The response has been limited to 50k tokens of the smallest files in the repo. You can remove this limitation by removing the max tokens filter.
├── .clang-format
├── .gitattributes
├── .github
    ├── CONTRIBUTING.md
    ├── actions
    │   ├── build_app
    │   │   └── action.yml
    │   ├── install_openvino
    │   │   └── action.yml
    │   ├── install_python_deps
    │   │   └── action.yml
    │   └── install_wheel
    │   │   ├── .node-version
    │   │   ├── .prettierignore
    │   │   ├── .prettierrc.json
    │   │   ├── action.yml
    │   │   ├── dist
    │   │       └── index.js
    │   │   ├── package-lock.json
    │   │   ├── package.json
    │   │   └── src
    │   │       └── install_packages.js
    ├── components.yml
    ├── dependabot.yml
    ├── dependency_review.yml
    ├── labeler.yml
    ├── pull_request_template.md
    ├── scripts
    │   └── generate_reference_llava.py
    └── workflows
    │   ├── assign_issue.yml
    │   ├── cleanup_caches.yml
    │   ├── coverity.yml
    │   ├── deploy_gh_pages.yml
    │   ├── labeler.yml
    │   ├── linux.yml
    │   ├── mac.yml
    │   ├── manylinux_2_28.yml
    │   ├── sdl.yml
    │   └── windows.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── Jenkinsfile
├── LICENSE
├── README.md
├── SECURITY.md
├── bandit.yml
├── cmake
    ├── features.cmake
    ├── templates
    │   ├── OpenVINOGenAIConfig.cmake.in
    │   ├── version.cpp.in
    │   ├── version.hpp.in
    │   └── vs_version.rc.in
    ├── version.cmake
    └── vs_version.cmake
├── pyproject.toml
├── requirements-build.txt
├── samples
    ├── CMakeLists.txt
    ├── c
    │   ├── text_generation
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   ├── benchmark_genai_c.c
    │   │   ├── chat_sample_c.c
    │   │   └── greedy_causal_lm_c.c
    │   ├── visual_language_chat
    │   │   ├── CMakeLists.txt
    │   │   ├── load_image.c
    │   │   ├── load_image.h
    │   │   └── vlm_pipeline.c
    │   └── whisper_speech_recognition
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   ├── whisper_speech_recognition.c
    │   │   ├── whisper_utils.c
    │   │   └── whisper_utils.h
    ├── cpp
    │   ├── README.md
    │   ├── image_generation
    │   │   ├── 512x512.bmp
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   ├── baseline.bmp
    │   │   ├── benchmark_image_gen.cpp
    │   │   ├── heterogeneous_stable_diffusion.cpp
    │   │   ├── image2image.cpp
    │   │   ├── image2image_concurrency.cpp
    │   │   ├── imageimage.bmp
    │   │   ├── imwrite.cpp
    │   │   ├── imwrite.hpp
    │   │   ├── inpainting.bmp
    │   │   ├── inpainting.cpp
    │   │   ├── load_image.cpp
    │   │   ├── load_image.hpp
    │   │   ├── lora.bmp
    │   │   ├── lora_text2image.cpp
    │   │   ├── progress_bar.hpp
    │   │   ├── stable_diffusion_export_import.cpp
    │   │   ├── text2image.cpp
    │   │   └── text2image_concurrency.cpp
    │   ├── rag
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   ├── text_embeddings.cpp
    │   │   └── text_rerank.cpp
    │   ├── speech_generation
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   ├── audio_utils.cpp
    │   │   ├── audio_utils.hpp
    │   │   └── text2speech.cpp
    │   ├── text_generation
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   ├── beam_search_causal_lm.cpp
    │   │   ├── benchmark_genai.cpp
    │   │   ├── chat_sample.cpp
    │   │   ├── encrypted_model_causal_lm.cpp
    │   │   ├── greedy_causal_lm.cpp
    │   │   ├── lora_greedy_causal_lm.cpp
    │   │   ├── multinomial_causal_lm.cpp
    │   │   ├── prompt_lookup_decoding_lm.cpp
    │   │   ├── read_prompt_from_file.cpp
    │   │   ├── read_prompt_from_file.h
    │   │   ├── speculative_decoding_lm.cpp
    │   │   └── structured_output_generation.cpp
    │   ├── visual_language_chat
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   ├── benchmark_vlm.cpp
    │   │   ├── encrypted_model_vlm.cpp
    │   │   ├── load_image.cpp
    │   │   ├── load_image.hpp
    │   │   └── visual_language_chat.cpp
    │   └── whisper_speech_recognition
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   ├── audio_utils.cpp
    │   │   ├── audio_utils.hpp
    │   │   └── whisper_speech_recognition.cpp
    ├── deployment-requirements.txt
    ├── export-requirements.txt
    ├── generation.gif
    ├── js
    │   ├── .gitignore
    │   ├── package-lock.json
    │   ├── package.json
    │   ├── rag
    │   │   ├── README.md
    │   │   └── text_embeddings.js
    │   └── text_generation
    │   │   ├── README.md
    │   │   ├── beam_search_causal_lm.js
    │   │   ├── benchmark_genai.js
    │   │   ├── chat_sample.js
    │   │   ├── compound_grammar_generation.js
    │   │   ├── greedy_causal_lm.js
    │   │   ├── helper.js
    │   │   ├── multinomial_causal_lm.js
    │   │   ├── react_sample.js
    │   │   ├── structural_tags_generation.js
    │   │   ├── structured_output_generation.js
    │   │   └── tests
    │   │       └── usage.test.js
    ├── python
    │   ├── image_generation
    │   │   ├── README.md
    │   │   ├── benchmark_image_gen.py
    │   │   ├── heterogeneous_stable_diffusion.py
    │   │   ├── image2image.py
    │   │   ├── inpainting.py
    │   │   ├── lora_text2image.py
    │   │   ├── stable_diffusion_export_import.py
    │   │   └── text2image.py
    │   ├── rag
    │   │   ├── README.md
    │   │   ├── text_embeddings.py
    │   │   └── text_rerank.py
    │   ├── speech_generation
    │   │   ├── README.md
    │   │   ├── create_speaker_embedding.py
    │   │   └── text2speech.py
    │   ├── text_generation
    │   │   ├── README.md
    │   │   ├── beam_search_causal_lm.py
    │   │   ├── benchmark_genai.py
    │   │   ├── chat_sample.py
    │   │   ├── compound_grammar_generation.py
    │   │   ├── encrypted_model_causal_lm.py
    │   │   ├── greedy_causal_lm.py
    │   │   ├── limit_checker.py
    │   │   ├── lora_greedy_causal_lm.py
    │   │   ├── multinomial_causal_lm.py
    │   │   ├── prompt_lookup_decoding_lm.py
    │   │   ├── react_sample.py
    │   │   ├── speculative_decoding_lm.py
    │   │   ├── structural_tags_generation.py
    │   │   └── structured_output_generation.py
    │   ├── visual_language_chat
    │   │   ├── README.md
    │   │   ├── benchmark_vlm.py
    │   │   ├── encrypted_model_vlm.py
    │   │   └── visual_language_chat.py
    │   └── whisper_speech_recognition
    │   │   ├── README.md
    │   │   ├── recorder.py
    │   │   └── whisper_speech_recognition.py
    └── requirements.txt
├── site
    ├── .editorconfig
    ├── .gitignore
    ├── .prettierignore
    ├── .prettierrc
    ├── README.md
    ├── docs
    │   ├── concepts
    │   │   ├── _category_.json
    │   │   ├── beam-search.md
    │   │   ├── how-it-works.md
    │   │   ├── lora.md
    │   │   └── optimization-techniques
    │   │   │   ├── _category_.json
    │   │   │   ├── continuous-batching.md
    │   │   │   ├── kvcache-eviction-algorithm.md
    │   │   │   ├── prefix-caching.md
    │   │   │   ├── sparse-attention-prefill.md
    │   │   │   └── speculative-decoding.md
    │   ├── getting-started
    │   │   ├── _category_.json
    │   │   ├── installation.mdx
    │   │   └── introduction.mdx
    │   ├── guides
    │   │   ├── _category_.json
    │   │   ├── chat-scenario.mdx
    │   │   ├── debug-logging.mdx
    │   │   ├── lora-adapters.mdx
    │   │   ├── model-preparation
    │   │   │   ├── _category_.json
    │   │   │   ├── _use_cases_note.mdx
    │   │   │   ├── convert-to-openvino.mdx
    │   │   │   └── download-openvino-models.mdx
    │   │   ├── performance-metrics.mdx
    │   │   ├── streaming.mdx
    │   │   ├── structured-output.mdx
    │   │   └── tokenization.mdx
    │   ├── samples
    │   │   ├── _category_.json
    │   │   ├── _components
    │   │   │   └── samples-list
    │   │   │   │   └── index.tsx
    │   │   └── index.mdx
    │   ├── supported-models
    │   │   ├── _category_.json
    │   │   ├── _components
    │   │   │   ├── base-models-table
    │   │   │   │   └── index.tsx
    │   │   │   ├── image-generation-models-table
    │   │   │   │   ├── index.tsx
    │   │   │   │   └── models.ts
    │   │   │   ├── llm-models-table
    │   │   │   │   ├── index.tsx
    │   │   │   │   └── models.ts
    │   │   │   ├── speech-generation-models-table
    │   │   │   │   ├── index.tsx
    │   │   │   │   └── models.ts
    │   │   │   ├── text-embeddings-models-table
    │   │   │   │   ├── index.tsx
    │   │   │   │   └── models.ts
    │   │   │   ├── text-rerank-models-table
    │   │   │   │   ├── index.tsx
    │   │   │   │   └── models.ts
    │   │   │   ├── vlm-models-table
    │   │   │   │   ├── index.tsx
    │   │   │   │   └── models.ts
    │   │   │   └── whisper-models-table
    │   │   │   │   ├── index.tsx
    │   │   │   │   └── models.ts
    │   │   └── index.mdx
    │   └── use-cases
    │   │   ├── _category_.json
    │   │   ├── _shared
    │   │       ├── _basic_generation_configuration.mdx
    │   │       ├── _beam_search_generation.mdx
    │   │       ├── _chat_scenario.mdx
    │   │       ├── _convert_model.mdx
    │   │       ├── _generation_configuration_workflow.mdx
    │   │       └── _streaming.mdx
    │   │   ├── image-generation
    │   │       ├── _sections
    │   │       │   ├── _run_model
    │   │       │   │   ├── _image2image_cpp.mdx
    │   │       │   │   ├── _image2image_python.mdx
    │   │       │   │   ├── _inpainting_cpp.mdx
    │   │       │   │   ├── _inpainting_python.mdx
    │   │       │   │   ├── _text2image_cpp.mdx
    │   │       │   │   ├── _text2image_python.mdx
    │   │       │   │   └── index.mdx
    │   │       │   └── _usage_options
    │   │       │   │   └── index.mdx
    │   │       └── index.mdx
    │   │   ├── image-processing
    │   │       ├── _sections
    │   │       │   ├── _run_model
    │   │       │   │   ├── _code_example_cpp.mdx
    │   │       │   │   ├── _code_example_python.mdx
    │   │       │   │   └── index.mdx
    │   │       │   └── _usage_options
    │   │       │   │   └── index.mdx
    │   │       └── index.mdx
    │   │   ├── speech-recognition
    │   │       ├── _sections
    │   │       │   ├── _run_model
    │   │       │   │   ├── _code_example_cpp.mdx
    │   │       │   │   ├── _code_example_python.mdx
    │   │       │   │   └── index.mdx
    │   │       │   └── _usage_options
    │   │       │   │   └── index.mdx
    │   │       └── index.mdx
    │   │   ├── text-embedding
    │   │       ├── _sections
    │   │       │   ├── _run_model
    │   │       │   │   ├── _code_example_cpp.mdx
    │   │       │   │   ├── _code_example_python.mdx
    │   │       │   │   └── index.mdx
    │   │       │   └── _usage_options
    │   │       │   │   └── index.mdx
    │   │       └── index.mdx
    │   │   ├── text-generation
    │   │       ├── _sections
    │   │       │   ├── _run_model
    │   │       │   │   ├── _code_example_cpp.mdx
    │   │       │   │   ├── _code_example_python.mdx
    │   │       │   │   └── index.mdx
    │   │       │   └── _usage_options
    │   │       │   │   ├── _generation_parameters.mdx
    │   │       │   │   ├── _lora_adapters.mdx
    │   │       │   │   ├── _speculative_decoding.mdx
    │   │       │   │   └── index.mdx
    │   │       └── index.mdx
    │   │   └── text-rerank
    │   │       ├── _sections
    │   │           └── _run_model
    │   │           │   ├── _code_example_cpp.mdx
    │   │           │   ├── _code_example_python.mdx
    │   │           │   └── index.mdx
    │   │       └── index.mdx
    ├── docusaurus.config.ts
    ├── eslint.config.mjs
    ├── package-lock.json
    ├── package.json
    ├── sidebars.ts
    ├── src
    │   ├── components
    │   │   ├── Button
    │   │   │   ├── index.tsx
    │   │   │   └── styles.module.css
    │   │   ├── Carousel
    │   │   │   ├── index.tsx
    │   │   │   └── styles.module.css
    │   │   ├── LanguageTabs
    │   │   │   └── index.tsx
    │   │   └── OptimumCLI
    │   │   │   └── index.tsx
    │   ├── css
    │   │   ├── breadcrumbs.css
    │   │   ├── custom.css
    │   │   ├── footer.css
    │   │   ├── menu.css
    │   │   ├── navbar.css
    │   │   ├── toc.css
    │   │   └── typography.css
    │   ├── hooks
    │   │   └── use-screen-size.ts
    │   ├── pages
    │   │   ├── _sections
    │   │   │   ├── FeaturesSection
    │   │   │   │   ├── FeatureItem
    │   │   │   │   │   ├── index.tsx
    │   │   │   │   │   └── styles.module.css
    │   │   │   │   ├── index.tsx
    │   │   │   │   └── styles.module.css
    │   │   │   ├── HeroSection
    │   │   │   │   ├── PipelinesCarousel
    │   │   │   │   │   ├── index.tsx
    │   │   │   │   │   └── styles.module.css
    │   │   │   │   ├── index.tsx
    │   │   │   │   └── styles.module.css
    │   │   │   ├── InstallSection
    │   │   │   │   ├── index.tsx
    │   │   │   │   └── styles.module.css
    │   │   │   ├── UseCasesSection
    │   │   │   │   ├── components
    │   │   │   │   │   ├── UseCaseCard
    │   │   │   │   │   │   ├── index.tsx
    │   │   │   │   │   │   └── styles.module.css
    │   │   │   │   │   ├── image-generation.tsx
    │   │   │   │   │   ├── image-processing.tsx
    │   │   │   │   │   ├── speech-recognition.tsx
    │   │   │   │   │   ├── text-embedding.tsx
    │   │   │   │   │   ├── text-generation.tsx
    │   │   │   │   │   └── text-rerank.tsx
    │   │   │   │   ├── index.tsx
    │   │   │   │   └── styles.module.css
    │   │   │   └── section-styles.module.css
    │   │   ├── index.module.css
    │   │   └── index.tsx
    │   ├── plugins
    │   │   └── genai-samples-docs-plugin.ts
    │   ├── theme
    │   │   └── MDXComponents.tsx
    │   └── types
    │   │   └── images.d.ts
    ├── static
    │   ├── .nojekyll
    │   └── img
    │   │   ├── background.webp
    │   │   ├── beam_idx-drop.gif
    │   │   ├── beam_idx-fork.gif
    │   │   ├── chevron-right.svg
    │   │   ├── chevron-up.svg
    │   │   ├── favicon.png
    │   │   ├── image.svg
    │   │   ├── intel-logo.svg
    │   │   ├── kv-cache-areas-diagram.svg
    │   │   ├── linux-logo.svg
    │   │   ├── lora.png
    │   │   ├── mac-os-logo.svg
    │   │   ├── magnifying-glass.svg
    │   │   ├── openvino-genai-workflow.svg
    │   │   ├── openvino.svg
    │   │   ├── sound-on.svg
    │   │   ├── stateful.jpg
    │   │   ├── stateless.jpg
    │   │   ├── structured_output_work_example.png
    │   │   ├── text.svg
    │   │   ├── trishape.svg
    │   │   └── windows-logo.svg
    └── tsconfig.json
├── src
    ├── CMakeLists.txt
    ├── README.md
    ├── bindings_utils.hpp
    ├── c
    │   ├── CMakeLists.txt
    │   ├── include
    │   │   └── openvino
    │   │   │   └── genai
    │   │   │       └── c
    │   │   │           ├── generation_config.h
    │   │   │           ├── llm_pipeline.h
    │   │   │           ├── perf_metrics.h
    │   │   │           ├── visibility.h
    │   │   │           ├── vlm_pipeline.h
    │   │   │           ├── whisper_generation_config.h
    │   │   │           └── whisper_pipeline.h
    │   └── src
    │   │   ├── generation_config.cpp
    │   │   ├── llm_pipeline.cpp
    │   │   ├── perf_metrics.cpp
    │   │   ├── types_c.h
    │   │   ├── vlm_pipeline.cpp
    │   │   ├── whisper_generation_config.cpp
    │   │   └── whisper_pipeline.cpp
    ├── cpp
    │   ├── CMakeLists.txt
    │   ├── include
    │   │   └── openvino
    │   │   │   └── genai
    │   │   │       ├── cache_eviction.hpp
    │   │   │       ├── chat_history.hpp
    │   │   │       ├── common_types.hpp
    │   │   │       ├── continuous_batching_pipeline.hpp
    │   │   │       ├── generation_config.hpp
    │   │   │       ├── generation_handle.hpp
    │   │   │       ├── image_generation
    │   │   │           ├── autoencoder_kl.hpp
    │   │   │           ├── clip_text_model.hpp
    │   │   │           ├── clip_text_model_with_projection.hpp
    │   │   │           ├── flux_transformer_2d_model.hpp
    │   │   │           ├── generation_config.hpp
    │   │   │           ├── image2image_pipeline.hpp
    │   │   │           ├── image_generation_perf_metrics.hpp
    │   │   │           ├── inpainting_pipeline.hpp
    │   │   │           ├── scheduler.hpp
    │   │   │           ├── sd3_transformer_2d_model.hpp
    │   │   │           ├── t5_encoder_model.hpp
    │   │   │           ├── text2image_pipeline.hpp
    │   │   │           └── unet2d_condition_model.hpp
    │   │   │       ├── json_container.hpp
    │   │   │       ├── llm_pipeline.hpp
    │   │   │       ├── lora_adapter.hpp
    │   │   │       ├── parsers.hpp
    │   │   │       ├── perf_metrics.hpp
    │   │   │       ├── rag
    │   │   │           ├── text_embedding_pipeline.hpp
    │   │   │           └── text_rerank_pipeline.hpp
    │   │   │       ├── scheduler_config.hpp
    │   │   │       ├── sparse_attention.hpp
    │   │   │       ├── speculative_decoding
    │   │   │           └── perf_metrics.hpp
    │   │   │       ├── speech_generation
    │   │   │           ├── speech_generation_config.hpp
    │   │   │           ├── speech_generation_perf_metrics.hpp
    │   │   │           └── text2speech_pipeline.hpp
    │   │   │       ├── streamer_base.hpp
    │   │   │       ├── text_streamer.hpp
    │   │   │       ├── tokenizer.hpp
    │   │   │       ├── visibility.hpp
    │   │   │       ├── visual_language
    │   │   │           ├── perf_metrics.hpp
    │   │   │           └── pipeline.hpp
    │   │   │       ├── whisper_generation_config.hpp
    │   │   │       └── whisper_pipeline.hpp
    │   └── src
    │   │   ├── chat_history.cpp
    │   │   ├── circular_buffer_queue.hpp
    │   │   ├── continuous_batching
    │   │       ├── attention_output.hpp
    │   │       ├── block_manager.hpp
    │   │       ├── cache_eviction.cpp
    │   │       ├── cache_eviction.hpp
    │   │       ├── cache_manager.hpp
    │   │       ├── cache_state_dumper.hpp
    │   │       ├── kvcrush.cpp
    │   │       ├── kvcrush.hpp
    │   │       ├── model_runner.hpp
    │   │       ├── paged_attention_transformations.cpp
    │   │       ├── paged_attention_transformations.hpp
    │   │       ├── pipeline.cpp
    │   │       ├── pipeline_base.cpp
    │   │       ├── pipeline_base.hpp
    │   │       ├── pipeline_impl.cpp
    │   │       ├── pipeline_impl.hpp
    │   │       ├── scheduler.hpp
    │   │       ├── sparse_attention.cpp
    │   │       ├── sparse_attention.hpp
    │   │       ├── threaded_streamer.hpp
    │   │       └── timer.hpp
    │   │   ├── debug_utils.hpp
    │   │   ├── generation_config.cpp
    │   │   ├── generation_handle.cpp
    │   │   ├── generation_stream.hpp
    │   │   ├── gguf_utils
    │   │       ├── building_blocks.cpp
    │   │       ├── building_blocks.hpp
    │   │       ├── gguf.cpp
    │   │       ├── gguf.hpp
    │   │       ├── gguf_modeling.cpp
    │   │       ├── gguf_modeling.hpp
    │   │       ├── gguf_quants.cpp
    │   │       ├── gguf_tokenizer.cpp
    │   │       └── gguf_tokenizer.hpp
    │   │   ├── image_generation
    │   │       ├── diffusion_pipeline.hpp
    │   │       ├── flux_fill_pipeline.hpp
    │   │       ├── flux_pipeline.hpp
    │   │       ├── generation_config.cpp
    │   │       ├── image2image_pipeline.cpp
    │   │       ├── image_generation_perf_metrics.cpp
    │   │       ├── image_processor.cpp
    │   │       ├── image_processor.hpp
    │   │       ├── inpainting_pipeline.cpp
    │   │       ├── models
    │   │       │   ├── autoencoder_kl.cpp
    │   │       │   ├── clip_text_model.cpp
    │   │       │   ├── flux_transformer_2d_model.cpp
    │   │       │   ├── sd3_transformer_2d_model.cpp
    │   │       │   ├── sd3transformer_2d_inference.hpp
    │   │       │   ├── sd3transformer_2d_inference_dynamic.hpp
    │   │       │   ├── sd3transformer_2d_inference_static_bs1.hpp
    │   │       │   ├── t5_encoder_model.cpp
    │   │       │   ├── unet2d_condition_model.cpp
    │   │       │   ├── unet_inference.hpp
    │   │       │   ├── unet_inference_dynamic.hpp
    │   │       │   └── unet_inference_static_bs1.hpp
    │   │       ├── numpy_utils.cpp
    │   │       ├── numpy_utils.hpp
    │   │       ├── schedulers
    │   │       │   ├── ddim.cpp
    │   │       │   ├── ddim.hpp
    │   │       │   ├── euler_ancestral_discrete.cpp
    │   │       │   ├── euler_ancestral_discrete.hpp
    │   │       │   ├── euler_discrete.cpp
    │   │       │   ├── euler_discrete.hpp
    │   │       │   ├── flow_match_euler_discrete.cpp
    │   │       │   ├── flow_match_euler_discrete.hpp
    │   │       │   ├── ischeduler.hpp
    │   │       │   ├── lcm.cpp
    │   │       │   ├── lcm.hpp
    │   │       │   ├── pndm.cpp
    │   │       │   ├── pndm.hpp
    │   │       │   ├── scheduler.cpp
    │   │       │   ├── types.cpp
    │   │       │   └── types.hpp
    │   │       ├── stable_diffusion_3_pipeline.hpp
    │   │       ├── stable_diffusion_pipeline.hpp
    │   │       ├── stable_diffusion_xl_pipeline.hpp
    │   │       ├── text2image_pipeline.cpp
    │   │       └── threaded_callback.hpp
    │   │   ├── json_container.cpp
    │   │   ├── json_utils.hpp
    │   │   ├── llm
    │   │       ├── pipeline.cpp
    │   │       ├── pipeline_base.hpp
    │   │       ├── pipeline_continuous_batching_adapter.hpp
    │   │       ├── pipeline_stateful.cpp
    │   │       ├── pipeline_stateful.hpp
    │   │       ├── pipeline_static.cpp
    │   │       └── pipeline_static.hpp
    │   │   ├── lm_encoding.cpp
    │   │   ├── lm_encoding.hpp
    │   │   ├── logger.hpp
    │   │   ├── lora
    │   │       ├── adapter.cpp
    │   │       ├── common.hpp
    │   │       ├── helper.cpp
    │   │       ├── helper.hpp
    │   │       ├── names_mapping.cpp
    │   │       ├── names_mapping.hpp
    │   │       └── safetensors.c
    │   │   ├── parsers.cpp
    │   │   ├── perf_metrics.cpp
    │   │   ├── prompt_lookup
    │   │       ├── continuous_batching_for_prompt_lookup.cpp
    │   │       ├── continuous_batching_for_prompt_lookup.hpp
    │   │       ├── prompt_lookup_impl.cpp
    │   │       └── prompt_lookup_impl.hpp
    │   │   ├── rag
    │   │       ├── text_embedding_pipeline.cpp
    │   │       └── text_rerank_pipeline.cpp
    │   │   ├── sampling
    │   │       ├── logit_processor.hpp
    │   │       ├── logit_transformers.hpp
    │   │       ├── sampler.cpp
    │   │       ├── sampler.hpp
    │   │       ├── structured_output
    │   │       │   ├── structured_output_controller.cpp
    │   │       │   ├── structured_output_controller.hpp
    │   │       │   ├── xgrammar_backend.cpp
    │   │       │   └── xgrammar_backend.hpp
    │   │       └── threadpool.hpp
    │   │   ├── sequence_group.cpp
    │   │   ├── sequence_group.hpp
    │   │   ├── speculative_decoding
    │   │       ├── continuous_batching_for_speculative_decoding_impl.cpp
    │   │       ├── continuous_batching_for_speculative_decoding_impl.hpp
    │   │       ├── speculative_decoding_impl.cpp
    │   │       ├── speculative_decoding_impl.hpp
    │   │       ├── speculative_decoding_metrics.cpp
    │   │       ├── speculative_decoding_metrics.hpp
    │   │       ├── speculative_decoding_perf_metrics.cpp
    │   │       ├── speculative_decoding_stateful.cpp
    │   │       ├── speculative_decoding_stateful.hpp
    │   │       └── update_request_structs.hpp
    │   │   ├── speech_generation
    │   │       ├── default_speaker_embedding.hpp
    │   │       ├── speech_generation_config.cpp
    │   │       ├── speech_generation_perf_metrics.cpp
    │   │       ├── speecht5_tts_decoder.cpp
    │   │       ├── speecht5_tts_decoder.hpp
    │   │       ├── speecht5_tts_model.cpp
    │   │       ├── speecht5_tts_model.hpp
    │   │       ├── text2speech_pipeline.cpp
    │   │       ├── text2speech_pipeline_impl.cpp
    │   │       └── text2speech_pipeline_impl.hpp
    │   │   ├── synchronized_queue.hpp
    │   │   ├── text_streamer.cpp
    │   │   ├── tokenizer
    │   │       ├── add_second_input_pass.cpp
    │   │       ├── add_second_input_pass.hpp
    │   │       ├── chat_template_fallback_map.hpp
    │   │       ├── make_tokenizer_stateful.cpp
    │   │       ├── make_tokenizer_stateful.hpp
    │   │       ├── tokenizer.cpp
    │   │       ├── tokenizer_impl.cpp
    │   │       ├── tokenizer_impl.hpp
    │   │       ├── tokenizers_path.cpp
    │   │       └── tokenizers_path.hpp
    │   │   ├── utils.cpp
    │   │   ├── utils.hpp
    │   │   ├── visual_language
    │   │       ├── clip.cpp
    │   │       ├── clip.hpp
    │   │       ├── continuous_batching_adapter.hpp
    │   │       ├── embedding_model.cpp
    │   │       ├── embedding_model.hpp
    │   │       ├── gemma3
    │   │       │   ├── classes.cpp
    │   │       │   └── classes.hpp
    │   │       ├── inputs_embedder.cpp
    │   │       ├── inputs_embedder.hpp
    │   │       ├── internvl_chat
    │   │       │   ├── classes.cpp
    │   │       │   └── classes.hpp
    │   │       ├── llava
    │   │       │   ├── classes.cpp
    │   │       │   └── classes.hpp
    │   │       ├── llava_next
    │   │       │   ├── classes.cpp
    │   │       │   └── classes.hpp
    │   │       ├── llava_next_video
    │   │       │   ├── classes.cpp
    │   │       │   └── classes.hpp
    │   │       ├── minicpm
    │   │       │   ├── classes.cpp
    │   │       │   └── classes.hpp
    │   │       ├── nanollava
    │   │       │   ├── classes.cpp
    │   │       │   └── classes.hpp
    │   │       ├── perf_metrics.cpp
    │   │       ├── phi3_vision
    │   │       │   ├── classes.cpp
    │   │       │   └── classes.hpp
    │   │       ├── phi4mm
    │   │       │   ├── classes.cpp
    │   │       │   └── classes.hpp
    │   │       ├── pipeline.cpp
    │   │       ├── pipeline_base.hpp
    │   │       ├── processor_config.cpp
    │   │       ├── processor_config.hpp
    │   │       ├── qwen2_5_vl
    │   │       │   ├── classes.cpp
    │   │       │   └── classes.hpp
    │   │       ├── qwen2vl
    │   │       │   ├── classes.cpp
    │   │       │   └── classes.hpp
    │   │       ├── vision_encoder.cpp
    │   │       ├── vision_encoder.hpp
    │   │       ├── vl_sdpa_transformations.cpp
    │   │       ├── vl_sdpa_transformations.hpp
    │   │       ├── vlm_config.cpp
    │   │       └── vlm_config.hpp
    │   │   └── whisper
    │   │       ├── config.cpp
    │   │       ├── config.hpp
    │   │       ├── context_tokens.cpp
    │   │       ├── context_tokens.hpp
    │   │       ├── feature_extractor.cpp
    │   │       ├── feature_extractor.hpp
    │   │       ├── generation_config.cpp
    │   │       ├── logit_processor.cpp
    │   │       ├── logit_processor.hpp
    │   │       ├── models.hpp
    │   │       ├── models
    │   │           ├── decoder.cpp
    │   │           ├── decoder.hpp
    │   │           ├── statefull_decoder.cpp
    │   │           ├── statefull_decoder.hpp
    │   │           ├── with_past_decoder.cpp
    │   │           └── with_past_decoder.hpp
    │   │       ├── perf_metrics.cpp
    │   │       ├── pipeline.cpp
    │   │       ├── pipeline_base.hpp
    │   │       ├── pipeline_static.cpp
    │   │       ├── pipeline_static.hpp
    │   │       ├── timestamps.cpp
    │   │       ├── timestamps.hpp
    │   │       ├── whisper.cpp
    │   │       ├── whisper.hpp
    │   │       ├── whisper_utils.cpp
    │   │       └── whisper_utils.hpp
    ├── docs
    │   ├── BUILD.md
    │   ├── DEBUG_LOG.md
    │   ├── DOCKER.md
    │   ├── HOW_IT_WORKS.md
    │   ├── beam_idx-drop.gif
    │   ├── beam_idx-fork.gif
    │   ├── openvino_genai.svg
    │   ├── stateful.jpg
    │   └── stateless.jpg
    ├── js
    │   ├── .gitignore
    │   ├── .npmignore
    │   ├── .prettierrc
    │   ├── BUILD.md
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── eslint.config.cjs
    │   ├── include
    │   │   ├── addon.hpp
    │   │   ├── chat_history.hpp
    │   │   ├── helper.hpp
    │   │   ├── llm_pipeline
    │   │   │   ├── finish_chat_worker.hpp
    │   │   │   ├── init_worker.hpp
    │   │   │   ├── llm_pipeline_wrapper.hpp
    │   │   │   └── start_chat_worker.hpp
    │   │   ├── perf_metrics.hpp
    │   │   ├── text_embedding_pipeline
    │   │   │   ├── embed_documents_worker.hpp
    │   │   │   ├── embed_query_worker.hpp
    │   │   │   ├── init_worker.hpp
    │   │   │   └── pipeline_wrapper.hpp
    │   │   └── tokenizer.hpp
    │   ├── lib
    │   │   ├── addon.ts
    │   │   ├── chatHistory.ts
    │   │   ├── index.ts
    │   │   ├── pipelines
    │   │   │   ├── llmPipeline.ts
    │   │   │   └── textEmbeddingPipeline.ts
    │   │   └── utils.ts
    │   ├── package-lock.json
    │   ├── package.json
    │   ├── scripts
    │   │   └── download-runtime.cjs
    │   ├── src
    │   │   ├── addon.cpp
    │   │   ├── chat_history.cpp
    │   │   ├── helper.cpp
    │   │   ├── llm_pipeline
    │   │   │   ├── finish_chat_worker.cpp
    │   │   │   ├── init_worker.cpp
    │   │   │   ├── llm_pipeline_wrapper.cpp
    │   │   │   └── start_chat_worker.cpp
    │   │   ├── perf_metrics.cpp
    │   │   ├── text_embedding_pipeline
    │   │   │   ├── embed_documents_worker.cpp
    │   │   │   ├── embed_query_worker.cpp
    │   │   │   ├── init_worker.cpp
    │   │   │   └── pipeline_wrapper.cpp
    │   │   └── tokenizer.cpp
    │   ├── tests
    │   │   ├── bindings.test.js
    │   │   ├── chatHistory.test.js
    │   │   ├── models.js
    │   │   ├── module.test.js
    │   │   ├── setup.js
    │   │   ├── structuredOutput.test.js
    │   │   ├── textEmbeddingsPipeline.test.js
    │   │   ├── tokenizer.test.js
    │   │   └── utils.js
    │   ├── thirdparty
    │   │   ├── node-lib.def
    │   │   └── win_delay_load_hook.cc
    │   └── tsconfig.json
    └── python
    │   ├── CMakeLists.txt
    │   ├── clean_version.cmake
    │   ├── compare_pyi.cmake
    │   ├── openvino_genai
    │       ├── __init__.py
    │       ├── __init__.pyi
    │       └── py_openvino_genai.pyi
    │   ├── py_chat_history.cpp
    │   ├── py_continuous_batching_pipeline.cpp
    │   ├── py_generation_config.cpp
    │   ├── py_image_generation_models.cpp
    │   ├── py_image_generation_pipelines.cpp
    │   ├── py_llm_pipeline.cpp
    │   ├── py_lora_adapter.cpp
    │   ├── py_openvino_genai.cpp
    │   ├── py_parsers.cpp
    │   ├── py_perf_metrics.cpp
    │   ├── py_rag.cpp
    │   ├── py_speech_generation.cpp
    │   ├── py_streamers.cpp
    │   ├── py_tokenizer.cpp
    │   ├── py_utils.cpp
    │   ├── py_utils.hpp
    │   ├── py_vlm_pipeline.cpp
    │   ├── py_whisper_pipeline.cpp
    │   └── remove_abi_specific_info.cmake
├── tests
    ├── cpp
    │   ├── CMakeLists.txt
    │   ├── block_allocator.cpp
    │   ├── block_hash_store.cpp
    │   ├── block_manager.cpp
    │   ├── cache_eviction.cpp
    │   ├── cache_manager.cpp
    │   ├── data
    │   │   ├── cache_rotation_poc_ref_coefficients_per_block_0.txt
    │   │   ├── cache_rotation_poc_ref_coefficients_per_block_1.txt
    │   │   ├── cache_rotation_poc_ref_coefficients_per_block_2.txt
    │   │   └── cache_rotation_poc_ref_coefficients_per_block_3.txt
    │   ├── helper.cpp
    │   ├── helper.hpp
    │   ├── kvcrush.cpp
    │   ├── logit_filtering.cpp
    │   ├── parser.cpp
    │   ├── sampler.cpp
    │   ├── scheduler.cpp
    │   ├── sparse_attention.cpp
    │   ├── speculative_decoding.cpp
    │   ├── test_add_second_input_pass.cpp
    │   ├── test_json_container.cpp
    │   └── utils.cpp
    └── python_tests
    │   ├── README.md
    │   ├── conftest.py
    │   ├── data
    │       ├── __init__.py
    │       ├── long_prompts.txt
    │       ├── models.py
    │       ├── short_prompts.txt
    │       ├── test_dataset.py
    │       └── tokenizer_configs.py
    │   ├── models
    │       ├── nightly
    │       ├── precommit
    │       └── real_models
    │   ├── pytest.ini
    │   ├── requirements.txt
    │   ├── samples
    │       ├── conftest.py
    │       ├── test_beam_search_causal_lm.py
    │       ├── test_benchmark_genai.py
    │       ├── test_benchmark_image_gen.py
    │       ├── test_benchmark_vlm.py
    │       ├── test_chat_sample.py
    │       ├── test_compound_grammar_sample.py
    │       ├── test_continuous_batching_tools.py
    │       ├── test_encrypted_model_causal_lm.py
    │       ├── test_encrypted_model_vlm.py
    │       ├── test_greedy_causal_lm.py
    │       ├── test_heterogeneous_stable_diffusion.py
    │       ├── test_image2image.py
    │       ├── test_inpainting.py
    │       ├── test_lora.py
    │       ├── test_lora_text2image.py
    │       ├── test_multinomial_causal_lm.py
    │       ├── test_prompt_lookup_decoding_lm.py
    │       ├── test_rag_sample.py
    │       ├── test_react_sample.py
    │       ├── test_scheduler_config.py
    │       ├── test_speculative_decoding_lm.py
    │       ├── test_structural_tag_generation.py
    │       ├── test_structured_output_sample.py
    │       ├── test_text2image.py
    │       ├── test_text2speech.py
    │       ├── test_tools_llm_benchmark.py
    │       ├── test_utils.py
    │       ├── test_visual_language_chat.py
    │       └── test_whisper_speech_recognition.py
    │   ├── test_continuous_batching.py
    │   ├── test_generation_config.py
    │   ├── test_gguf_reader.py
    │   ├── test_kv_cache_eviction
    │       ├── kv_cache_eviction_utils.py
    │       ├── test_kv_cache_eviction_1.py
    │       └── test_kv_cache_eviction_2.py
    │   ├── test_llm_pipeline.py
    │   ├── test_llm_pipeline_static.py
    │   ├── test_parsers.py
    │   ├── test_rag.py
    │   ├── test_sampling.py
    │   ├── test_stateful_speculative_decoding.py
    │   ├── test_structured_output.py
    │   ├── test_text_streamer.py
    │   ├── test_tokenizer.py
    │   ├── test_vlm_pipeline.py
    │   ├── test_whisper_pipeline.py
    │   ├── test_whisper_pipeline_static.py
    │   └── utils
    │       ├── __init__.py
    │       ├── comparation.py
    │       ├── constants.py
    │       ├── generation_config.py
    │       ├── hugging_face.py
    │       ├── longbench.py
    │       ├── network.py
    │       ├── ov_genai_pipelines.py
    │       ├── qwen3_reranker_utils.py
    │       └── tokenizers.py
├── third-party-programs.txt
├── thirdparty
    └── CMakeLists.txt
└── tools
    ├── __init__.py
    ├── cacheviz
        ├── __init__.py
        ├── cacheviz.py
        └── requirements.txt
    ├── continuous_batching
        ├── CMakeLists.txt
        ├── accuracy
        │   ├── CMakeLists.txt
        │   ├── continuous_batching_accuracy.cpp
        │   └── continuous_batching_speculative_decoding.cpp
        └── benchmark
        │   ├── CMakeLists.txt
        │   └── continuous_batching_benchmark.cpp
    ├── llm_bench
        ├── README.md
        ├── benchmark.py
        ├── doc
        │   ├── NOTES.md
        │   └── PROMPT.md
        ├── llm_bench_utils
        │   ├── config_class.py
        │   ├── gen_output_data.py
        │   ├── get_use_case.py
        │   ├── hook_beam_search.py
        │   ├── hook_common.py
        │   ├── hook_forward.py
        │   ├── hook_forward_whisper.py
        │   ├── hook_greedy_search.py
        │   ├── llm_hook_beam_search
        │   │   ├── __init__.py
        │   │   ├── hook_beam_search_v40.py
        │   │   ├── hook_beam_search_v51.py
        │   │   ├── hook_beam_search_v52.py
        │   │   └── hook_beam_search_v55.py
        │   ├── llm_hook_sample
        │   │   ├── __init__.py
        │   │   ├── hook_sample.py
        │   │   ├── hook_sample_v43.py
        │   │   ├── hook_sample_v45.py
        │   │   ├── hook_sample_v51.py
        │   │   ├── hook_sample_v52.py
        │   │   └── hook_sample_v55.py
        │   ├── memory_monitor.py
        │   ├── metrics_print.py
        │   ├── model_utils.py
        │   ├── output_csv.py
        │   ├── output_file.py
        │   ├── output_json.py
        │   ├── ov_model_classes.py
        │   ├── ov_utils.py
        │   ├── parse_json_data.py
        │   ├── prompt_utils.py
        │   └── pt_utils.py
        ├── prompts
        │   ├── llama-2-7b-chat_l.jsonl
        │   ├── llava-1.5-7b.jsonl
        │   ├── scheduler_config.json
        │   ├── stable-diffusion-i2i.jsonl
        │   ├── stable-diffusion-inpainting.jsonl
        │   ├── stable-diffusion.jsonl
        │   └── texts_for_rerank.jsonl
        ├── requirements.txt
        ├── requirements
        │   ├── requirements_conversion.txt
        │   └── requirements_gptq.txt
        ├── setup.cfg
        └── task
        │   ├── image_generation.py
        │   ├── pipeline_utils.py
        │   ├── speech_to_text_generation.py
        │   ├── super_resolution_generation.py
        │   ├── text_embeddings.py
        │   ├── text_generation.py
        │   ├── text_reranker.py
        │   ├── text_to_speech_generation.py
        │   └── visual_language_generation.py
    └── who_what_benchmark
        ├── README.md
        ├── examples
            ├── gptq_eval.py
            ├── huggingface_eval.py
            ├── openvino_batched_eval.py
            └── openvino_eval.py
        ├── requirements.txt
        ├── setup.cfg
        ├── setup.py
        ├── tests
            ├── test_cli_embeddings.py
            ├── test_cli_image.py
            ├── test_cli_reranking.py
            ├── test_cli_text.py
            └── test_cli_vlm.py
        └── whowhatbench
            ├── __init__.py
            ├── embeddings_evaluator.py
            ├── im2im_evaluator.py
            ├── inpaint_evaluator.py
            ├── model_loaders.py
            ├── prompts
                ├── text_long_prompts.yaml
                └── text_prompts.yaml
            ├── registry.py
            ├── reranking_evaluator.py
            ├── text2image_evaluator.py
            ├── text_evaluator.py
            ├── utils.py
            ├── visualtext_evaluator.py
            ├── whowhat_metrics.py
            └── wwb.py


/.clang-format:
--------------------------------------------------------------------------------
 1 | BasedOnStyle: Google
 2 | IndentWidth: 4
 3 | UseTab: Never
 4 | ColumnLimit: 120
 5 | 
 6 | Language: Cpp
 7 | Standard: Cpp11
 8 | 
 9 | AccessModifierOffset: -4
10 | AlignConsecutiveMacros: true
11 | AllowAllArgumentsOnNextLine: false
12 | AllowAllConstructorInitializersOnNextLine: false
13 | AllowAllParametersOfDeclarationOnNextLine: false
14 | AllowShortFunctionsOnASingleLine: Empty
15 | AllowShortIfStatementsOnASingleLine: Never
16 | AllowShortLambdasOnASingleLine: Empty
17 | AllowShortLoopsOnASingleLine: false
18 | AlwaysBreakBeforeMultilineStrings: false
19 | BinPackArguments: false
20 | BinPackParameters: false
21 | CommentPragmas: '^#'
22 | DerivePointerAlignment: false
23 | FixNamespaceComments: true
24 | IndentCaseLabels: false
25 | IndentPPDirectives: AfterHash
26 | ForEachMacros:
27 |   - foreach
28 |   - FOREACH_CHILD
29 | 


--------------------------------------------------------------------------------
/.github/actions/build_app/action.yml:
--------------------------------------------------------------------------------
 1 | name: 'Build App'
 2 | inputs:
 3 |   ov_dir:
 4 |     description: 'Directory where OpenVINO is installed'
 5 |     default: './ov'
 6 |     required: false
 7 |   build_dir:
 8 |     description: 'Directory where the app is built'
 9 |     default: './build'
10 |     required: false
11 |   build_target:
12 |     description: 'Target to build'
13 |     default: ''
14 |     required: false
15 | runs:
16 |   using: "composite"
17 |   steps:
18 |     - name: Build app
19 |       shell: bash
20 |       run: |
21 |         source ${{ inputs.ov_dir }}/setupvars.sh
22 |         cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ inputs.build_dir }}
23 |         cmake --build ${{ inputs.build_dir }} --config Release ${{ inputs.build_target && format('--target {0}', inputs.build_target) || '' }} -j
24 | 


--------------------------------------------------------------------------------
/.github/actions/install_openvino/action.yml:
--------------------------------------------------------------------------------
 1 | name: 'Install OpenVINO'
 2 | inputs:
 3 |   ov_link:
 4 |     description: 'URL to download OpenVINO'
 5 |     required: true
 6 |   ov_dir:
 7 |     description: 'Directory to install OpenVINO'
 8 |     default: './ov'
 9 |     required: false
10 | runs:
11 |   using: "composite"
12 |   steps:
13 |     - name: 'Install OpenVINO'
14 |       shell: bash
15 |       run: |
16 |         mkdir ${{ inputs.ov_dir }}
17 |         curl ${{ inputs.ov_link }} | tar --directory ${{ inputs.ov_dir }} --strip-components 1 -xz
18 |         sudo ${{ inputs.ov_dir }}/install_dependencies/install_openvino_dependencies.sh
19 | 


--------------------------------------------------------------------------------
/.github/actions/install_python_deps/action.yml:
--------------------------------------------------------------------------------
 1 | name: 'Install Python Dependencies'
 2 | inputs:
 3 |   ov_dir:
 4 |     description: 'Directory where OpenVINO is installed'
 5 |     default: './ov'
 6 |     required: false
 7 | runs:
 8 |   using: "composite"
 9 |   steps:
10 |     - name: Install Python dependencies
11 |       shell: bash
12 |       run: |
13 |         source ${{ inputs.ov_dir }}/setupvars.sh
14 |         python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
15 |         python -m pip install -r ./samples/requirements.txt
16 | 


--------------------------------------------------------------------------------
/.github/actions/install_wheel/.node-version:
--------------------------------------------------------------------------------
1 | 20.6.0
2 | 


--------------------------------------------------------------------------------
/.github/actions/install_wheel/.prettierignore:
--------------------------------------------------------------------------------
1 | dist/
2 | node_modules/
3 | coverage/
4 | 


--------------------------------------------------------------------------------
/.github/actions/install_wheel/.prettierrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "printWidth": 80,
 3 |   "tabWidth": 2,
 4 |   "useTabs": false,
 5 |   "semi": true,
 6 |   "singleQuote": true,
 7 |   "quoteProps": "as-needed",
 8 |   "jsxSingleQuote": false,
 9 |   "trailingComma": "none",
10 |   "bracketSpacing": true,
11 |   "bracketSameLine": true,
12 |   "arrowParens": "avoid",
13 |   "proseWrap": "always",
14 |   "htmlWhitespaceSensitivity": "css",
15 |   "endOfLine": "lf"
16 | }
17 | 


--------------------------------------------------------------------------------
/.github/actions/install_wheel/action.yml:
--------------------------------------------------------------------------------
 1 | name: 'Install Python Packages with Local Wheels and Extras'
 2 | description:
 3 |   'Installs specified Python packages with support for local wheels and optional
 4 |   extras.'
 5 | author: 'OpenVINO Developers'
 6 | inputs:
 7 |   packages:
 8 |     description:
 9 |       "Semicolon-separated list of packages to install, e.g.,
10 |       'openvino;openvino_tokenizers[extra1,extra2]'"
11 |     required: true
12 |   requirements_files:
13 |     description:
14 |       "Semicolon-separated list of requirements.txt to install, e.g.,
15 |       'requirements.txt;requirements-dev.txt'"
16 |     required: false
17 |   local_wheel_dir:
18 |     description: 'Path to the directory containing local wheel files'
19 |     required: true
20 | runs:
21 |   using: 'node20'
22 |   main: 'dist/index.js'
23 | 


--------------------------------------------------------------------------------
/.github/actions/install_wheel/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "install-wheel-action",
 3 |   "description": "Action to install local python wheels together with their dependencies",
 4 |   "version": "0.0.1",
 5 |   "author": "OpenVINO Developers",
 6 |   "private": true,
 7 |   "keywords": [
 8 |     "GitHub",
 9 |     "Actions",
10 |     "JavaScript"
11 |   ],
12 |   "engines": {
13 |     "node": ">=20"
14 |   },
15 |   "main": "dist/index.js",
16 |   "scripts": {
17 |     "bundle": "npm run format:write && npm run package",
18 |     "format:write": "npx prettier --write .",
19 |     "format:check": "npx prettier --check .",
20 |     "package": "npx ncc build src/install_packages.js -o dist",
21 |     "package:watch": "npm run package -- --watch",
22 |     "all": "npm run format:write && npm run package"
23 |   },
24 |   "dependencies": {
25 |     "@actions/core": "^1.11.1",
26 |     "glob": "^11.0.1"
27 |   },
28 |   "devDependencies": {
29 |     "@vercel/ncc": "^0.38.3",
30 |     "prettier": "^3.5.0"
31 |   },
32 |   "license": "Apache-2.0"
33 | }
34 | 


--------------------------------------------------------------------------------
/.github/dependency_review.yml:
--------------------------------------------------------------------------------
 1 | fail-on-severity: "low"
 2 | allow-licenses:
 3 |   - "BSD-2-Clause"
 4 |   - "BSD-3-Clause"
 5 |   - "MIT"
 6 |   - "Apache-2.0"
 7 |   - "ISC"
 8 |   - "BlueOak-1.0.0"
 9 |   - "0BSD"
10 |   - "Python-2.0"
11 |   - "MIT-CMU" # Pillow's license
12 |   - "CC-BY-NC-4.0"
13 |   - "GPL-1.0-or-later"
14 |   - "LGPL-2.0-or-later"
15 |   - "LicenseRef-scancode-proprietary-license"
16 | fail-on-scopes:
17 |   - "runtime"
18 |   - "development"
19 |   - "unknown"
20 | license-check: true
21 | vulnerability-check: true
22 | allow-dependencies-licenses:
23 |   - "pkg:npm/thingies" # Docs site (dependency of dependency)
24 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | <!-- Keep your pull requests (PRs) as atomic as possible. That increases the likelihood that an individual PR won't be stuck because of adjacent problems, merge conflicts, or code review.
 2 | Your merged PR is going to appear in the automatically generated release notes on GitHub. So the clearer the title the better. -->
 3 | ## Description
 4 | <!-- Please include a summary of the change. Also include relevant motivation and context. -->
 5 | 
 6 | <!-- Jira ticket number (e.g., 123). Delete if there's no ticket. -->
 7 | CVS-###
 8 | 
 9 | <!-- Remove if not applicable -->
10 | Fixes #(issue)
11 | 
12 | ## Checklist:
13 | - [ ] Tests have been updated or added to cover the new code. <!-- If the change isn't maintenance related, update the tests at https://github.com/openvinotoolkit/openvino.genai/tree/master/tests or explain in the description why the tests don't need an update. -->
14 | - [ ] This patch fully addresses the ticket. <!--- If follow-up pull requests are needed, specify in description. -->
15 | - [ ] I have made corresponding changes to the documentation. <!-- Run github.com/\<username>/openvino.genai/actions/workflows/deploy_gh_pages.yml on your fork with your branch as a parameter to deploy a test version with the updated content. Replace this comment with the link to the built docs. -->
16 | 


--------------------------------------------------------------------------------
/.github/workflows/assign_issue.yml:
--------------------------------------------------------------------------------
 1 | name: Take Issue
 2 | 
 3 | on:
 4 |   issue_comment:
 5 |     types:
 6 |       - created
 7 |       - edited
 8 | 
 9 | permissions: read-all  # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions
10 | 
11 | jobs:
12 |   take-issue:
13 |     name: Take issue
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |       issues: write
17 |     timeout-minutes: 10
18 |     steps:
19 |       - name: take an issue
20 |         uses: bdougie/take-action@v1.6.1
21 |         with:
22 |           message: Thank you for looking into this issue! Please let us know if you have any questions or require any help.
23 |           issueCurrentlyAssignedMessage: Thanks for being interested in this issue. It looks like this ticket is already assigned to a contributor. Please communicate with the assigned contributor to confirm the status of the issue.
24 |           trigger: .take
25 |           token: ${{ secrets.GITHUB_TOKEN }}
26 | 


--------------------------------------------------------------------------------
/.github/workflows/cleanup_caches.yml:
--------------------------------------------------------------------------------
 1 | name: Cleanup caches
 2 | on:
 3 |   workflow_dispatch:
 4 |   schedule:
 5 |     # at 00:00 on workdays
 6 |     - cron: '0 0 * * 1,2,3,4,5'
 7 | 
 8 | permissions: read-all
 9 | 
10 | jobs:
11 |   Cleanup_OV_CACHE:
12 |     name: Cleanup OV_CACHE
13 |     runs-on: aks-linux-4-cores-16gb
14 |     if: ${{ github.repository_owner == 'openvinotoolkit' }}
15 |     container:
16 |       image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04
17 |       volumes:
18 |         - /mount:/mount
19 |     env:
20 |       OV_CACHE: /mount/caches/huggingface/.ov_cache
21 | 
22 |     steps:
23 |       - name: Pre-Collecting Cache Info
24 |         run: |
25 |           echo "Cache info: "
26 |           du -h -d2 ${{ env.OV_CACHE }}
27 |       - name: Cleanup cache
28 |         run: |
29 |           echo "Delete cache files if they have not been used in over 3 days"
30 |           [ ! -z "${{ env.OV_CACHE }}" ] && find ${{ env.OV_CACHE }} ! -type d -atime +3 -delete
31 | 
32 |       - name: Post-Collecting Cache Info
33 |         run: |
34 |           echo "Cache info: "
35 |           du -h -d2 ${{ env.OV_CACHE }}


--------------------------------------------------------------------------------
/.github/workflows/labeler.yml:
--------------------------------------------------------------------------------
 1 | name: "Pull Request Labeler"
 2 | on:
 3 | - pull_request_target
 4 | 
 5 | permissions: read-all  # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions
 6 | 
 7 | jobs:
 8 |   triage:
 9 |     permissions:
10 |       contents: read
11 |       pull-requests: write
12 |       issues: write
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |     - uses: akladiev/labeler@eeac5941e7fb6f980d47e038ac0665168851c874 # v4.3.1
16 |       with:
17 |         repo-token: "${{ secrets.GITHUB_TOKEN }}"
18 |         configuration-path: '.github/labeler.yml'
19 |         sync-labels: 'true'
20 |         dot: 'true'
21 |         non-matching-label: 'no-match-files'
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # build/artifact dirs
 2 | [Bb]uild*/
 3 | 
 4 | # but ensure we don't skip __init__.py and __main__.py
 5 | !__init__.py
 6 | !__main__.py
 7 | 
 8 | # don't skip GitHub Actions files and directories
 9 | !.github/**
10 | 
11 | # developer tools
12 | *.idea
13 | .vscode
14 | .vs/
15 | .vsconan/
16 | .DS_Store
17 | **/tags
18 | compile_commands.json
19 | .local_vimrc
20 | .gdb_history
21 | .vimspector.json
22 | doc/
23 | temp/
24 | .repo/
25 | CMakeLists.txt.user
26 | CMakeUserPresets.json
27 | .env
28 | 
29 | *.project
30 | *.cproject
31 | *.pydevproject
32 | *.settings
33 | */gen/
34 | *.swp
35 | /config.xml
36 | 
37 | # Python-specific
38 | *.?env*
39 | *.pyc
40 | __pycache__
41 | .py-build-cmake_cache
42 | *.egg-info
43 | 
44 | # CodeQL artifacts
45 | _codeql_detected_source_root
46 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "thirdparty/openvino_tokenizers"]
2 |     path = thirdparty/openvino_tokenizers
3 |     url = https://github.com/openvinotoolkit/openvino_tokenizers.git
4 | 


--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
 1 | #!groovy
 2 | 
 3 | properties([
 4 |     parameters([
 5 |         booleanParam(defaultValue: false,
 6 |                      description: 'Cancel the rest of parallel stages if one of them fails and return status immediately',
 7 |                      name: 'failFast'),
 8 |         booleanParam(defaultValue: true,
 9 |                      description: 'Whether to propagate commit status to GitHub',
10 |                      name: 'propagateStatus'),
11 |         booleanParam(defaultValue: false,
12 |                description: 'If true, forces running pre-commit scope',
13 |                name: 'forceRunPrecommitScope'),
14 |         string(defaultValue: '',
15 |                description: 'Pipeline shared library version (branch/tag/commit). Determined automatically if empty',
16 |                name: 'library_version')
17 |     ])
18 | ])
19 | 
20 | loadOpenVinoLibrary {
21 |     entrypoint(this)
22 | }
23 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Report a Vulnerability
 4 | 
 5 | Please report security issues or vulnerabilities to the [Intel® Security Center].
 6 | 
 7 | For more information on how Intel® works to resolve security issues, see
 8 | [Vulnerability Handling Guidelines].
 9 | 
10 | [Intel® Security Center]:https://www.intel.com/security
11 | 
12 | [Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html
13 | 


--------------------------------------------------------------------------------
/cmake/features.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018-2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | 
 5 | option(ENABLE_PYTHON "Enable Python API build" ON)
 6 | option(ENABLE_GIL_PYTHON_API "Build Python API with Global Interpreter Lock" ON)
 7 | option(ENABLE_JS "Enable JS API build" OFF)
 8 | option(ENABLE_SAMPLES "Enable samples build" ON)
 9 | option(ENABLE_TESTS "Enable tests build" ON)
10 | option(ENABLE_TOOLS "Enable tools build" ON)
11 | option(ENABLE_GGUF "Enable support for GGUF format" ON)
12 | option(ENABLE_XGRAMMAR "Enable support for structured output generation with xgrammar backend" ON)
13 | 
14 | # Disable building samples for NPM package
15 | if(CPACK_GENERATOR STREQUAL "NPM")
16 |     set(ENABLE_SAMPLES OFF)
17 |     set(ENABLE_PYTHON OFF)
18 |     set(ENABLE_JS ON)
19 | else()
20 |     set(ENABLE_JS OFF)
21 | endif()
22 | 


--------------------------------------------------------------------------------
/cmake/templates/OpenVINOGenAIConfig.cmake.in:
--------------------------------------------------------------------------------
 1 | @PACKAGE_INIT@
 2 | 
 3 | include(CMakeFindDependencyMacro)
 4 | find_dependency(OpenVINO COMPONENTS Runtime)
 5 | 
 6 | if(NOT TARGET openvino_genai)
 7 |     include("${CMAKE_CURRENT_LIST_DIR}/OpenVINOGenAITargets.cmake")
 8 | endif()
 9 | 
10 | check_required_components(OpenVINOGenAI)
11 | 


--------------------------------------------------------------------------------
/cmake/templates/version.cpp.in:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "openvino/genai/version.hpp"
 5 | 
 6 | namespace ov {
 7 | namespace genai {
 8 | 
 9 | const Version get_version() {
10 |     const static Version version = {
11 |         "@OpenVINOGenAI_FULL_VERSION@",
12 |         "OpenVINO GenAI version",
13 |     };
14 | 
15 |     return version;
16 | }
17 | 
18 | } // namespace genai
19 | } // namespace ov
20 | 


--------------------------------------------------------------------------------
/cmake/templates/version.hpp.in:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "openvino/core/version.hpp"
 7 | #include "openvino/genai/visibility.hpp"
 8 | 
 9 | /**
10 |  * OpenVINO GenAI major version
11 |  */
12 | #define OPENVINO_GENAI_VERSION_MAJOR @OpenVINOGenAI_VERSION_MAJOR@
13 | 
14 | /**
15 |  * OpenVINO GenAI minor version
16 |  */
17 | #define OPENVINO_GENAI_VERSION_MINOR @OpenVINOGenAI_VERSION_MINOR@
18 | 
19 | /**
20 |  * OpenVINO GenAI patch version
21 |  */
22 | #define OPENVINO_GENAI_VERSION_PATCH @OpenVINOGenAI_VERSION_PATCH@
23 | 
24 | namespace ov {
25 | namespace genai {
26 | 
27 | /**
28 |  * Returns OpenVINO GenAI full version including git commit and hash information in form of:
29 |  *   <MAJOR>.<MINOR>.<PATCH>.<REVISION>-<COMMIT NUMBER>-<COMMIT HASH>[-<BRANCH SUFFIX>]
30 |  */
31 | OPENVINO_EXTERN_C OPENVINO_GENAI_EXPORTS const ov::Version OPENVINO_CDECL get_version();
32 | 
33 | } // namespace genai
34 | } // namespace ov
35 | 


--------------------------------------------------------------------------------
/cmake/templates/vs_version.rc.in:
--------------------------------------------------------------------------------
 1 | #include <winver.h>
 2 | 
 3 | VS_VERSION_INFO         VERSIONINFO
 4 |   FILEVERSION           @PROJECT_VERSION_MAJOR@,@PROJECT_VERSION_MINOR@,@PROJECT_VERSION_PATCH@,@PROJECT_VERSION_TWEAK@
 5 |   PRODUCTVERSION        @PROJECT_VERSION_MAJOR@,@PROJECT_VERSION_MINOR@,@PROJECT_VERSION_PATCH@,@PROJECT_VERSION_TWEAK@
 6 |   FILEFLAGSMASK         VS_FFI_FILEFLAGSMASK
 7 | #ifdef _DEBUG
 8 |   FILEFLAGS             1
 9 | #else
10 |   FILEFLAGS             0
11 | #endif
12 |   FILEOS                VOS__WINDOWS32
13 |   FILETYPE              VFT_DLL
14 |   FILESUBTYPE           0
15 | BEGIN
16 |   BLOCK "StringFileInfo"
17 |   BEGIN
18 |     BLOCK "040904E4"
19 |     BEGIN
20 |       VALUE "CompanyName", "@PROJECT_COMPANY_NAME@\0"
21 |       VALUE "FileDescription", "@PROJECT_DESCRIPTION@\0"
22 |       VALUE "FileVersion", "@PROJECT_VERSION@\0"
23 |       VALUE "LegalCopyright", "@PROJECT_COPYRIGHT@\0"
24 |       VALUE "ProductName", "@PROJECT_PRODUCT_NAME@\0"
25 |       VALUE "ProductVersion", "@OpenVINOGenAI_FULL_VERSION@\0"
26 |       VALUE "Comments", "@PROJECT_COMMENTS@\0"
27 |     END
28 |   END
29 |   BLOCK "VarFileInfo"
30 |   BEGIN
31 |     VALUE "Translation", 0x0409, 1252
32 |   END
33 | END


--------------------------------------------------------------------------------
/cmake/vs_version.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | 
 5 | set(PROJECT_COMPANY_NAME "Intel Corporation")
 6 | set(PROJECT_PRODUCT_NAME "OpenVINO GenAI")
 7 | set(PROJECT_COPYRIGHT "Copyright (C) 2018-2025, Intel Corporation")
 8 | set(PROJECT_COMMENTS "https://docs.openvino.ai/")
 9 | 
10 | # This function generates a version resource (.rc) file from a template and adds it to the given target.
11 | function(add_vs_version_resource TARGET_NAME)
12 |     set(VS_VERSION_TEMPLATE "${PROJECT_SOURCE_DIR}/cmake/templates/vs_version.rc.in")
13 |     set(VS_VERSION_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/vs_version.rc")
14 | 
15 |     configure_file("${VS_VERSION_TEMPLATE}" "${VS_VERSION_OUTPUT}" @ONLY)
16 | 
17 |     target_sources(${TARGET_NAME} PRIVATE "${VS_VERSION_OUTPUT}")
18 | endfunction()
19 | 


--------------------------------------------------------------------------------
/requirements-build.txt:
--------------------------------------------------------------------------------
1 | cmake~=3.23.0; platform_system != 'Darwin' or platform_machine == 'x86_64'
2 | cmake~=3.24.0; platform_system == 'Darwin' and platform_machine == 'arm64'
3 | pybind11-stubgen==2.5.5


--------------------------------------------------------------------------------
/samples/c/text_generation/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023-2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | find_package(OpenVINOGenAI REQUIRED
 5 |     PATHS
 6 |         "${CMAKE_BINARY_DIR}"  # Reuse the package from the build.
 7 |         ${OpenVINO_DIR}  # GenAI may be installed alogside OpenVINO.
 8 |     NO_CMAKE_FIND_ROOT_PATH
 9 | )
10 | 
11 | function(add_sample_executable target_name)
12 |     add_executable(${target_name} ${target_name}.c)
13 |     # Specifies that the source file should be compiled as a C source file
14 |     set_source_files_properties(${target_name}.c PROPERTIES LANGUAGE C)
15 |     target_link_libraries(${target_name} PRIVATE openvino::genai::c)
16 |     set_target_properties(${target_name} PROPERTIES
17 |         # Ensure out-of-box LC_RPATH on macOS with SIP
18 |         INSTALL_RPATH_USE_LINK_PATH ON)
19 |     install(TARGETS ${target_name}
20 |             RUNTIME DESTINATION samples_bin/
21 |             COMPONENT samples_bin
22 |             EXCLUDE_FROM_ALL)
23 | endfunction()
24 | 
25 | set (SAMPLE_LIST
26 |     greedy_causal_lm_c
27 |     chat_sample_c
28 |     benchmark_genai_c)
29 | 
30 | foreach(sample IN LISTS SAMPLE_LIST)
31 |     add_sample_executable(${sample})
32 | endforeach()
33 | 
34 | 


--------------------------------------------------------------------------------
/samples/c/visual_language_chat/load_image.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOAD_IMAGE_H
 2 | #define LOAD_IMAGE_H
 3 | 
 4 | #include <stddef.h>
 5 | #include <stdint.h>
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | typedef struct ov_tensor ov_tensor_t;
12 | 
13 | ov_tensor_t* load_image(const char* image_path);
14 | 
15 | const ov_tensor_t** load_images(const char* image_path, size_t* tensor_count);
16 | 
17 | void free_tensor(ov_tensor_t* tensor);
18 | 
19 | void free_tensor_array(ov_tensor_t** tensors, size_t count);
20 | 
21 | int file_exists(const char* path);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | 
27 | #endif // LOAD_IMAGE_H
28 | 


--------------------------------------------------------------------------------
/samples/c/whisper_speech_recognition/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | find_package(OpenVINOGenAI REQUIRED
 5 |     PATHS
 6 |         "${CMAKE_BINARY_DIR}"  # Reuse the package from the build.
 7 |         ${OpenVINO_DIR}  # GenAI may be installed alogside OpenVINO.
 8 |     NO_CMAKE_FIND_ROOT_PATH
 9 | )
10 | 
11 | # Whisper Speech Recognition Sample
12 | add_executable(whisper_speech_recognition_c whisper_speech_recognition.c whisper_utils.c)
13 | 
14 | # Specifies that the source file should be compiled as a C source file
15 | set_source_files_properties(whisper_speech_recognition.c whisper_utils.c PROPERTIES LANGUAGE C)
16 | target_link_libraries(whisper_speech_recognition_c PRIVATE openvino::genai::c)
17 | 
18 | set_target_properties(whisper_speech_recognition_c PROPERTIES
19 |     # Ensure out-of-box LC_RPATH on macOS with SIP
20 |     INSTALL_RPATH_USE_LINK_PATH ON)
21 | 
22 | # Install
23 | install(TARGETS whisper_speech_recognition_c
24 |         RUNTIME DESTINATION samples_bin/
25 |         COMPONENT samples_bin
26 |         EXCLUDE_FROM_ALL)
27 | 


--------------------------------------------------------------------------------
/samples/cpp/README.md:
--------------------------------------------------------------------------------
1 | Please refer to the following blogs for the setup instructions. 
2 | 
3 | https://medium.com/openvino-toolkit/how-to-build-openvino-genai-app-in-c-32dcbe42fa67
4 | 


--------------------------------------------------------------------------------
/samples/cpp/image_generation/512x512.bmp:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:73fc1a2b80048752350d108852f3598395666b9208d5e0ab34c0613cea9cfd04
3 | size 786486
4 | 


--------------------------------------------------------------------------------
/samples/cpp/image_generation/baseline.bmp:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:bb8491607e8c2cce4394ac0b796350745dde04dba7d754c3fad24d86e1c4d2e1
3 | size 1376310
4 | 


--------------------------------------------------------------------------------
/samples/cpp/image_generation/imageimage.bmp:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:2ecb4783a8f3a0962659ebf80eeaf0c0e48c44995c1e60001f215e0697ab9397
3 | size 2162742
4 | 


--------------------------------------------------------------------------------
/samples/cpp/image_generation/imwrite.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <string>
 7 | 
 8 | #include "openvino/runtime/tensor.hpp"
 9 | 
10 | /**
11 |  * @brief Writes multiple images (depending on `image` tensor batch size) to BPM file(s)
12 |  * @param name File name or pattern to use to write images
13 |  * @param image Image(s) tensor
14 |  * @param convert_bgr2rgb Convert BGR to RGB
15 |  */
16 | void imwrite(const std::string& name, ov::Tensor images, bool convert_bgr2rgb);
17 | 


--------------------------------------------------------------------------------
/samples/cpp/image_generation/inpainting.bmp:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:527cee8f7d451c7e5004bc58c079d4c853443644eaeb2d84a343016cd25214c1
3 | size 786486
4 | 


--------------------------------------------------------------------------------
/samples/cpp/image_generation/load_image.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | // Copyright (C) 2023-2025 Intel Corporation
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <openvino/runtime/tensor.hpp>
 8 | #include <filesystem>
 9 | 
10 | namespace utils {
11 | ov::Tensor load_image(const std::filesystem::path& image_path);
12 | }
13 | 


--------------------------------------------------------------------------------
/samples/cpp/image_generation/lora.bmp:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:72760b8ae70a02cf318cfb9a08d520bd4800abb22b5eafe57eafb3cfbed7303d
3 | size 1376310
4 | 


--------------------------------------------------------------------------------
/samples/cpp/image_generation/progress_bar.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include <optional>
 5 | 
 6 | #include "indicators/progress_bar.hpp"
 7 | 
 8 | bool progress_bar(size_t step, size_t num_steps, ov::Tensor& /* latent */) {
 9 |     using namespace indicators;
10 | 
11 |     static std::optional<ProgressBar> bar;
12 | 
13 |     if (!bar) {
14 |         bar.emplace(
15 |             option::BarWidth{50},
16 |             option::ForegroundColor{Color::green},
17 |             option::FontStyles{std::vector<FontStyle>{FontStyle::bold}},
18 |             option::ShowElapsedTime{true},
19 |             option::ShowRemainingTime{true}
20 |         );
21 |     }
22 | 
23 |     std::stringstream stream;
24 |     stream << "Image generation step " << (step + 1) << " / " << num_steps;
25 | 
26 |     bar->set_option(option::PostfixText{stream.str()});
27 |     bar->set_progress((100 * (step + 1)) / num_steps);
28 | 
29 |     if (step + 1 == num_steps) {
30 |         bar.reset();  // Required when multiple progress bars are used, without recreation of the object the second progress bar won't be displayed correctly
31 |     }
32 | 
33 |     return false;
34 | }
35 | 


--------------------------------------------------------------------------------
/samples/cpp/image_generation/text2image.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "openvino/genai/image_generation/text2image_pipeline.hpp"
 5 | 
 6 | #include "imwrite.hpp"
 7 | #include "progress_bar.hpp"
 8 | 
 9 | int32_t main(int32_t argc, char* argv[]) try {
10 |     OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " <MODEL_DIR> '<PROMPT>'");
11 | 
12 |     const std::string models_path = argv[1], prompt = argv[2];
13 |     const std::string device = "CPU";  // GPU can be used as well
14 | 
15 |     ov::genai::Text2ImagePipeline pipe(models_path, device);
16 |     ov::Tensor image = pipe.generate(prompt,
17 |         ov::genai::width(512),
18 |         ov::genai::height(512),
19 |         ov::genai::num_inference_steps(20),
20 |         ov::genai::num_images_per_prompt(1),
21 |         ov::genai::callback(progress_bar));
22 | 
23 |     // writes `num_images_per_prompt` images by pattern name
24 |     imwrite("image_%d.bmp", image, true);
25 | 
26 |     return EXIT_SUCCESS;
27 | } catch (const std::exception& error) {
28 |     try {
29 |         std::cerr << error.what() << '\n';
30 |     } catch (const std::ios_base::failure&) {}
31 |     return EXIT_FAILURE;
32 | } catch (...) {
33 |     try {
34 |         std::cerr << "Non-exception object thrown\n";
35 |     } catch (const std::ios_base::failure&) {}
36 |     return EXIT_FAILURE;
37 | }
38 | 


--------------------------------------------------------------------------------
/samples/cpp/rag/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | find_package(OpenVINOGenAI REQUIRED
 5 |     PATHS
 6 |         "${CMAKE_BINARY_DIR}"  # Reuse the package from the build.
 7 |         ${OpenVINO_DIR}  # GenAI may be installed alogside OpenVINO.
 8 |     NO_CMAKE_FIND_ROOT_PATH
 9 | )
10 | 
11 | function(add_sample_executable target_name)
12 |     add_executable(${target_name} ${target_name}.cpp)
13 |     target_link_libraries(${target_name} PRIVATE openvino::genai)
14 |     set_target_properties(${target_name} PROPERTIES
15 |         COMPILE_PDB_NAME ${target_name}
16 |         # Ensure out-of-box LC_RPATH on macOS with SIP
17 |         INSTALL_RPATH_USE_LINK_PATH ON)
18 |     install(TARGETS ${target_name}
19 |             RUNTIME DESTINATION samples_bin/
20 |             COMPONENT samples_bin
21 |             EXCLUDE_FROM_ALL)
22 | endfunction()
23 | 
24 | set(SAMPLE_LIST text_embeddings text_rerank)
25 | 
26 | foreach(sample ${SAMPLE_LIST})
27 |     add_sample_executable(${sample})
28 | endforeach()
29 | 
30 | 
31 | # benchmark_genai
32 | include(FetchContent)
33 | 
34 | if(POLICY CMP0135)
35 |     cmake_policy(SET CMP0135 NEW)
36 | endif()
37 | 


--------------------------------------------------------------------------------
/samples/cpp/rag/text_embeddings.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "openvino/genai/rag/text_embedding_pipeline.hpp"
 5 | 
 6 | int main(int argc, char* argv[]) try {
 7 |     if (argc < 3) {
 8 |         throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> '<TEXT 1>' ['<TEXT 2>' ...]");
 9 |     }
10 |     auto documents = std::vector<std::string>(argv + 2, argv + argc);
11 |     std::string models_path = argv[1];
12 | 
13 |     std::string device = "CPU";  // GPU can be used as well
14 | 
15 |     ov::genai::TextEmbeddingPipeline::Config config;
16 |     config.pooling_type = ov::genai::TextEmbeddingPipeline::PoolingType::MEAN;
17 | 
18 |     ov::genai::TextEmbeddingPipeline pipeline(models_path, device, config);
19 | 
20 |     ov::genai::EmbeddingResults documents_embeddings = pipeline.embed_documents(documents);
21 |     ov::genai::EmbeddingResult query_embedding = pipeline.embed_query("What is the capital of France?");
22 | } catch (const std::exception& error) {
23 |     try {
24 |         std::cerr << error.what() << '\n';
25 |     } catch (const std::ios_base::failure&) {
26 |     }
27 |     return EXIT_FAILURE;
28 | } catch (...) {
29 |     try {
30 |         std::cerr << "Non-exception object thrown\n";
31 |     } catch (const std::ios_base::failure&) {
32 |     }
33 |     return EXIT_FAILURE;
34 | }
35 | 


--------------------------------------------------------------------------------
/samples/cpp/speech_generation/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023-2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | find_package(OpenVINOGenAI REQUIRED
 5 |     PATHS
 6 |         "${CMAKE_BINARY_DIR}"  # Reuse the package from the build.
 7 |         ${OpenVINO_DIR}  # GenAI may be installed alogside OpenVINO.
 8 |     NO_CMAKE_FIND_ROOT_PATH
 9 | )
10 | 
11 | include(FetchContent)
12 | 
13 | if(NOT TARGET dr_libs)
14 |     FetchContent_Declare(dr_libs
15 |         URL https://github.com/mackron/dr_libs/archive/da35f9d6c7374a95353fd1df1d394d44ab66cf01.tar.gz
16 |         URL_HASH SHA256=2704d347f480ca1bc92233fb01747e4550cc8031735b6ea62ca9990ebb8851ae)
17 |     FetchContent_MakeAvailable(dr_libs)
18 | endif()
19 | 
20 | if(POLICY CMP0135)
21 |     cmake_policy(SET CMP0135 NEW)
22 | endif()
23 | 
24 | # create main sample executable
25 | 
26 | add_executable(text2speech text2speech.cpp audio_utils.cpp)
27 | 
28 | target_include_directories(text2speech PRIVATE "
lt;BUILD_INTERFACE:${dr_libs_SOURCE_DIR}>")
29 | target_link_libraries(text2speech PRIVATE openvino::genai)
30 | 
31 | set_target_properties(text2speech PROPERTIES
32 |     # Ensure out of box LC_RPATH on macOS with SIP
33 |     INSTALL_RPATH_USE_LINK_PATH ON)
34 | target_compile_features(text2speech PRIVATE cxx_std_11)
35 | 
36 | install(TARGETS text2speech
37 |         RUNTIME DESTINATION samples_bin/
38 |         COMPONENT samples_bin
39 |         EXCLUDE_FROM_ALL)
40 | 


--------------------------------------------------------------------------------
/samples/cpp/speech_generation/audio_utils.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <filesystem>
 7 | #include <string>
 8 | #include <vector>
 9 | 
10 | #include "openvino/runtime/tensor.hpp"
11 | 
12 | namespace utils {
13 | namespace audio {
14 | /**
15 |  * This function saves an audio waveform, provided as an array of floating-point samples, to a WAV file.
16 |  *
17 |  * @param waveform_ptr Pointer to the array of float samples representing the audio waveform
18 |  * @param waveform_size The number of samples in the waveform array
19 |  * @param file_path The name (and path) of the WAV file to be created
20 |  * @param bits_per_sample The bit depth used to store each sample in the WAV file
21 |  */
22 | void save_to_wav(const float* waveform_ptr,
23 |                  size_t waveform_size,
24 |                  const std::filesystem::path& file_path,
25 |                  uint32_t bits_per_sample);
26 | 
27 | /**
28 |  * This function reads a binary file containing speaker embedding or 32-bit floating-point values and returns
29 |  * ov::Tensor
30 |  *
31 |  * @param file_path The path to the binary file to be read
32 |  * @returns a std::vector<float> containing all float values read from the binary file
33 |  */
34 | ov::Tensor read_speaker_embedding(const std::filesystem::path& file_path);
35 | }  // namespace audio
36 | }  // namespace utils
37 | 


--------------------------------------------------------------------------------
/samples/cpp/text_generation/beam_search_causal_lm.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include <openvino/genai/llm_pipeline.hpp>
 5 | 
 6 | int main(int argc, char* argv[]) try {
 7 |     if (argc < 3) {
 8 |         throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> '<PROMPT 1>' ['<PROMPT 2>' ...]");
 9 |     }
10 |     auto prompts = std::vector<std::string>(argv + 2, argv + argc);
11 |     std::string models_path = argv[1];
12 | 
13 |     std::string device = "CPU";  // GPU can be used as well
14 |     ov::genai::LLMPipeline pipe(models_path, device);
15 | 
16 |     ov::genai::GenerationConfig config;
17 |     config.max_new_tokens = 20;
18 |     config.num_beam_groups = 3;
19 |     config.num_beams = 15;
20 |     config.diversity_penalty = 1.0f;
21 |     config.num_return_sequences = config.num_beams;
22 | 
23 |     auto beams = pipe.generate(prompts, config);
24 |     std::cout << beams << '\n';
25 | } catch (const std::exception& error) {
26 |     try {
27 |         std::cerr << error.what() << '\n';
28 |     } catch (const std::ios_base::failure&) {}
29 |     return EXIT_FAILURE;
30 | } catch (...) {
31 |     try {
32 |         std::cerr << "Non-exception object thrown\n";
33 |     } catch (const std::ios_base::failure&) {}
34 |     return EXIT_FAILURE;
35 | }
36 | 


--------------------------------------------------------------------------------
/samples/cpp/text_generation/greedy_causal_lm.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "openvino/genai/llm_pipeline.hpp"
 5 | 
 6 | int main(int argc, char* argv[]) try {
 7 |     if (3 > argc)
 8 |         throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> \"<PROMPT>\"");
 9 | 
10 |     std::string models_path = argv[1];
11 |     std::string prompt = argv[2];
12 |     std::string device = "CPU";  // GPU can be used as well
13 | 
14 |     ov::genai::LLMPipeline pipe(models_path, device);
15 |     ov::genai::GenerationConfig config;
16 |     config.max_new_tokens = 100;
17 |     std::string result = pipe.generate(prompt, config);
18 |     std::cout << result << std::endl;
19 | } catch (const std::exception& error) {
20 |     try {
21 |         std::cerr << error.what() << '\n';
22 |     } catch (const std::ios_base::failure&) {}
23 |     return EXIT_FAILURE;
24 | } catch (...) {
25 |     try {
26 |         std::cerr << "Non-exception object thrown\n";
27 |     } catch (const std::ios_base::failure&) {}
28 |     return EXIT_FAILURE;
29 | }
30 | 


--------------------------------------------------------------------------------
/samples/cpp/text_generation/read_prompt_from_file.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include <iostream>
 5 | #include <fstream>
 6 | #include "read_prompt_from_file.h"
 7 | 
 8 | std::string utils::read_prompt(const std::string& file_path) {
 9 |     std::ifstream file(file_path);
10 |     if (file.is_open()) {
11 |         std::stringstream buffer;
12 |         buffer << file.rdbuf();
13 |         return buffer.str();
14 |     } else {
15 |         std::stringstream error_message;
16 |         error_message << "Error opening prompt file: '" << file_path << "'";
17 |         throw std::runtime_error{error_message.str()};
18 |     }
19 | }


--------------------------------------------------------------------------------
/samples/cpp/text_generation/read_prompt_from_file.h:
--------------------------------------------------------------------------------
 1 | 
 2 | // Copyright (C) 2023-2025 Intel Corporation
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <sstream>
 8 | 
 9 | namespace utils {
10 | std::string read_prompt(const std::string& file_path);
11 | }


--------------------------------------------------------------------------------
/samples/cpp/visual_language_chat/load_image.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | // Copyright (C) 2023-2025 Intel Corporation
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <openvino/runtime/tensor.hpp>
 8 | #include <filesystem>
 9 | 
10 | namespace utils {
11 | ov::Tensor load_image(const std::filesystem::path& image_path);
12 | std::vector<ov::Tensor> load_images(const std::filesystem::path& image_path);
13 | }
14 | 


--------------------------------------------------------------------------------
/samples/cpp/whisper_speech_recognition/audio_utils.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "openvino/genai/whisper_pipeline.hpp"
 7 | 
 8 | namespace utils {
 9 | namespace audio {
10 | ov::genai::RawSpeechInput read_wav(const std::string& filename);
11 | }  // namespace audio
12 | }  // namespace utils
13 | 


--------------------------------------------------------------------------------
/samples/deployment-requirements.txt:
--------------------------------------------------------------------------------
1 | --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
2 | openvino_genai~=2026.0.0.0.dev
3 | librosa==0.11.0  # For Whisper
4 | pillow==12.0.0  # Image processing for VLMs
5 | json5==0.12.1  # For ReAct
6 | pydantic==2.12.4  # For Structured output json schema
7 | 


--------------------------------------------------------------------------------
/samples/export-requirements.txt:
--------------------------------------------------------------------------------
 1 | --extra-index-url https://download.pytorch.org/whl/cpu
 2 | --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
 3 | openvino-tokenizers[transformers]~=2026.0.0.0.dev
 4 | optimum-intel[nncf]==1.26.0
 5 | numpy==1.26.4; platform_system == "Darwin" and platform_machine == "x86_64"
 6 | safetensors==0.6.2; platform_system == "Darwin" and platform_machine == "x86_64"
 7 | einops==0.8.1  # For Qwen
 8 | transformers_stream_generator==0.0.5  # For Qwen
 9 | diffusers==0.35.2 # For image generation pipelines
10 | timm==1.0.22  # For exporting InternVL2
11 | # torchvision for visual language models
12 | torchvision==0.17.2; platform_system == "Darwin" and platform_machine == "x86_64"
13 | torchvision==0.23.0; platform_system != "Darwin" or platform_machine != "x86_64"
14 | transformers==4.55.4 # For Whisper
15 | hf_transfer==0.1.9  # for faster models download, should used with env var HF_HUB_ENABLE_HF_TRANSFER=1
16 | backoff==2.2.1  # for microsoft/Phi-3.5-vision-instruct
17 | peft==0.17.1  # For microsoft/Phi-4-multimodal-instruct
18 | 


--------------------------------------------------------------------------------
/samples/generation.gif:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8b3ea717def68df6493c629551b80e74f58d03be02d837e6a16541b3d95787df
3 | size 5550657
4 | 


--------------------------------------------------------------------------------
/samples/js/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | 


--------------------------------------------------------------------------------
/samples/js/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "openvino-genai-node-demo",
 3 |   "version": "1.0.0",
 4 |   "license": "Apache-2.0",
 5 |   "type": "module",
 6 |   "devDependencies": {
 7 |     "openvino-genai-node": "^2025.4.0",
 8 |     "yargs": "^18.0.0",
 9 |     "zod": "^4.1.12"
10 |   },
11 |   "engines": {
12 |     "node": ">=21.0.0"
13 |   },
14 |   "scripts": {
15 |     "test": "node tests/usage.test.js"
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/samples/js/rag/text_embeddings.js:
--------------------------------------------------------------------------------
 1 | import { TextEmbeddingPipeline, PoolingType } from 'openvino-genai-node';
 2 | import { basename } from 'node:path';
 3 | 
 4 | main();
 5 | 
 6 | async function main() {
 7 |     const modelPath = process.argv[2];
 8 |     const texts = process.argv.slice(3);
 9 | 
10 |     const usageCommand = `Usage: node ${basename(process.argv[1])} <MODEL_DIR> '<TEXT 1>' ['<TEXT 2>' ...]`;
11 |     if (!modelPath) {
12 |         console.error('Please specify path to model directory');
13 |         console.error(usageCommand);
14 |         process.exit(1);
15 |     }
16 |     if (!texts.length) {
17 |         console.error('Please specify prompt');
18 |         console.error(usageCommand);
19 |         process.exit(1);
20 |     }
21 | 
22 |     const device = 'CPU'; // GPU can be used as well
23 |     const config = {
24 |         'pooling_type': PoolingType.MEAN
25 |     };
26 | 
27 |     const pipeline = await TextEmbeddingPipeline(modelPath, device, config);
28 | 
29 |     await pipeline.embedDocuments(texts);
30 | }
31 | 


--------------------------------------------------------------------------------
/samples/js/text_generation/beam_search_causal_lm.js:
--------------------------------------------------------------------------------
 1 | import { LLMPipeline } from 'openvino-genai-node';
 2 | import { basename } from 'node:path';
 3 | 
 4 | main();
 5 | 
 6 | async function main() {
 7 |     const modelPath = process.argv[2];
 8 |     const prompts = process.argv.slice(3);
 9 |     
10 |     if (!modelPath) {
11 |         console.error('Please specify path to model directory\n'
12 |                     + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompts*'`);
13 |         process.exit(1);
14 |     }
15 |     if (!prompts) {
16 |         console.error('Please specify prompts\n'
17 |                       + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompts*'`);
18 |         process.exit(1);
19 |     }
20 | 
21 |     const device = 'CPU'; // GPU can be used as well
22 |     const pipe = await LLMPipeline(modelPath, device);
23 | 
24 |     const numBeams = 15;
25 |     const config = {
26 |         'max_new_tokens': 20,
27 |         'num_beam_groups': 3,
28 |         'num_beams': numBeams,
29 |         'diversity_penalty': 1,
30 |         'num_return_sequences': numBeams,
31 |         'return_decoded_results': true,
32 | 
33 |     };
34 |     const beams = await pipe.generate(prompts, config);
35 |     console.log(beams.toString());
36 | }
37 | 


--------------------------------------------------------------------------------
/samples/js/text_generation/greedy_causal_lm.js:
--------------------------------------------------------------------------------
 1 | import { LLMPipeline } from 'openvino-genai-node';
 2 | import { basename } from 'node:path';
 3 | 
 4 | main();
 5 | 
 6 | async function main() {
 7 |     const modelPath = process.argv[2];
 8 |     const prompt = process.argv[3];
 9 |     
10 |     if (process.argv.length > 4) {
11 |         console.error(`Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompt*'`);
12 |         process.exit(1);
13 |     }
14 |     if (!modelPath) {
15 |         console.error('Please specify path to model directory\n'
16 |                     + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompt*'`);
17 |         process.exit(1);
18 |     }
19 |     if (!prompt) {
20 |         console.error('Please specify prompt\n'
21 |                       + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompt*'`);
22 |         process.exit(1);
23 |     }
24 | 
25 |     const device = 'CPU'; // GPU can be used as well
26 |     const pipe = await LLMPipeline(modelPath, device);
27 | 
28 |     const config = {
29 |         'max_new_tokens': 100,
30 |         'return_decoded_results': true,
31 |     };
32 |     const result = await pipe.generate(prompt, config);
33 | 
34 |     console.log(result.toString());
35 | }


--------------------------------------------------------------------------------
/samples/js/text_generation/helper.js:
--------------------------------------------------------------------------------
 1 | // Copyright(C) 2025 Intel Corporation
 2 | // SPDX - License - Identifier: Apache - 2.0
 3 | 
 4 | import { z } from 'zod';
 5 | 
 6 | /** Serialize a JavaScript object to a JSON string
 7 |  * with specific formatting to align with Python. */
 8 | export function serialize_json(object) {
 9 |     return JSON.stringify(object)
10 |         // Add a space after every colon or comma not already followed by a space
11 |         .replace(/(:|,)(?! )/g, '$1 ');
12 | }
13 | 
14 | /** Convert a Zod schema to a JSON Schema
15 |  * with specific formatting to align with Python */
16 | export function toJSONSchema(zodSchema, params) {
17 |     const jsonSchema = z.toJSONSchema(
18 |         zodSchema,
19 |         {
20 |             override: (ctx) => {
21 |                 if (params && params.override) {
22 |                     params.override(ctx);
23 |                 }
24 |                 const keys = Object.keys(ctx.jsonSchema).sort();
25 |                 for (const key of keys) {
26 |                     const value = ctx.jsonSchema[key];
27 |                     delete ctx.jsonSchema[key];
28 |                     ctx.jsonSchema[key] = value;
29 |                 }
30 |             }
31 |         });
32 |     delete jsonSchema.$schema;
33 |     delete jsonSchema.additionalProperties;
34 |     return jsonSchema;
35 | }


--------------------------------------------------------------------------------
/samples/python/image_generation/image2image.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (C) 2024 Intel Corporation
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import argparse
 6 | import openvino
 7 | import openvino_genai
 8 | import numpy as np
 9 | 
10 | from PIL import Image
11 | 
12 | def read_image(path: str) -> openvino.Tensor:
13 |     pic = Image.open(path).convert("RGB")
14 |     image_data = np.array(pic)[None]
15 |     return openvino.Tensor(image_data)
16 | 
17 | def main():
18 |     parser = argparse.ArgumentParser()
19 |     parser.add_argument('model_dir')
20 |     parser.add_argument('prompt')
21 |     parser.add_argument('image')
22 |     args = parser.parse_args()
23 | 
24 |     device = 'CPU'  # GPU can be used as well
25 |     pipe = openvino_genai.Image2ImagePipeline(args.model_dir, device)
26 | 
27 |     image = read_image(args.image)
28 | 
29 |     image_tensor = pipe.generate(args.prompt, image,
30 |         strength=0.8 # controls how initial image is noised after being converted to latent space. `1` means initial image is fully noised
31 |     )
32 | 
33 |     image = Image.fromarray(image_tensor.data[0])
34 |     image.save("image.bmp")
35 | 
36 | 
37 | if '__main__' == __name__:
38 |     main()
39 | 


--------------------------------------------------------------------------------
/samples/python/image_generation/inpainting.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (C) 2024 Intel Corporation
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import argparse
 6 | import openvino
 7 | import openvino_genai
 8 | import numpy as np
 9 | 
10 | from PIL import Image
11 | 
12 | def read_image(path: str) -> openvino.Tensor:
13 |     pic = Image.open(path).convert("RGB")
14 |     image_data = np.array(pic)[None]
15 |     return openvino.Tensor(image_data)
16 | 
17 | def main():
18 |     parser = argparse.ArgumentParser()
19 |     parser.add_argument('model_dir')
20 |     parser.add_argument('prompt')
21 |     parser.add_argument('image')
22 |     parser.add_argument('mask')
23 |     args = parser.parse_args()
24 | 
25 |     device = 'CPU'  # GPU can be used as well
26 |     pipe = openvino_genai.InpaintingPipeline(args.model_dir, device)
27 | 
28 |     image = read_image(args.image)
29 |     mask_image = read_image(args.mask)
30 | 
31 |     image_tensor = pipe.generate(args.prompt, image, mask_image)
32 | 
33 |     image = Image.fromarray(image_tensor.data[0])
34 |     image.save("image.bmp")
35 | 
36 | 
37 | if '__main__' == __name__:
38 |     main()
39 | 


--------------------------------------------------------------------------------
/samples/python/image_generation/text2image.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (C) 2024 Intel Corporation
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import argparse
 6 | 
 7 | import openvino_genai
 8 | from PIL import Image
 9 | 
10 | 
11 | def main():
12 |     parser = argparse.ArgumentParser()
13 |     parser.add_argument('model_dir')
14 |     parser.add_argument('prompt')
15 |     args = parser.parse_args()
16 | 
17 |     device = 'CPU'  # GPU can be used as well
18 |     pipe = openvino_genai.Text2ImagePipeline(args.model_dir, device)
19 | 
20 |     image_tensor = pipe.generate(
21 |         args.prompt,
22 |         width=512,
23 |         height=512,
24 |         num_inference_steps=20,
25 |         num_images_per_prompt=1)
26 | 
27 |     image = Image.fromarray(image_tensor.data[0])
28 |     image.save("image.bmp")
29 | 
30 | 
31 | if '__main__' == __name__:
32 |     main()


--------------------------------------------------------------------------------
/samples/python/rag/text_embeddings.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (C) 2025 Intel Corporation
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import argparse
 6 | import openvino_genai
 7 | 
 8 | 
 9 | def main():
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument("model_dir")
12 |     parser.add_argument("texts", nargs="+")
13 |     args = parser.parse_args()
14 | 
15 |     device = "CPU"  # GPU can be used as well
16 | 
17 |     config = openvino_genai.TextEmbeddingPipeline.Config()
18 |     config.pooling_type = openvino_genai.TextEmbeddingPipeline.PoolingType.MEAN
19 | 
20 |     pipeline = openvino_genai.TextEmbeddingPipeline(args.model_dir, device, config)
21 | 
22 |     text_embeddings = pipeline.embed_documents(args.texts)
23 |     query_embeddings = pipeline.embed_query("What is the capital of France?")
24 | 
25 | 
26 | if "__main__" == __name__:
27 |     main()
28 | 


--------------------------------------------------------------------------------
/samples/python/rag/text_rerank.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (C) 2025 Intel Corporation
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import argparse
 6 | import openvino_genai
 7 | 
 8 | 
 9 | def main():
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument("model_dir")
12 |     parser.add_argument("query")
13 |     parser.add_argument("texts", nargs="+")
14 |     args = parser.parse_args()
15 | 
16 |     device = "CPU"  # GPU can be used as well
17 | 
18 |     config = openvino_genai.TextRerankPipeline.Config()
19 |     config.top_n = 3
20 | 
21 |     pipeline = openvino_genai.TextRerankPipeline(args.model_dir, device, config)
22 | 
23 |     rerank_result = pipeline.rerank(args.query, args.texts)
24 | 
25 |     print("Reranked documents:")
26 |     for index, score in rerank_result:
27 |         print(f"Document {index} (score: {score:.4f}): {args.texts[index]}")
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     main()
32 | 


--------------------------------------------------------------------------------
/samples/python/text_generation/beam_search_causal_lm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (C) 2024 Intel Corporation
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import argparse
 6 | import openvino_genai
 7 | 
 8 | 
 9 | def main():
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument('model_dir')
12 |     parser.add_argument('prompts', nargs='+')
13 |     args = parser.parse_args()
14 | 
15 |     device = 'CPU'  # GPU can be used as well
16 |     pipe = openvino_genai.LLMPipeline(args.model_dir, device)
17 | 
18 |     config = openvino_genai.GenerationConfig()
19 |     config.max_new_tokens = 20
20 |     config.num_beam_groups = 3
21 |     config.num_beams = 15
22 |     config.diversity_penalty = 1
23 |     config.num_return_sequences = config.num_beams
24 | 
25 |     beams = pipe.generate(args.prompts, config)
26 |     print(beams)
27 | 
28 | 
29 | if '__main__' == __name__:
30 |     main()
31 | 


--------------------------------------------------------------------------------
/samples/python/text_generation/greedy_causal_lm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (C) 2024 Intel Corporation
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import argparse
 6 | import openvino_genai
 7 | 
 8 | 
 9 | def main():
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument('model_dir')
12 |     parser.add_argument('prompt')
13 |     args = parser.parse_args()
14 | 
15 |     device = 'CPU'  # GPU can be used as well
16 |     pipe = openvino_genai.LLMPipeline(args.model_dir, device)
17 | 
18 |     config = openvino_genai.GenerationConfig()
19 |     config.max_new_tokens = 100
20 | 
21 |     print(pipe.generate(args.prompt, config))
22 | 
23 | 
24 | if '__main__' == __name__:
25 |     main()
26 | 


--------------------------------------------------------------------------------
/samples/python/text_generation/lora_greedy_causal_lm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (C) 2024 Intel Corporation
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import argparse
 6 | import openvino_genai
 7 | 
 8 | 
 9 | def main():
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument('models_path')
12 |     parser.add_argument('adapter_path')
13 |     parser.add_argument('prompt')
14 |     args = parser.parse_args()
15 | 
16 |     device = 'CPU'  # GPU can be used as well
17 |     adapter = openvino_genai.Adapter(args.adapter_path)
18 |     adapter_config = openvino_genai.AdapterConfig(adapter)
19 |     pipe = openvino_genai.LLMPipeline(args.models_path, device, adapters=adapter_config)  # register all required adapters here
20 | 
21 |     print("Generate with LoRA adapter and alpha set to 0.75:")
22 |     print(pipe.generate(args.prompt, max_new_tokens=100, adapters=openvino_genai.AdapterConfig(adapter, 0.75)))
23 | 
24 |     print("\n-----------------------------")
25 |     print("Generate without LoRA adapter:")
26 |     print(pipe.generate(args.prompt, max_new_tokens=100, adapters=openvino_genai.AdapterConfig()))
27 | 
28 | if '__main__' == __name__:
29 |     main()
30 | 


--------------------------------------------------------------------------------
/samples/python/text_generation/prompt_lookup_decoding_lm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (C) 2024 Intel Corporation
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import argparse
 6 | import openvino_genai
 7 | 
 8 | def streamer(subword):
 9 |     print(subword, end='', flush=True)
10 |     # Return flag corresponds whether generation should be stopped. 
11 |     return openvino_genai.StreamingStatus.RUNNING
12 | 
13 | def main():
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('model_dir')
16 |     parser.add_argument('prompt')
17 |     args = parser.parse_args()
18 | 
19 |     device = 'CPU'
20 | 
21 |     pipe = openvino_genai.LLMPipeline(args.model_dir, device, prompt_lookup=True)
22 |     
23 |     config = openvino_genai.GenerationConfig()
24 |     config.max_new_tokens = 100
25 |     # add parameter to enable prompt lookup decoding to generate `num_assistant_tokens` candidates per iteration
26 |     config.num_assistant_tokens = 5
27 |     # Define max_ngram_size
28 |     config.max_ngram_size = 3
29 | 
30 |     # Since the streamer is set, the results will be printed 
31 |     # every time a new token is generated and put into the streamer queue.
32 |     pipe.generate(args.prompt, config, streamer)
33 |     print()
34 | 
35 | if '__main__' == __name__:
36 |     main()
37 | 


--------------------------------------------------------------------------------
/samples/requirements.txt:
--------------------------------------------------------------------------------
1 | -r ./deployment-requirements.txt
2 | -r ./export-requirements.txt
3 | pydantic


--------------------------------------------------------------------------------
/site/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | 
 3 | root = false
 4 | 
 5 | [*]
 6 | charset = utf-8
 7 | end_of_line = lf
 8 | insert_final_newline = true
 9 | indent_style = space
10 | indent_size = 2
11 | max_line_length = 100
12 | trim_trailing_whitespace = true
13 | 
14 | [*.md]
15 | insert_final_newline = true
16 | trim_trailing_whitespace = false
17 | 


--------------------------------------------------------------------------------
/site/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependencies
 2 | /node_modules
 3 | 
 4 | # Production
 5 | /build
 6 | 
 7 | # Generated files
 8 | .docusaurus
 9 | .cache-loader
10 | 
11 | # Misc
12 | .DS_Store
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 | 
18 | npm-debug.log*
19 | yarn-debug.log*
20 | yarn-error.log*
21 | 
22 | # Generated docs files for samples
23 | /docs/samples/*/
24 | !/docs/samples/_*/
25 | # !/docs/samples/index.mdx
26 | # !/docs/samples/_category_.json
27 | 


--------------------------------------------------------------------------------
/site/.prettierignore:
--------------------------------------------------------------------------------
 1 | dist
 2 | node_modules
 3 | .yarn
 4 | build
 5 | coverage
 6 | .docusaurus
 7 | .idea
 8 | 
 9 | .svg
10 | *.svg
11 | 
12 | *.mdx
13 | 


--------------------------------------------------------------------------------
/site/.prettierrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "printWidth": 100,
 3 |   "trailingComma": "es5",
 4 |   "useTabs": false,
 5 |   "tabWidth": 2,
 6 |   "semi": true,
 7 |   "bracketSpacing": true,
 8 |   "singleQuote": true,
 9 |   "arrowParens": "always"
10 | }
11 | 


--------------------------------------------------------------------------------
/site/README.md:
--------------------------------------------------------------------------------
 1 | # Website
 2 | 
 3 | This website is built using [Docusaurus](https://docusaurus.io/), a modern static website generator.
 4 | 
 5 | ### Installation
 6 | 
 7 | ```
 8 | $ npm i
 9 | ```
10 | 
11 | ### Local Development
12 | 
13 | ```
14 | $ npm run start
15 | ```
16 | 
17 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.
18 | 
19 | ### Build
20 | 
21 | ```
22 | $ npm run build
23 | ```
24 | 
25 | This command generates static content into the `build` directory and can be served using any static contents hosting service.
26 | 
27 | ### Deployment
28 | 
29 | Using SSH:
30 | 
31 | ```
32 | $ USE_SSH=true npm run deploy
33 | ```
34 | 
35 | Not using SSH:
36 | 
37 | ```
38 | $ GIT_USER=<Your GitHub username> npm run deploy
39 | ```
40 | 
41 | If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch.
42 | 


--------------------------------------------------------------------------------
/site/docs/concepts/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 |   "label": "Concepts",
3 |   "position": 6,
4 |   "link": {
5 |     "type": "generated-index",
6 |     "description": "Concepts to OpenVINO GenAI."
7 |   }
8 | }


--------------------------------------------------------------------------------
/site/docs/concepts/beam-search.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 3
3 | ---
4 | 
5 | # Beam Search
6 | 
7 | > **Note:** This page is a work in progress.
8 | 


--------------------------------------------------------------------------------
/site/docs/concepts/optimization-techniques/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 |   "label": "Optimization techniques",
3 |   "position": 4,
4 |   "link": {
5 |     "type": "generated-index",
6 |     "description": "Optimization techniques to OpenVINO GenAI."
7 |   }
8 | }


--------------------------------------------------------------------------------
/site/docs/concepts/optimization-techniques/continuous-batching.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 3
3 | ---
4 | 
5 | # Continuous Batching
6 | 
7 | > **Note:** This page is a work in progress.
8 | 


--------------------------------------------------------------------------------
/site/docs/concepts/optimization-techniques/prefix-caching.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 4
3 | ---
4 | 
5 | # Prefix Caching
6 | 
7 | > **Note:** This page is a work in progress.
8 | 


--------------------------------------------------------------------------------
/site/docs/concepts/optimization-techniques/speculative-decoding.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 1
3 | ---
4 | 
5 | # Speculative Decoding
6 | 
7 | > **Note:** This page is a work in progress.
8 | 


--------------------------------------------------------------------------------
/site/docs/getting-started/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 |   "label": "Getting Started",
3 |   "position": 1,
4 |   "link": {
5 |     "type": "generated-index",
6 |     "description": "Getting started guide for OpenVINO GenAI"
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/site/docs/guides/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 |   "label": "Guides",
3 |   "position": 3,
4 |   "link": null
5 | }
6 | 


--------------------------------------------------------------------------------
/site/docs/guides/model-preparation/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 |   "label": "Model Preparation",
3 |   "position": 1,
4 |   "link": {
5 |     "type": "generated-index",
6 |     "description": "Prepare generative models for inference with OpenVINO GenAI."
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/site/docs/guides/model-preparation/_use_cases_note.mdx:
--------------------------------------------------------------------------------
1 | :::info
2 | 
3 | Refer to the [Use Cases](/docs/category/use-cases) for detailed instructions on using models with OpenVINO GenAI.
4 | 
5 | :::
6 | 


--------------------------------------------------------------------------------
/site/docs/samples/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 |   "label": "Samples",
3 |   "position": 5,
4 |   "link": {
5 |     "type": "doc",
6 |     "id": "samples/index"
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/site/docs/samples/_components/samples-list/index.tsx:
--------------------------------------------------------------------------------
 1 | import Link from '@docusaurus/Link';
 2 | import { usePluginData } from '@docusaurus/useGlobalData';
 3 | import { type GenAISamples } from '@site/src/plugins/genai-samples-docs-plugin';
 4 | import Heading from '@theme/Heading';
 5 | import React from 'react';
 6 | 
 7 | function SamplesListItem({
 8 |   item: { language, name, githubLink },
 9 | }: {
10 |   item: GenAISamples[string][number];
11 | }): React.JSX.Element {
12 |   return (
13 |     <li>
14 |       <Link href={`./${language}/${name}`}>{name}</Link> (<Link href={githubLink}>GitHub</Link>)
15 |     </li>
16 |   );
17 | }
18 | 
19 | export default function SamplesList(): React.JSX.Element {
20 |   const samplesMap = usePluginData('genai-samples-docs-plugin') as GenAISamples;
21 | 
22 |   return (
23 |     <>
24 |       {Object.entries(samplesMap)
25 |         .sort(([a], [b]) => a.localeCompare(b))
26 |         .map(([language, samples]) => (
27 |           <div key={language}>
28 |             <Heading as="h2">{samples[0]?.languageTitle}</Heading>
29 |             <ul>
30 |               {samples
31 |                 .sort((a, b) => a.name.localeCompare(b.name))
32 |                 .map((sample) => (
33 |                   <SamplesListItem key={`${language}-${sample.name}`} item={sample} />
34 |                 ))}
35 |             </ul>
36 |           </div>
37 |         ))}
38 |     </>
39 |   );
40 | }
41 | 


--------------------------------------------------------------------------------
/site/docs/samples/index.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 1
 3 | ---
 4 | 
 5 | import SamplesList from './_components/samples-list';
 6 | 
 7 | # OpenVINO GenAI Samples
 8 | 
 9 | <SamplesList />
10 | 


--------------------------------------------------------------------------------
/site/docs/supported-models/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 |   "label": "Supported Models",
3 |   "position": 4,
4 |   "link": {
5 |     "type": "doc",
6 |     "id": "supported-models/index"
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/site/docs/supported-models/_components/base-models-table/index.tsx:
--------------------------------------------------------------------------------
 1 | import Link from '@docusaurus/Link';
 2 | import { Children } from 'react';
 3 | 
 4 | type BaseModelsTableProps = {
 5 |   headers: string[];
 6 |   rows: React.JSX.Element[];
 7 | };
 8 | 
 9 | export function BaseModelsTable({ headers, rows }: BaseModelsTableProps): React.JSX.Element {
10 |   return (
11 |     <table>
12 |       <thead>
13 |         <tr>
14 |           {headers.map((v) => (
15 |             <th key={v}>{v}</th>
16 |           ))}
17 |         </tr>
18 |       </thead>
19 |       <tbody style={{ verticalAlign: 'baseline' }}>{Children.map(rows, (row) => row)}</tbody>
20 |     </table>
21 |   );
22 | }
23 | 
24 | export const LinksCell = ({ links }: { links: string[] }) => (
25 |   <td>
26 |     <ul>
27 |       {links.map((link) => (
28 |         <li key={link}>
29 |           <Link href={link}>{new URL(link).pathname.slice(1)}</Link>
30 |         </li>
31 |       ))}
32 |     </ul>
33 |   </td>
34 | );
35 | 
36 | export const StatusCell = ({ value }: { value: boolean }) => (
37 |   <td style={{ textAlign: 'center' }}>{value ? '✅' : '❌'}</td>
38 | );
39 | 


--------------------------------------------------------------------------------
/site/docs/supported-models/_components/image-generation-models-table/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { BaseModelsTable, LinksCell, StatusCell } from '../base-models-table';
 3 | import { IMAGE_GENERATION_MODELS } from './models';
 4 | 
 5 | export default function ImageGenerationModelsTable(): React.JSX.Element {
 6 |   const headers = [
 7 |     'Architecture',
 8 |     'Text to Image',
 9 |     'Image to Image',
10 |     'Inpainting',
11 |     'LoRA Support',
12 |     'Example HuggingFace Models',
13 |   ];
14 | 
15 |   const rows = IMAGE_GENERATION_MODELS.map(
16 |     ({ architecture, textToImage, imageToImage, inpainting, loraSupport, links }) => (
17 |       <tr key={architecture}>
18 |         <td>
19 |           <code style={{ whiteSpace: 'pre' }}>{architecture}</code>
20 |         </td>
21 |         <StatusCell value={textToImage} />
22 |         <StatusCell value={imageToImage} />
23 |         <StatusCell value={inpainting} />
24 |         <StatusCell value={loraSupport} />
25 |         <LinksCell links={links} />
26 |       </tr>
27 |     )
28 |   );
29 | 
30 |   return <BaseModelsTable headers={headers} rows={rows} />;
31 | }
32 | 


--------------------------------------------------------------------------------
/site/docs/supported-models/_components/llm-models-table/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { BaseModelsTable, LinksCell } from '../base-models-table';
 3 | import { LLM_MODELS } from './models';
 4 | 
 5 | export default function LLMModelsTable(): React.JSX.Element {
 6 |   const headers = ['Architecture', 'Models', 'Example HuggingFace Models'];
 7 | 
 8 |   const rows = LLM_MODELS.map(({ architecture, models }) => (
 9 |     <>
10 |       <tr key={architecture}>
11 |         <td rowSpan={models.length}>
12 |           <code>{architecture}</code>
13 |         </td>
14 |         <td>{models[0].name}</td>
15 |         <LinksCell links={models[0].links} />
16 |       </tr>
17 |       {models.slice(1).map(({ name, links }) => (
18 |         <tr key={name}>
19 |           <td>{name}</td>
20 |           <LinksCell links={links} />
21 |         </tr>
22 |       ))}
23 |     </>
24 |   ));
25 | 
26 |   return <BaseModelsTable headers={headers} rows={rows} />;
27 | }
28 | 


--------------------------------------------------------------------------------
/site/docs/supported-models/_components/speech-generation-models-table/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { BaseModelsTable, LinksCell, StatusCell } from '../base-models-table';
 3 | import { SPEECH_GENERATION_MODELS } from './models';
 4 | 
 5 | export default function SpeechGenerationModelsTable(): React.JSX.Element {
 6 |   const headers = ['Architecture', 'Models', 'LoRA Support', 'Example HuggingFace Models'];
 7 | 
 8 |   const rows = SPEECH_GENERATION_MODELS.map(({ architecture, models }) => (
 9 |     <>
10 |       <tr key={architecture}>
11 |         <td rowSpan={models.length}>
12 |           <code>{architecture}</code>
13 |         </td>
14 |         <td>{models[0].name}</td>
15 |         <StatusCell value={models[0].loraSupport} />
16 |         <LinksCell links={models[0].links} />
17 |       </tr>
18 |       {models.slice(1).map(({ name, loraSupport, links }) => (
19 |         <tr key={name}>
20 |           <td>{name}</td>
21 |           <StatusCell value={loraSupport} />
22 |           <LinksCell links={links} />
23 |         </tr>
24 |       ))}
25 |     </>
26 |   ));
27 | 
28 |   return <BaseModelsTable headers={headers} rows={rows} />;
29 | }
30 | 


--------------------------------------------------------------------------------
/site/docs/supported-models/_components/speech-generation-models-table/models.ts:
--------------------------------------------------------------------------------
 1 | type SpeechGenerationModelType = {
 2 |   architecture: string;
 3 |   models: Array<{
 4 |     name: string;
 5 |     loraSupport: boolean;
 6 |     links: string[];
 7 |   }>;
 8 | };
 9 | 
10 | export const SPEECH_GENERATION_MODELS: SpeechGenerationModelType[] = [
11 |   {
12 |     architecture: 'SpeechT5ForTextToSpeech',
13 |     models: [
14 |       {
15 |         name: 'SpeechT5 TTS',
16 |         loraSupport: false,
17 |         links: ['https://huggingface.co/microsoft/speecht5_tts'],
18 |       },
19 |     ],
20 |   },
21 | ];
22 | 


--------------------------------------------------------------------------------
/site/docs/supported-models/_components/text-embeddings-models-table/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { BaseModelsTable, LinksCell } from '../base-models-table';
 3 | import { TEXT_EMBEDDINGS_MODELS } from './models';
 4 | 
 5 | export default function TextEmbeddingsModelsTable(): React.JSX.Element {
 6 |   const headers = ['Architecture', 'Example HuggingFace Models'];
 7 | 
 8 |   const rows = TEXT_EMBEDDINGS_MODELS.map(({ architecture, models }) => (
 9 |     <tr key={architecture}>
10 |       <td>
11 |         <code>{architecture}</code>
12 |       </td>
13 |       <LinksCell links={models[0].links} />
14 |     </tr>
15 |   ));
16 | 
17 |   return <BaseModelsTable headers={headers} rows={rows} />;
18 | }
19 | 


--------------------------------------------------------------------------------
/site/docs/supported-models/_components/text-rerank-models-table/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { TEXT_RERANK_MODELS } from './models';
 3 | import { BaseModelsTable, LinksCell } from '../base-models-table';
 4 | 
 5 | export default function TextRerankModelsTable(): React.JSX.Element {
 6 |   const headers = ['Architecture', '`optimum-cli` task', 'Example HuggingFace Models'];
 7 | 
 8 |   const rows = TEXT_RERANK_MODELS.map(({ architecture, optimumIntelTask, models }) => (
 9 |     <>
10 |       <tr key={architecture}>
11 |         <td rowSpan={models.length}>
12 |           <code>{architecture}</code>
13 |         </td>
14 |         <td rowSpan={models.length}>
15 |           <code>{optimumIntelTask}</code>
16 |         </td>
17 |         <LinksCell links={models[0].links} />
18 |       </tr>
19 |     </>
20 |   ));
21 | 
22 |   return <BaseModelsTable headers={headers} rows={rows} />;
23 | }
24 | 


--------------------------------------------------------------------------------
/site/docs/supported-models/_components/vlm-models-table/index.tsx:
--------------------------------------------------------------------------------
 1 | import Link from '@docusaurus/Link';
 2 | import React from 'react';
 3 | import { BaseModelsTable, LinksCell, StatusCell } from '../base-models-table';
 4 | import { VLM_MODELS } from './models';
 5 | 
 6 | export default function VLMModelsTable(): React.JSX.Element {
 7 |   const headers = ['Architecture', 'Models', 'LoRA Support', 'Example HuggingFace Models'];
 8 | 
 9 |   const rows = VLM_MODELS.map(({ architecture, models }) => (
10 |     <>
11 |       <tr key={architecture}>
12 |         <td rowSpan={models.length}>
13 |           <code>{architecture}</code>
14 |         </td>
15 |         <td>
16 |           {models[0].name}
17 |           {models[0].notesLink && (
18 |             <>
19 |               &nbsp;(<Link href={models[0].notesLink}>Notes</Link>)
20 |             </>
21 |           )}
22 |         </td>
23 |         <StatusCell value={models[0].loraSupport} />
24 |         <LinksCell links={models[0].links} />
25 |       </tr>
26 |       {models.slice(1).map(({ name, loraSupport, links }) => (
27 |         <tr key={name}>
28 |           <td>{name}</td>
29 |           <StatusCell value={loraSupport} />
30 |           <LinksCell links={links} />
31 |         </tr>
32 |       ))}
33 |     </>
34 |   ));
35 | 
36 |   return <BaseModelsTable headers={headers} rows={rows} />;
37 | }
38 | 


--------------------------------------------------------------------------------
/site/docs/supported-models/_components/whisper-models-table/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { BaseModelsTable, LinksCell, StatusCell } from '../base-models-table';
 3 | import { WHISPER_MODELS } from './models';
 4 | 
 5 | export default function WhisperModelsTable(): React.JSX.Element {
 6 |   const headers = ['Architecture', 'Models', 'LoRA Support', 'Example HuggingFace Models'];
 7 | 
 8 |   const rows = WHISPER_MODELS.map(({ architecture, models }) => (
 9 |     <>
10 |       <tr key={architecture}>
11 |         <td rowSpan={models.length}>
12 |           <code>{architecture}</code>
13 |         </td>
14 |         <td>{models[0].name}</td>
15 |         <StatusCell value={models[0].loraSupport} />
16 |         <LinksCell links={models[0].links} />
17 |       </tr>
18 |       {models.slice(1).map(({ name, loraSupport, links }) => (
19 |         <tr key={name}>
20 |           <td>{name}</td>
21 |           <StatusCell value={loraSupport} />
22 |           <LinksCell links={links} />
23 |         </tr>
24 |       ))}
25 |     </>
26 |   ));
27 | 
28 |   return <BaseModelsTable headers={headers} rows={rows} />;
29 | }
30 | 


--------------------------------------------------------------------------------
/site/docs/supported-models/_components/whisper-models-table/models.ts:
--------------------------------------------------------------------------------
 1 | type WhisperModelType = {
 2 |   architecture: string;
 3 |   models: Array<{
 4 |     name: string;
 5 |     loraSupport: boolean;
 6 |     links: string[];
 7 |   }>;
 8 | };
 9 | 
10 | export const WHISPER_MODELS: WhisperModelType[] = [
11 |   {
12 |     architecture: 'WhisperForConditionalGeneration',
13 |     models: [
14 |       {
15 |         name: 'Whisper',
16 |         loraSupport: false,
17 |         links: [
18 |           'https://huggingface.co/openai/whisper-tiny',
19 |           'https://huggingface.co/openai/whisper-tiny.en',
20 |           'https://huggingface.co/openai/whisper-base',
21 |           'https://huggingface.co/openai/whisper-base.en',
22 |           'https://huggingface.co/openai/whisper-small',
23 |           'https://huggingface.co/openai/whisper-small.en',
24 |           'https://huggingface.co/openai/whisper-medium',
25 |           'https://huggingface.co/openai/whisper-medium.en',
26 |           'https://huggingface.co/openai/whisper-large-v3',
27 |         ],
28 |       },
29 |       {
30 |         name: 'Distil-Whisper',
31 |         loraSupport: false,
32 |         links: [
33 |           'https://huggingface.co/distil-whisper/distil-small.en',
34 |           'https://huggingface.co/distil-whisper/distil-medium.en',
35 |           'https://huggingface.co/distil-whisper/distil-large-v3',
36 |         ],
37 |       },
38 |     ],
39 |   },
40 | ];
41 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 |   "label": "Use Cases",
3 |   "position": 2,
4 |   "link": {
5 |     "type": "generated-index",
6 |     "description": "OpenVINO GenAI provides support for following use cases"
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/_shared/_beam_search_generation.mdx:
--------------------------------------------------------------------------------
 1 | #### Optimizing Generation with Grouped Beam Search
 2 | 
 3 | Beam search helps explore multiple possible text completions simultaneously, often leading to higher quality outputs.
 4 | 
 5 | {/* Python and C++ code examples */}
 6 | {props.children}
 7 | 
 8 | :::info Understanding Beam Search Generation Parameters
 9 | 
10 | - `max_new_tokens`: The maximum numbers of tokens to generate, excluding the number of tokens in the prompt. `max_new_tokens` has priority over `max_length`.
11 | - `num_beams`: The number of beams for beam search. 1 disables beam search.
12 | - `num_beam_groups`: The number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams.
13 | - `diversity_penalty`: value is subtracted from a beam's score if it generates the same token as any beam from other group at a particular time.
14 | 
15 | For the full list of generation parameters, refer to the [Generation Config API](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.GenerationConfig.html#openvino-genai-generationconfig).
16 | 
17 | :::
18 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/_shared/_chat_scenario.mdx:
--------------------------------------------------------------------------------
1 | ### Use OpenVINO GenAI in Chat Scenario
2 | 
3 | Refer to the [Chat Scenario](/docs/guides/chat-scenario) guide for more information on using OpenVINO GenAI in chat applications.
4 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/_shared/_convert_model.mdx:
--------------------------------------------------------------------------------
1 | ## Convert and Optimize Model
2 | 
3 | {/* optimum-cli export code examples */}
4 | {props.children}
5 | 
6 | :::info
7 | Refer to the [Model Preparation](/docs/category/model-preparation) guide for detailed instructions on how to download, convert and optimize models for OpenVINO GenAI.
8 | :::
9 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/_shared/_generation_configuration_workflow.mdx:
--------------------------------------------------------------------------------
1 | #### Generation Configuration Workflow
2 | 
3 | 1. Get the model default config with `get_generation_config()`
4 | 2. Modify parameters
5 | 3. Apply the updated config using one of the following methods:
6 |     - Use `set_generation_config(config)`
7 |     - Pass config directly to `generate()` (e.g. `generate(prompt, config)`)
8 |     - Specify options as inputs in the `generate()` method (e.g. `generate(prompt, max_new_tokens=100)`)
9 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/_shared/_streaming.mdx:
--------------------------------------------------------------------------------
1 | ### Streaming the Output
2 | 
3 | Refer to the [Streaming](/docs/guides/streaming) guide for more information on streaming the output with OpenVINO GenAI.
4 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_cpp.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="cpp" showLineNumbers>
 4 | {`#include "openvino/genai/image_generation/image2image_pipeline.hpp"
 5 | #include "load_image.hpp"
 6 | #include "imwrite.hpp"
 7 | 
 8 | int main(int argc, char* argv[]) {
 9 |     const std::string models_path = argv[1], prompt = argv[2], image_path = argv[3];
10 | 
11 |     ov::Tensor input_image = utils::load_image(image_path);
12 | 
13 |     ov::genai::Image2ImagePipeline pipe(models_path, "${props.device || 'CPU'}");
14 |     ov::Tensor generated_image = pipe.generate(prompt, input_image, ov::genai::strength(0.8f));
15 | 
16 |     imwrite("image.bmp", generated_image, true);
17 | }
18 | `}
19 | </CodeBlock>
20 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_python.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="python" showLineNumbers>
 4 | {`import openvino_genai as ov_genai
 5 | import openvino as ov
 6 | from PIL import Image
 7 | import numpy as np
 8 | 
 9 | def read_image(path: str) -> ov.Tensor:
10 |     pic = Image.open(path).convert("RGB")
11 |     image_data = np.array(pic)[None]
12 |     return ov.Tensor(image_data)
13 | 
14 | input_image_data = read_image("input_image.jpg")
15 | 
16 | pipe = ov_genai.Image2ImagePipeline(model_path, "${props.device || 'CPU'}")
17 | image_tensor = pipe.generate(prompt, image=input_image_data, strength=0.8)
18 | 
19 | image = Image.fromarray(image_tensor.data[0])
20 | image.save("image.bmp")
21 | `}
22 | </CodeBlock>
23 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_cpp.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="cpp" showLineNumbers>
 4 | {`#include "openvino/genai/image_generation/inpainting_pipeline.hpp"
 5 | #include "load_image.hpp"
 6 | #include "imwrite.hpp"
 7 | 
 8 | int main(int argc, char* argv[]) {
 9 |     const std::string models_path = argv[1], prompt = argv[2];
10 | 
11 |     ov::Tensor input_image = utils::load_image(argv[3]);
12 |     ov::Tensor mask_image = utils::load_image(argv[4]);
13 | 
14 |     ov::genai::InpaintingPipeline pipe(models_path, "${props.device || 'CPU'}");
15 |     ov::Tensor generated_image = pipe.generate(prompt, input_image, mask_image);
16 | 
17 |     imwrite("image.bmp", generated_image, true);
18 | }
19 | `}
20 | </CodeBlock>
21 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_python.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="python" showLineNumbers>
 4 | {`import openvino_genai as ov_genai
 5 | import openvino as ov
 6 | from PIL import Image
 7 | import numpy as np
 8 | 
 9 | def read_image(path: str) -> ov.Tensor:
10 |     pic = Image.open(path).convert("RGB")
11 |     image_data = np.array(pic)[None]
12 |     return ov.Tensor(image_data)
13 | 
14 | input_image_data = read_image("input_image.jpg")
15 | mask_image = read_image("mask.jpg")
16 | 
17 | pipe = ov_genai.InpaintingPipeline(model_path, "${props.device || 'CPU'}")
18 | image_tensor = pipe.generate(prompt, image=input_image_data, mask_image=mask_image)
19 | 
20 | image = Image.fromarray(image_tensor.data[0])
21 | image.save("image.bmp")
22 | `}
23 | </CodeBlock>
24 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_cpp.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="cpp" showLineNumbers>
 4 | {`#include "openvino/genai/image_generation/text2image_pipeline.hpp"
 5 | #include "imwrite.hpp"
 6 | 
 7 | int main(int argc, char* argv[]) {
 8 |     const std::string models_path = argv[1], prompt = argv[2];
 9 | 
10 |     ov::genai::Text2ImagePipeline pipe(models_path, "${props.device || 'CPU'}");
11 |     ov::Tensor image = pipe.generate(prompt);
12 | 
13 |     imwrite("image.bmp", image, true);
14 | }
15 | `}
16 | </CodeBlock>
17 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_python.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="python" showLineNumbers>
 4 | {`import openvino_genai as ov_genai
 5 | from PIL import Image
 6 | 
 7 | pipe = ov_genai.Text2ImagePipeline(model_path, "${props.device || 'CPU'}")
 8 | image_tensor = pipe.generate(prompt)
 9 | 
10 | image = Image.fromarray(image_tensor.data[0])
11 | image.save("image.bmp")
12 | `}
13 | </CodeBlock>
14 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/image-generation/index.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 2
 3 | ---
 4 | import OptimumCLI from '@site/src/components/OptimumCLI';
 5 | import ConvertModelSection from '../_shared/_convert_model.mdx';
 6 | import RunModelSection from './_sections/_run_model/index.mdx';
 7 | import UsageOptionsSection from './_sections/_usage_options/index.mdx';
 8 | 
 9 | # Image Generation Using Diffusers
10 | 
11 | <ConvertModelSection>
12 |     Download and convert model (e.g. [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)) to OpenVINO format from Hugging Face:
13 | 
14 |     <OptimumCLI model='stabilityai/stable-diffusion-xl-base-1.0' outputDir='stable_diffusion_xl_base_1_0_ov' weightFormat='int4' trustRemoteCode />
15 | 
16 |     See all supported [Image Generation Models](/docs/supported-models/#image-generation-models).
17 | </ConvertModelSection>
18 | 
19 | <RunModelSection />
20 | 
21 | <UsageOptionsSection />
22 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="cpp" showLineNumbers>
 4 | {`#include "openvino/genai/visual_language/pipeline.hpp"
 5 | #include "load_image.hpp"
 6 | #include <iostream>
 7 | 
 8 | int main(int argc, char* argv[]) {
 9 |     std::string models_path = argv[1], images_path = argv[2];;
10 |     std::vector<ov::Tensor> images = utils::load_images(images_path);
11 | 
12 |     ov::genai::VLMPipeline pipe(models_path, "${props.device || 'CPU'}");
13 |     ov::genai::VLMDecodedResults result = pipe.generate(
14 |         prompt,
15 |         ov::genai::images(images),
16 |         ov::genai::max_new_tokens(100)
17 |     );
18 |     std::cout << result.texts[0] << std::endl;
19 | }
20 | `}
21 | </CodeBlock>
22 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="python" showLineNumbers>
 4 | {`import openvino_genai as ov_genai
 5 | import openvino as ov
 6 | from PIL import Image
 7 | import numpy as np
 8 | from pathlib import Path
 9 | 
10 | def read_image(path: str) -> ov.Tensor:
11 |     pic = Image.open(path).convert("RGB")
12 |     image_data = np.array(pic)[None]
13 |     return ov.Tensor(image_data)
14 | 
15 | def read_images(path: str) -> list[ov.Tensor]:
16 |     entry = Path(path)
17 |     if entry.is_dir():
18 |         return [read_image(str(file)) for file in sorted(entry.iterdir())]
19 |     return [read_image(path)]
20 | 
21 | images = read_images("./images")
22 | 
23 | pipe = ov_genai.VLMPipeline(model_path, "${props.device || 'CPU'}")
24 | result = pipe.generate(prompt, images=images, max_new_tokens=100)
25 | print(result.texts[0])
26 | `}
27 | </CodeBlock>
28 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx:
--------------------------------------------------------------------------------
 1 | import CodeExampleCPP from './_code_example_cpp.mdx';
 2 | import CodeExamplePython from './_code_example_python.mdx';
 3 | 
 4 | ## Run Model Using OpenVINO GenAI
 5 | 
 6 | OpenVINO GenAI introduces the [`VLMPipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.VLMPipeline.html) pipeline for inference of multimodal text-generation Vision Language Models (VLMs).
 7 | It can generate text from a text prompt and images as inputs.
 8 | 
 9 | <LanguageTabs>
10 |     <TabItemPython>
11 |         <Tabs groupId="device">
12 |             <TabItem label="CPU" value="cpu">
13 |                 <CodeExamplePython device="CPU" />
14 |             </TabItem>
15 |             <TabItem label="GPU" value="gpu">
16 |                 <CodeExamplePython device="GPU" />
17 |             </TabItem>
18 |         </Tabs>
19 |     </TabItemPython>
20 |     <TabItemCpp>
21 |         <Tabs groupId="device">
22 |             <TabItem label="CPU" value="cpu">
23 |                 <CodeExampleCPP device="CPU" />
24 |             </TabItem>
25 |             <TabItem label="GPU" value="gpu">
26 |                 <CodeExampleCPP device="GPU" />
27 |             </TabItem>
28 |         </Tabs>
29 |     </TabItemCpp>
30 | </LanguageTabs>
31 | 
32 | :::tip
33 | 
34 | Use CPU or GPU as devices without any other code change.
35 | 
36 | :::
37 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/image-processing/index.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 4
 3 | ---
 4 | import OptimumCLI from '@site/src/components/OptimumCLI';
 5 | import ConvertModelSection from '../_shared/_convert_model.mdx';
 6 | import RunModelSection from './_sections/_run_model/index.mdx';
 7 | import UsageOptionsSection from './_sections/_usage_options/index.mdx';
 8 | 
 9 | # Image Processing Using VLMs
10 | 
11 | <ConvertModelSection>
12 |     Download and convert model (e.g. [openbmb/MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6)) to OpenVINO format from Hugging Face:
13 | 
14 |     <OptimumCLI model='openbmb/MiniCPM-V-2_6' outputDir='MiniCPM_V_2_6_ov' weightFormat='int4' trustRemoteCode />
15 | 
16 |     See all supported [Visual Language Models](/docs/supported-models/#visual-language-models-vlms).
17 | </ConvertModelSection>
18 | 
19 | <RunModelSection />
20 | 
21 | <UsageOptionsSection />
22 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/speech-recognition/_sections/_run_model/_code_example_cpp.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="cpp" showLineNumbers>
 4 | {`#include "openvino/genai/whisper_pipeline.hpp"
 5 | #include "audio_utils.hpp"
 6 | #include <iostream>
 7 | 
 8 | int main(int argc, char* argv[]) {
 9 |     std::filesystem::path models_path = argv[1];
10 |     std::string wav_file_path = argv[2];
11 | 
12 |     ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path);
13 | 
14 |     ov::genai::WhisperPipeline pipe(models_path, "${props.device || 'CPU'}");
15 |     auto result = pipe.generate(raw_speech, ov::genai::max_new_tokens(100));
16 |     std::cout << result << std::endl;
17 | }
18 | `}
19 | </CodeBlock>
20 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/speech-recognition/_sections/_run_model/_code_example_python.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="python" showLineNumbers>
 4 | {`import openvino_genai as ov_genai
 5 | import librosa
 6 | 
 7 | def read_wav(filepath):
 8 |     raw_speech, samplerate = librosa.load(filepath, sr=16000)
 9 |     return raw_speech.tolist()
10 | 
11 | raw_speech = read_wav('sample.wav')
12 | 
13 | pipe = ov_genai.WhisperPipeline(model_path, "${props.device || 'CPU'}")
14 | result = pipe.generate(raw_speech, max_new_tokens=100)
15 | print(result)
16 | `}
17 | </CodeBlock>
18 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/speech-recognition/index.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 3
 3 | ---
 4 | import OptimumCLI from '@site/src/components/OptimumCLI';
 5 | import ConvertModelSection from '../_shared/_convert_model.mdx';
 6 | import RunModelSection from './_sections/_run_model/index.mdx';
 7 | import UsageOptionsSection from './_sections/_usage_options/index.mdx';
 8 | 
 9 | # Speech Recognition Using Whisper
10 | 
11 | <ConvertModelSection>
12 |     Download and convert model (e.g. [openai/whisper-base](https://huggingface.co/openai/whisper-base)) to OpenVINO format from Hugging Face:
13 | 
14 |     <OptimumCLI model='openai/whisper-base' outputDir='whisper_ov' trustRemoteCode />
15 | 
16 |     See all supported [Speech Recognition Models](/docs/supported-models/#speech-recognition-models-whisper-based).
17 | </ConvertModelSection>
18 | 
19 | <RunModelSection />
20 | 
21 | <UsageOptionsSection />
22 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/text-embedding/_sections/_run_model/_code_example_cpp.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="cpp" showLineNumbers>
 4 | {`#include "openvino/genai/rag/text_embedding_pipeline.hpp"
 5 | 
 6 | int main(int argc, char* argv[]) try {
 7 |     auto documents = std::vector<std::string>(argv + 2, argv + argc);
 8 |     std::string models_path = argv[1];
 9 | 
10 |     ov::genai::TextEmbeddingPipeline pipeline(
11 |         models_path,
12 |         "${props.device || 'CPU'}",
13 |         ov::genai::pooling_type(ov::genai::TextEmbeddingPipeline::PoolingType::MEAN),
14 |         ov::genai::normalize(true)
15 |     );
16 | 
17 |     ov::genai::EmbeddingResults documents_embeddings = pipeline.embed_documents(documents);
18 |     ov::genai::EmbeddingResult query_embedding = pipeline.embed_query("What is the capital of France?");
19 | }
20 | `}
21 | </CodeBlock>
22 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/text-embedding/_sections/_run_model/_code_example_python.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="python" showLineNumbers>
 4 | {`import openvino_genai as ov_genai
 5 | 
 6 | pipeline = ov_genai.TextEmbeddingPipeline(
 7 |     models_path,
 8 |     "${props.device || 'CPU'}",
 9 |     pooling_type = ov_genai.TextEmbeddingPipeline.PoolingType.MEAN,
10 |     normalize = True
11 | )
12 | 
13 | documents_embeddings = pipeline.embed_documents(documents)
14 | query_embeddings = pipeline.embed_query("What is the capital of France?")
15 | `}
16 | </CodeBlock>
17 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/text-embedding/_sections/_run_model/index.mdx:
--------------------------------------------------------------------------------
 1 | import CodeExampleCPP from './_code_example_cpp.mdx';
 2 | import CodeExamplePython from './_code_example_python.mdx';
 3 | 
 4 | ## Run Model Using OpenVINO GenAI
 5 | 
 6 | [`TextEmbeddingPipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.TextEmbeddingPipeline.html) generates vector representations for text using embedding models.
 7 | 
 8 | <LanguageTabs>
 9 |     <TabItemPython>
10 |         <Tabs groupId="device">
11 |             <TabItem label="CPU" value="cpu">
12 |                 <CodeExamplePython device="CPU" />
13 |             </TabItem>
14 |             <TabItem label="GPU" value="gpu">
15 |                 <CodeExamplePython device="GPU" />
16 |             </TabItem>
17 |         </Tabs>
18 |     </TabItemPython>
19 |     <TabItemCpp>
20 |         <Tabs groupId="device">
21 |             <TabItem label="CPU" value="cpu">
22 |                 <CodeExampleCPP device="CPU" />
23 |             </TabItem>
24 |             <TabItem label="GPU" value="gpu">
25 |                 <CodeExampleCPP device="GPU" />
26 |             </TabItem>
27 |         </Tabs>
28 |     </TabItemCpp>
29 | </LanguageTabs>
30 | 
31 | :::tip
32 | Use CPU or GPU as devices without any other code change.
33 | :::
34 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/text-embedding/index.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 4
 3 | ---
 4 | import OptimumCLI from '@site/src/components/OptimumCLI';
 5 | import ConvertModelSection from '../_shared/_convert_model.mdx';
 6 | import RunModelSection from './_sections/_run_model/index.mdx';
 7 | import UsageOptionsSection from './_sections/_usage_options/index.mdx';
 8 | 
 9 | # Semantic Search using Text Embedding
10 | 
11 | <ConvertModelSection>
12 |     Download and convert a text embedding model (e.g. [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5)) to OpenVINO format from Hugging Face:
13 | 
14 |     <OptimumCLI model='BAAI/bge-small-en-v1.5' outputDir='bge-small-en-v1_5_ov' trustRemoteCode />
15 | 
16 |     See all supported [Text Embedding Models](/docs/supported-models/#text-embeddings-models).
17 | </ConvertModelSection>
18 | 
19 | <RunModelSection />
20 | 
21 | <UsageOptionsSection />
22 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/text-generation/_sections/_run_model/_code_example_cpp.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="cpp" showLineNumbers>
 4 | {`#include "openvino/genai/llm_pipeline.hpp"
 5 | #include <iostream>
 6 | 
 7 | int main(int argc, char* argv[]) {
 8 |     std::string models_path = argv[1];
 9 |     ov::genai::LLMPipeline pipe(model_path, "${props.device || 'CPU'}");
10 |     std::cout << pipe.generate("What is OpenVINO?", ov::genai::max_new_tokens(100)) << '\\n';
11 | }
12 | `}
13 | </CodeBlock>
14 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/text-generation/_sections/_run_model/_code_example_python.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="python" showLineNumbers>
 4 | {`import openvino_genai as ov_genai
 5 | 
 6 | pipe = ov_genai.LLMPipeline(model_path, "${props.device || 'CPU'}")
 7 | print(pipe.generate("What is OpenVINO?", max_new_tokens=100))
 8 | `}
 9 | </CodeBlock>
10 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/text-generation/_sections/_run_model/index.mdx:
--------------------------------------------------------------------------------
 1 | import CodeExampleCPP from './_code_example_cpp.mdx';
 2 | import CodeExamplePython from './_code_example_python.mdx';
 3 | 
 4 | ## Run Model Using OpenVINO GenAI
 5 | 
 6 | [`LLMPipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.LLMPipeline.html) is the main object used for decoding. You can construct it straight away from the folder with the converted model.
 7 | It will automatically load the main model, tokenizer, detokenizer and default generation configuration.
 8 | 
 9 | <LanguageTabs>
10 |     <TabItemPython>
11 |         <Tabs groupId="device">
12 |             <TabItem label="CPU" value="cpu">
13 |                 <CodeExamplePython device="CPU" />
14 |             </TabItem>
15 |             <TabItem label="GPU" value="gpu">
16 |                 <CodeExamplePython device="GPU" />
17 |             </TabItem>
18 |         </Tabs>
19 |     </TabItemPython>
20 |     <TabItemCpp>
21 |         <Tabs groupId="device">
22 |             <TabItem label="CPU" value="cpu">
23 |                 <CodeExampleCPP device="CPU" />
24 |             </TabItem>
25 |             <TabItem label="GPU" value="gpu">
26 |                 <CodeExampleCPP device="GPU" />
27 |             </TabItem>
28 |         </Tabs>
29 |     </TabItemCpp>
30 | </LanguageTabs>
31 | 
32 | :::tip
33 | 
34 | Use CPU or GPU as devices without any other code change.
35 | 
36 | :::
37 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/text-generation/_sections/_usage_options/_lora_adapters.mdx:
--------------------------------------------------------------------------------
1 | ### Working with LoRA Adapters
2 | 
3 | LoRA adapters can be used to customize LLM outputs for specific tasks or styles.
4 | In text generation, adapters can help models perform better at particular activities like coding, creative writing, or domain-specific knowledge.
5 | 
6 | Refer to the [LoRA Adapters](/docs/guides/lora-adapters.mdx) for more details on working with LoRA adapters.
7 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/text-generation/_sections/_usage_options/index.mdx:
--------------------------------------------------------------------------------
 1 | import ChatScenario from '@site/docs/use-cases/_shared/_chat_scenario.mdx';
 2 | import Streaming from '@site/docs/use-cases/_shared/_streaming.mdx';
 3 | import GenerationParameters from './_generation_parameters.mdx';
 4 | import LoraAdapters from './_lora_adapters.mdx';
 5 | import SpeculativeDecoding from './_speculative_decoding.mdx';
 6 | 
 7 | ## Additional Usage Options
 8 | 
 9 | :::tip
10 | Check out [Python](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/text_generation) and [C++](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/text_generation) text generation samples.
11 | :::
12 | 
13 | <GenerationParameters />
14 | 
15 | <ChatScenario />
16 | 
17 | <Streaming />
18 | 
19 | <LoraAdapters />
20 | 
21 | <SpeculativeDecoding />
22 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/text-generation/index.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 1
 3 | ---
 4 | import OptimumCLI from '@site/src/components/OptimumCLI';
 5 | import ConvertModelSection from '../_shared/_convert_model.mdx';
 6 | import RunModelSection from './_sections/_run_model/index.mdx';
 7 | import UsageOptionsSection from './_sections/_usage_options/index.mdx';
 8 | 
 9 | # Text Generation Using LLMs
10 | 
11 | <ConvertModelSection>
12 |     Download and convert model (e.g. [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0)) to OpenVINO format from Hugging Face:
13 | 
14 |     <OptimumCLI model='TinyLlama/TinyLlama-1.1B-Chat-v1.0' outputDir='TinyLlama_1_1b_v1_ov' weightFormat='int4' trustRemoteCode />
15 | 
16 |     See all supported [Large Language Models](/docs/supported-models/#large-language-models-llms).
17 | </ConvertModelSection>
18 | 
19 | <RunModelSection />
20 | 
21 | <UsageOptionsSection />
22 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/text-rerank/_sections/_run_model/_code_example_cpp.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="cpp" showLineNumbers>
 4 | {`#include "openvino/genai/rag/text_rerank_pipeline.hpp"
 5 | 
 6 | int main(int argc, char* argv[]) {
 7 |     std::vector<std::string> documents(argv + 3, argv + argc);
 8 |     std::string models_path = argv[1], query = argv[2];
 9 |     
10 |     ov::genai::TextRerankPipeline pipeline(models_path, "${props.device || 'CPU'}", ov::genai::top_n(3));
11 |     
12 |     auto rerank_result = pipeline.rerank(query, documents);
13 |     
14 |     std::cout << "Reranked documents:\\n";
15 |     for (const auto& [index, score] : rerank_result) {
16 |         std::cout << "Document " << index << " (score: " << score << "): " << documents[index] << '\\n';
17 |     }
18 | }
19 | `}
20 | </CodeBlock>
21 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/text-rerank/_sections/_run_model/_code_example_python.mdx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | <CodeBlock language="python" showLineNumbers>
 4 | {`import openvino_genai
 5 | 
 6 | pipeline = openvino_genai.TextRerankPipeline(model_path, "${props.device || 'CPU'}", top_n=3)
 7 | 
 8 | rerank_result = pipeline.rerank(query, documents)
 9 | 
10 | print("Reranked documents:")
11 | for index, score in rerank_result:
12 |     print(f"Document {index} (score: {score:.4f}): {documents[index]}")
13 | `}
14 | </CodeBlock>
15 | 


--------------------------------------------------------------------------------
/site/docs/use-cases/text-rerank/_sections/_run_model/index.mdx:
--------------------------------------------------------------------------------
 1 | import CodeExampleCPP from './_code_example_cpp.mdx';
 2 | import CodeExamplePython from './_code_example_python.mdx';
 3 | 
 4 | ## Run Model Using OpenVINO GenAI
 5 | 
 6 | The `TextRerankPipeline` enables you to reorder candidate documents or passages by semantic relevance to a query using a cross-encoder or reranker model. You can control how many top results are returned using the `top_n` parameter.
 7 | 
 8 | <LanguageTabs>
 9 |     <TabItemPython>
10 |         <Tabs groupId="device">
11 |             <TabItem label="CPU" value="cpu">
12 |                 <CodeExamplePython device="CPU" />
13 |             </TabItem>
14 |             <TabItem label="GPU" value="gpu">
15 |                 <CodeExamplePython device="GPU" />
16 |             </TabItem>
17 |         </Tabs>
18 |     </TabItemPython>
19 |     <TabItemCpp>
20 |         <Tabs groupId="device">
21 |             <TabItem label="CPU" value="cpu">
22 |                 <CodeExampleCPP device="CPU" />
23 |             </TabItem>
24 |             <TabItem label="GPU" value="gpu">
25 |                 <CodeExampleCPP device="GPU" />
26 |             </TabItem>
27 |         </Tabs>
28 |     </TabItemCpp>
29 | </LanguageTabs>


--------------------------------------------------------------------------------
/site/docs/use-cases/text-rerank/index.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_position: 2
 3 | ---
 4 | import OptimumCLI from '@site/src/components/OptimumCLI';
 5 | import ConvertModelSection from '../_shared/_convert_model.mdx';
 6 | import RunModelSection from './_sections/_run_model/index.mdx';
 7 | 
 8 | # Text Reranking
 9 | 
10 | <ConvertModelSection>
11 |     Download and convert a reranker model (e.g. [cross-encoder/ms-marco-MiniLM-L6-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L6-v2)) to OpenVINO format from Hugging Face:
12 | 
13 |     <OptimumCLI model='cross-encoder/ms-marco-MiniLM-L6-v2' outputDir='cross-encoder/ms-marco-MiniLM-L6-v2' trustRemoteCode />
14 | 
15 |     See all supported [Reranker Models](/docs/supported-models/#text-rerank-models).
16 | </ConvertModelSection>
17 | 
18 | <RunModelSection />
19 | 


--------------------------------------------------------------------------------
/site/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | import pluginJs from '@eslint/js';
 2 | import pluginReact from 'eslint-plugin-react';
 3 | import globals from 'globals';
 4 | import tsEslint from 'typescript-eslint';
 5 | 
 6 | import { FlatCompat } from '@eslint/eslintrc';
 7 | import path from 'path';
 8 | import { fileURLToPath } from 'url';
 9 | 
10 | // mimic CommonJS variables -- not needed if using CommonJS
11 | const __filename = fileURLToPath(import.meta.url);
12 | const __dirname = path.dirname(__filename);
13 | 
14 | const compat = new FlatCompat({
15 |   baseDirectory: __dirname,
16 | });
17 | 
18 | /** @type {import('eslint').Linter.Config[]} */
19 | export default [
20 |   { files: ['**/*.{js,mjs,cjs,ts,jsx,tsx}'] },
21 |   { ignores: ['node_modules/', '.docusaurus/'] },
22 |   {
23 |     languageOptions: {
24 |       ...pluginReact.configs.flat.recommended.languageOptions,
25 |       globals: { ...globals.browser, ...globals.node },
26 |     },
27 |   },
28 |   pluginJs.configs.recommended,
29 |   ...tsEslint.configs.recommended,
30 |   pluginReact.configs.flat.recommended,
31 |   pluginReact.configs.flat['jsx-runtime'],
32 |   ...compat.extends('plugin:@docusaurus/recommended'),
33 |   { settings: { react: { version: 'detect' } } },
34 | ];
35 | 


--------------------------------------------------------------------------------
/site/sidebars.ts:
--------------------------------------------------------------------------------
 1 | import type { SidebarsConfig } from '@docusaurus/plugin-content-docs';
 2 | 
 3 | // This runs in Node.js - Don't use client-side code here (browser APIs, JSX...)
 4 | 
 5 | /**
 6 |  * Creating a sidebar enables you to:
 7 |  - create an ordered group of docs
 8 |  - render a sidebar for each doc of that group
 9 |  - provide next/previous navigation
10 | 
11 |  The sidebars can be generated from the filesystem, or explicitly defined here.
12 | 
13 |  Create as many sidebars as you want.
14 |  */
15 | const sidebars: SidebarsConfig = {
16 |   // By default, Docusaurus generates a sidebar from the docs folder structure
17 |   genaiDocsSidebar: [
18 |     {
19 |       type: 'autogenerated',
20 |       dirName: '.',
21 |     },
22 |   ],
23 | };
24 | 
25 | export default sidebars;
26 | 


--------------------------------------------------------------------------------
/site/src/components/Button/styles.module.css:
--------------------------------------------------------------------------------
 1 | :global(.button) {
 2 |   display: inline-flex;
 3 |   align-items: center;
 4 |   justify-content: center;
 5 | 
 6 |   --button-icon-size: calc(1.5rem * var(--ifm-button-size-multiplier));
 7 | 
 8 |   .buttonIcon {
 9 |     display: inline-flex;
10 |     height: var(--button-icon-size);
11 |     width: var(--button-icon-size);
12 |     margin-right: 1rem;
13 | 
14 |     svg {
15 |       height: 100%;
16 |       width: 100%;
17 |     }
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/site/src/components/Carousel/styles.module.css:
--------------------------------------------------------------------------------
 1 | .carousel {
 2 |   position: relative;
 3 |   padding: 0 2rem;
 4 | 
 5 |   .slidesWrapper {
 6 |     overflow: hidden;
 7 |   }
 8 | 
 9 |   .slidesContainer {
10 |     display: flex;
11 |     transition: transform 0.5s ease-in-out;
12 | 
13 |     .slide {
14 |       display: flex;
15 |       align-items: center;
16 |       justify-content: center;
17 |       padding: 0 1rem;
18 |     }
19 |   }
20 | }
21 | 
22 | .chevron {
23 |   position: absolute;
24 |   top: 50%;
25 |   transform: translateY(-50%);
26 |   background-color: transparent;
27 |   border: none;
28 |   width: 30px;
29 |   height: 30px;
30 |   display: flex;
31 |   align-items: center;
32 |   justify-content: center;
33 |   cursor: pointer;
34 |   transition: all 0.3s ease;
35 |   z-index: 2;
36 |   color: white;
37 |   padding: 0;
38 | }
39 | 
40 | .chevronLeft {
41 |   left: 0px;
42 | }
43 | 
44 | .chevronRight {
45 |   right: 0px;
46 | }
47 | 
48 | .pagination {
49 |   display: flex;
50 |   justify-content: center;
51 |   gap: 8px;
52 |   padding-bottom: 5px;
53 | 
54 |   .dot {
55 |     padding: 0;
56 |     width: 8px;
57 |     height: 8px;
58 |     border-radius: 50%;
59 |     border: none;
60 |     cursor: pointer;
61 |     transition: all 0.3s ease;
62 |     opacity: 0.5;
63 |   }
64 | 
65 |   .dotActive {
66 |     opacity: 1;
67 |     transform: scale(1.3);
68 |   }
69 | }
70 | 


--------------------------------------------------------------------------------
/site/src/components/OptimumCLI/index.tsx:
--------------------------------------------------------------------------------
 1 | import CodeBlock from '@theme/CodeBlock';
 2 | 
 3 | type OptimumCLIProps = {
 4 |   model?: string;
 5 |   outputDir?: string;
 6 |   weightFormat?: 'fp32' | 'fp16' | 'int8' | 'int4';
 7 |   task?: string;
 8 |   trustRemoteCode?: boolean;
 9 | };
10 | 
11 | export default function OptimumCLI({
12 |   model = '<model_id_or_path>',
13 |   outputDir = '<output_dir>',
14 |   weightFormat,
15 |   task,
16 |   trustRemoteCode,
17 | }: OptimumCLIProps): React.JSX.Element {
18 |   const args = [`--model ${model}`];
19 |   if (weightFormat) {
20 |     args.push(`--weight-format ${weightFormat}`);
21 |   }
22 |   if (task) {
23 |     args.push(`--task ${task}`);
24 |   }
25 |   if (trustRemoteCode) {
26 |     args.push('--trust-remote-code');
27 |   }
28 |   return (
29 |     <CodeBlock language="bash">{`optimum-cli export openvino ${args.join(
30 |       ' '
31 |     )} ${outputDir}`}</CodeBlock>
32 |   );
33 | }
34 | 


--------------------------------------------------------------------------------
/site/src/css/breadcrumbs.css:
--------------------------------------------------------------------------------
 1 | .breadcrumbs > .breadcrumbs__item:first-child {
 2 |   display: none;
 3 | }
 4 | 
 5 | .breadcrumbs__link {
 6 |   font-size: 14px;
 7 | }
 8 | 
 9 | .breadcrumbs__item:not(:last-child)::after {
10 |   background: none;
11 |   content: '/';
12 |   margin: 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/site/src/css/footer.css:
--------------------------------------------------------------------------------
 1 | .footer {
 2 |   --ifm-footer-background-color: #f9f9f9;
 3 |   --ifm-footer-title-color: var(--genai-color-text-black);
 4 |   --ifm-footer-link-color: var(--genai-color-classic-blue);
 5 |   --ifm-footer-color: var(--genai-color-text-black);
 6 |   --ifm-footer-link-hover-color: var(--genai-color-darker-classic-blue);
 7 | }
 8 | 
 9 | .footer__links {
10 |   padding: 0 80px;
11 |   margin-bottom: 2rem;
12 | }
13 | 
14 | .footer__col {
15 |   display: grid;
16 |   justify-content: center;
17 | }
18 | 
19 | .footer__copyright {
20 |   white-space: pre-wrap;
21 |   font-size: 12px;
22 | }
23 | 


--------------------------------------------------------------------------------
/site/src/css/menu.css:
--------------------------------------------------------------------------------
 1 | .menu__link {
 2 |   font-size: 0.875rem;
 3 |   position: relative;
 4 | }
 5 | 
 6 | .menu__link--active:not(.menu__link--sublist) {
 7 |   --ifm-menu-color-active: var(--genai-color-classic-blue);
 8 | }
 9 | 
10 | .menu__link--sublist-caret:after,
11 | .menu__caret:before {
12 |   --ifm-menu-link-sublist-icon: url('../../static/img/chevron-up.svg');
13 |   background: var(--ifm-menu-link-sublist-icon);
14 |   background-position: center;
15 |   background-repeat: no-repeat;
16 | }
17 | 


--------------------------------------------------------------------------------
/site/src/css/navbar.css:
--------------------------------------------------------------------------------
 1 | :root {
 2 |   --search-local-input-active-border-color: var(--genai-color-white);
 3 | 
 4 |   --ifm-navbar-link-color: var(--genai-color-text-white);
 5 |   --ifm-navbar-link-hover-color: var(--genai-text-color-white-hover);
 6 |   --ifm-navbar-link-active-color: var(--genai-color-blue-energy);
 7 | }
 8 | 
 9 | .navbar {
10 |   background: linear-gradient(90deg, #27317f 6.98%, #1a3483 46.6%, #02227c 79.11%);
11 |   width: 100%;
12 | }
13 | 
14 | .navbar__items {
15 |   margin-left: var(--ifm-navbar-height);
16 | }
17 | 
18 | .navbar__inner {
19 |   align-items: center;
20 | }
21 | 
22 | .navbar__logo {
23 |   top: 0;
24 |   left: 0;
25 |   height: 100%;
26 |   position: absolute;
27 | }
28 | 
29 | .navbar__link--active:not(:has(~ .navbar__link--active)) {
30 |   --ifm-navbar-link-hover-color: var(--genai-color-blue-energy);
31 | }
32 | 
33 | .navbar__search-input {
34 |   border-radius: 3px;
35 |   font-size: 14px;
36 |   font-family: var(--genai-font-family);
37 |   color: var(--genai-color-text-white);
38 |   width: 238px;
39 |   background: #122f84 url('../../static/img/magnifying-glass.svg') no-repeat 0.75rem center / 1rem
40 |     1rem;
41 | 
42 |   &::placeholder {
43 |     color: var(--genai-color-text-white);
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/site/src/css/toc.css:
--------------------------------------------------------------------------------
 1 | .table-of-contents__link {
 2 |   position: relative;
 3 |   padding: var(--ifm-menu-link-padding-vertical) var(--ifm-menu-link-padding-horizontal);
 4 | }
 5 | 
 6 | .table-of-contents__link--active {
 7 |   background-color: var(--ifm-menu-color-background-active);
 8 |   color: var(--genai-color-classic-blue);
 9 | }
10 | 


--------------------------------------------------------------------------------
/site/src/css/typography.css:
--------------------------------------------------------------------------------
 1 | :root {
 2 |   --global-font-apple-system: -apple-system;
 3 |   --global-font-blink-mac-system-font: BlinkMacSystemFont;
 4 |   --global-font-segoe-ui: 'Segoe UI';
 5 |   --global-font-roboto: Roboto;
 6 |   --global-font-helvetica: Helvetica;
 7 |   --global-font-arial: Arial;
 8 |   --global-font-sans-serif: sans-serif;
 9 |   --global-font-apple-color-emoji: 'Apple Color Emoji';
10 |   --global-font-segoe-ui-emoji: 'Segoe UI Emoji';
11 |   --global-font-segoe-ui-symbol: 'Segoe UI Symbol';
12 | 
13 |   --genai-font-family: var(--global-font-apple-system), var(--global-font-blink-mac-system-font),
14 |     var(--global-font-segoe-ui), var(--global-font-roboto), var(--global-font-helvetica),
15 |     var(--global-font-arial), var(--global-font-sans-serif), var(--global-font-apple-color-emoji),
16 |     var(--global-font-segoe-ui-emoji), var(--global-font-segoe-ui-symbol);
17 | }
18 | 


--------------------------------------------------------------------------------
/site/src/hooks/use-screen-size.ts:
--------------------------------------------------------------------------------
 1 | import ExecutionEnvironment from '@docusaurus/ExecutionEnvironment';
 2 | import { useEffect, useState } from 'react';
 3 | 
 4 | const useScreenSize = () => {
 5 |   if (!ExecutionEnvironment.canUseViewport) {
 6 |     return {
 7 |       width: 0,
 8 |       height: 0,
 9 |     };
10 |   }
11 | 
12 |   const [screenSize, setScreenSize] = useState({
13 |     width: window.innerWidth,
14 |     height: window.innerHeight,
15 |   });
16 | 
17 |   useEffect(() => {
18 |     const handleResize = () => {
19 |       setScreenSize({
20 |         width: window.innerWidth,
21 |         height: window.innerHeight,
22 |       });
23 |     };
24 | 
25 |     window.addEventListener('resize', handleResize);
26 | 
27 |     return () => {
28 |       window.removeEventListener('resize', handleResize);
29 |     };
30 |   }, []);
31 | 
32 |   return screenSize;
33 | };
34 | 
35 | export default useScreenSize;
36 | 


--------------------------------------------------------------------------------
/site/src/pages/_sections/FeaturesSection/FeatureItem/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import styles from './styles.module.css';
 3 | 
 4 | import Heading from '@theme/Heading';
 5 | 
 6 | type FeatureItemProps = {
 7 |   icon: string;
 8 |   title: string;
 9 |   children: React.ReactNode;
10 | };
11 | 
12 | export const FeatureItem: React.FC<FeatureItemProps> = ({ icon, title, children }) => (
13 |   <div className={styles.benefitItem}>
14 |     <span className={styles.icon}>{icon}</span>
15 |     <Heading as="h3" className={styles.title}>
16 |       {title}
17 |     </Heading>
18 |     <p className={styles.description}>{children}</p>
19 |   </div>
20 | );
21 | 


--------------------------------------------------------------------------------
/site/src/pages/_sections/FeaturesSection/FeatureItem/styles.module.css:
--------------------------------------------------------------------------------
 1 | .benefitItem {
 2 |   display: flex;
 3 |   flex-direction: column;
 4 |   align-items: center;
 5 |   padding: 0rem 2rem;
 6 | 
 7 |   .icon {
 8 |     font-size: 3rem;
 9 |   }
10 | 
11 |   .title {
12 |     margin-bottom: 0.75rem;
13 |   }
14 | 
15 |   .description {
16 |     margin: 0;
17 |     text-align: center;
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/site/src/pages/_sections/FeaturesSection/styles.module.css:
--------------------------------------------------------------------------------
 1 | .featuresSection {
 2 |   composes: section from '../section-styles.module.css';
 3 | }
 4 | 
 5 | .sectionTitle {
 6 |   composes: sectionTitle from '../section-styles.module.css';
 7 | }
 8 | 
 9 | .sectionContent {
10 |   composes: sectionContent from '../section-styles.module.css';
11 | 
12 |   display: grid;
13 |   grid-template-columns: repeat(auto-fit, minmax(340px, 1fr));
14 |   width: 100%;
15 |   justify-content: center;
16 |   gap: 1rem;
17 | }
18 | 


--------------------------------------------------------------------------------
/site/src/pages/_sections/HeroSection/PipelinesCarousel/styles.module.css:
--------------------------------------------------------------------------------
 1 | .pipelineExample {
 2 |   display: flex;
 3 |   flex-direction: column;
 4 |   flex-grow: 1;
 5 | 
 6 |   .pipelineHeader {
 7 |     display: flex;
 8 |     align-items: center;
 9 |     gap: 0.5rem;
10 |     margin-bottom: 0.75rem;
11 | 
12 |     .pipelineTitle {
13 |       margin: 0;
14 |       color: var(--genai-color-text-white);
15 |       font-weight: 500;
16 |       font-size: 14px;
17 |     }
18 |   }
19 | 
20 |   .pipelineCode {
21 |     font-size: 0.75rem;
22 | 
23 |     pre code {
24 |       padding: 0.5rem;
25 |     }
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/site/src/pages/_sections/HeroSection/index.tsx:
--------------------------------------------------------------------------------
 1 | import Heading from '@theme/Heading';
 2 | 
 3 | import Button from '@site/src/components/Button';
 4 | import OpenVINOLogo from '@site/static/img/openvino.svg';
 5 | 
 6 | import PipelinesCarousel from './PipelinesCarousel';
 7 | import styles from './styles.module.css';
 8 | 
 9 | export const HeroSection = () => (
10 |   <section className={styles.heroSection}>
11 |     <Heading as="h1" className={styles.sectionTitle}>
12 |       <OpenVINOLogo role="img" title="OpenVINO" />
13 |       <span className={styles.genAITitle}>GenAI</span>
14 |     </Heading>
15 |     <div className={styles.sectionContent}>
16 |       <p className={styles.subtitle}>Run Generative AI with ease</p>
17 |       <p className={styles.description}>
18 |         OpenVINO™ GenAI provides optimized pipelines for running generative AI models with maximum
19 |         performance and minimal dependencies
20 |       </p>
21 |       <Button
22 |         label="Get Started"
23 |         link="/docs/getting-started/introduction"
24 |         size="lg"
25 |         variant="secondary"
26 |         className={styles.getStartedButton}
27 |       />
28 |       <PipelinesCarousel className={styles.pipelinesCarousel} />
29 |     </div>
30 |   </section>
31 | );
32 | 


--------------------------------------------------------------------------------
/site/src/pages/_sections/InstallSection/styles.module.css:
--------------------------------------------------------------------------------
 1 | .installSection {
 2 |   composes: section from '../section-styles.module.css';
 3 | }
 4 | 
 5 | .sectionTitle {
 6 |   composes: sectionTitle from '../section-styles.module.css';
 7 | }
 8 | 
 9 | .sectionContent {
10 |   composes: sectionContent from '../section-styles.module.css';
11 | 
12 |   display: flex;
13 |   flex-direction: column;
14 |   gap: 2rem;
15 |   align-items: center;
16 | 
17 |   .sectionDescription {
18 |     margin-bottom: 0;
19 |     text-align: center;
20 |   }
21 | 
22 |   .quickInstall,
23 |   .os {
24 |     display: flex;
25 |     flex-direction: column;
26 |     align-items: center;
27 | 
28 |     h3 {
29 |       font-weight: 500;
30 |     }
31 |   }
32 | 
33 |   .quickInstallCommand {
34 |     margin-top: 0.5rem;
35 |     margin-bottom: 0;
36 |     min-width: 365px;
37 |   }
38 | 
39 |   .osList {
40 |     display: flex;
41 |     flex-direction: row;
42 |     gap: 2.5rem;
43 | 
44 |     .osItem {
45 |       display: flex;
46 |       flex-direction: column;
47 |       align-items: center;
48 |       padding: 1rem;
49 |       gap: 0.5rem;
50 | 
51 |       .osItemIcon {
52 |         height: 2.5rem;
53 |       }
54 | 
55 |       .osItemTitle {
56 |         font-weight: 500;
57 |       }
58 |     }
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/site/src/pages/_sections/UseCasesSection/index.tsx:
--------------------------------------------------------------------------------
 1 | import styles from './styles.module.css';
 2 | 
 3 | import Heading from '@theme/Heading';
 4 | 
 5 | import Link from '@docusaurus/Link';
 6 | import { ImageGeneration } from './components/image-generation';
 7 | import { ImageProcessing } from './components/image-processing';
 8 | import { SpeechRecognition } from './components/speech-recognition';
 9 | import { TextGeneration } from './components/text-generation';
10 | import { TextRerank } from './components/text-rerank';
11 | import { TextEmbedding } from './components/text-embedding';
12 | 
13 | export const UseCasesSection = () => (
14 |   <section className={styles.useCasesSection}>
15 |     <Heading as="h2" className={styles.sectionTitle}>
16 |       Use Cases
17 |     </Heading>
18 |     <div className={styles.sectionContent}>
19 |       <TextGeneration />
20 |       <ImageGeneration />
21 |       <SpeechRecognition />
22 |       <ImageProcessing />
23 |       <TextEmbedding />
24 |       <TextRerank />
25 |     </div>
26 |     <div className={styles.useCasesFooter}>
27 |       <strong>Looking for more?</strong>&nbsp;See all{' '}
28 |       <Link to="docs/category/use-cases">supported use cases</Link>.
29 |     </div>
30 |   </section>
31 | );
32 | 


--------------------------------------------------------------------------------
/site/src/pages/_sections/UseCasesSection/styles.module.css:
--------------------------------------------------------------------------------
 1 | .useCasesSection {
 2 |   composes: section from '../section-styles.module.css';
 3 | 
 4 |   background-color: var(--genai-color-section-bg);
 5 | }
 6 | 
 7 | .sectionTitle {
 8 |   composes: sectionTitle from '../section-styles.module.css';
 9 | }
10 | 
11 | .sectionContent {
12 |   composes: sectionContent from '../section-styles.module.css';
13 | 
14 |   display: flex;
15 |   flex-direction: column;
16 |   gap: 2rem;
17 | }
18 | 
19 | .useCasesFooter {
20 |   margin-top: 1.5rem;
21 |   font-size: 1rem;
22 | 
23 |   strong {
24 |     font-weight: 600;
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/site/src/pages/_sections/section-styles.module.css:
--------------------------------------------------------------------------------
 1 | .section {
 2 |   display: flex;
 3 |   flex-direction: column;
 4 |   align-items: center;
 5 |   padding: 2.5rem 2rem;
 6 | }
 7 | 
 8 | .sectionTitle {
 9 |   margin-bottom: 2rem;
10 | }
11 | 
12 | .sectionContent {
13 |   max-width: 1200px;
14 |   margin: 0 auto;
15 | }
16 | 


--------------------------------------------------------------------------------
/site/src/pages/index.tsx:
--------------------------------------------------------------------------------
 1 | import Layout from '@theme/Layout';
 2 | 
 3 | import { FeaturesSection } from './_sections/FeaturesSection';
 4 | import { HeroSection } from './_sections/HeroSection';
 5 | import { InstallSection } from './_sections/InstallSection';
 6 | import { UseCasesSection } from './_sections/UseCasesSection';
 7 | 
 8 | export default function Home() {
 9 |   return (
10 |     <Layout description="Run Generative AI models with simple C++/Python API and using OpenVINO Runtime">
11 |       <HeroSection />
12 |       <FeaturesSection />
13 |       <UseCasesSection />
14 |       <InstallSection />
15 |     </Layout>
16 |   );
17 | }
18 | 


--------------------------------------------------------------------------------
/site/src/theme/MDXComponents.tsx:
--------------------------------------------------------------------------------
 1 | import Button from '@site/src/components/Button';
 2 | import { LanguageTabs, TabItemCpp, TabItemPython } from '@site/src/components/LanguageTabs';
 3 | import MDXComponents from '@theme-original/MDXComponents';
 4 | import TabItem from '@theme/TabItem';
 5 | import Tabs from '@theme/Tabs';
 6 | 
 7 | export default {
 8 |   // Reusing the default mapping
 9 |   ...MDXComponents,
10 |   // Theme components
11 |   Tabs,
12 |   TabItem,
13 |   // Custom components
14 |   Button,
15 |   LanguageTabs,
16 |   TabItemPython,
17 |   TabItemCpp,
18 | };
19 | 


--------------------------------------------------------------------------------
/site/src/types/images.d.ts:
--------------------------------------------------------------------------------
1 | declare module '*.webp' {
2 |   const content: string;
3 |   export default content;
4 | }
5 | 


--------------------------------------------------------------------------------
/site/static/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvinotoolkit/openvino.genai/91dc71e0ded5a4624289723266f7b3811369f0e9/site/static/.nojekyll


--------------------------------------------------------------------------------
/site/static/img/background.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvinotoolkit/openvino.genai/91dc71e0ded5a4624289723266f7b3811369f0e9/site/static/img/background.webp


--------------------------------------------------------------------------------
/site/static/img/beam_idx-drop.gif:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:701839c28ac1e05c1c9e23823c74a10149a343210192e51df36e563ff6e257e4
3 | size 5700875
4 | 


--------------------------------------------------------------------------------
/site/static/img/beam_idx-fork.gif:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:292753b30a2153c92cedf16672ba182a851ec30c95c309cdaca13173f00fe700
3 | size 6062552
4 | 


--------------------------------------------------------------------------------
/site/static/img/chevron-right.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:4fa971a830569f87b477186e7c96b36a2bb66cf76431e10027e2dcf92a2307e9
3 | size 513
4 | 


--------------------------------------------------------------------------------
/site/static/img/chevron-up.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:ad006a8bb6ea8f42b9c3bf6b73bf44f35379f5b03ccd2fd356caf1de8cb14b94
3 | size 697
4 | 


--------------------------------------------------------------------------------
/site/static/img/favicon.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:eef9d4468b26933cb3101f3ddcde43a05686152e286e38ecb986ae8755e589ae
3 | size 570
4 | 


--------------------------------------------------------------------------------
/site/static/img/image.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f4d041afa0c579aa2274a785edd7cc89fc936f5b805aefae5e450c14295a250f
3 | size 954
4 | 


--------------------------------------------------------------------------------
/site/static/img/intel-logo.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:53536031483e61bc6fd7024cfdcc59e24bb352ef28ce308af75a53b677fabf37
3 | size 4633
4 | 


--------------------------------------------------------------------------------
/site/static/img/kv-cache-areas-diagram.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e2fa45a69b4db6e8293fd8e1da712c2970237ac98aab99d4b0d729379bbe49c6
3 | size 7143
4 | 


--------------------------------------------------------------------------------
/site/static/img/linux-logo.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:6ad66b9369475567aac9ef50d3abf06fa3b9ae2d3ef7a392167862fbb3985068
3 | size 112029
4 | 


--------------------------------------------------------------------------------
/site/static/img/lora.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:64a1113c00b6f37d78ce0e32713170b04a9367ca0a2a74b280d1d6f7ea9122e1
3 | size 18575
4 | 


--------------------------------------------------------------------------------
/site/static/img/mac-os-logo.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:66717fe401ce5b14b7f0723088612c9c246e868948cec2c56fcf748e5dad7387
3 | size 2901
4 | 


--------------------------------------------------------------------------------
/site/static/img/magnifying-glass.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:4b37f714caeaf026cef1548bbd51c099419f617211a72bd1cb95b2c1c2fb2fca
3 | size 399
4 | 


--------------------------------------------------------------------------------
/site/static/img/openvino-genai-workflow.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:703732cd6a85f2cbcfd0915d63c10483114f05b71b834d2228501700074d0053
3 | size 1053573
4 | 


--------------------------------------------------------------------------------
/site/static/img/openvino.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:80a77851039175ddd926eebbe2a8b88ca7380a7bd0ee8b4028f103b873789425
3 | size 97028
4 | 


--------------------------------------------------------------------------------
/site/static/img/sound-on.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:7b5e5ec8be590d7b11399903a27eee1f94617f5c7c756b76f16f6dfc083768b5
3 | size 1391
4 | 


--------------------------------------------------------------------------------
/site/static/img/stateful.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a6fb5ab9990c845eef8847bdf76799fcaefe0a9afa10fb9d07f6df4394a9e2ad
3 | size 129471
4 | 


--------------------------------------------------------------------------------
/site/static/img/stateless.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:20904ff7a8793359b978cfcdc85c482e0764291af17b572936955f586e202ea9
3 | size 113440
4 | 


--------------------------------------------------------------------------------
/site/static/img/structured_output_work_example.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:2f97b3b73753dbe2849035b49fb20630131b6fae06972ecfb6b1b80680d4eeb6
3 | size 58388
4 | 


--------------------------------------------------------------------------------
/site/static/img/text.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f5a1350d89c71005b91f06a78d7209891b41be871fcf83b6d574f76132a25870
3 | size 1449
4 | 


--------------------------------------------------------------------------------
/site/static/img/trishape.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:fd3ccb1d8489537c03832ad4f4f8d6ccacb29656787f3983e1f14663d1c0272a
3 | size 55058
4 | 


--------------------------------------------------------------------------------
/site/static/img/windows-logo.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:0ec8b8d5efed5d7f0b7f19271ecf40ecd5fe14bb5284dfc36ea340b6381206eb
3 | size 422
4 | 


--------------------------------------------------------------------------------
/site/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   // This file is not used in compilation. It is here just for a nice editor experience.
 3 |   "extends": "@docusaurus/tsconfig",
 4 |   "compilerOptions": {
 5 |     "baseUrl": "."
 6 |   },
 7 |   "exclude": [
 8 |     ".docusaurus",
 9 |     "build"
10 |   ]
11 | }


--------------------------------------------------------------------------------
/src/c/include/openvino/genai/c/visibility.h:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #ifndef OPENVINO_GENAI_EXTERN_C
 7 | #    ifdef __cplusplus
 8 | #        define OPENVINO_GENAI_EXTERN_C extern "C"
 9 | #    else
10 | #        define OPENVINO_GENAI_EXTERN_C
11 | #    endif
12 | #endif
13 | 
14 | #if defined(_WIN32) || defined(__CYGWIN__)
15 | #    ifdef openvino_genai_c_EXPORTS
16 | #        define OPENVINO_GENAI_C_EXPORTS OPENVINO_GENAI_EXTERN_C __declspec(dllexport)
17 | #    else
18 | #        define OPENVINO_GENAI_C_EXPORTS OPENVINO_GENAI_EXTERN_C __declspec(dllimport)
19 | #    endif
20 | #elif defined(__GNUC__) && (__GNUC__ >= 4) || defined(__clang__)
21 | #    ifdef openvino_genai_c_EXPORTS
22 | #        define OPENVINO_GENAI_C_EXPORTS OPENVINO_GENAI_EXTERN_C __attribute__((visibility("default")))
23 | #    else
24 | #        define OPENVINO_GENAI_C_EXPORTS OPENVINO_GENAI_EXTERN_C __attribute__((visibility("default")))
25 | #    endif
26 | #else
27 | #    define OPENVINO_GENAI_C_EXPORTS OPENVINO_GENAI_EXTERN_C
28 | #endif
29 | 


--------------------------------------------------------------------------------
/src/cpp/include/openvino/genai/common_types.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <map>
 7 | 
 8 | #include "openvino/core/core.hpp"
 9 | #include <openvino/runtime/properties.hpp>
10 | 
11 | namespace ov {
12 | namespace genai {
13 | 
14 | /**
15 |  * @brief A map of models for VLMPipeline constructor.
16 |  * Key is model name (e.g. "vision_embeddings", "text_embeddings", "language", "resampler")
17 |  * and value is a pair of model IR as string and weights as tensor.
18 |  */
19 | using ModelsMap = std::map<std::string, std::pair<std::string, ov::Tensor>>;
20 | 
21 | /**
22 |  * @brief blob_path property defines a path to a directory containing compiled blobs previously exported with
23 |  * `pipeline.export_model` method.
24 |  *
25 |  * Use of compiled blobs can significantly reduce model load time, especially for large models.
26 |  */
27 | static constexpr ov::Property<std::filesystem::path> blob_path{"blob_path"};
28 | 
29 | }  // namespace genai
30 | }  // namespace ov
31 | 


--------------------------------------------------------------------------------
/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "openvino/genai/image_generation/clip_text_model.hpp"
 7 | 
 8 | namespace ov {
 9 | namespace genai {
10 | 
11 | class CLIPTextModelWithProjection : public CLIPTextModel {
12 | public:
13 |     using CLIPTextModel::CLIPTextModel;
14 | 
15 |     std::shared_ptr<CLIPTextModel> clone() {
16 |         OPENVINO_ASSERT((m_model != nullptr) ^ static_cast<bool>(m_request), "CLIPTextModelWithProjection must have exactly one of m_model or m_request initialized");
17 | 
18 |         std::shared_ptr<CLIPTextModelWithProjection> cloned = std::make_shared<CLIPTextModelWithProjection>(*this);
19 | 
20 |         if (m_model) {
21 |             cloned->m_model = m_model->clone();
22 |         } else {
23 |             cloned->m_request = m_request.get_compiled_model().create_infer_request();
24 |         }
25 | 
26 |         return cloned;
27 |     }
28 | 
29 | };
30 | 
31 | } // namespace genai
32 | } // namespace ov
33 | 


--------------------------------------------------------------------------------
/src/cpp/include/openvino/genai/image_generation/scheduler.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <filesystem>
 7 | 
 8 | #include "openvino/genai/visibility.hpp"
 9 | #include "openvino/core/deprecated.hpp"
10 | 
11 | namespace ov {
12 | namespace genai {
13 | 
14 | class OPENVINO_GENAI_EXPORTS Scheduler {
15 | public:
16 |     enum Type {
17 |         AUTO,
18 |         LCM,
19 |         DDIM,
20 |         LMS_DISCRETE OPENVINO_ENUM_DEPRECATED("LMS_DISCRETE is deprecated. Please, select different scheduler type") = DDIM,
21 |         EULER_DISCRETE,
22 |         FLOW_MATCH_EULER_DISCRETE,
23 |         PNDM,
24 |         EULER_ANCESTRAL_DISCRETE
25 |     };
26 | 
27 |     static std::shared_ptr<Scheduler> from_config(const std::filesystem::path& scheduler_config_path,
28 |                                                   Type scheduler_type = AUTO);
29 | 
30 |     virtual ~Scheduler();
31 | };
32 | 
33 | } // namespace genai
34 | } // namespace ov
35 | 


--------------------------------------------------------------------------------
/src/cpp/include/openvino/genai/speech_generation/speech_generation_perf_metrics.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <chrono>
 7 | #include <map>
 8 | #include <string>
 9 | #include <vector>
10 | 
11 | #include "openvino/genai/perf_metrics.hpp"
12 | #include "openvino/genai/visibility.hpp"
13 | 
14 | namespace ov::genai {
15 | 
16 | struct OPENVINO_GENAI_EXPORTS SpeechGenerationPerfMetrics : public PerfMetrics {
17 |     size_t num_generated_samples = 0;
18 | 
19 |     void evaluate_statistics(std::optional<TimePoint> start_time = std::nullopt) override;
20 | };
21 | }  // namespace ov::genai
22 | 


--------------------------------------------------------------------------------
/src/cpp/include/openvino/genai/visibility.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "openvino/core/visibility.hpp"
 7 | 
 8 | #ifdef openvino_genai_EXPORTS
 9 | #    define OPENVINO_GENAI_EXPORTS OPENVINO_CORE_EXPORTS
10 | #else
11 | #    define OPENVINO_GENAI_EXPORTS OPENVINO_CORE_IMPORTS
12 | #endif  // openvino_genai_EXPORTS
13 | 


--------------------------------------------------------------------------------
/src/cpp/include/openvino/genai/visual_language/perf_metrics.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "openvino/genai/perf_metrics.hpp"
 7 | #include "openvino/genai/visibility.hpp"
 8 | 
 9 | 
10 | namespace ov::genai {
11 | 
12 | struct OPENVINO_GENAI_EXPORTS VLMRawPerfMetrics {
13 |     /** @brief Duration of preparation of embeddings */
14 |     std::vector<MicroSeconds> prepare_embeddings_durations;
15 | };
16 | 
17 | struct OPENVINO_GENAI_EXPORTS VLMPerfMetrics : public PerfMetrics {
18 |     /** @brief Mean and standard deviation of preparation of embeddings in milliseconds */
19 |     MeanStdPair prepare_embeddings_duration;
20 | 
21 |     MeanStdPair get_prepare_embeddings_duration();
22 | 
23 |     VLMPerfMetrics() = default;
24 | 
25 |     VLMPerfMetrics(PerfMetrics& perf_metrics) : PerfMetrics(perf_metrics), prepare_embeddings_duration(){};
26 | 
27 |     void evaluate_statistics(std::optional<TimePoint> start_time = std::nullopt) override;
28 | 
29 |     VLMPerfMetrics operator+(const VLMPerfMetrics& metrics) const;
30 | 
31 |     VLMRawPerfMetrics vlm_raw_metrics;
32 | };
33 | 
34 | }


--------------------------------------------------------------------------------
/src/cpp/src/continuous_batching/attention_output.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 | 
4 | #pragma once
5 | #include "openvino/openvino.hpp"
6 | using AttentionScoresForCacheOfSubsequence = ov::Tensor;
7 | using AttentionScoresForEachDecoderLayer = std::vector<AttentionScoresForCacheOfSubsequence>;
8 | using AttentionScoresForEachSubsequence = std::map<size_t, AttentionScoresForEachDecoderLayer>;
9 | 


--------------------------------------------------------------------------------
/src/cpp/src/gguf_utils/gguf_modeling.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <cstring>
 7 | 
 8 | #include "openvino/openvino.hpp"
 9 | 
10 | std::shared_ptr<ov::Model> create_from_gguf(const std::string& model_path, const bool enable_save_ov_model);
11 | 


--------------------------------------------------------------------------------
/src/cpp/src/lm_encoding.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <optional>
 4 | #include "openvino/genai/llm_pipeline.hpp"
 5 | #include "visual_language/embedding_model.hpp"
 6 | #include "sampling/sampler.hpp"
 7 | 
 8 | namespace ov {
 9 | namespace genai {
10 | 
11 | ov::genai::utils::GenerationFinishInfo get_lm_encoded_results(ov::InferRequest& m_llm, const ov::Tensor& input_ids, const ov::Tensor& attention_mask,
12 |                                                               const std::shared_ptr<StreamerBase>& streamer_ptr, Sampler& sampler, std::vector<SequenceGroup::Ptr> sequence_groups,
13 |                                                               std::optional<ov::Tensor> position_ids, std::optional<ov::Tensor> token_type_ids, utils::KVCacheState& m_kv_cache_state, EmbeddingsModel::Ptr m_embedding,
14 |                                                               std::optional<int64_t> rope_delta = std::nullopt, const size_t max_kv_cache_size = std::numeric_limits<size_t>::max());
15 | 
16 | 
17 | void align_kv_cache_and_history(const ov::Tensor& new_chat_tokens, utils::KVCacheState& kv_cache_state);
18 | 
19 | 
20 | TokenizedInputs get_chat_encoded_input(const ov::Tensor& new_chat_tokens, utils::KVCacheState& kv_cache_state);
21 | 
22 | }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/cpp/src/logger.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | #include <iostream>
 6 | #include <string>
 7 | 
 8 | namespace ov::genai {
 9 | 
10 | class Logger {
11 | public:
12 |     static void warn(const std::string& message) {
13 |         std::cout << "[WARN] " << message << '\n';
14 |     };
15 | };
16 | 
17 | }  // namespace ov::genai
18 | 


--------------------------------------------------------------------------------
/src/cpp/src/lora/common.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2024 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <map>
 7 | #include <memory>
 8 | #include <string>
 9 | #include <optional>
10 | #include <vector>
11 | 
12 | #include "openvino/op/constant.hpp"
13 | 
14 | namespace ov {
15 | namespace genai {
16 | namespace utils {
17 | 
18 | template <typename T>
19 | struct LoRAParts {
20 |     T alpha, A, B;
21 | 
22 |     LoRAParts() = default;
23 |     LoRAParts(const T& alpha, const T& A, const T& B) : alpha(alpha), A(A), B(B) {}
24 | 
25 |     template <typename Other>
26 |     LoRAParts(const LoRAParts<Other>& other) : alpha(other.alpha), A(other.A), B(other.B) {}
27 | };
28 | 
29 | 
30 | using LoRAWeight = LoRAParts<std::shared_ptr<ov::op::v0::Constant>>;
31 | using LoRATensors = std::map<std::string, LoRAWeight>;
32 | 
33 | }
34 | }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/cpp/src/lora/safetensors.c:
--------------------------------------------------------------------------------
1 | #define SAFETENSORS_IMPLEMENTATION
2 | #include "safetensors.h"


--------------------------------------------------------------------------------
/src/cpp/src/speculative_decoding/update_request_structs.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <vector>
 7 | #include <map>
 8 | 
 9 | namespace ov::genai {
10 | struct GeneratedSequence {
11 |     std::vector<int64_t> token_ids;
12 |     std::vector<float> log_probs;
13 | 
14 |     GeneratedSequence(const std::vector<int64_t>& generated_token_ids,
15 |                     const std::vector<float>& generated_log_probs) :
16 |         token_ids(generated_token_ids),
17 |         log_probs(generated_log_probs) {};
18 | };
19 | 
20 | struct UpdateRequestResult {
21 |     size_t inserted_tokens_cnt, removed_tokens_cnt;
22 | 
23 |     UpdateRequestResult(size_t to_insert = 0, size_t to_remove = 0) :
24 |         inserted_tokens_cnt(to_insert),
25 |         removed_tokens_cnt(to_remove) {};
26 | };
27 | 
28 | // { sequence_id : generated_tokens_and_log_probs }
29 | using GeneratedSequences = std::map<uint64_t, GeneratedSequence>;
30 | 
31 | // { request_id : generated_sequence }
32 | using GeneratedRequests = std::map<uint64_t, GeneratedSequences>;
33 | }
34 | 


--------------------------------------------------------------------------------
/src/cpp/src/speech_generation/speech_generation_perf_metrics.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "openvino/genai/speech_generation/speech_generation_perf_metrics.hpp"
 5 | 
 6 | #include <cmath>
 7 | #include <numeric>
 8 | 
 9 | namespace ov {
10 | namespace genai {
11 | 
12 | MeanStdPair calc_mean_and_std(const std::vector<MicroSeconds>& durations);
13 | 
14 | void SpeechGenerationPerfMetrics::evaluate_statistics(std::optional<TimePoint> start_time) {
15 |     if (m_evaluated) {
16 |         return;
17 |     }
18 | 
19 |     generate_duration = calc_mean_and_std(raw_metrics.generate_durations);
20 |     tokenization_duration = calc_mean_and_std(raw_metrics.tokenization_durations);
21 | 
22 |     // tokens per second
23 | 
24 |     float throughput_mean = static_cast<float>(num_generated_samples) * 1000.0f / generate_duration.mean;
25 |     float throughput_std = (generate_duration.std * 1000.0f * static_cast<float>(num_generated_samples)) /
26 |                            (generate_duration.mean * generate_duration.mean);
27 |     throughput = {throughput_mean, throughput_std};
28 |     m_evaluated = true;
29 | }
30 | 
31 | }  // namespace genai
32 | }  // namespace ov
33 | 


--------------------------------------------------------------------------------
/src/cpp/src/speech_generation/speecht5_tts_decoder.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2024 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <filesystem>
 7 | 
 8 | #include "openvino/runtime/core.hpp"
 9 | 
10 | namespace ov::genai {
11 | 
12 | class SpeechT5TTSDecoder {
13 | public:
14 |     static std::shared_ptr<SpeechT5TTSDecoder> from_path(const std::filesystem::path& models_path,
15 |                                                          const std::string& device,
16 |                                                          const ov::AnyMap& properties);
17 | 
18 |     SpeechT5TTSDecoder(const std::filesystem::path& models_path,
19 |                        const std::string& device,
20 |                        const ov::AnyMap& properties);
21 | 
22 |     void start_async(const Tensor& inputs_embeds,
23 |                      const Tensor& speaker_embeddings,
24 |                      const Tensor& encoder_hidden_states,
25 |                      const Tensor& encoder_attention_mask,
26 |                      const Tensor& spectrogram);
27 | 
28 |     std::tuple<Tensor, Tensor, Tensor, Tensor> wait();
29 | 
30 |     void reset_state();
31 | 
32 |     ov::Tensor create_host_tensor(const element::Type element_type, const Shape& shape);
33 | 
34 | private:
35 |     ov::InferRequest m_request;
36 |     Tensor m_beam_idx_tensor;
37 | };
38 | }  // namespace ov::genai
39 | 


--------------------------------------------------------------------------------
/src/cpp/src/speech_generation/text2speech_pipeline_impl.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "text2speech_pipeline_impl.hpp"
 5 | 
 6 | namespace ov {
 7 | namespace genai {
 8 | 
 9 | SpeechGenerationPerfMetrics Text2SpeechPipelineImpl::get_performance_metrics() {
10 |     m_perf_metrics.load_time = m_load_time_ms;
11 |     return m_perf_metrics;
12 | }
13 | 
14 | void Text2SpeechPipelineImpl::save_load_time(std::chrono::steady_clock::time_point start_time) {
15 |     auto stop_time = std::chrono::steady_clock::now();
16 |     m_load_time_ms += std::chrono::duration_cast<std::chrono::milliseconds>(stop_time - start_time).count();
17 | }
18 | }  // namespace genai
19 | }  // namespace ov
20 | 


--------------------------------------------------------------------------------
/src/cpp/src/synchronized_queue.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <queue>
 7 | #include <mutex>
 8 | #include <condition_variable>
 9 | 
10 | template <typename T>
11 | class SynchronizedQueue
12 | {
13 |     std::queue<T> m_queue;
14 |     std::mutex m_mutex;
15 |     std::condition_variable m_cv;
16 | 
17 | public:
18 |     SynchronizedQueue() = default;
19 |     SynchronizedQueue(const SynchronizedQueue&) = delete;
20 |     SynchronizedQueue(const SynchronizedQueue&&) = delete;
21 |     SynchronizedQueue& operator=(const SynchronizedQueue&) = delete;
22 | 
23 |     T back() {
24 |         std::unique_lock<std::mutex> lock(m_mutex);
25 |         m_cv.wait(lock, [this]{return !m_queue.empty(); });
26 |         return m_queue.back();
27 |     }
28 | 
29 |     T pull() {
30 |         std::unique_lock<std::mutex> lock(m_mutex);
31 |         m_cv.wait(lock, [this]{return !m_queue.empty();});
32 |         auto val = m_queue.front();
33 |         m_queue.pop();
34 |         return val;
35 |     }
36 | 
37 |     void push(const T& item) {
38 |         std::unique_lock<std::mutex> lock(m_mutex);
39 |         m_queue.push(item);
40 |         m_cv.notify_one();
41 |     }
42 | 
43 |     bool empty() {
44 |         std::unique_lock<std::mutex> lock(m_mutex);
45 |         return m_queue.empty();
46 |     }
47 | };
48 | 


--------------------------------------------------------------------------------
/src/cpp/src/visual_language/vl_sdpa_transformations.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "visual_language/vl_sdpa_transformations.hpp"
 5 | 
 6 | #include "utils.hpp"
 7 | 
 8 | namespace ov {
 9 | namespace genai {
10 | namespace utils {
11 | 
12 | void request_vl_sdpa_transformations(std::shared_ptr<ov::Model> model) {
13 |     model->set_rt_info("QWenVL", "model_type_hint");
14 | }
15 | 
16 | bool check_vl_sdpa_transformations(const ov::CompiledModel& compiled_model) {
17 |     const std::vector<std::string> target_names {"cu_seq_lens", "cu_window_seqlens"};
18 | 
19 |     bool exists = false;
20 |     for (auto &input : compiled_model.inputs()) {
21 |         const auto& names = input.get_names();
22 | 
23 |         for (const auto& target : target_names) {
24 |             exists |= (names.find(target) != names.end());
25 |         }
26 |     }
27 | 
28 |     return exists;
29 | }
30 | 
31 | }  // namespace utils
32 | }  // namespace genai
33 | }  // namespace ov
34 | 


--------------------------------------------------------------------------------
/src/cpp/src/visual_language/vl_sdpa_transformations.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <vector>
 7 | 
 8 | #include "openvino/core/any.hpp"
 9 | #include "openvino/core/model.hpp"
10 | #include "openvino/runtime/compiled_model.hpp"
11 | 
12 | namespace ov {
13 | namespace genai {
14 | 
15 | namespace utils {
16 | 
17 | /** Requests transforming SDPA ov::Model to VLSDPA. It's up to a plugin to apply the transformation.
18 |  * @param model Pointer to the ov::Model representing one of the supported VLM architectures.
19 |  */
20 | void request_vl_sdpa_transformations(std::shared_ptr<ov::Model> model);
21 | 
22 | bool check_vl_sdpa_transformations(const ov::CompiledModel& compiled_model);
23 | 
24 | }  // namespace utils
25 | }  // namespace genai
26 | }  // namespace ov
27 | 


--------------------------------------------------------------------------------
/src/cpp/src/whisper/config.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "whisper/config.hpp"
 5 | 
 6 | #include <fstream>
 7 | #include <nlohmann/json.hpp>
 8 | 
 9 | #include "openvino/core/except.hpp"
10 | 
11 | #include "json_utils.hpp"
12 | 
13 | namespace ov {
14 | namespace genai {
15 | 
16 | WhisperConfig::WhisperConfig(const std::filesystem::path& json_path) {
17 |     // preprocessor_config.json not found. Skip parameters initialization from file, use defaults.
18 |     if (!std::filesystem::exists(json_path)) {
19 |         return;
20 |     }
21 | 
22 |     using ov::genai::utils::read_json_param;
23 | 
24 |     std::ifstream f(json_path);
25 |     OPENVINO_ASSERT(f.is_open(), "Failed to open '", json_path, "' with config");
26 | 
27 |     nlohmann::json data = nlohmann::json::parse(f);
28 | 
29 |     read_json_param(data, "max_source_positions", max_source_positions);
30 | }
31 | 
32 | }  // namespace genai
33 | }  // namespace ov
34 | 


--------------------------------------------------------------------------------
/src/cpp/src/whisper/config.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <filesystem>
 7 | 
 8 | namespace ov {
 9 | namespace genai {
10 | 
11 | /**
12 |  * @brief Structure to keep whisper config parameters.
13 |  */
14 | class WhisperConfig {
15 | public:
16 |     explicit WhisperConfig(const std::filesystem::path& json_path);
17 | 
18 |     size_t max_source_positions = 1500;
19 | };
20 | 
21 | }  // namespace genai
22 | }  // namespace ov
23 | 


--------------------------------------------------------------------------------
/src/cpp/src/whisper/context_tokens.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "openvino/genai/perf_metrics.hpp"
 7 | #include "openvino/genai/whisper_generation_config.hpp"
 8 | 
 9 | namespace ov {
10 | namespace genai {
11 | 
12 | struct WhisperContextTokens {
13 |     std::vector<int64_t> initial_prompt;
14 |     std::vector<int64_t> hotwords;
15 | };
16 | 
17 | std::pair<WhisperContextTokens, float> prepare_context_tokens(const WhisperGenerationConfig& config,
18 |                                                               Tokenizer& tokenizer);
19 | 
20 | std::vector<int64_t> get_prompt_tokens(const WhisperContextTokens& context_tokens,
21 |                                        const WhisperGenerationConfig& config,
22 |                                        size_t chunk_offset);
23 | 
24 | }  // namespace genai
25 | }  // namespace ov
26 | 


--------------------------------------------------------------------------------
/src/cpp/src/whisper/logit_processor.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <openvino/openvino.hpp>
 7 | 
 8 | #include "openvino/genai/whisper_generation_config.hpp"
 9 | 
10 | namespace ov {
11 | namespace genai {
12 | 
13 | void do_suppress_tokens(ov::Tensor& logits, const size_t batch_idx, const std::vector<int64_t>& suppress_tokens);
14 | 
15 | void process_whisper_timestamp_logits(ov::Tensor& logits,
16 |                                       const size_t batch_idx,
17 |                                       const ov::genai::WhisperGenerationConfig& config,
18 |                                       const std::vector<int64_t>& generated_tokens,
19 |                                       bool initial_step = false);
20 | 
21 | }  // namespace genai
22 | }  // namespace ov
23 | 


--------------------------------------------------------------------------------
/src/cpp/src/whisper/models.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <openvino/runtime/core.hpp>
 7 | 
 8 | namespace ov {
 9 | namespace genai {
10 | 
11 | struct WhisperInitializedModels {
12 |     ov::InferRequest encoder;
13 |     ov::InferRequest decoder;
14 |     ov::InferRequest decoder_with_past;
15 | };
16 | }  // namespace genai
17 | }  // namespace ov
18 | 


--------------------------------------------------------------------------------
/src/cpp/src/whisper/models/statefull_decoder.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2024-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "decoder.hpp"
 7 | #include "openvino/runtime/core.hpp"
 8 | 
 9 | namespace ov::genai {
10 | 
11 | class WhisperStatefullDecoder : public WhisperDecoder {
12 | public:
13 |     WhisperStatefullDecoder(const std::filesystem::path& models_path,
14 |                             const std::string& device,
15 |                             const ov::AnyMap& properties,
16 |                             const ov::PartialShape& lhs_shape);
17 | 
18 |     void start_async(const Tensor& encoder_hidden_state, const Tensor& input_ids, const Tensor& beam_idx) override;
19 | 
20 |     Tensor wait() override;
21 | 
22 |     void reset_state() override;
23 | 
24 |     ov::Tensor create_host_tensor(const element::Type element_type, const Shape& shape) override;
25 | 
26 | private:
27 |     void _set_cache_position_tensor(const size_t seq_len);
28 | 
29 | private:
30 |     ov::InferRequest m_request;
31 |     bool m_has_cache_position = true;
32 | };
33 | }  // namespace ov::genai
34 | 


--------------------------------------------------------------------------------
/src/cpp/src/whisper/models/with_past_decoder.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2024 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "decoder.hpp"
 7 | #include "openvino/runtime/core.hpp"
 8 | 
 9 | namespace ov::genai {
10 | 
11 | class WhisperWithPastDecoder : public WhisperDecoder {
12 | public:
13 |     WhisperWithPastDecoder(const std::filesystem::path& models_path,
14 |                            const std::string& device,
15 |                            const ov::AnyMap& properties);
16 | 
17 |     void start_async(const Tensor& encoder_hidden_state, const Tensor& input_ids, const Tensor& beam_idx) override;
18 | 
19 |     Tensor wait() override;
20 | 
21 |     void reset_state() override;
22 | 
23 | private:
24 |     ov::InferRequest m_request_decoder;
25 |     ov::InferRequest m_request_decoder_with_past;
26 |     size_t m_cache_position = 0;
27 |     bool m_initial_past_key_value_set = false;
28 |     bool m_past_key_value_linked = false;
29 |     bool m_past_decoder_has_cache_position = true;
30 | 
31 |     void _set_past_key_value(const Tensor& beam_idx);
32 | };
33 | 
34 | }  // namespace ov::genai
35 | 


--------------------------------------------------------------------------------
/src/cpp/src/whisper/pipeline_base.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2024 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "openvino/genai/whisper_pipeline.hpp"
 7 | #include "utils.hpp"
 8 | #include "whisper/config.hpp"
 9 | #include "whisper/feature_extractor.hpp"
10 | 
11 | namespace ov {
12 | namespace genai {
13 | 
14 | class WhisperPipeline::WhisperPipelineImplBase {
15 | public:
16 |     WhisperGenerationConfig m_generation_config;
17 |     Tokenizer m_tokenizer;
18 |     WhisperFeatureExtractor m_feature_extractor;
19 |     WhisperConfig m_model_config;
20 | 
21 |     float m_load_time_ms = 0;
22 | 
23 |     WhisperPipelineImplBase(const std::filesystem::path& models_path)
24 |         : m_generation_config(utils::from_config_json_if_exists<WhisperGenerationConfig>(models_path)),
25 |           m_tokenizer{models_path},
26 |           m_feature_extractor{models_path / "preprocessor_config.json"},
27 |           m_model_config{models_path / "config.json"} {}
28 | 
29 |     virtual WhisperDecodedResults generate(const RawSpeechInput& raw_speech_input,
30 |                                            OptionalWhisperGenerationConfig generation_config,
31 |                                            const std::shared_ptr<StreamerBase> streamer) = 0;
32 | 
33 |     virtual ~WhisperPipelineImplBase() = default;
34 | };
35 | 
36 | }  // namespace genai
37 | }  // namespace ov
38 | 


--------------------------------------------------------------------------------
/src/cpp/src/whisper/pipeline_static.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2024 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <filesystem>
 7 | #include <string>
 8 | 
 9 | #include "openvino/genai/streamer_base.hpp"
10 | #include "openvino/genai/tokenizer.hpp"
11 | #include "openvino/genai/whisper_pipeline.hpp"
12 | #include "whisper/models.hpp"
13 | #include "whisper/pipeline_base.hpp"
14 | #include "sampling/sampler.hpp"
15 | 
16 | namespace ov {
17 | namespace genai {
18 | 
19 | class WhisperPipeline::StaticWhisperPipeline : public WhisperPipeline::WhisperPipelineImplBase {
20 | public:
21 |     StaticWhisperPipeline(const std::filesystem::path& model_path, const ov::AnyMap& properties);
22 | 
23 |     WhisperDecodedResults generate(const RawSpeechInput& raw_speech_input,
24 |                                    OptionalWhisperGenerationConfig generation_config,
25 |                                    const std::shared_ptr<StreamerBase> streamer) override;
26 | 
27 | private:
28 |     WhisperInitializedModels m_models;
29 |     Sampler m_sampler;
30 | };
31 | 
32 | }  // namespace genai
33 | }  // namespace ov
34 | 


--------------------------------------------------------------------------------
/src/cpp/src/whisper/timestamps.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2023-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <openvino/openvino.hpp>
 7 | 
 8 | #include "whisper.hpp"
 9 | 
10 | namespace ov {
11 | namespace genai {
12 | 
13 | struct ExtractedSegments {
14 |     std::vector<ov::genai::Segment> segments;
15 |     size_t last_offset = 0;
16 |     std::vector<int64_t> non_timestamp_tokens;
17 |     std::vector<std::pair<size_t, size_t>> segment_ranges;
18 | };
19 | 
20 | ExtractedSegments extract_segments(const std::vector<int64_t>& tokens,
21 |                                    const ov::genai::WhisperGenerationConfig& config,
22 |                                    const size_t nb_max_frames,
23 |                                    const float time_precision,
24 |                                    const float time_offset = 0.f);
25 | 
26 | }  // namespace genai
27 | }  // namespace ov
28 | 


--------------------------------------------------------------------------------
/src/cpp/src/whisper/whisper_utils.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2024 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <openvino/openvino.hpp>
 7 | 
 8 | #include "openvino/genai/perf_metrics.hpp"
 9 | 
10 | namespace ov {
11 | namespace genai {
12 | namespace utils {
13 | 
14 | void infer_with_perf_metrics(ov::InferRequest& request, ov::genai::RawPerfMetrics& raw_metrics);
15 | 
16 | void filter_non_segment_metrics(ov::genai::RawPerfMetrics& raw_metrics,
17 |                                 size_t offset,
18 |                                 std::vector<std::pair<size_t, size_t>>& ranges);
19 | 
20 | int64_t argmax(const ov::Tensor& logits, const size_t batch_idx);
21 | 
22 | }  // namespace utils
23 | }  // namespace genai
24 | }  // namespace ov
25 | 


--------------------------------------------------------------------------------
/src/docs/beam_idx-drop.gif:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:701839c28ac1e05c1c9e23823c74a10149a343210192e51df36e563ff6e257e4
3 | size 5700875
4 | 


--------------------------------------------------------------------------------
/src/docs/beam_idx-fork.gif:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:292753b30a2153c92cedf16672ba182a851ec30c95c309cdaca13173f00fe700
3 | size 6062552
4 | 


--------------------------------------------------------------------------------
/src/docs/openvino_genai.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:703732cd6a85f2cbcfd0915d63c10483114f05b71b834d2228501700074d0053
3 | size 1053573
4 | 


--------------------------------------------------------------------------------
/src/docs/stateful.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a6fb5ab9990c845eef8847bdf76799fcaefe0a9afa10fb9d07f6df4394a9e2ad
3 | size 129471
4 | 


--------------------------------------------------------------------------------
/src/docs/stateless.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:20904ff7a8793359b978cfcdc85c482e0764291af17b572936955f586e202ea9
3 | size 113440
4 | 


--------------------------------------------------------------------------------
/src/js/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | bin
3 | bin.*
4 | build
5 | dist
6 | node_modules
7 | tests/models
8 | types
9 | 


--------------------------------------------------------------------------------
/src/js/.npmignore:
--------------------------------------------------------------------------------
 1 | bin
 2 | include
 3 | lib
 4 | src
 5 | tests
 6 | thirdparty
 7 | 
 8 | CMakeLists.txt
 9 | eslint.config.cjs
10 | .prettierrc
11 | tsconfig.json
12 | *.md
13 | !README.md
14 | 
15 | **/*.tsbuildinfo
16 | *.tgz
17 | 


--------------------------------------------------------------------------------
/src/js/.prettierrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://json.schemastore.org/prettierrc",
 3 |   "semi": true,
 4 |   "printWidth": 100,
 5 |   "endOfLine": "lf",
 6 |   "tabWidth": 2,
 7 |   "singleQuote": false,
 8 |   "trailingComma": "all",
 9 |   "bracketSpacing": true,
10 |   "proseWrap": "always"
11 | }
12 | 


--------------------------------------------------------------------------------
/src/js/include/addon.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2018-2024 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <napi.h>
 7 | 
 8 | typedef Napi::Function (*Prototype)(Napi::Env);
 9 | 
10 | struct AddonData {
11 |     Napi::FunctionReference core;
12 |     Napi::FunctionReference tokenizer;
13 |     Napi::FunctionReference perf_metrics;
14 |     Napi::FunctionReference chat_history;
15 | };
16 | 
17 | void init_class(Napi::Env env,
18 |                 Napi::Object exports,
19 |                 std::string class_name,
20 |                 Prototype func,
21 |                 Napi::FunctionReference& reference);
22 | 
23 | Napi::Object init_module(Napi::Env env, Napi::Object exports);
24 | 


--------------------------------------------------------------------------------
/src/js/include/chat_history.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <napi.h>
 4 | #include "openvino/genai/chat_history.hpp"
 5 | 
 6 | class ChatHistoryWrap : public Napi::ObjectWrap<ChatHistoryWrap> {
 7 | public:
 8 |     static Napi::Function get_class(Napi::Env env);
 9 |     
10 |     ChatHistoryWrap(const Napi::CallbackInfo& info);
11 |     
12 |     ov::genai::ChatHistory& get_value();
13 | 
14 | private:
15 |     Napi::Value push_back(const Napi::CallbackInfo& info);
16 |     void pop_back(const Napi::CallbackInfo& info);
17 |     Napi::Value get_messages(const Napi::CallbackInfo& info);
18 |     Napi::Value set_messages(const Napi::CallbackInfo& info);
19 |     void clear(const Napi::CallbackInfo& info);
20 |     Napi::Value size(const Napi::CallbackInfo& info);
21 |     Napi::Value empty(const Napi::CallbackInfo& info);
22 |     Napi::Value set_tools(const Napi::CallbackInfo& info);
23 |     Napi::Value get_tools(const Napi::CallbackInfo& info);
24 |     Napi::Value set_extra_context(const Napi::CallbackInfo& info);
25 |     Napi::Value get_extra_context(const Napi::CallbackInfo& info);
26 |     
27 |     ov::genai::ChatHistory m_chat_history;
28 | };
29 | 


--------------------------------------------------------------------------------
/src/js/include/llm_pipeline/finish_chat_worker.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <napi.h>
 4 | #include "openvino/genai/llm_pipeline.hpp"
 5 | 
 6 | using namespace Napi;
 7 | 
 8 | class FinishChatWorker : public AsyncWorker {
 9 |  public:
10 |   FinishChatWorker(Function& callback, std::shared_ptr<ov::genai::LLMPipeline>& pipe);
11 |   virtual ~FinishChatWorker(){}
12 | 
13 |   void Execute() override;
14 |   void OnOK() override;
15 | 
16 |  private:
17 |   std::shared_ptr<ov::genai::LLMPipeline>& pipe;
18 | };
19 | 


--------------------------------------------------------------------------------
/src/js/include/llm_pipeline/init_worker.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <napi.h>
 4 | #include "openvino/genai/llm_pipeline.hpp"
 5 | 
 6 | using namespace Napi;
 7 | 
 8 | class InitWorker : public AsyncWorker {
 9 | public:
10 |     InitWorker(Function& callback,
11 |                std::shared_ptr<ov::genai::LLMPipeline>& pipe,
12 |                const std::string model_path,
13 |                std::string device,
14 |                ov::AnyMap properties);
15 |     virtual ~InitWorker() {}
16 | 
17 |     void Execute() override;
18 |     void OnOK() override;
19 | 
20 | private:
21 |     std::shared_ptr<ov::genai::LLMPipeline>& pipe;
22 |     std::string model_path;
23 |     std::string device;
24 |     ov::AnyMap properties;
25 | };
26 | 


--------------------------------------------------------------------------------
/src/js/include/llm_pipeline/llm_pipeline_wrapper.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <thread>
 4 | #include <napi.h>
 5 | #include "openvino/genai/llm_pipeline.hpp"
 6 | 
 7 | class LLMPipelineWrapper : public Napi::ObjectWrap<LLMPipelineWrapper> {
 8 | public:
 9 |     LLMPipelineWrapper(const Napi::CallbackInfo& info);
10 | 
11 |     static Napi::Function get_class(Napi::Env env);
12 | 
13 |     Napi::Value init(const Napi::CallbackInfo& info);
14 |     Napi::Value generate(const Napi::CallbackInfo& info);
15 |     Napi::Value start_chat(const Napi::CallbackInfo& info);
16 |     Napi::Value finish_chat(const Napi::CallbackInfo& info);
17 |     Napi::Value get_tokenizer(const Napi::CallbackInfo& info);
18 | private:
19 |     bool is_loaded = false;
20 |     bool is_initialized = false;
21 |     bool is_running = false;
22 | 
23 |     std::string model_path;
24 |     std::string device;
25 | 
26 |     std::shared_ptr<ov::genai::LLMPipeline> pipe = nullptr;
27 |     std::function<bool(std::string)> streamer;
28 | };
29 | 


--------------------------------------------------------------------------------
/src/js/include/llm_pipeline/start_chat_worker.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <napi.h>
 4 | #include "openvino/genai/llm_pipeline.hpp"
 5 | 
 6 | using namespace Napi;
 7 | 
 8 | class StartChatWorker : public AsyncWorker {
 9 |  public:
10 |   StartChatWorker(Function& callback, std::shared_ptr<ov::genai::LLMPipeline>& pipe, std::string system_message);
11 |   virtual ~StartChatWorker(){}
12 | 
13 |   void Execute() override;
14 |   void OnOK() override;
15 | 
16 |  private:
17 |   std::shared_ptr<ov::genai::LLMPipeline>& pipe;
18 |   std::string system_message;
19 | };
20 | 


--------------------------------------------------------------------------------
/src/js/include/text_embedding_pipeline/embed_documents_worker.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <napi.h>
 4 | #include "openvino/genai/rag/text_embedding_pipeline.hpp"
 5 | 
 6 | using namespace Napi;
 7 | 
 8 | class EmbedDocumentsWorker : public AsyncWorker {
 9 |     public:
10 |         EmbedDocumentsWorker(
11 |             Function& callback,
12 |             std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe,
13 |             Array documents
14 |         );
15 |         virtual ~EmbedDocumentsWorker(){}
16 | 
17 |         void Execute() override;
18 |         void OnOK() override;
19 |     private:
20 |         std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe;
21 |         std::vector<std::string> documents;
22 |         ov::genai::EmbeddingResults embed_results;
23 | };
24 | 


--------------------------------------------------------------------------------
/src/js/include/text_embedding_pipeline/embed_query_worker.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <napi.h>
 4 | #include "openvino/genai/rag/text_embedding_pipeline.hpp"
 5 | 
 6 | using namespace Napi;
 7 | 
 8 | class EmbedQueryWorker : public AsyncWorker {
 9 |     public:
10 |         EmbedQueryWorker(
11 |             Function& callback,
12 |             std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe,
13 |             String text);
14 |         virtual ~EmbedQueryWorker(){}
15 | 
16 |         void Execute() override;
17 |         void OnOK() override;
18 |     private:
19 |         std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe;
20 |         std::string text;
21 |         ov::genai::EmbeddingResult embed_result;
22 | };
23 | 


--------------------------------------------------------------------------------
/src/js/include/text_embedding_pipeline/init_worker.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <napi.h>
 4 | #include "openvino/genai/rag/text_embedding_pipeline.hpp"
 5 | 
 6 | using namespace Napi;
 7 | 
 8 | class EmbeddingInitWorker : public AsyncWorker {
 9 |     public:
10 |         EmbeddingInitWorker(
11 |             Function& callback,
12 |             std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe,
13 |             const std::string model_path,
14 |             std::string device,
15 |             Object config,
16 |             Object properties
17 |         );
18 |         virtual ~EmbeddingInitWorker(){}
19 |         void Execute() override;
20 |         void OnOK() override;
21 |     private:
22 |         std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe;
23 |         std::string model_path;
24 |         std::string device;
25 |         ov::AnyMap config;
26 |         ov::AnyMap properties;
27 | };
28 | 


--------------------------------------------------------------------------------
/src/js/include/text_embedding_pipeline/pipeline_wrapper.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <napi.h>
 4 | #include "openvino/genai/rag/text_embedding_pipeline.hpp"
 5 | 
 6 | class TextEmbeddingPipelineWrapper : public Napi::ObjectWrap<TextEmbeddingPipelineWrapper> {
 7 |     public:
 8 |         TextEmbeddingPipelineWrapper(const Napi::CallbackInfo& info);
 9 |         static Napi::Function get_class(Napi::Env env);
10 |         Napi::Value init(const Napi::CallbackInfo& info);
11 |         Napi::Value embed_documents(const Napi::CallbackInfo& info);
12 |         Napi::Value embed_documents_async(const Napi::CallbackInfo& info);
13 |         Napi::Value embed_query(const Napi::CallbackInfo& info);
14 |         Napi::Value embed_query_async(const Napi::CallbackInfo& info);
15 |     private:
16 |         std::shared_ptr<ov::genai::TextEmbeddingPipeline> pipe = nullptr;
17 | };
18 | 


--------------------------------------------------------------------------------
/src/js/include/tokenizer.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <napi.h>
 4 | #include "openvino/genai/tokenizer.hpp"
 5 | 
 6 | class TokenizerWrapper : public Napi::ObjectWrap<TokenizerWrapper> {
 7 | public:
 8 |     TokenizerWrapper(const Napi::CallbackInfo& info);
 9 |     static Napi::Function get_class(Napi::Env env);
10 |     static Napi::Object wrap(Napi::Env env, ov::genai::Tokenizer tokenizer);
11 |     Napi::Value apply_chat_template(const Napi::CallbackInfo& info);
12 |     Napi::Value get_bos_token(const Napi::CallbackInfo& info);
13 |     Napi::Value get_bos_token_id(const Napi::CallbackInfo& info);
14 |     Napi::Value get_eos_token(const Napi::CallbackInfo& info);
15 |     Napi::Value get_eos_token_id(const Napi::CallbackInfo& info);
16 |     Napi::Value get_pad_token(const Napi::CallbackInfo& info);
17 |     Napi::Value get_pad_token_id(const Napi::CallbackInfo& info);
18 | private:
19 |     ov::genai::Tokenizer _tokenizer;
20 | };
21 | 


--------------------------------------------------------------------------------
/src/js/scripts/download-runtime.cjs:
--------------------------------------------------------------------------------
 1 | const { join } = require("node:path");
 2 | const BinaryManager = require("openvino-node/scripts/lib/binary-manager");
 3 | 
 4 | const packageJson = require("../package.json");
 5 | 
 6 | if (require.main === module) main();
 7 | 
 8 | async function main() {
 9 |   if (!BinaryManager.isCompatible()) process.exit(1);
10 | 
11 |   const force = process.argv.includes("-f");
12 |   const ignoreIfExists = process.argv.includes("--ignore-if-exists");
13 | 
14 |   const { env } = process;
15 |   const proxy = env.http_proxy || env.HTTP_PROXY || env.npm_config_proxy;
16 | 
17 |   await BinaryManager.prepareBinary(
18 |     join(__dirname, ".."),
19 |     packageJson.binary.version || packageJson.version,
20 |     packageJson.binary,
21 |     { force, ignoreIfExists, proxy },
22 |   );
23 | }
24 | 


--------------------------------------------------------------------------------
/src/js/src/llm_pipeline/finish_chat_worker.cpp:
--------------------------------------------------------------------------------
 1 | #include "include/llm_pipeline/finish_chat_worker.hpp"
 2 | #include <chrono>
 3 | #include <thread>
 4 | 
 5 | FinishChatWorker::FinishChatWorker(Function& callback, std::shared_ptr<ov::genai::LLMPipeline>& pipe)
 6 |     : AsyncWorker(callback), pipe(pipe) {};
 7 | 
 8 | void FinishChatWorker::Execute() {
 9 |   this->pipe->finish_chat();
10 | };
11 | 
12 | void FinishChatWorker::OnOK() {
13 |   Callback().Call({ Env().Null() });
14 | };
15 | 


--------------------------------------------------------------------------------
/src/js/src/llm_pipeline/init_worker.cpp:
--------------------------------------------------------------------------------
 1 | #include "include/llm_pipeline/init_worker.hpp"
 2 | #include <chrono>
 3 | #include <thread>
 4 | 
 5 | InitWorker::InitWorker(
 6 |   Function& callback,
 7 |   std::shared_ptr<ov::genai::LLMPipeline>& pipe,
 8 |   const std::string model_path,
 9 |   const std::string device,
10 |   const ov::AnyMap properties
11 | ) : AsyncWorker(callback), pipe(pipe), model_path(model_path), device(device), properties(properties) {};
12 | 
13 | void InitWorker::Execute() {
14 |   this->pipe = std::make_shared<ov::genai::LLMPipeline>(this->model_path, this->device, this->properties);
15 | };
16 | 
17 | void InitWorker::OnOK() {
18 |   Callback().Call({ Env().Null() });
19 | };
20 | 


--------------------------------------------------------------------------------
/src/js/src/llm_pipeline/start_chat_worker.cpp:
--------------------------------------------------------------------------------
 1 | #include "include/llm_pipeline/start_chat_worker.hpp"
 2 | #include <chrono>
 3 | #include <thread>
 4 | 
 5 | StartChatWorker::StartChatWorker(Function& callback, std::shared_ptr<ov::genai::LLMPipeline>& pipe, std::string system_message)
 6 |     : AsyncWorker(callback), pipe(pipe), system_message(system_message) {};
 7 | 
 8 | void StartChatWorker::Execute() {
 9 |   this->pipe->start_chat(this->system_message);
10 | };
11 | 
12 | void StartChatWorker::OnOK() {
13 |   Callback().Call({ Env().Null() });
14 | };
15 | 


--------------------------------------------------------------------------------
/src/js/src/text_embedding_pipeline/embed_documents_worker.cpp:
--------------------------------------------------------------------------------
 1 | #include "include/helper.hpp"
 2 | #include "include/text_embedding_pipeline/embed_documents_worker.hpp"
 3 | 
 4 | EmbedDocumentsWorker::EmbedDocumentsWorker(
 5 |     Function& callback,
 6 |     std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe,
 7 |     Array documents
 8 | ) : AsyncWorker(callback), pipe(pipe), documents(js_to_cpp<std::vector<std::string>>(Env(), documents)) {};
 9 | 
10 | void EmbedDocumentsWorker::Execute() {
11 |     try {
12 |         this->embed_results = this->pipe->embed_documents(this->documents);
13 |     } catch(const std::exception& ex) {
14 |         SetError(ex.what());
15 |     }
16 | };
17 | 
18 | void EmbedDocumentsWorker::OnOK() {
19 |     Callback().Call({
20 |         Env().Null(),                                                                   // Error result
21 |         cpp_to_js<ov::genai::EmbeddingResults, Napi::Value>(Env(), this->embed_results) // Ok result
22 |     });
23 | };
24 | 


--------------------------------------------------------------------------------
/src/js/src/text_embedding_pipeline/embed_query_worker.cpp:
--------------------------------------------------------------------------------
 1 | #include "include/helper.hpp"
 2 | #include "include/text_embedding_pipeline/embed_query_worker.hpp"
 3 | 
 4 | EmbedQueryWorker::EmbedQueryWorker(
 5 |     Function& callback,
 6 |     std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe,
 7 |     String text
 8 | ) : AsyncWorker(callback), pipe(pipe), text(text.ToString()) {};
 9 | 
10 | void EmbedQueryWorker::Execute() {
11 |     try {
12 |         this->embed_result = this->pipe->embed_query(this->text);
13 |     } catch(const std::exception& ex) {
14 |         SetError(ex.what());
15 |     }
16 | };
17 | 
18 | void EmbedQueryWorker::OnOK() {
19 |     Callback().Call({ 
20 |         Env().Null(),                                                                   // Error result
21 |         cpp_to_js<ov::genai::EmbeddingResult, Napi::Value>(Env(), this->embed_result)   // Ok result
22 |     });
23 | };


--------------------------------------------------------------------------------
/src/js/src/text_embedding_pipeline/init_worker.cpp:
--------------------------------------------------------------------------------
 1 | #include "include/text_embedding_pipeline/init_worker.hpp"
 2 | #include "include/helper.hpp"
 3 | #include <chrono>
 4 | #include <thread>
 5 | 
 6 | EmbeddingInitWorker::EmbeddingInitWorker(
 7 |     Function& callback,
 8 |     std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe,
 9 |     const std::string model_path,
10 |     const std::string device,
11 |     Object config,
12 |     Object properties
13 | ) : AsyncWorker(callback),
14 |     pipe(pipe),
15 |     model_path(model_path),
16 |     device(device),
17 |     config(js_to_cpp<ov::AnyMap>(Env(), config)),
18 |     properties(js_to_cpp<ov::AnyMap>(Env(), properties)) {};
19 | 
20 | void EmbeddingInitWorker::Execute() {
21 |     try {
22 |         ov::genai::TextEmbeddingPipeline::Config config(this->config);
23 |         this->pipe = std::make_shared<ov::genai::TextEmbeddingPipeline>(this->model_path, this->device, config, this->properties);
24 |     } catch(const std::exception& ex) {
25 |         SetError(ex.what());
26 |     }
27 | };
28 | 
29 | void EmbeddingInitWorker::OnOK() {
30 |     Callback().Call({
31 |         Env().Null()      // Error result
32 |     });
33 | };
34 | 


--------------------------------------------------------------------------------
/src/js/tests/models.js:
--------------------------------------------------------------------------------
1 | export const models = {
2 |   LLM: "OpenVINO/Llama-3.1-8B-Instruct-FastDraft-150M-int8-ov",
3 |   InstructLLM: "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov",
4 |   Embedding: "OpenVINO/bge-base-en-v1.5-fp16-ov",
5 | };
6 | 


--------------------------------------------------------------------------------
/src/js/tests/setup.js:
--------------------------------------------------------------------------------
1 | import { downloadModel } from "./utils.js";
2 | import { models } from "./models.js";
3 | 
4 | for (const model of Object.values(models)) {
5 |   await downloadModel(model);
6 | }
7 | 


--------------------------------------------------------------------------------
/src/js/tests/utils.js:
--------------------------------------------------------------------------------
 1 | import { bootstrap } from "global-agent";
 2 | import { promises as fs } from "node:fs";
 3 | import { listFiles, downloadFile } from "@huggingface/hub";
 4 | 
 5 | const BASE_DIR = "./tests/models/";
 6 | 
 7 | bootstrap();
 8 | 
 9 | export async function downloadModel(repo) {
10 |   console.log(`Downloading model '${repo}'`);
11 | 
12 |   const fetch = await import("node-fetch");
13 |   const modelName = repo.split("/")[1];
14 |   const destDir = `${BASE_DIR}${modelName}`;
15 | 
16 |   await fs.mkdir(destDir, { recursive: true });
17 | 
18 |   const fileList = await listFiles({
19 |     repo,
20 |     fetch: fetch.default,
21 |   });
22 |   const fileNames = [];
23 |   for await (const file of fileList) {
24 |     fileNames.push(file.path);
25 |   }
26 | 
27 |   for (const path of fileNames) {
28 |     console.log(`Downloading file '${path}'`);
29 |     const response = await downloadFile({
30 |       repo,
31 |       path,
32 |       fetch: fetch.default,
33 |     });
34 |     const filename = `${destDir}/${path}`;
35 | 
36 |     await saveFile(filename, response);
37 |     console.log(`File '${path}' downloaded`);
38 |   }
39 | 
40 |   console.log(`Model '${repo}' downloaded`);
41 | }
42 | 
43 | async function saveFile(file, response) {
44 |   const arrayBuffer = await response.arrayBuffer();
45 | 
46 |   await fs.writeFile(file, Buffer.from(arrayBuffer));
47 | }
48 | 


--------------------------------------------------------------------------------
/src/python/clean_version.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2024 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | 
 5 | foreach(var IN ITEMS init_pyi_file)
 6 |     if(NOT DEFINED ${var})
 7 |         message(FATAL_ERROR "Variable ${var} is not defined")
 8 |     endif()
 9 | endforeach()
10 | 
11 | file(STRINGS ${init_pyi_file} file_lines)
12 | 
13 | foreach(file_line IN LISTS file_lines)
14 |     if(file_line MATCHES "^__version__.*")
15 |         set(file_line "__version__: str")
16 |     endif()
17 | 
18 |     set(file_content "${file_content}${file_line}\n")
19 | endforeach()
20 | 
21 | file(WRITE ${init_pyi_file} ${file_content})
22 | 


--------------------------------------------------------------------------------
/src/python/remove_abi_specific_info.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | foreach(var IN ITEMS init_pyi_file)
 5 |     if(NOT DEFINED ${var})
 6 |         message(FATAL_ERROR "Variable ${var} is not defined")
 7 |     endif()
 8 | endforeach()
 9 | 
10 | file(STRINGS ${init_pyi_file} file_lines)
11 | 
12 | foreach(file_line IN LISTS file_lines)
13 |     if(file_line MATCHES "^from openvino_genai\\.py_openvino_genai\\..* import draft_model
quot;)
14 |         set(file_line "from openvino_genai.py_openvino_genai import draft_model")
15 |     endif()
16 |     if(file_line MATCHES "^from openvino_genai\\.py_openvino_genai\\..* import get_version
quot;)
17 |         set(file_line "from openvino_genai.py_openvino_genai import get_version")
18 |     endif()
19 | 
20 |     set(file_content "${file_content}${file_line}\n")
21 | endforeach()
22 | 
23 | file(WRITE ${init_pyi_file} ${file_content})
24 | 


--------------------------------------------------------------------------------
/tests/cpp/helper.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2024 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 | 
4 | #pragma once
5 | 
6 | #include "openvino/runtime/core.hpp"
7 | 
8 | std::shared_ptr<ov::Model> get_dummy_model(ov::Core core, size_t num_layers);


--------------------------------------------------------------------------------
/tests/cpp/utils.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2018-2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | //
 4 | 
 5 | #include <gtest/gtest.h>
 6 | #include "utils.hpp"
 7 | 
 8 | 
 9 | using namespace ov::genai::utils;
10 | using map_type = std::map<std::string, int64_t>;
11 | 
12 | TEST(TestIsContainer, test_is_container) {
13 |     EXPECT_EQ(is_container<int>, false);
14 |     EXPECT_EQ(is_container<int64_t>, false);
15 |     EXPECT_EQ(is_container<float>, false);
16 |     EXPECT_EQ(is_container<size_t>, false);
17 |     EXPECT_EQ(is_container<std::string>, true);
18 |     EXPECT_EQ(is_container<std::vector<float>>, true);
19 |     EXPECT_EQ(is_container<map_type>, true);
20 |     EXPECT_EQ(is_container<std::set<int64_t>>, true);
21 | }


--------------------------------------------------------------------------------
/tests/python_tests/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2018-2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0


--------------------------------------------------------------------------------
/tests/python_tests/data/short_prompts.txt:
--------------------------------------------------------------------------------
 1 | The Earth revolves around the Sun.
 2 | Water is essential for all known forms of life.
 3 | The human body is composed of about 60% water.
 4 | Photosynthesis allows plants to convert sunlight into energy.
 5 | The speed of light is approximately 299,792 kilometers per second.
 6 | Ice is less dense than liquid water.
 7 | The brain contains around 86 billion neurons.
 8 | Honey never spoils due to its low moisture content.
 9 | A group of lions is called a pride.
10 | The Great Wall of China is visible from space.
11 | Humans share 99.9% of their DNA with chimpanzees.
12 | The average adult has 206 bones in their body.
13 | Bananas are berries, while strawberries are not.
14 | The Pacific Ocean is the largest ocean on Earth.
15 | Sound travels faster in water than in air.
16 | The Eiffel Tower can be 15 cm taller during the summer.
17 | 


--------------------------------------------------------------------------------
/tests/python_tests/data/test_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2024 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | from openvino_genai import GenerationConfig
 5 | from utils.generation_config import get_greedy, get_beam_search, get_multinomial_temperature
 6 | 
 7 | def get_test_dataset() -> tuple[list[str], list[GenerationConfig]]:
 8 |     prompts = [
 9 |         "What is OpenVINO?",
10 |         "How are you?",
11 |         "What is your name?",
12 |         "Tell me something about Canada"
13 |     ]
14 |     generation_configs = [
15 |         get_greedy(),
16 |         get_beam_search(),
17 |         get_greedy(),
18 |         get_beam_search(),
19 |     ]
20 |     return (prompts, generation_configs)
21 | 


--------------------------------------------------------------------------------
/tests/python_tests/models/precommit:
--------------------------------------------------------------------------------
1 | hf-tiny-model-private/tiny-random-CodeGenForCausalLM
2 | hf-tiny-model-private/tiny-random-GPT2LMHeadModel
3 | hf-tiny-model-private/tiny-random-OPTForCausalLM


--------------------------------------------------------------------------------
/tests/python_tests/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | 
 3 | markers =
 4 |     ; The following markers are defined for categorizing tests:
 5 |     ; precommit   - Tests that should be run before committing code.
 6 |     ; real_models - Tests that involve execution of the models from models/real_models file
 7 |     ; samples     - Tests related to the sample models.
 8 |     ; llm         - Tests related to large language models.
 9 |     ; whisper     - Tests related to the Whisper model.
10 |     ; dreamlike_anime_1_0 - Image generation tests subset with dreamlike-anime-1.0.
11 |     ; LCM_Dreamshaper_v7_int8_ov - Image generation tests subset with LCM_Dreamshaper_v7-int8-ov.
12 |     ; vlm         - Tests related to the VLM model.
13 |     ; rag         - Tests related to the RAG components.
14 |     ; speech_generation - Tests related to text-to-speech generation
15 |     precommit
16 |     real_models
17 |     samples
18 |     llm
19 |     whisper
20 |     dreamlike_anime_1_0
21 |     LCM_Dreamshaper_v7_int8_ov
22 |     vlm
23 |     agent
24 |     rag
25 |     speech_generation
26 | 
27 | addopts = -m precommit
28 | 


--------------------------------------------------------------------------------
/tests/python_tests/requirements.txt:
--------------------------------------------------------------------------------
 1 | --extra-index-url https://download.pytorch.org/whl/cpu
 2 | diffusers==0.35.2
 3 | optimum-intel==1.26.0
 4 | numpy==1.26.4; platform_system == "Darwin" and platform_machine == "x86_64"
 5 | safetensors==0.6.2; platform_system == "Darwin" and platform_machine == "x86_64"
 6 | pytest==8.4.2
 7 | transformers==4.55.4
 8 | hf_transfer==0.1.9
 9 | gguf==0.17.1
10 | 
11 | # rag requirements
12 | langchain_community==0.4
13 | langchain-core==1.0.3
14 | 
15 | # requirements for specific models
16 | # - Qwen/Qwen-7B
17 | # - Qwen/Qwen-7B-Chat
18 | einops==0.8.1
19 | # - openbmb/MiniCPM-V-2
20 | torchvision==0.17.2; platform_system == "Darwin" and platform_machine == "x86_64"
21 | torchvision==0.23.0; platform_system != "Darwin" or platform_machine != "x86_64"
22 | # - openbmb/MiniCPM-V-2
23 | timm==1.0.22
24 | # - openai/whisper-base
25 | librosa==0.11.0
26 | soundfile==0.13.1
27 | datasets==4.1.1; sys_platform == "linux"
28 | datasets==3.6.0; sys_platform != "linux"
29 | torchcodec==0.7.0; sys_platform == "linux"
30 | rouge==1.0.1
31 | # - microsoft/Phi-4-multimodal-instruct
32 | peft==0.17.1
33 | 


--------------------------------------------------------------------------------
/tests/python_tests/samples/test_benchmark_vlm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | import os
 5 | import pytest
 6 | import sys
 7 | 
 8 | from conftest import SAMPLES_PY_DIR, SAMPLES_CPP_DIR
 9 | from test_utils import run_sample
10 | 
11 | class TestBenchmarkVLM:
12 |     @pytest.mark.vlm
13 |     @pytest.mark.samples
14 |     @pytest.mark.parametrize(
15 |         "convert_model, download_test_content",
16 |         [
17 |             pytest.param("tiny-random-minicpmv-2_6", "images/image.png"),
18 |         ],
19 |         indirect=["convert_model", "download_test_content"],
20 |     )
21 |     def test_sample_benchmark_vlm(self, convert_model, download_test_content):
22 |         num_iter = "3"
23 |         # Run C++ benchmark sample
24 |         benchmark_sample = os.path.join(SAMPLES_CPP_DIR, 'benchmark_vlm')
25 |         benchmark_cpp_command = [benchmark_sample, "-m" , convert_model, "-i", download_test_content, "-n", num_iter]
26 |         run_sample(benchmark_cpp_command)
27 |         
28 |         # Run Python benchmark sample
29 |         benchmark_script = os.path.join(SAMPLES_PY_DIR, 'visual_language_chat/benchmark_vlm.py')
30 |         benchmark_py_command = [sys.executable, benchmark_script, "-m" , convert_model, "-i", download_test_content, "-n", num_iter]
31 |         run_sample(benchmark_py_command)


--------------------------------------------------------------------------------
/tests/python_tests/samples/test_compound_grammar_sample.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | import pytest
 5 | import sys
 6 | 
 7 | from conftest import SAMPLES_PY_DIR, SAMPLES_JS_DIR
 8 | from test_utils import run_sample
 9 | 
10 | 
11 | @pytest.mark.llm
12 | @pytest.mark.samples
13 | @pytest.mark.parametrize("convert_model", ["TinyLlama-1.1B-Chat-v1.0"], indirect=True)
14 | def test_structured_output_sample(convert_model):
15 |     # Test PY sample
16 |     py_script = SAMPLES_PY_DIR / "text_generation" / "compound_grammar_generation.py"
17 |     py_command = [sys.executable, py_script, convert_model]
18 |     py_result = run_sample(py_command)
19 |     py_predictions = py_result.stdout
20 | 
21 |     # Test JS sample
22 |     js_sample = SAMPLES_JS_DIR / "text_generation" / "compound_grammar_generation.js"
23 |     js_command = ["node", js_sample, convert_model]
24 |     js_result = run_sample(js_command)
25 |     js_predictions = js_result.stdout
26 | 
27 |     # Compare results
28 |     assert py_predictions == js_predictions, "Python and JS results should match"
29 | 
30 | 


--------------------------------------------------------------------------------
/tests/python_tests/samples/test_encrypted_model_causal_lm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | import os
 5 | import pytest
 6 | import sys
 7 | 
 8 | from conftest import SAMPLES_PY_DIR, SAMPLES_CPP_DIR
 9 | from test_utils import run_sample
10 | 
11 | class TestEncryptedLM:
12 |     @pytest.mark.llm
13 |     @pytest.mark.samples
14 |     @pytest.mark.parametrize("convert_model", ["Qwen2.5-0.5B-Instruct"], indirect=True)
15 |     @pytest.mark.parametrize("prompt", ["Why is the sun yellow?"])
16 | 
17 |     def test_sample_encrypted_lm(self, convert_model, prompt):
18 |         # Test CPP sample
19 |         cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'encrypted_model_causal_lm')
20 |         cpp_command =[cpp_sample, convert_model, prompt]
21 |         cpp_result = run_sample(cpp_command)
22 | 
23 |         # Test Python sample
24 |         py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/encrypted_model_causal_lm.py")
25 |         py_command = [sys.executable, py_script, convert_model, prompt]
26 |         py_result = run_sample(py_command)
27 | 
28 |         # Compare results
29 |         assert py_result.stdout == cpp_result.stdout, f"Results should match"
30 | 


--------------------------------------------------------------------------------
/tests/python_tests/samples/test_heterogeneous_stable_diffusion.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | import pytest
 5 | import sys
 6 | 
 7 | from conftest import SAMPLES_PY_DIR, SAMPLES_CPP_DIR
 8 | from test_utils import run_sample
 9 | 
10 | class TestHeterogeneousStableDiffusion:
11 |     @pytest.mark.samples
12 |     @pytest.mark.LCM_Dreamshaper_v7_int8_ov
13 |     @pytest.mark.parametrize("executable", [
14 |         [SAMPLES_CPP_DIR / "heterogeneous_stable_diffusion"],
15 |         [sys.executable, SAMPLES_PY_DIR / "image_generation/heterogeneous_stable_diffusion.py"],
16 |     ])
17 |     @pytest.mark.parametrize(
18 |         "download_model, prompt",
19 |         [
20 |             pytest.param("LCM_Dreamshaper_v7-int8-ov", "cyberpunk cityscape like Tokyo New York  with tall buildings at dusk golden hour cinematic lighting"),
21 |         ],
22 |         indirect=["download_model"],
23 |     )
24 |     def test_sample_heterogeneous_stable_diffusion(self, executable, download_model, prompt):
25 |         run_sample(executable + [download_model, '"' + prompt + '"'])
26 | 


--------------------------------------------------------------------------------
/tests/python_tests/samples/test_lora.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2024 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | import os
 5 | import pytest
 6 | import sys
 7 | 
 8 | from conftest import SAMPLES_PY_DIR
 9 | from test_utils import run_sample
10 | 
11 | class TestLora:
12 |     @pytest.mark.llm
13 |     @pytest.mark.samples
14 |     @pytest.mark.parametrize("convert_model", ["TinyStories-1M"], indirect=True)
15 |     @pytest.mark.parametrize("sample_args", ["How to create a table with two columns, one of them has type float, another one has type int?"])
16 |     @pytest.mark.parametrize("download_test_content", ["adapter_model.safetensors"], indirect=True)
17 |     def test_python_sample_lora(self, convert_model, download_test_content, sample_args):      
18 |         py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/lora_greedy_causal_lm.py")
19 |         py_command = [sys.executable, py_script, convert_model, download_test_content, sample_args]
20 |         run_sample(py_command)


--------------------------------------------------------------------------------
/tests/python_tests/samples/test_lora_text2image.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | import pytest
 5 | import sys
 6 | 
 7 | from conftest import SAMPLES_PY_DIR, SAMPLES_CPP_DIR
 8 | from test_utils import run_sample
 9 | 
10 | class TestLoraText2Image:
11 |     @pytest.mark.samples
12 |     @pytest.mark.dreamlike_anime_1_0
13 |     @pytest.mark.parametrize(
14 |         "convert_model, prompt, sample_args",
15 |         [
16 |             pytest.param("dreamlike-anime-1.0", "curly-haired unicorn in the forest, anime, line", "0.7"),
17 |         ],
18 |         indirect=["convert_model"],
19 |     )
20 |     @pytest.mark.parametrize("download_test_content", ["soulcard.safetensors"], indirect=True)
21 |     @pytest.mark.parametrize("executable", [
22 |         [SAMPLES_CPP_DIR / 'lora_text2image'],
23 |         [sys.executable, SAMPLES_PY_DIR / "image_generation/lora_text2image.py"],
24 |     ])
25 |     def test_sample_lora_text2image(self, convert_model, prompt, download_test_content, sample_args, executable):
26 |         run_sample(executable + [convert_model, prompt, download_test_content, sample_args])
27 | 


--------------------------------------------------------------------------------
/tests/python_tests/samples/test_react_sample.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 |  
 4 | import os
 5 | import pytest
 6 | import sys
 7 | 
 8 | from conftest import SAMPLES_PY_DIR, SAMPLES_JS_DIR
 9 | from test_utils import run_sample
10 |     
11 | class TestReactSample:
12 |     @pytest.mark.llm
13 |     @pytest.mark.agent
14 |     @pytest.mark.samples
15 |     @pytest.mark.parametrize("convert_model", ["TinyLlama-1.1B-Chat-v1.0"], indirect=True)
16 |     def test_react_sample_refs(self, request, convert_model):
17 |         if sys.platform == 'darwin':
18 |             pytest.xfail("Ticket 173586")
19 |         # Python test
20 |         py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/react_sample.py")
21 |         py_command = [sys.executable, py_script, convert_model]
22 |         py_result = run_sample(py_command)
23 | 
24 |         # Test JS sample
25 |         js_sample = os.path.join(SAMPLES_JS_DIR, "text_generation/react_sample.js")
26 |         js_command =['node', js_sample, convert_model]
27 |         js_result = run_sample(js_command)
28 | 
29 |         assert py_result.stdout == js_result.stdout, f"Results should match"
30 | 
31 | 


--------------------------------------------------------------------------------
/tests/python_tests/samples/test_structural_tag_generation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | import pytest
 5 | import sys
 6 | 
 7 | from conftest import SAMPLES_PY_DIR, SAMPLES_JS_DIR
 8 | from test_utils import run_sample
 9 | 
10 | 
11 | @pytest.mark.llm
12 | @pytest.mark.samples
13 | @pytest.mark.parametrize("convert_model", ["TinyLlama-1.1B-Chat-v1.0"], indirect=True)
14 | def test_structured_output_sample(convert_model):
15 |     # Python test
16 |     py_script = SAMPLES_PY_DIR / "text_generation" / "structural_tags_generation.py"
17 |     py_command = [sys.executable, py_script, convert_model]
18 |     py_result = run_sample(py_command)
19 |     py_predictions = py_result.stdout
20 | 
21 |     # JS test
22 |     js_script = SAMPLES_JS_DIR / "text_generation" / "structural_tags_generation.js"
23 |     js_command = ["node", js_script, convert_model]
24 |     js_result = run_sample(js_command)
25 |     js_predictions = js_result.stdout
26 | 
27 |     # Compare results
28 |     assert py_predictions == js_predictions, "Python and JS results should match"
29 | 


--------------------------------------------------------------------------------
/tests/python_tests/samples/test_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2025 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | from conftest import logger
 4 | import os
 5 | import subprocess # nosec B404
 6 | 
 7 | def run_sample(command, input_data=None, env=os.environ):
 8 |     logger.info(f"Running sample command: {' '.join(map(str, command))}")
 9 |     if input_data:
10 |         logger.info(f"Input data: {input_data}")
11 |     try:
12 |         result = subprocess.run(command, text=True, check=True, encoding='utf-8', env=env, input=input_data, stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
13 |     except subprocess.CalledProcessError as error:
14 |         logger.error(f"Sample returned {error.returncode}. Output:\n{error.output}")
15 |         raise
16 |     logger.info(f"Sample output: {result.stdout}")
17 |     return result
18 | 


--------------------------------------------------------------------------------
/tests/python_tests/test_kv_cache_eviction/kv_cache_eviction_utils.py:
--------------------------------------------------------------------------------
 1 | from openvino_genai import SchedulerConfig
 2 | 
 3 | def get_scheduler_config(num_kv_blocks: int) -> SchedulerConfig:
 4 |     scheduler_config = SchedulerConfig()
 5 |     scheduler_config.num_kv_blocks = num_kv_blocks
 6 |     scheduler_config.dynamic_split_fuse = True
 7 |     scheduler_config.max_num_batched_tokens = 256
 8 |     scheduler_config.max_num_seqs = 256
 9 |     scheduler_config.use_cache_eviction = False
10 |     return scheduler_config


--------------------------------------------------------------------------------
/tests/python_tests/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2018-2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0


--------------------------------------------------------------------------------
/tests/python_tests/utils/qwen3_reranker_utils.py:
--------------------------------------------------------------------------------
 1 | def qwen3_reranker_format_queries(query, instruction=None):
 2 |     prefix = '<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".<|im_end|>\n<|im_start|>user\n'
 3 |     if instruction is None:
 4 |         instruction = "Given a web search query, retrieve relevant passages that answer the query"
 5 |     return f"{prefix}<Instruct>: {instruction}\n<Query>: {query}\n"
 6 | 
 7 | 
 8 | def qwen3_reranker_format_document(document):
 9 |     suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
10 |     return f"<Document>: {document}{suffix}"
11 | 


--------------------------------------------------------------------------------
/thirdparty/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2024 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | 
 5 | option(BUILD_TOKENIZERS "Build OpenVINO Tokenizers together with OpenVINO GenAI" ON)
 6 | 
 7 | if(BUILD_TOKENIZERS)
 8 |     add_subdirectory(./openvino_tokenizers/ "${CMAKE_BINARY_DIR}/openvino_tokenizers/")
 9 |     # Put binaries to a single dir to mimic package structure.
10 |     set_target_properties(openvino_tokenizers PROPERTIES
11 |         # Generator expressions to disable appending a per-configuration subdirectory (Release, Debug).
12 |         # ARCHIVE_OUTPUT is irrelevant. It's here just to keep all the artifacts in one place.
13 |         ARCHIVE_OUTPUT_DIRECTORY "
lt;1:${CMAKE_BINARY_DIR}/openvino_genai/>"
14 |         LIBRARY_OUTPUT_DIRECTORY "
lt;1:${CMAKE_BINARY_DIR}/openvino_genai/>"
15 |         RUNTIME_OUTPUT_DIRECTORY "
lt;1:${CMAKE_BINARY_DIR}/openvino_genai/>"
16 |     )
17 | endif()
18 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvinotoolkit/openvino.genai/91dc71e0ded5a4624289723266f7b3811369f0e9/tools/__init__.py


--------------------------------------------------------------------------------
/tools/cacheviz/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023-2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 | 
4 | 


--------------------------------------------------------------------------------
/tools/cacheviz/requirements.txt:
--------------------------------------------------------------------------------
1 | argparse
2 | matplotlib


--------------------------------------------------------------------------------
/tools/continuous_batching/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2018-2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 | #
4 | 
5 | add_subdirectory(accuracy)
6 | add_subdirectory(benchmark)
7 | 


--------------------------------------------------------------------------------
/tools/llm_bench/llm_bench_utils/llm_hook_beam_search/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvinotoolkit/openvino.genai/91dc71e0ded5a4624289723266f7b3811369f0e9/tools/llm_bench/llm_bench_utils/llm_hook_beam_search/__init__.py


--------------------------------------------------------------------------------
/tools/llm_bench/llm_bench_utils/llm_hook_sample/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvinotoolkit/openvino.genai/91dc71e0ded5a4624289723266f7b3811369f0e9/tools/llm_bench/llm_bench_utils/llm_hook_sample/__init__.py


--------------------------------------------------------------------------------
/tools/llm_bench/llm_bench_utils/prompt_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (C) 2023-2025 Intel Corporation
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from .model_utils import get_param_from_file
 6 | from .parse_json_data import parse_text_json_data
 7 | 
 8 | 
 9 | def get_text_prompt(args):
10 |     text_list = []
11 |     output_data_list, is_json_data = get_param_from_file(args, 'prompt')
12 |     if is_json_data is True:
13 |         text_param_list = parse_text_json_data(output_data_list)
14 |         if len(text_param_list) > 0:
15 |             for text in text_param_list:
16 |                 text_list.append(text)
17 |     else:
18 |         text_list.append(output_data_list[0])
19 |     return text_list
20 | 


--------------------------------------------------------------------------------
/tools/llm_bench/prompts/llava-1.5-7b.jsonl:
--------------------------------------------------------------------------------
1 | {"prompt": "Describe this image in details", "media": "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11"}


--------------------------------------------------------------------------------
/tools/llm_bench/prompts/scheduler_config.json:
--------------------------------------------------------------------------------
 1 | {   
 2 |     "num_kv_blocks": 300,
 3 |     "dynamic_split_fuse": true,
 4 |     "max_num_batched_tokens": 256,
 5 |     "max_num_seqs": 256,
 6 |     "use_cache_eviction": true,
 7 |     "enable_prefix_caching": false,
 8 |     "cache_eviction_config": {
 9 |         "start_size": 32,
10 |         "recent_size": 32,
11 |         "max_cache_size": 128,
12 |         "aggregation_mode": "NORM_SUM",
13 |         "apply_rotation": false,
14 |         "snapkv_window_size": 8,
15 |         "kvcrush_config": {"budget": 0, "anchor_point_mode": "RANDOM", "rng_seed": 0}
16 |     },
17 |     "sparse_attention_config": {
18 |         "mode": "TRISHAPE",
19 |         "num_last_dense_tokens_in_prefill": 100, 
20 |         "num_retained_start_tokens_in_cache": 128, 
21 |         "num_retained_recent_tokens_in_cache": 1920, 
22 |         "xattention_threshold": 0.8, 
23 |         "xattention_block_size": 64, 
24 |         "xattention_stride": 8
25 |     }
26 | }


--------------------------------------------------------------------------------
/tools/llm_bench/prompts/stable-diffusion-i2i.jsonl:
--------------------------------------------------------------------------------
1 | {"steps":"30", "width":"256", "height":"256", "guidance_scale":"0.8", "prompt": "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k", "media": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png"}
2 | {"prompt": "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k", "media": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png"}
3 | 


--------------------------------------------------------------------------------
/tools/llm_bench/prompts/stable-diffusion-inpainting.jsonl:
--------------------------------------------------------------------------------
1 | {"steps":"30", "width":"256", "height":"256", "guidance_scale":"0.8", "prompt": "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k", "media": "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png", "mask_image": "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"}
2 | {"prompt": "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k", "media": "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png", "mask_image": "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"}


--------------------------------------------------------------------------------
/tools/llm_bench/prompts/stable-diffusion.jsonl:
--------------------------------------------------------------------------------
1 | {"steps":"30", "width":"256", "height":"256", "guidance_scale":"1.0", "prompt": "side profile centered painted portrait, Gandhi rolling a blunt, Gloomhaven, matte painting concept art, art nouveau, 8K HD Resolution, beautifully background"}
2 | {"prompt": "side profile centered painted portrait, Gandhi rolling a blunt, Gloomhaven, matte painting concept art, art nouveau, 8K HD Resolution, beautifully background"}
3 | 


--------------------------------------------------------------------------------
/tools/llm_bench/requirements.txt:
--------------------------------------------------------------------------------
 1 | --extra-index-url https://download.pytorch.org/whl/cpu
 2 | numpy
 3 | --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
 4 | openvino
 5 | openvino-tokenizers
 6 | openvino_genai
 7 | pillow
 8 | torch
 9 | transformers[sentencepiece]>=4.40.0
10 | diffusers>=0.22.0
11 | #optimum is in dependency list of optimum-intel 
12 | optimum-intel[nncf]>=1.25.0
13 | packaging
14 | psutil
15 | timm
16 | tiktoken
17 | librosa # For Whisper
18 | matplotlib
19 | jinja2>=3.1.0
20 | scipy
21 | gguf_parser
22 | gguf>=0.10
23 | num2words
24 | 


--------------------------------------------------------------------------------
/tools/llm_bench/requirements/requirements_conversion.txt:
--------------------------------------------------------------------------------
1 | einops
2 | transformers_stream_generator
3 | backoff
4 | bitsandbytes
5 | -r ../requirements.txt
6 | 


--------------------------------------------------------------------------------
/tools/llm_bench/requirements/requirements_gptq.txt:
--------------------------------------------------------------------------------
1 | auto-gptq>=0.5.1 # for gptq


--------------------------------------------------------------------------------
/tools/llm_bench/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | # ignore:
 3 | # D100 - Missing docstring in public module
 4 | # D101 - Missing docstring in public class
 5 | # D103 - Missing docstring in public function
 6 | # VNE001 - Single letter variable names are not allowed
 7 | # W503 - https://www.flake8rules.com/rules/W503.html conflicts with W504
 8 | filename = *.py
 9 | max-line-length = 160
10 | ignore = E203,D100,D101,D103,VNE001,W503
11 | max-parameters-amount = 8
12 | show_source = True
13 | docstring-convention = google
14 | enable-extensions = G
15 | 
16 | [pydocstyle]
17 | convention = google
18 | 
19 | [mypy]
20 | ignore_missing_imports = True
21 | disable_error_code = attr-defined
22 | show_column_numbers = True
23 | show_error_context = True
24 | show_absolute_path = True
25 | pretty = True
26 | 


--------------------------------------------------------------------------------
/tools/who_what_benchmark/examples/gptq_eval.py:
--------------------------------------------------------------------------------
 1 | import whowhatbench
 2 | from transformers import AutoModelForCausalLM, AutoTokenizer
 3 | 
 4 | model_id = "meta-llama/Llama-2-7b-chat-hf"
 5 | model_gptq_id = "TheBloke/Llama-2-7B-Chat-GPTQ"
 6 | 
 7 | model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
 8 | tokenizer = AutoTokenizer.from_pretrained(model_id)
 9 | 
10 | 
11 | evaluator = whowhatbench.Evaluator(base_model=model, tokenizer=tokenizer)
12 | 
13 | model_int4 = AutoModelForCausalLM.from_pretrained(model_gptq_id, device_map="auto")
14 | all_metrics_per_question, all_metrics = evaluator.score(model_int4)
15 | 
16 | print(all_metrics_per_question)
17 | print(all_metrics)
18 | 
19 | metrics = ["similarity", "SDT norm"]
20 | 
21 | for metric in metrics:
22 |     worst_examples = evaluator.worst_examples(top_k=5, metric=metric)
23 |     print("Metric: ", metric)
24 |     for e in worst_examples:
25 |         print("\t=========================")
26 |         print(f"\t{metric}: ", e[metric])
27 |         print("\tPrompt: ", e["prompt"])
28 |         print("\tSource Model:\n ", "\t" + e["source_model"])
29 |         print("\tOptimized Model:\n ", "\t" + e["optimized_model"])
30 | 


--------------------------------------------------------------------------------
/tools/who_what_benchmark/examples/huggingface_eval.py:
--------------------------------------------------------------------------------
 1 | import whowhatbench
 2 | from transformers import AutoModelForCausalLM, AutoTokenizer
 3 | 
 4 | model_id = "meta-llama/Llama-2-7b-chat-hf"
 5 | 
 6 | model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
 7 | tokenizer = AutoTokenizer.from_pretrained(model_id)
 8 | 
 9 | 
10 | evaluator = whowhatbench.Evaluator(base_model=model, tokenizer=tokenizer)
11 | 
12 | model_int4 = AutoModelForCausalLM.from_pretrained(
13 |     model_id, load_in_4bit=True, device_map="auto"
14 | )
15 | all_metrics_per_question, all_metrics = evaluator.score(model_int4)
16 | 
17 | print(all_metrics_per_question)
18 | print(all_metrics)
19 | 
20 | metrics = ["similarity", "SDT norm"]
21 | 
22 | for metric in metrics:
23 |     worst_examples = evaluator.worst_examples(top_k=5, metric=metric)
24 |     print("Metric: ", metric)
25 |     for e in worst_examples:
26 |         print("\t=========================")
27 |         print(f"\t{metric}: ", e[metric])
28 |         print("\tPrompt: ", e["prompt"])
29 |         print("\tSource Model:\n ", "\t" + e["source_model"])
30 |         print("\tOptimized Model:\n ", "\t" + e["optimized_model"])
31 | 


--------------------------------------------------------------------------------
/tools/who_what_benchmark/examples/openvino_eval.py:
--------------------------------------------------------------------------------
 1 | import whowhatbench
 2 | from optimum.intel.openvino import OVModelForCausalLM
 3 | from transformers import AutoTokenizer
 4 | 
 5 | model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 6 | 
 7 | model = OVModelForCausalLM.from_pretrained(model_id, load_in_8bit=False, export=True)
 8 | tokenizer = AutoTokenizer.from_pretrained(model_id)
 9 | 
10 | 
11 | evaluator = whowhatbench.Evaluator(base_model=model, tokenizer=tokenizer)
12 | 
13 | model_int8 = OVModelForCausalLM.from_pretrained(
14 |     model_id, load_in_8bit=True, export=True
15 | )
16 | all_metrics_per_question, all_metrics = evaluator.score(model_int8)
17 | 
18 | print(all_metrics_per_question)
19 | print(all_metrics)
20 | 
21 | metrics = ["similarity", "SDT norm"]
22 | 
23 | for metric in metrics:
24 |     worst_examples = evaluator.worst_examples(top_k=5, metric=metric)
25 |     print("Metric: ", metric)
26 |     for e in worst_examples:
27 |         print("\t=========================")
28 |         print(f"\t{metric}: ", e[metric])
29 |         print("\tPrompt: ", e["prompt"])
30 |         print("\tSource Model:\n ", "\t" + e["source_model"])
31 |         print("\tOptimized Model:\n ", "\t" + e["optimized_model"])
32 | 


--------------------------------------------------------------------------------
/tools/who_what_benchmark/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate>=0.26.0
 2 | transformers>=4.35.2
 3 | sentence-transformers>=2.2.2
 4 | openvino-genai
 5 | optimum-intel[nncf]>=1.19.0
 6 | pandas>=2.0.3
 7 | numpy>=1.23.5
 8 | tqdm>=4.66.1
 9 | diffusers
10 | datasets>=3.6.0
11 | auto-gptq; sys_platform == "linux"
12 | autoawq<0.2.8; sys_platform == "linux"
13 | sentencepiece
14 | jinja2>=3.1.0
15 | scipy


--------------------------------------------------------------------------------
/tools/who_what_benchmark/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | # ignore:
 3 | # D100 - Missing docstring in public module
 4 | # D101 - Missing docstring in public class
 5 | # D103 - Missing docstring in public function
 6 | # VNE001 - Single letter variable names are not allowed
 7 | # W503 - https://www.flake8rules.com/rules/W503.html conflicts with W504
 8 | filename = *.py
 9 | max-line-length = 160
10 | ignore = E203,D100,D101,D103,VNE001,W503
11 | max-parameters-amount = 8
12 | show_source = True
13 | docstring-convention = google
14 | enable-extensions = G
15 | per-file-ignores =
16 |     # imports order
17 |     tools/who_what_benchmark/whowhatbench/wwb.py: E402
18 | 
19 | [pydocstyle]
20 | convention = google
21 | 
22 | [mypy]
23 | ignore_missing_imports = True
24 | disable_error_code = attr-defined
25 | show_column_numbers = True
26 | show_error_context = True
27 | show_absolute_path = True
28 | pretty = True
29 | 


--------------------------------------------------------------------------------
/tools/who_what_benchmark/whowhatbench/__init__.py:
--------------------------------------------------------------------------------
 1 | from .registry import register_evaluator, EVALUATOR_REGISTRY
 2 | from .text_evaluator import TextEvaluator
 3 | from .text_evaluator import TextEvaluator as Evaluator
 4 | from .text2image_evaluator import Text2ImageEvaluator
 5 | from .visualtext_evaluator import VisualTextEvaluator
 6 | from .im2im_evaluator import Image2ImageEvaluator
 7 | from .inpaint_evaluator import InpaintingEvaluator
 8 | from .embeddings_evaluator import EmbeddingsEvaluator
 9 | from .reranking_evaluator import RerankingEvaluator
10 | 
11 | 
12 | __all__ = [
13 |     "Evaluator",
14 |     "register_evaluator",
15 |     "TextEvaluator",
16 |     "Text2ImageEvaluator",
17 |     "VisualTextEvaluator",
18 |     "Image2ImageEvaluator",
19 |     "InpaintingEvaluator",
20 |     "EmbeddingsEvaluator",
21 |     "RerankingEvaluator",
22 |     "EVALUATOR_REGISTRY",
23 | ]
24 | 


--------------------------------------------------------------------------------
/tools/who_what_benchmark/whowhatbench/registry.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from abc import ABC, abstractmethod
 3 | 
 4 | 
 5 | # Registry for evaluators
 6 | EVALUATOR_REGISTRY = {}
 7 | 
 8 | 
 9 | def register_evaluator(*names):
10 |     def decorate(cls):
11 |         for name in names:
12 |             assert (
13 |                 name not in EVALUATOR_REGISTRY
14 |             ), f"Evaluator named '{name}' conflicts with existing evaluators! Please register with a non-conflicting alias instead."
15 | 
16 |             EVALUATOR_REGISTRY[name] = cls
17 |         return cls
18 | 
19 |     return decorate
20 | 
21 | 
22 | class Evaluator(ABC):
23 |     @abstractmethod
24 |     def dump_gt(self, csv_name: str):
25 |         pass
26 | 
27 |     @abstractmethod
28 |     def dump_predictions(self, csv_name: str):
29 |         pass
30 | 
31 |     @abstractmethod
32 |     def score(self, model_or_data, **kwargs):
33 |         pass
34 | 
35 |     @abstractmethod
36 |     def worst_examples(self, top_k: int = 5, metric="similarity"):
37 |         pass
38 | 
39 |     @abstractmethod
40 |     def get_generation_fn(self):
41 |         raise NotImplementedError("generation_fn should be returned")
42 | 
43 | 
44 | class BaseEvaluator(Evaluator):
45 |     def dump_gt(self, csv_name: str):
46 |         self.gt_data.to_csv(csv_name)
47 | 
48 |     def dump_predictions(self, csv_name: str):
49 |         self.predictions.to_csv(csv_name)
50 | 


--------------------------------------------------------------------------------