├── .github
    ├── actions
    │   └── poetry_setup
    │   │   └── action.yml
    ├── scripts
    │   ├── check_diff.py
    │   └── get_min_versions.py
    └── workflows
    │   ├── _all_ci.yml
    │   ├── _codespell.yml
    │   ├── _compile_integration_test.yml
    │   ├── _lint.yml
    │   ├── _release.yml
    │   ├── _scheduled_test.yml
    │   ├── _test.yml
    │   ├── _test_release.yml
    │   ├── check_diffs.yml
    │   └── extract_ignored_words_list.py
├── .gitignore
├── LICENSE
├── README.md
├── cookbook
    ├── img
    │   └── structured_report_generation_arch.png
    ├── langgraph_rag_agent_llama3_nvidia_nim.ipynb
    ├── nvidia_nim_agents_llama3.1.ipynb
    ├── structured_report_generation.ipynb
    └── structured_report_generation_elastic
    │   ├── setup.sh
    │   └── structured_report_generation_elastic.ipynb
├── libs
    ├── ai-endpoints
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── Makefile
    │   ├── README.md
    │   ├── docs
    │   │   ├── chat
    │   │   │   └── nvidia_ai_endpoints.ipynb
    │   │   ├── llms
    │   │   │   └── nvidia_ai_endpoints.ipynb
    │   │   ├── providers
    │   │   │   └── nvidia.mdx
    │   │   ├── retrievers
    │   │   │   └── nvidia_rerank.ipynb
    │   │   └── text_embedding
    │   │   │   └── nvidia_ai_endpoints.ipynb
    │   ├── langchain_nvidia.py
    │   ├── langchain_nvidia_ai_endpoints
    │   │   ├── __init__.py
    │   │   ├── _common.py
    │   │   ├── _statics.py
    │   │   ├── _utils.py
    │   │   ├── callbacks.py
    │   │   ├── chat_models.py
    │   │   ├── embeddings.py
    │   │   ├── llm.py
    │   │   ├── py.typed
    │   │   └── reranking.py
    │   ├── poetry.lock
    │   ├── pyproject.toml
    │   ├── scripts
    │   │   ├── check_imports.py
    │   │   └── lint_imports.sh
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── data
    │   │       ├── nvidia-picasso-large.png
    │   │       ├── nvidia-picasso.gif
    │   │       ├── nvidia-picasso.jpg
    │   │       ├── nvidia-picasso.png
    │   │       └── nvidia-picasso.webp
    │   │   ├── integration_tests
    │   │       ├── __init__.py
    │   │       ├── conftest.py
    │   │       ├── test_api_key.py
    │   │       ├── test_available_models.py
    │   │       ├── test_base_url.py
    │   │       ├── test_bind_tools.py
    │   │       ├── test_chat_models.py
    │   │       ├── test_compile.py
    │   │       ├── test_completions_models.py
    │   │       ├── test_embeddings.py
    │   │       ├── test_other_models.py
    │   │       ├── test_ranking.py
    │   │       ├── test_register_model.py
    │   │       ├── test_standard.py
    │   │       ├── test_streaming.py
    │   │       ├── test_structured_output.py
    │   │       └── test_vlm_models.py
    │   │   └── unit_tests
    │   │       ├── __init__.py
    │   │       ├── conftest.py
    │   │       ├── test_202_polling.py
    │   │       ├── test_api_key.py
    │   │       ├── test_available_models.py
    │   │       ├── test_base_url.py
    │   │       ├── test_bind_tools.py
    │   │       ├── test_chat_models.py
    │   │       ├── test_completions_models.py
    │   │       ├── test_embeddings.py
    │   │       ├── test_imports.py
    │   │       ├── test_messages.py
    │   │       ├── test_metadata.py
    │   │       ├── test_model.py
    │   │       ├── test_parallel_tool_calls.py
    │   │       ├── test_ranking.py
    │   │       ├── test_register_model.py
    │   │       ├── test_serialization.py
    │   │       ├── test_standard.py
    │   │       ├── test_statics.py
    │   │       ├── test_stop.py
    │   │       ├── test_structured_output.py
    │   │       └── test_vlm_models.py
    └── trt
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── Makefile
    │   ├── README.md
    │   ├── docs
    │       └── llms.ipynb
    │   ├── langchain_nvidia_trt
    │       ├── __init__.py
    │       ├── llms.py
    │       └── py.typed
    │   ├── mypy.ini
    │   ├── poetry.lock
    │   ├── pyproject.toml
    │   ├── scripts
    │       ├── check_imports.py
    │       ├── check_pydantic.sh
    │       └── lint_imports.sh
    │   └── tests
    │       ├── __init__.py
    │       ├── integration_tests
    │           ├── __init__.py
    │           ├── test_compile.py
    │           └── test_llms.py
    │       └── unit_tests
    │           ├── __init__.py
    │           ├── test_imports.py
    │           └── test_llms.py
└── studio
    ├── .env.example
    ├── agentic_rag_nvidia.py
    ├── langgraph.json
    └── requirements.txt


/.github/actions/poetry_setup/action.yml:
--------------------------------------------------------------------------------
 1 | # An action for setting up poetry install with caching.
 2 | # Using a custom action since the default action does not
 3 | # take poetry install groups into account.
 4 | # Action code from:
 5 | # https://github.com/actions/setup-python/issues/505#issuecomment-1273013236
 6 | name: poetry-install-with-caching
 7 | description: Poetry install with support for caching of dependency groups.
 8 | 
 9 | inputs:
10 |   python-version:
11 |     description: Python version, supporting MAJOR.MINOR only
12 |     required: true
13 | 
14 |   poetry-version:
15 |     description: Poetry version
16 |     required: true
17 | 
18 |   cache-key:
19 |     description: Cache key to use for manual handling of caching
20 |     required: true
21 | 
22 |   working-directory:
23 |     description: Directory whose poetry.lock file should be cached
24 |     required: true
25 | 
26 | runs:
27 |   using: composite
28 |   steps:
29 |     - uses: actions/setup-python@v5
30 |       name: Setup python ${{ inputs.python-version }}
31 |       id: setup-python
32 |       with:
33 |         python-version: ${{ inputs.python-version }}
34 | 
35 |     - uses: actions/cache@v4
36 |       id: cache-bin-poetry
37 |       name: Cache Poetry binary - Python ${{ inputs.python-version }}
38 |       env:
39 |         SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1"
40 |       with:
41 |         path: |
42 |           /opt/pipx/venvs/poetry
43 |         # This step caches the poetry installation, so make sure it's keyed on the poetry version as well.
44 |         key: bin-poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-${{ inputs.poetry-version }}
45 | 
46 |     - name: Refresh shell hashtable and fixup softlinks
47 |       if: steps.cache-bin-poetry.outputs.cache-hit == 'true'
48 |       shell: bash
49 |       env:
50 |         POETRY_VERSION: ${{ inputs.poetry-version }}
51 |         PYTHON_VERSION: ${{ inputs.python-version }}
52 |       run: |
53 |         set -eux
54 | 
55 |         # Refresh the shell hashtable, to ensure correct `which` output.
56 |         hash -r
57 | 
58 |         # `actions/cache@v3` doesn't always seem able to correctly unpack softlinks.
59 |         # Delete and recreate the softlinks pipx expects to have.
60 |         rm /opt/pipx/venvs/poetry/bin/python
61 |         cd /opt/pipx/venvs/poetry/bin
62 |         ln -s "$(which "python$PYTHON_VERSION")" python
63 |         chmod +x python
64 |         cd /opt/pipx_bin/
65 |         ln -s /opt/pipx/venvs/poetry/bin/poetry poetry
66 |         chmod +x poetry
67 | 
68 |         # Ensure everything got set up correctly.
69 |         /opt/pipx/venvs/poetry/bin/python --version
70 |         /opt/pipx_bin/poetry --version
71 | 
72 |     - name: Install poetry
73 |       if: steps.cache-bin-poetry.outputs.cache-hit != 'true'
74 |       shell: bash
75 |       env:
76 |         POETRY_VERSION: ${{ inputs.poetry-version }}
77 |         PYTHON_VERSION: ${{ inputs.python-version }}
78 |       # Install poetry using the python version installed by setup-python step.
79 |       run: pipx install "poetry==$POETRY_VERSION" --python '${{ steps.setup-python.outputs.python-path }}' --verbose
80 | 
81 |     - name: Restore pip and poetry cached dependencies
82 |       uses: actions/cache@v4
83 |       env:
84 |         SEGMENT_DOWNLOAD_TIMEOUT_MIN: "4"
85 |         WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
86 |       with:
87 |         path: |
88 |           ~/.cache/pip
89 |           ~/.cache/pypoetry/virtualenvs
90 |           ~/.cache/pypoetry/cache
91 |           ~/.cache/pypoetry/artifacts
92 |           ${{ env.WORKDIR }}/.venv
93 |         key: py-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/**/poetry.lock', env.WORKDIR)) }}
94 | 


--------------------------------------------------------------------------------
/.github/scripts/check_diff.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import sys
 3 | import os
 4 | from typing import Dict
 5 | 
 6 | NVIDIA_DIRS = [
 7 |     "libs/ai-endpoints",
 8 |     "libs/trt",
 9 | ]
10 | 
11 | if __name__ == "__main__":
12 |     files = sys.argv[1:]
13 | 
14 |     dirs_to_run: Dict[str, set] = {
15 |         "lint": set(),
16 |         "test": set(),
17 |     }
18 | 
19 |     if len(files) == 300:
20 |         # max diff length is 300 files - there are likely files missing
21 |         raise ValueError("Max diff reached. Please manually run CI on changed libs.")
22 | 
23 |     for file in files:
24 |         if any(
25 |             file.startswith(dir_)
26 |             for dir_ in (
27 |                 ".github/workflows",
28 |                 ".github/tools",
29 |                 ".github/actions",
30 |                 ".github/scripts/check_diff.py",
31 |             )
32 |         ):
33 |             # add all LANGCHAIN_DIRS for infra changes
34 |             # dirs_to_run["lint"].add(".")
35 |             pass
36 | 
37 |         if any(file.startswith(dir_) for dir_ in NVIDIA_DIRS):
38 |             for dir_ in NVIDIA_DIRS:
39 |                 if file.startswith(dir_):
40 |                     # add that dir and all dirs after in LANGCHAIN_DIRS
41 |                     # for extended testing
42 |                     dirs_to_run["test"].add(dir_)
43 |         elif file.startswith("libs/"):
44 |             raise ValueError(
45 |                 f"Unknown lib: {file}. check_diff.py likely needs "
46 |                 "an update for this new library!"
47 |             )
48 |         # elif any(file.startswith(p) for p in ["docs/", "templates/", "cookbook/"]):
49 |         #     dirs_to_run["lint"].add(".")
50 | 
51 |     outputs = {
52 |         "dirs-to-lint": list(
53 |             dirs_to_run["lint"] | dirs_to_run["test"]
54 |         ),
55 |         "dirs-to-test": list(dirs_to_run["test"]),
56 |     }
57 |     for key, value in outputs.items():
58 |         json_output = json.dumps(value)
59 |         print(f"{key}={json_output}")  # noqa: T201
60 | 


--------------------------------------------------------------------------------
/.github/scripts/get_min_versions.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import tomllib
 4 | from packaging.version import parse as parse_version
 5 | import re
 6 | 
 7 | MIN_VERSION_LIBS = ["langchain-core"]
 8 | 
 9 | 
10 | def get_min_version(version: str) -> str:
11 |     # case ^x.x.x
12 |     _match = re.match(r"^\^(\d+(?:\.\d+){0,2})$", version)
13 |     if _match:
14 |         return _match.group(1)
15 | 
16 |     # case >=x.x.x,<y.y.y
17 |     _match = re.match(r"^>=(\d+(?:\.\d+){0,2}),<(\d+(?:\.\d+){0,2})$", version)
18 |     if _match:
19 |         _min = _match.group(1)
20 |         _max = _match.group(2)
21 |         assert parse_version(_min) < parse_version(_max)
22 |         return _min
23 | 
24 |     # case x.x.x
25 |     _match = re.match(r"^(\d+(?:\.\d+){0,2})$", version)
26 |     if _match:
27 |         return _match.group(1)
28 | 
29 |     raise ValueError(f"Unrecognized version format: {version}")
30 | 
31 | 
32 | def get_min_version_from_toml(toml_path: str):
33 |     # Parse the TOML file
34 |     with open(toml_path, "rb") as file:
35 |         toml_data = tomllib.load(file)
36 | 
37 |     # Get the dependencies from tool.poetry.dependencies
38 |     dependencies = toml_data["tool"]["poetry"]["dependencies"]
39 | 
40 |     # Initialize a dictionary to store the minimum versions
41 |     min_versions = {}
42 | 
43 |     # Iterate over the libs in MIN_VERSION_LIBS
44 |     for lib in MIN_VERSION_LIBS:
45 |         # Check if the lib is present in the dependencies
46 |         if lib in dependencies:
47 |             # Get the version string
48 |             version_string = dependencies[lib]
49 | 
50 |             # Use parse_version to get the minimum supported version from version_string
51 |             min_version = get_min_version(version_string)
52 | 
53 |             # Store the minimum version in the min_versions dictionary
54 |             min_versions[lib] = min_version
55 | 
56 |     return min_versions
57 | 
58 | 
59 | # Get the TOML file path from the command line argument
60 | toml_file = sys.argv[1]
61 | 
62 | # Call the function to get the minimum versions
63 | min_versions = get_min_version_from_toml(toml_file)
64 | 
65 | print(" ".join([f"{lib}=={version}" for lib, version in min_versions.items()]))
66 | 


--------------------------------------------------------------------------------
/.github/workflows/_all_ci.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: langchain CI
 3 | 
 4 | on:
 5 |   workflow_call:
 6 |     inputs:
 7 |       working-directory:
 8 |         required: true
 9 |         type: string
10 |         description: "From which folder this pipeline executes"
11 |   workflow_dispatch:
12 |     inputs:
13 |       working-directory:
14 |         required: true
15 |         type: choice
16 |         default: 'libs/vertexai'
17 |         options:
18 |         - libs/genai
19 |         - libs/vertexai
20 | 
21 | 
22 | # If another push to the same PR or branch happens while this workflow is still running,
23 | # cancel the earlier run in favor of the next run.
24 | #
25 | # There's no point in testing an outdated version of the code. GitHub only allows
26 | # a limited number of job runners to be active at the same time, so it's better to cancel
27 | # pointless jobs early so that more useful jobs can run sooner.
28 | concurrency:
29 |   group: ${{ github.workflow }}-${{ github.ref }}-${{ inputs.working-directory }}
30 |   cancel-in-progress: true
31 | 
32 | env:
33 |   POETRY_VERSION: "1.7.1"
34 | 
35 | jobs:
36 |   lint:
37 |     name: "-"
38 |     uses: ./.github/workflows/_lint.yml
39 |     with:
40 |       working-directory: ${{ inputs.working-directory }}
41 |     secrets: inherit
42 | 
43 |   test:
44 |     name: "-"
45 |     uses: ./.github/workflows/_test.yml
46 |     with:
47 |       working-directory: ${{ inputs.working-directory }}
48 |     secrets: inherit
49 | 
50 |   compile-integration-tests:
51 |     name: "-"
52 |     uses: ./.github/workflows/_compile_integration_test.yml
53 |     with:
54 |       working-directory: ${{ inputs.working-directory }}
55 |     secrets: inherit


--------------------------------------------------------------------------------
/.github/workflows/_codespell.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: make spell_check
 3 | 
 4 | on:
 5 |   workflow_call:
 6 |     inputs:
 7 |       working-directory:
 8 |         required: true
 9 |         type: string
10 |         description: "From which folder this pipeline executes"
11 | 
12 | permissions:
13 |   contents: read
14 | 
15 | jobs:
16 |   codespell:
17 |     name: (Check for spelling errors)
18 |     runs-on: ubuntu-latest
19 | 
20 |     steps:
21 |       - name: Checkout
22 |         uses: actions/checkout@v4
23 | 
24 |       - name: Install Dependencies
25 |         run: |
26 |           pip install toml
27 | 
28 |       - name: Extract Ignore Words List
29 |         working-directory: ${{ inputs.working-directory }}
30 |         run: |
31 |           # Use a Python script to extract the ignore words list from pyproject.toml
32 |           python ../../.github/workflows/extract_ignored_words_list.py
33 |         id: extract_ignore_words
34 | 
35 |       - name: Codespell
36 |         uses: codespell-project/actions-codespell@v2
37 |         with:
38 |           skip: guide_imports.json
39 |           ignore_words_list: ${{ steps.extract_ignore_words.outputs.ignore_words_list }}
40 | 


--------------------------------------------------------------------------------
/.github/workflows/_compile_integration_test.yml:
--------------------------------------------------------------------------------
 1 | name: compile-integration-test
 2 | 
 3 | on:
 4 |   workflow_call:
 5 |     inputs:
 6 |       working-directory:
 7 |         required: true
 8 |         type: string
 9 |         description: "From which folder this pipeline executes"
10 | 
11 | env:
12 |   POETRY_VERSION: "1.7.1"
13 | 
14 | jobs:
15 |   build:
16 |     defaults:
17 |       run:
18 |         working-directory: ${{ inputs.working-directory }}
19 |     runs-on: ubuntu-latest
20 |     strategy:
21 |       matrix:
22 |         python-version:
23 |           - "3.8"
24 |           - "3.9"
25 |           - "3.10"
26 |           - "3.11"
27 |     name: "poetry run pytest -m compile tests/integration_tests #${{ matrix.python-version }}"
28 |     steps:
29 |       - uses: actions/checkout@v4
30 | 
31 |       - name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
32 |         uses: "./.github/actions/poetry_setup"
33 |         with:
34 |           python-version: ${{ matrix.python-version }}
35 |           poetry-version: ${{ env.POETRY_VERSION }}
36 |           working-directory: ${{ inputs.working-directory }}
37 |           cache-key: compile-integration
38 | 
39 |       - name: Install integration dependencies
40 |         shell: bash
41 |         run: poetry install --with=test_integration,test
42 | 
43 |       - name: Check integration tests compile
44 |         shell: bash
45 |         env:
46 |           NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
47 |         run: poetry run pytest -m compile tests/integration_tests
48 | 
49 |       - name: Ensure the tests did not create any additional files
50 |         shell: bash
51 |         run: |
52 |           set -eu
53 | 
54 |           STATUS="$(git status)"
55 |           echo "$STATUS"
56 | 
57 |           # grep will exit non-zero if the target message isn't found,
58 |           # and `set -e` above will cause the step to fail.
59 |           echo "$STATUS" | grep 'nothing to commit, working tree clean'
60 | 


--------------------------------------------------------------------------------
/.github/workflows/_lint.yml:
--------------------------------------------------------------------------------
  1 | name: lint
  2 | 
  3 | on:
  4 |   workflow_call:
  5 |     inputs:
  6 |       working-directory:
  7 |         required: true
  8 |         type: string
  9 |         description: "From which folder this pipeline executes"
 10 |       langchain-location:
 11 |         required: false
 12 |         type: string
 13 |         description: "Relative path to the langchain library folder"
 14 | 
 15 | env:
 16 |   POETRY_VERSION: "1.7.1"
 17 |   WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
 18 | 
 19 |   # This env var allows us to get inline annotations when ruff has complaints.
 20 |   RUFF_OUTPUT_FORMAT: github
 21 | 
 22 | jobs:
 23 |   build:
 24 |     name: "make lint #${{ matrix.python-version }}"
 25 |     runs-on: ubuntu-latest
 26 |     strategy:
 27 |       matrix:
 28 |         # Only lint on the min and max supported Python versions.
 29 |         # It's extremely unlikely that there's a lint issue on any version in between
 30 |         # that doesn't show up on the min or max versions.
 31 |         #
 32 |         # GitHub rate-limits how many jobs can be running at any one time.
 33 |         # Starting new jobs is also relatively slow,
 34 |         # so linting on fewer versions makes CI faster.
 35 |         python-version:
 36 |           - "3.8"
 37 |           - "3.11"
 38 |     steps:
 39 |       - uses: actions/checkout@v4
 40 | 
 41 |       - name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
 42 |         uses: "./.github/actions/poetry_setup"
 43 |         with:
 44 |           python-version: ${{ matrix.python-version }}
 45 |           poetry-version: ${{ env.POETRY_VERSION }}
 46 |           working-directory: ${{ inputs.working-directory }}
 47 |           cache-key: lint-with-extras
 48 | 
 49 |       - name: Check Poetry File
 50 |         shell: bash
 51 |         working-directory: ${{ inputs.working-directory }}
 52 |         run: |
 53 |           poetry check
 54 | 
 55 |       - name: Check lock file
 56 |         shell: bash
 57 |         working-directory: ${{ inputs.working-directory }}
 58 |         run: |
 59 |           poetry lock --check
 60 | 
 61 |       - name: Install dependencies
 62 |         # Also installs dev/lint/test/typing dependencies, to ensure we have
 63 |         # type hints for as many of our libraries as possible.
 64 |         # This helps catch errors that require dependencies to be spotted, for example:
 65 |         # https://github.com/langchain-ai/langchain/pull/10249/files#diff-935185cd488d015f026dcd9e19616ff62863e8cde8c0bee70318d3ccbca98341
 66 |         #
 67 |         # If you change this configuration, make sure to change the `cache-key`
 68 |         # in the `poetry_setup` action above to stop using the old cache.
 69 |         # It doesn't matter how you change it, any change will cause a cache-bust.
 70 |         working-directory: ${{ inputs.working-directory }}
 71 |         run: |
 72 |           poetry install --with lint,typing
 73 | 
 74 |       - name: Install langchain editable
 75 |         working-directory: ${{ inputs.working-directory }}
 76 |         if: ${{ inputs.langchain-location }}
 77 |         env:
 78 |           LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
 79 |         run: |
 80 |           poetry run pip install -e "$LANGCHAIN_LOCATION"
 81 | 
 82 |       - name: Get .mypy_cache to speed up mypy
 83 |         uses: actions/cache@v4
 84 |         env:
 85 |           SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2"
 86 |         with:
 87 |           path: |
 88 |             ${{ env.WORKDIR }}/.mypy_cache
 89 |           key: mypy-lint-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', inputs.working-directory)) }}
 90 | 
 91 |       - name: Install unit test dependencies
 92 |         # Also installs dev/lint/test/typing dependencies, to ensure we have
 93 |         # type hints for as many of our libraries as possible.
 94 |         # This helps catch errors that require dependencies to be spotted, for example:
 95 |         # https://github.com/langchain-ai/langchain/pull/10249/files#diff-935185cd488d015f026dcd9e19616ff62863e8cde8c0bee70318d3ccbca98341
 96 |         #
 97 |         # If you change this configuration, make sure to change the `cache-key`
 98 |         # in the `poetry_setup` action above to stop using the old cache.
 99 |         # It doesn't matter how you change it, any change will cause a cache-bust.
100 |         if: ${{ ! startsWith(inputs.working-directory, 'libs/partners/') }}
101 |         working-directory: ${{ inputs.working-directory }}
102 |         run: |
103 |           poetry install --with test
104 | 
105 |       - name: Analysing the code with our lint
106 |         working-directory: ${{ inputs.working-directory }}
107 |         run: |
108 |           make lint_package
109 | 
110 |       - name: Install unit+integration test dependencies
111 |         if: ${{ startsWith(inputs.working-directory, 'libs/partners/') }}
112 |         working-directory: ${{ inputs.working-directory }}
113 |         run: |
114 |           poetry install --with test,test_integration
115 | 
116 |       - name: Get .mypy_cache_test to speed up mypy
117 |         uses: actions/cache@v4
118 |         env:
119 |           SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2"
120 |         with:
121 |           path: |
122 |             ${{ env.WORKDIR }}/.mypy_cache_test
123 |           key: mypy-test-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', inputs.working-directory)) }}
124 | 
125 |       - name: Analysing the code with our lint
126 |         working-directory: ${{ inputs.working-directory }}
127 |         run: |
128 |           make lint_tests
129 | 


--------------------------------------------------------------------------------
/.github/workflows/_scheduled_test.yml:
--------------------------------------------------------------------------------
 1 | name: Scheduled tests
 2 | run-name: langchain-nvidia Scheduled tests
 3 | 
 4 | on:
 5 |   workflow_dispatch:
 6 |   schedule:
 7 |     - cron:  '0 8 * * *'
 8 | 
 9 | env:
10 |   POETRY_VERSION: "1.7.1"
11 | 
12 | jobs:
13 |   build:
14 |     name: Python ${{ matrix.python-version }} - ${{ matrix.working-directory }}
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         python-version:
20 |           - "3.8"
21 |           - "3.11"
22 |         working-directory:
23 |           - "libs/ai-endpoints"
24 |           - "libs/trt"
25 | 
26 |     steps:
27 |       - uses: actions/checkout@v4
28 | 
29 |       - name: Set up Python + Poetry ${{ env.POETRY_VERSION }}
30 |         uses: "./.github/actions/poetry_setup"
31 |         with:
32 |           python-version: ${{ matrix.python-version }}
33 |           poetry-version: ${{ env.POETRY_VERSION }}
34 |           working-directory: ${{ matrix.working-directory }}
35 |           cache-key: scheduled
36 | 
37 |       - name: Install dependencies
38 |         run: poetry install --with test,test_integration
39 |         working-directory: ${{ matrix.working-directory }}
40 | 
41 |       - name: Run unit tests
42 |         run: make tests
43 |         working-directory: ${{ matrix.working-directory }}
44 | 
45 |       - name: Run integration tests
46 |         env:
47 |           NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
48 |         run: make integration_tests
49 |         working-directory: ${{ matrix.working-directory }}
50 | 
51 | 
52 |       - name: Ensure the tests did not create any additional files
53 |         working-directory: ${{ matrix.working-directory }}
54 |         run: |
55 |           set -eu
56 | 
57 |           STATUS="$(git status)"
58 |           echo "$STATUS"
59 | 
60 |           # grep will exit non-zero if the target message isn't found,
61 |           # and `set -e` above will cause the step to fail.
62 |           echo "$STATUS" | grep 'nothing to commit, working tree clean'
63 | 


--------------------------------------------------------------------------------
/.github/workflows/_test.yml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | 
 3 | on:
 4 |   workflow_call:
 5 |     inputs:
 6 |       working-directory:
 7 |         required: true
 8 |         type: string
 9 |         description: "From which folder this pipeline executes"
10 |       langchain-location:
11 |         required: false
12 |         type: string
13 |         description: "Relative path to the langchain library folder"
14 | 
15 | env:
16 |   POETRY_VERSION: "1.7.1"
17 | 
18 | jobs:
19 |   build:
20 |     defaults:
21 |       run:
22 |         working-directory: ${{ inputs.working-directory }}
23 |     runs-on: ubuntu-latest
24 |     strategy:
25 |       matrix:
26 |         python-version:
27 |           - "3.8"
28 |           - "3.9"
29 |           - "3.10"
30 |           - "3.11"
31 |     name: "make test #${{ matrix.python-version }}"
32 |     steps:
33 |       - uses: actions/checkout@v4
34 | 
35 |       - name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
36 |         uses: "./.github/actions/poetry_setup"
37 |         with:
38 |           python-version: ${{ matrix.python-version }}
39 |           poetry-version: ${{ env.POETRY_VERSION }}
40 |           working-directory: ${{ inputs.working-directory }}
41 |           cache-key: core
42 | 
43 |       - name: Install dependencies
44 |         shell: bash
45 |         run: poetry install --with test
46 | 
47 |       - name: Install langchain editable
48 |         working-directory: ${{ inputs.working-directory }}
49 |         if: ${{ inputs.langchain-location }}
50 |         env:
51 |           LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
52 |         run: |
53 |           poetry run pip install -e "$LANGCHAIN_LOCATION"
54 | 
55 |       - name: Run core tests
56 |         shell: bash
57 |         run: |
58 |           make test
59 | 
60 |       - name: Ensure the tests did not create any additional files
61 |         shell: bash
62 |         run: |
63 |           set -eu
64 | 
65 |           STATUS="$(git status)"
66 |           echo "$STATUS"
67 | 
68 |           # grep will exit non-zero if the target message isn't found,
69 |           # and `set -e` above will cause the step to fail.
70 |           echo "$STATUS" | grep 'nothing to commit, working tree clean'
71 | 


--------------------------------------------------------------------------------
/.github/workflows/_test_release.yml:
--------------------------------------------------------------------------------
 1 | name: test-release
 2 | 
 3 | on:
 4 |   workflow_call:
 5 |     inputs:
 6 |       working-directory:
 7 |         required: true
 8 |         type: string
 9 |         description: "From which folder this pipeline executes"
10 | 
11 | env:
12 |   POETRY_VERSION: "1.7.1"
13 |   PYTHON_VERSION: "3.10"
14 | 
15 | jobs:
16 |   build:
17 |     if: github.ref == 'refs/heads/main'
18 |     runs-on: ubuntu-latest
19 | 
20 |     outputs:
21 |       pkg-name: ${{ steps.check-version.outputs.pkg-name }}
22 |       version: ${{ steps.check-version.outputs.version }}
23 | 
24 |     steps:
25 |       - uses: actions/checkout@v4
26 | 
27 |       - name: Set up Python + Poetry ${{ env.POETRY_VERSION }}
28 |         uses: "./.github/actions/poetry_setup"
29 |         with:
30 |           python-version: ${{ env.PYTHON_VERSION }}
31 |           poetry-version: ${{ env.POETRY_VERSION }}
32 |           working-directory: ${{ inputs.working-directory }}
33 |           cache-key: release
34 | 
35 |       # We want to keep this build stage *separate* from the release stage,
36 |       # so that there's no sharing of permissions between them.
37 |       # The release stage has trusted publishing and GitHub repo contents write access,
38 |       # and we want to keep the scope of that access limited just to the release job.
39 |       # Otherwise, a malicious `build` step (e.g. via a compromised dependency)
40 |       # could get access to our GitHub or PyPI credentials.
41 |       #
42 |       # Per the trusted publishing GitHub Action:
43 |       # > It is strongly advised to separate jobs for building [...]
44 |       # > from the publish job.
45 |       # https://github.com/pypa/gh-action-pypi-publish#non-goals
46 |       - name: Build project for distribution
47 |         run: poetry build
48 |         working-directory: ${{ inputs.working-directory }}
49 | 
50 |       - name: Upload build
51 |         uses: actions/upload-artifact@v4
52 |         with:
53 |           name: test-dist
54 |           path: ${{ inputs.working-directory }}/dist/
55 | 
56 |       - name: Check Version
57 |         id: check-version
58 |         shell: bash
59 |         working-directory: ${{ inputs.working-directory }}
60 |         run: |
61 |           echo pkg-name="$(poetry version | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
62 |           echo version="$(poetry version --short)" >> $GITHUB_OUTPUT
63 | 
64 |   publish:
65 |     needs:
66 |       - build
67 |     runs-on: ubuntu-latest
68 |     permissions:
69 |       # This permission is used for trusted publishing:
70 |       # https://blog.pypi.org/posts/2023-04-20-introducing-trusted-publishers/
71 |       #
72 |       # Trusted publishing has to also be configured on PyPI for each package:
73 |       # https://docs.pypi.org/trusted-publishers/adding-a-publisher/
74 |       id-token: write
75 | 
76 |     steps:
77 |       - uses: actions/checkout@v4
78 | 
79 |       - uses: actions/download-artifact@v4
80 |         with:
81 |           name: test-dist
82 |           path: ${{ inputs.working-directory }}/dist/
83 | 
84 |       - name: Publish to test PyPI
85 |         uses: pypa/gh-action-pypi-publish@release/v1
86 |         with:
87 |           packages-dir: ${{ inputs.working-directory }}/dist/
88 |           verbose: true
89 |           print-hash: true
90 |           repository-url: https://test.pypi.org/legacy/
91 | 
92 |           # We overwrite any existing distributions with the same name and version.
93 |           # This is *only for CI use* and is *extremely dangerous* otherwise!
94 |           # https://github.com/pypa/gh-action-pypi-publish#tolerating-release-package-file-duplicates
95 |           skip-existing: true
96 |           # Temp workaround since attestations are on by default as of gh-action-pypi-publish v1.11.0
97 |           attestations: false
98 | 


--------------------------------------------------------------------------------
/.github/workflows/check_diffs.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: CI
 3 | 
 4 | on:
 5 |   push:
 6 |     branches: [main]
 7 |   pull_request:
 8 | 
 9 | # If another push to the same PR or branch happens while this workflow is still running,
10 | # cancel the earlier run in favor of the next run.
11 | #
12 | # There's no point in testing an outdated version of the code. GitHub only allows
13 | # a limited number of job runners to be active at the same time, so it's better to cancel
14 | # pointless jobs early so that more useful jobs can run sooner.
15 | concurrency:
16 |   group: ${{ github.workflow }}-${{ github.ref }}
17 |   cancel-in-progress: true
18 | 
19 | env:
20 |   POETRY_VERSION: "1.7.1"
21 | 
22 | jobs:
23 |   build:
24 |     runs-on: ubuntu-latest
25 |     steps:
26 |       - uses: actions/checkout@v4
27 |       - uses: actions/setup-python@v5
28 |         with:
29 |           python-version: '3.10'
30 |       - id: files
31 |         uses: Ana06/get-changed-files@v2.2.0
32 |       - id: set-matrix
33 |         run: |
34 |           python .github/scripts/check_diff.py ${{ steps.files.outputs.all }} >> $GITHUB_OUTPUT
35 |     outputs:
36 |       dirs-to-lint: ${{ steps.set-matrix.outputs.dirs-to-lint }}
37 |       dirs-to-test: ${{ steps.set-matrix.outputs.dirs-to-test }}
38 |   lint:
39 |     name: cd ${{ matrix.working-directory }}
40 |     needs: [ build ]
41 |     if: ${{ needs.build.outputs.dirs-to-lint != '[]' }}
42 |     strategy:
43 |       matrix:
44 |         working-directory: ${{ fromJson(needs.build.outputs.dirs-to-lint) }}
45 |     uses: ./.github/workflows/_lint.yml
46 |     with:
47 |       working-directory: ${{ matrix.working-directory }}
48 |     secrets: inherit
49 | 
50 |   test:
51 |     name: cd ${{ matrix.working-directory }}
52 |     needs: [ build ]
53 |     if: ${{ needs.build.outputs.dirs-to-test != '[]' }}
54 |     strategy:
55 |       matrix:
56 |         working-directory: ${{ fromJson(needs.build.outputs.dirs-to-test) }}
57 |     uses: ./.github/workflows/_test.yml
58 |     with:
59 |       working-directory: ${{ matrix.working-directory }}
60 |     secrets: inherit
61 | 
62 |   compile-integration-tests:
63 |     name: cd ${{ matrix.working-directory }}
64 |     needs: [ build ]
65 |     if: ${{ needs.build.outputs.dirs-to-test != '[]' }}
66 |     strategy:
67 |       matrix:
68 |         working-directory: ${{ fromJson(needs.build.outputs.dirs-to-test) }}
69 |     uses: ./.github/workflows/_compile_integration_test.yml
70 |     with:
71 |       working-directory: ${{ matrix.working-directory }}
72 |     secrets: inherit
73 |   ci_success:
74 |     name: "CI Success"
75 |     needs: [build, lint, test, compile-integration-tests]
76 |     if: |
77 |       always()
78 |     runs-on: ubuntu-latest
79 |     env:
80 |       JOBS_JSON: ${{ toJSON(needs) }}
81 |       RESULTS_JSON: ${{ toJSON(needs.*.result) }}
82 |       EXIT_CODE: ${{!contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') && '0' || '1'}}
83 |     steps:
84 |       - name: "CI Success"
85 |         run: |
86 |           echo $JOBS_JSON
87 |           echo $RESULTS_JSON
88 |           echo "Exiting with $EXIT_CODE"
89 |           exit $EXIT_CODE
90 | 


--------------------------------------------------------------------------------
/.github/workflows/extract_ignored_words_list.py:
--------------------------------------------------------------------------------
 1 | import toml
 2 | 
 3 | pyproject_toml = toml.load("pyproject.toml")
 4 | 
 5 | # Extract the ignore words list (adjust the key as per your TOML structure)
 6 | ignore_words_list = (
 7 |     pyproject_toml.get("tool", {}).get("codespell", {}).get("ignore-words-list")
 8 | )
 9 | 
10 | print(f"::set-output name=ignore_words_list::{ignore_words_list}")
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .vs/
  2 | .vscode/
  3 | .idea/
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | docs/docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | notebooks/
 84 | 
 85 | # IPython
 86 | profile_default/
 87 | ipython_config.py
 88 | 
 89 | # pyenv
 90 | .python-version
 91 | 
 92 | # pipenv
 93 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 94 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 95 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 96 | #   install all needed dependencies.
 97 | #Pipfile.lock
 98 | 
 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100 | __pypackages__/
101 | 
102 | # Celery stuff
103 | celerybeat-schedule
104 | celerybeat.pid
105 | 
106 | # SageMath parsed files
107 | *.sage.py
108 | 
109 | # Environments
110 | .env
111 | .envrc
112 | .venv
113 | .venvs
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/
137 | 
138 | # macOS display setting files
139 | .DS_Store
140 | 
141 | # Wandb directory
142 | wandb/
143 | 
144 | # asdf tool versions
145 | .tool-versions
146 | /.ruff_cache/
147 | 
148 | *.pkl
149 | *.bin
150 | 
151 | # integration test artifacts
152 | data_map*
153 | \[('_type', 'fake'), ('stop', None)]
154 | 
155 | # Replit files
156 | *replit*
157 | 
158 | node_modules
159 | docs/.yarn/
160 | docs/node_modules/
161 | docs/.docusaurus/
162 | docs/.cache-loader/
163 | docs/_dist
164 | docs/api_reference/*api_reference.rst
165 | docs/api_reference/_build
166 | docs/api_reference/*/
167 | !docs/api_reference/_static/
168 | !docs/api_reference/templates/
169 | !docs/api_reference/themes/
170 | docs/docs/build
171 | docs/docs/node_modules
172 | docs/docs/yarn.lock
173 | _dist
174 | docs/docs/templates


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 LangChain
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 🦜️🔗 LangChain NVIDIA
 2 | 
 3 | ## Packages
 4 | 
 5 | This repository contains two packages with NVIDIA integrations with LangChain:
 6 | - [langchain-nvidia-ai-endpoints](https://pypi.org/project/langchain-nvidia-ai-endpoints/) integrates [NVIDIA AI Foundation Models and Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/).
 7 | - [langchain-nvidia-trt](https://pypi.org/project/langchain-nvidia-trt/) implements integrations of NVIDIA [TensorRT](https://developer.nvidia.com/tensorrt) models.
 8 | 
 9 | ## Testing
10 | 
11 | ### Cookbooks
12 | 
13 | See the notebooks in the [cookbook](./cookbook) directory for examples of using `ChatNVIDIA` and `NVIDIAEmbeddings` with LangGraph for agentic RAG and tool-calling agents.
14 | 
15 | ### Studio
16 | 
17 | See the [studio](./studio) directory to test the agentic RAG workflow in LangGraph Studio.
18 | 
19 | Simply load the `studio` directory in [LangGraph Studio](https://github.com/langchain-ai/langgraph-studio?tab=readme-ov-file#download) and click the "Run" button with an input question.
20 | 
21 | This will run agentic RAG where it first reflects on the question to decide whether to use web search or vectorstore retrieval. It also grades retrieved documents as well as generated answers.
22 | 
23 | ![Screenshot 2024-12-04 at 11 19 54 AM](https://github.com/user-attachments/assets/736544ff-6597-4eb4-89d1-e1e5863baad4)
24 | 


--------------------------------------------------------------------------------
/cookbook/img/structured_report_generation_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/cookbook/img/structured_report_generation_arch.png


--------------------------------------------------------------------------------
/cookbook/structured_report_generation_elastic/setup.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -e
  3 | 
  4 | # Function to print messages
  5 | log() {
  6 |   echo "[`date +"%Y-%m-%d %H:%M:%S"`] $1"
  7 | }
  8 | 
  9 | # Check if container exists and remove it if it does
 10 | log "Checking for existing es_test container..."
 11 | if docker ps -a | grep -q es_test; then
 12 |   log "Found existing es_test container. Removing it..."
 13 |   docker rm -f es_test
 14 | fi
 15 | 
 16 | # 1. Start Elasticsearch server in Docker (single-node for testing) with security disabled
 17 | log "Starting Elasticsearch Docker container..."
 18 | docker run -d --name es_test -p 9200:9200 \
 19 |   -e "discovery.type=single-node" \
 20 |   -e "xpack.security.enabled=false" \
 21 |   docker.elastic.co/elasticsearch/elasticsearch:8.9.0
 22 | 
 23 | # Wait until Elasticsearch is responsive
 24 | log "Waiting for Elasticsearch to be available on http://localhost:9200 ..."
 25 | until curl -s http://localhost:9200 >/dev/null; do
 26 |   sleep 1
 27 | done
 28 | log "Elasticsearch is up and running."
 29 | 
 30 | # 2. Create an index named "weather" with mappings and populate with sample data
 31 | 
 32 | log "Creating the 'weather' index with mappings..."
 33 | curl -s -X PUT "http://localhost:9200/weather?pretty" -H 'Content-Type: application/json' -d'
 34 | {
 35 |   "mappings": {
 36 |     "properties": {
 37 |       "city": { "type": "keyword" },
 38 |       "country": { "type": "keyword" },
 39 |       "temperature": { "type": "float" },
 40 |       "condition": { "type": "text" },
 41 |       "timestamp": { "type": "date" }
 42 |     }
 43 |   }
 44 | }
 45 | '
 46 | log "'weather' index created."
 47 | 
 48 | log "Indexing dummy weather data..."
 49 | 
 50 | # New York, USA
 51 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d'
 52 | {
 53 |   "city": "New York",
 54 |   "country": "USA",
 55 |   "temperature": 22.5,
 56 |   "condition": "Sunny",
 57 |   "timestamp": "2025-02-02T12:00:00Z"
 58 | }
 59 | '
 60 | 
 61 | # London, UK
 62 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d'
 63 | {
 64 |   "city": "London",
 65 |   "country": "UK",
 66 |   "temperature": 16.0,
 67 |   "condition": "Cloudy",
 68 |   "timestamp": "2025-02-02T12:05:00Z"
 69 | }
 70 | '
 71 | 
 72 | # Paris, France
 73 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d'
 74 | {
 75 |   "city": "Paris",
 76 |   "country": "France",
 77 |   "temperature": 18.3,
 78 |   "condition": "Rainy",
 79 |   "timestamp": "2025-02-02T12:10:00Z"
 80 | }
 81 | '
 82 | 
 83 | # Tokyo, Japan
 84 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d'
 85 | {
 86 |   "city": "Tokyo",
 87 |   "country": "Japan",
 88 |   "temperature": 24.0,
 89 |   "condition": "Clear",
 90 |   "timestamp": "2025-02-02T12:15:00Z"
 91 | }
 92 | '
 93 | 
 94 | # Berlin, Germany
 95 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d'
 96 | {
 97 |   "city": "Berlin",
 98 |   "country": "Germany",
 99 |   "temperature": 14.7,
100 |   "condition": "Overcast",
101 |   "timestamp": "2025-02-02T12:20:00Z"
102 | }
103 | '
104 | 
105 | # Sydney, Australia
106 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d'
107 | {
108 |   "city": "Sydney",
109 |   "country": "Australia",
110 |   "temperature": 26.4,
111 |   "condition": "Sunny",
112 |   "timestamp": "2025-02-02T12:25:00Z"
113 | }
114 | '
115 | 
116 | # Moscow, Russia
117 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d'
118 | {
119 |   "city": "Moscow",
120 |   "country": "Russia",
121 |   "temperature": -5.0,
122 |   "condition": "Snowy",
123 |   "timestamp": "2025-02-02T12:30:00Z"
124 | }
125 | '
126 | 
127 | # Beijing, China
128 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d'
129 | {
130 |   "city": "Beijing",
131 |   "country": "China",
132 |   "temperature": 10.2,
133 |   "condition": "Smoggy",
134 |   "timestamp": "2025-02-02T12:35:00Z"
135 | }
136 | '
137 | 
138 | # Rio de Janeiro, Brazil
139 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d'
140 | {
141 |   "city": "Rio de Janeiro",
142 |   "country": "Brazil",
143 |   "temperature": 28.1,
144 |   "condition": "Humid",
145 |   "timestamp": "2025-02-02T12:40:00Z"
146 | }
147 | '
148 | 
149 | # Cape Town, South Africa
150 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d'
151 | {
152 |   "city": "Cape Town",
153 |   "country": "South Africa",
154 |   "temperature": 20.3,
155 |   "condition": "Windy",
156 |   "timestamp": "2025-02-02T12:45:00Z"
157 | }
158 | '
159 | 
160 | # Mumbai, India
161 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d'
162 | {
163 |   "city": "Mumbai",
164 |   "country": "India",
165 |   "temperature": 30.5,
166 |   "condition": "Humid",
167 |   "timestamp": "2025-02-02T12:50:00Z"
168 | }
169 | '
170 | 
171 | # San Francisco, USA
172 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d'
173 | {
174 |   "city": "San Francisco",
175 |   "country": "USA",
176 |   "temperature": 17.8,
177 |   "condition": "Foggy",
178 |   "timestamp": "2025-02-02T12:55:00Z"
179 | }
180 | '
181 | 
182 | # Refresh the index to make sure documents are searchable immediately.
183 | log "Refreshing the index..."
184 | curl -s -X POST "http://localhost:9200/weather/_refresh?pretty"
185 | 
186 | # 3. Test: List all indices to verify the "weather" index is up
187 | log "Testing: Listing all indices..."
188 | curl -X GET "http://localhost:9200/_cat/indices?v&pretty"
189 | 
190 | log "Elasticsearch setup complete. The 'weather' index is populated with expanded dummy weather data."
191 | 
192 | # Uncomment the following line to stop and remove the container after testing
193 | # docker rm -f es_test
194 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 LangChain, Inc.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all format lint test tests integration_tests help
 2 | 
 3 | # Default target executed when no arguments are given to make.
 4 | all: help
 5 | 
 6 | # Define a variable for the test file path.
 7 | TEST_FILE ?= tests/unit_tests/
 8 | 
 9 | test:
10 | 	poetry run pytest $(PYTEST_ARGS) $(TEST_FILE)
11 | 
12 | tests:
13 | 	poetry run pytest $(PYTEST_ARGS) $(TEST_FILE)
14 | 
15 | check_imports: $(shell find langchain_nvidia_ai_endpoints -name '*.py')
16 | 	poetry run python ./scripts/check_imports.py $^
17 | 
18 | integration_tests:
19 | 	poetry run pytest tests/integration_tests $(PYTEST_ARGS)
20 | 
21 | 
22 | ######################
23 | # LINTING AND FORMATTING
24 | ######################
25 | 
26 | # Define a variable for Python and notebook files.
27 | PYTHON_FILES=.
28 | MYPY_CACHE=.mypy_cache
29 | lint format: PYTHON_FILES=.
30 | lint_diff format_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
31 | lint_package: PYTHON_FILES=langchain_nvidia_ai_endpoints
32 | lint_tests: PYTHON_FILES=tests
33 | lint_tests: MYPY_CACHE=.mypy_cache_test
34 | 
35 | lint lint_diff lint_package lint_tests:
36 | 	./scripts/lint_imports.sh
37 | 	poetry run ruff .
38 | 	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
39 | 	[ "$(PYTHON_FILES)" = "" ] || poetry run mypy $(PYTHON_FILES)
40 | 
41 | format format_diff:
42 | 	poetry run ruff format $(PYTHON_FILES)
43 | 	poetry run ruff --select I --fix $(PYTHON_FILES)
44 | 
45 | spell_check:
46 | 	poetry run codespell --toml pyproject.toml
47 | 
48 | spell_fix:
49 | 	poetry run codespell --toml pyproject.toml -w
50 | 
51 | ######################
52 | # HELP
53 | ######################
54 | 
55 | help:
56 | 	@echo '----'
57 | 	@echo 'format                       - run code formatters'
58 | 	@echo 'lint                         - run linters'
59 | 	@echo 'test                         - run unit tests'
60 | 	@echo 'tests                        - run unit tests'
61 | 	@echo 'test TEST_FILE=<test_file>   - run all tests in file'
62 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# NVIDIA NIMs\n",
  8 |     "\n",
  9 |     ":::caution\n",
 10 |     "You are currently on a page documenting the use of models as [text completion models](/docs/concepts/#llms).\n",
 11 |     "Many popular models are [chat completion models](/docs/concepts/#chat-models).\n",
 12 |     "\n",
 13 |     "To use chat completion models, use [ChatNVIDIA](/docs/integrations/chat/nvidia_ai_endpoints/) instead.\n",
 14 |     ":::\n",
 15 |     "\n",
 16 |     "The `langchain-nvidia-ai-endpoints` package contains LangChain integrations building applications with models on \n",
 17 |     "NVIDIA NIM inference microservice. NIM supports models across domains like chat, completion, embedding, and re-ranking models \n",
 18 |     "from the community as well as NVIDIA. These models are optimized by NVIDIA to deliver the best performance on NVIDIA \n",
 19 |     "accelerated infrastructure and deployed as a NIM, an easy-to-use, prebuilt containers that deploy anywhere using a single \n",
 20 |     "command on NVIDIA accelerated infrastructure.\n",
 21 |     "\n",
 22 |     "NVIDIA hosted deployments of NIMs are available to test on the [NVIDIA API catalog](https://build.nvidia.com/). After testing, \n",
 23 |     "NIMs can be exported from NVIDIA’s API catalog using the NVIDIA AI Enterprise license and run on-premises or in the cloud, \n",
 24 |     "giving enterprises ownership and full control of their IP and AI application.\n",
 25 |     "\n",
 26 |     "NIMs are packaged as container images on a per model basis and are distributed as NGC container images through the NVIDIA NGC Catalog. \n",
 27 |     "At their core, NIMs provide easy, consistent, and familiar APIs for running inference on an AI model.\n",
 28 |     "\n",
 29 |     "This example goes over how to use LangChain to interact with NVIDIA supported via the `NVIDIA` class.\n",
 30 |     "\n",
 31 |     "For more information on accessing the completion models through this api, check out the [NVIDIA](https://python.langchain.com/docs/integrations/llms/nvidia_ai_endpoints/) documentation.\n"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "## Installation"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "#%pip install -qU langchain-nvidia-ai-endpoints"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "## Setup\n",
 55 |     "\n",
 56 |     "**To get started:**\n",
 57 |     "\n",
 58 |     "1. Create a free account with [NVIDIA](https://build.nvidia.com/), which hosts NVIDIA AI Foundation models.\n",
 59 |     "\n",
 60 |     "2. Click on your model of choice.\n",
 61 |     "\n",
 62 |     "3. Under `Input` select the `Python` tab, and click `Get API Key`. Then click `Generate Key`.\n",
 63 |     "\n",
 64 |     "4. Copy and save the generated key as `NVIDIA_API_KEY`. From there, you should have access to the endpoints."
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "import os\n",
 74 |     "from getpass import getpass\n",
 75 |     "\n",
 76 |     "# del os.environ['NVIDIA_API_KEY']  ## delete key and reset\n",
 77 |     "if os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n",
 78 |     "    print(\"Valid NVIDIA_API_KEY already in environment. Delete to reset\")\n",
 79 |     "else:\n",
 80 |     "    candidate_api_key = getpass(\"NVAPI Key (starts with nvapi-): \")\n",
 81 |     "    assert candidate_api_key.startswith(\"nvapi-\"), f\"{candidate_api_key[:5]}... is not a valid key\"\n",
 82 |     "    os.environ[\"NVIDIA_API_KEY\"] = candidate_api_key"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "## Usage\n",
 90 |     "\n",
 91 |     "See [LLM](/docs/how_to#llms) for full functionality."
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "from langchain_nvidia_ai_endpoints import NVIDIA"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "llm = NVIDIA().bind(max_tokens=256)\n",
110 |     "llm"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "prompt = \"# Function that does quicksort written in Rust without comments:\""
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "print(llm.invoke(prompt))"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "## Stream, Batch, and Async\n",
136 |     "\n",
137 |     "These models natively support streaming, and as is the case with all LangChain LLMs they expose a batch method to handle concurrent requests, as well as async methods for invoke, stream, and batch. Below are a few examples."
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "for chunk in llm.stream(prompt):\n",
147 |     "    print(chunk, end=\"\", flush=True)"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": null,
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "llm.batch([prompt])"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "metadata": {},
163 |    "outputs": [],
164 |    "source": [
165 |     "await llm.ainvoke(prompt)"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "async for chunk in llm.astream(prompt):\n",
175 |     "    print(chunk, end=\"\", flush=True)"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "await llm.abatch([prompt])"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": [
193 |     "async for chunk in llm.astream_log(prompt):\n",
194 |     "    print(chunk)"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": null,
200 |    "metadata": {},
201 |    "outputs": [],
202 |    "source": [
203 |     "response = llm.invoke(\n",
204 |     "    \"X_train, y_train, X_test, y_test = train_test_split(X, y, test_size=0.1) #Train a logistic regression model, predict the labels on the test set and compute the accuracy score\"\n",
205 |     ")\n",
206 |     "print(response)"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "markdown",
211 |    "metadata": {},
212 |    "source": [
213 |     "## Supported models\n",
214 |     "\n",
215 |     "Querying `available_models` will still give you all of the other models offered by your API credentials."
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": null,
221 |    "metadata": {},
222 |    "outputs": [],
223 |    "source": [
224 |     "NVIDIA.get_available_models()\n",
225 |     "# llm.get_available_models()"
226 |    ]
227 |   }
228 |  ],
229 |  "metadata": {
230 |   "kernelspec": {
231 |    "display_name": "langchain-nvidia-ai-endpoints-m0-Y4aGr-py3.10",
232 |    "language": "python",
233 |    "name": "python3"
234 |   },
235 |   "language_info": {
236 |    "codemirror_mode": {
237 |     "name": "ipython",
238 |     "version": 3
239 |    },
240 |    "file_extension": ".py",
241 |    "mimetype": "text/x-python",
242 |    "name": "python",
243 |    "nbconvert_exporter": "python",
244 |    "pygments_lexer": "ipython3",
245 |    "version": "3.10.14"
246 |   }
247 |  },
248 |  "nbformat": 4,
249 |  "nbformat_minor": 2
250 | }
251 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/docs/providers/nvidia.mdx:
--------------------------------------------------------------------------------
 1 | # NVIDIA
 2 | The `langchain-nvidia-ai-endpoints` package contains LangChain integrations building applications with models on 
 3 | NVIDIA NIM inference microservice. NIM supports models across domains like chat, embedding, and re-ranking models 
 4 | from the community as well as NVIDIA. These models are optimized by NVIDIA to deliver the best performance on NVIDIA 
 5 | accelerated infrastructure and deployed as a NIM, an easy-to-use, prebuilt containers that deploy anywhere using a single 
 6 | command on NVIDIA accelerated infrastructure.
 7 | 
 8 | NVIDIA hosted deployments of NIMs are available to test on the [NVIDIA API catalog](https://build.nvidia.com/). After testing, 
 9 | NIMs can be exported from NVIDIA’s API catalog using the NVIDIA AI Enterprise license and run on-premises or in the cloud, 
10 | giving enterprises ownership and full control of their IP and AI application.
11 | 
12 | NIMs are packaged as container images on a per model basis and are distributed as NGC container images through the NVIDIA NGC Catalog. 
13 | At their core, NIMs provide easy, consistent, and familiar APIs for running inference on an AI model.
14 | 
15 | Below is an example on how to use some common functionality surrounding text-generative and embedding models.
16 | 
17 | ## Installation
18 | 
19 | ```python
20 | pip install -U --quiet langchain-nvidia-ai-endpoints
21 | ```
22 | 
23 | ## Setup
24 | 
25 | **To get started:**
26 | 
27 | 1. Create a free account with [NVIDIA](https://build.nvidia.com/), which hosts NVIDIA AI Foundation models.
28 | 
29 | 2. Click on your model of choice.
30 | 
31 | 3. Under Input select the Python tab, and click `Get API Key`. Then click `Generate Key`.
32 | 
33 | 4. Copy and save the generated key as NVIDIA_API_KEY. From there, you should have access to the endpoints.
34 | 
35 | ```python
36 | import getpass
37 | import os
38 | 
39 | if not os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
40 |     nvidia_api_key = getpass.getpass("Enter your NVIDIA API key: ")
41 |     assert nvidia_api_key.startswith("nvapi-"), f"{nvidia_api_key[:5]}... is not a valid key"
42 |     os.environ["NVIDIA_API_KEY"] = nvidia_api_key
43 | ```
44 | ## Working with NVIDIA API Catalog
45 | 
46 | ```python
47 | from langchain_nvidia_ai_endpoints import ChatNVIDIA
48 | 
49 | llm = ChatNVIDIA(model="mistralai/mixtral-8x22b-instruct-v0.1")
50 | result = llm.invoke("Write a ballad about LangChain.")
51 | print(result.content)
52 | ```
53 | 
54 | Using the API, you can query live endpoints available on the NVIDIA API Catalog to get quick results from a DGX-hosted cloud compute environment. All models are source-accessible and can be deployed on your own compute cluster using NVIDIA NIM which is part of NVIDIA AI Enterprise, shown in the next section [Working with NVIDIA NIMs](##working-with-nvidia-nims). 
55 | 
56 | ## Working with NVIDIA NIMs
57 | When ready to deploy, you can self-host models with NVIDIA NIM—which is included with the NVIDIA AI Enterprise software license—and run them anywhere, giving you ownership of your customizations and full control of your intellectual property (IP) and AI applications.
58 | 
59 | [Learn more about NIMs](https://developer.nvidia.com/blog/nvidia-nim-offers-optimized-inference-microservices-for-deploying-ai-models-at-scale/)
60 | 
61 | ```python
62 | from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank
63 | 
64 | # connect to an chat NIM running at localhost:8000, specifyig a specific model
65 | llm = ChatNVIDIA(base_url="http://localhost:8000/v1", model="meta/llama3-8b-instruct")
66 | 
67 | # connect to an embedding NIM running at localhost:8080
68 | embedder = NVIDIAEmbeddings(base_url="http://localhost:8080/v1")
69 | 
70 | # connect to a reranking NIM running at localhost:2016
71 | ranker = NVIDIARerank(base_url="http://localhost:2016/v1")
72 | ```
73 | 
74 | ## Using NVIDIA AI Foundation Endpoints
75 | 
76 | A selection of NVIDIA AI Foundation models are supported directly in LangChain with familiar APIs.
77 | 
78 | The active models which are supported can be found [in API Catalog](https://build.nvidia.com/).
79 | 
80 | **The following may be useful examples to help you get started:**
81 | - **[`ChatNVIDIA` Model](https://github.com/langchain-ai/langchain/blob/master/docs/docs/integrations/chat/nvidia_ai_endpoints.ipynb).**
82 | - **[`NVIDIAEmbeddings` Model for retrieval in RAG Workflows](https://github.com/langchain-ai/langchain/blob/master/docs/docs/integrations/text_embedding/nvidia_ai_endpoints.ipynb).**


--------------------------------------------------------------------------------
/libs/ai-endpoints/langchain_nvidia.py:
--------------------------------------------------------------------------------
 1 | """
 2 | **LangChain NVIDIA AI Endpoints**
 3 | 
 4 | This comprehensive module integrates NVIDIA's state-of-the-art NIM endpoints,
 5 | featuring advanced models for conversational AI and semantic embeddings,
 6 | into the LangChain framework. It provides robust classes for seamless interaction
 7 | with AI models, particularly tailored for enriching conversational experiences
 8 | and enhancing semantic understanding in various applications.
 9 | 
10 | **Features:**
11 | 
12 | 1. **`ChatNVIDIA`:** This class serves as the primary interface for interacting
13 |    with chat models. Users can effortlessly utilize advanced models like 'Nemotron'
14 |    to engage in rich, context-aware conversations, applicable across diverse
15 |    domains from customer support to interactive storytelling.
16 | 
17 | 2. **`NVIDIAEmbeddings`:** The class offers capabilities to generate sophisticated
18 |    embeddings using AI models. These embeddings are instrumental for tasks like
19 |    semantic analysis, text similarity assessments, and contextual understanding,
20 |    significantly enhancing the depth of NLP applications.
21 | 
22 | 3. **`NVIDIARerank`:** This class provides an interface for reranking search results
23 |     using AI models. Users can leverage this functionality to enhance search
24 |     relevance and improve user experience in information retrieval systems.
25 | 
26 | 4. **`NVIDIA`:** This class enables users to interact with large language models
27 |     through a completions, or prompting, interface. Users can generate text
28 |     completions, summaries, and other language model outputs using this class.
29 |     This class is particularly useful for code generation tasks.
30 | 
31 | **Installation:**
32 | 
33 | Install this module easily using pip:
34 | 
35 | ```python
36 | pip install langchain-nvidia-ai-endpoints
37 | ```
38 | 
39 | After setting up the environment, interact with NIM endpoints -
40 | 
41 | ## Utilizing chat models:
42 | 
43 | ```python
44 | from langchain_nvidia import ChatNVIDIA
45 | 
46 | llm = ChatNVIDIA(model="nvidia/llama-3.1-nemotron-51b-instruct")
47 | response = llm.invoke("Tell me about the LangChain integration.")
48 | ```
49 | 
50 | ## Generating semantic embeddings:
51 | 
52 | Create embeddings useful in various NLP tasks:
53 | 
54 | ```python
55 | from langchain_nvidia import NVIDIAEmbeddings
56 | 
57 | embedder = NVIDIAEmbeddings(model="nvidia/nv-embedqa-e5-v5")
58 | embedding = embedder.embed_query("Exploring AI capabilities.")
59 | ```
60 | 
61 | ## Code completion using large language models:
62 | 
63 | ```python
64 | from langchain_nvidia import NVIDIA
65 | 
66 | llm = NVIDIA(model="meta/codellama-70b")
67 | completion = llm.invoke("def hello_world():")
68 | ```
69 | """  # noqa: E501
70 | 
71 | from langchain_nvidia_ai_endpoints import *  # noqa: F403
72 | from langchain_nvidia_ai_endpoints import __all__  # noqa: F401
73 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/langchain_nvidia_ai_endpoints/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | **NOTE: You can `import langchain_nvidia` instead.**
 3 | 
 4 | **LangChain NVIDIA AI Foundation Model Playground Integration**
 5 | 
 6 | This comprehensive module integrates NVIDIA's state-of-the-art AI Foundation Models, featuring advanced models for conversational AI and semantic embeddings, into the LangChain framework. It provides robust classes for seamless interaction with NVIDIA's AI models, particularly tailored for enriching conversational experiences and enhancing semantic understanding in various applications.
 7 | 
 8 | **Features:**
 9 | 
10 | 1. **Chat Models (`ChatNVIDIA`):** This class serves as the primary interface for interacting with NVIDIA's Foundation chat models. Users can effortlessly utilize NVIDIA's advanced models like 'Mistral' to engage in rich, context-aware conversations, applicable across diverse domains from customer support to interactive storytelling.
11 | 
12 | 2. **Semantic Embeddings (`NVIDIAEmbeddings`):** The module offers capabilities to generate sophisticated embeddings using NVIDIA's AI models. These embeddings are instrumental for tasks like semantic analysis, text similarity assessments, and contextual understanding, significantly enhancing the depth of NLP applications.
13 | 
14 | **Installation:**
15 | 
16 | Install this module easily using pip:
17 | 
18 | ```python
19 | pip install langchain-nvidia-ai-endpoints
20 | ```
21 | 
22 | ## Utilizing Chat Models:
23 | 
24 | After setting up the environment, interact with NVIDIA AI Foundation models:
25 | ```python
26 | from langchain_nvidia_ai_endpoints import ChatNVIDIA
27 | 
28 | ai_chat_model = ChatNVIDIA(model="meta/llama2-70b")
29 | response = ai_chat_model.invoke("Tell me about the LangChain integration.")
30 | ```
31 | 
32 | # Generating Semantic Embeddings:
33 | 
34 | Use NVIDIA's models for creating embeddings, useful in various NLP tasks:
35 | 
36 | ```python
37 | from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
38 | 
39 | embed_model = NVIDIAEmbeddings(model="nvolveqa_40k")
40 | embedding_output = embed_model.embed_query("Exploring AI capabilities.")
41 | ```
42 | """  # noqa: E501
43 | 
44 | from langchain_nvidia_ai_endpoints._statics import Model, register_model
45 | from langchain_nvidia_ai_endpoints.chat_models import ChatNVIDIA
46 | from langchain_nvidia_ai_endpoints.embeddings import NVIDIAEmbeddings
47 | from langchain_nvidia_ai_endpoints.llm import NVIDIA
48 | from langchain_nvidia_ai_endpoints.reranking import NVIDIARerank
49 | 
50 | __all__ = [
51 |     "ChatNVIDIA",
52 |     "NVIDIA",
53 |     "NVIDIAEmbeddings",
54 |     "NVIDIARerank",
55 |     "register_model",
56 |     "Model",
57 | ]
58 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import (
 4 |     Any,
 5 |     Dict,
 6 | )
 7 | 
 8 | from langchain_core.messages import (
 9 |     AIMessage,
10 |     BaseMessage,
11 |     ChatMessage,
12 |     FunctionMessage,
13 |     HumanMessage,
14 |     SystemMessage,
15 |     ToolMessage,
16 | )
17 | 
18 | 
19 | def convert_message_to_dict(message: BaseMessage) -> dict:
20 |     """Convert a LangChain message to a dictionary.
21 | 
22 |     Args:
23 |         message: The LangChain message.
24 | 
25 |     Returns:
26 |         The dictionary.
27 |     """
28 |     message_dict: Dict[str, Any]
29 |     if isinstance(message, ChatMessage):
30 |         message_dict = {"role": message.role, "content": message.content}
31 |     elif isinstance(message, HumanMessage):
32 |         message_dict = {"role": "user", "content": message.content}
33 |     elif isinstance(message, AIMessage):
34 |         message_dict = {"role": "assistant", "content": message.content}
35 |         if "function_call" in message.additional_kwargs:
36 |             message_dict["function_call"] = message.additional_kwargs["function_call"]
37 |             # If function call only, content is None not empty string
38 |             if message_dict["content"] == "":
39 |                 message_dict["content"] = None
40 |         if "tool_calls" in message.additional_kwargs:
41 |             message_dict["tool_calls"] = message.additional_kwargs["tool_calls"]
42 |             # If tool calls only, content is None not empty string
43 |             if message_dict["content"] == "":
44 |                 message_dict["content"] = None
45 |     elif isinstance(message, SystemMessage):
46 |         message_dict = {"role": "system", "content": message.content}
47 |     elif isinstance(message, FunctionMessage):
48 |         message_dict = {
49 |             "role": "function",
50 |             "content": message.content,
51 |             "name": message.name,
52 |         }
53 |     elif isinstance(message, ToolMessage):
54 |         message_dict = {
55 |             "role": "tool",
56 |             "content": message.content,
57 |             "tool_call_id": message.tool_call_id,
58 |         }
59 |     else:
60 |         raise TypeError(f"Got unknown type {message}")
61 |     if "name" in message.additional_kwargs:
62 |         message_dict["name"] = message.additional_kwargs["name"]
63 |     return message_dict
64 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Literal, Optional
  2 | 
  3 | from langchain_core.embeddings import Embeddings
  4 | from langchain_core.outputs.llm_result import LLMResult
  5 | from pydantic import (
  6 |     BaseModel,
  7 |     ConfigDict,
  8 |     Field,
  9 |     PrivateAttr,
 10 | )
 11 | 
 12 | from langchain_nvidia_ai_endpoints._common import _NVIDIAClient
 13 | from langchain_nvidia_ai_endpoints._statics import Model
 14 | from langchain_nvidia_ai_endpoints.callbacks import usage_callback_var
 15 | 
 16 | _DEFAULT_MODEL_NAME: str = "nvidia/nv-embedqa-e5-v5"
 17 | _DEFAULT_BATCH_SIZE: int = 50
 18 | 
 19 | 
 20 | class NVIDIAEmbeddings(BaseModel, Embeddings):
 21 |     """
 22 |     Client to NVIDIA embeddings models.
 23 | 
 24 |     Fields:
 25 |     - model: str, the name of the model to use
 26 |     - truncate: "NONE", "START", "END", truncate input text if it exceeds the model's
 27 |         maximum token length. Default is "NONE", which raises an error if an input is
 28 |         too long.
 29 |     - dimensions: int, the number of dimensions for the embeddings. This parameter is
 30 |                   not supported by all models.
 31 |     """
 32 | 
 33 |     model_config = ConfigDict(
 34 |         validate_assignment=True,
 35 |     )
 36 | 
 37 |     _client: _NVIDIAClient = PrivateAttr()
 38 |     base_url: Optional[str] = Field(
 39 |         default=None,
 40 |         description="Base url for model listing an invocation",
 41 |     )
 42 |     model: Optional[str] = Field(None, description="Name of the model to invoke")
 43 |     truncate: Literal["NONE", "START", "END"] = Field(
 44 |         default="NONE",
 45 |         description=(
 46 |             "Truncate input text if it exceeds the model's maximum token length. "
 47 |             "Default is 'NONE', which raises an error if an input is too long."
 48 |         ),
 49 |     )
 50 |     dimensions: Optional[int] = Field(
 51 |         default=None,
 52 |         description=(
 53 |             "The number of dimensions for the embeddings. This parameter is not "
 54 |             "supported by all models."
 55 |         ),
 56 |     )
 57 |     max_batch_size: int = Field(default=_DEFAULT_BATCH_SIZE)
 58 | 
 59 |     def __init__(self, **kwargs: Any):
 60 |         """
 61 |         Create a new NVIDIAEmbeddings embedder.
 62 | 
 63 |         This class provides access to a NVIDIA NIM for embedding. By default, it
 64 |         connects to a hosted NIM, but can be configured to connect to a local NIM
 65 |         using the `base_url` parameter. An API key is required to connect to the
 66 |         hosted NIM.
 67 | 
 68 |         Args:
 69 |             model (str): The model to use for embedding.
 70 |             nvidia_api_key (str): The API key to use for connecting to the hosted NIM.
 71 |             api_key (str): Alternative to nvidia_api_key.
 72 |             base_url (str): The base URL of the NIM to connect to.
 73 |                             Format for base URL is http://host:port
 74 |             trucate (str): "NONE", "START", "END", truncate input text if it exceeds
 75 |                             the model's context length. Default is "NONE", which raises
 76 |                             an error if an input is too long.
 77 |             dimensions (int): The number of dimensions for the embeddings. This
 78 |                               parameter is not supported by all models.
 79 | 
 80 |         API Key:
 81 |         - The recommended way to provide the API key is through the `NVIDIA_API_KEY`
 82 |             environment variable.
 83 | 
 84 |         Base URL:
 85 |         - Connect to a self-hosted model with NVIDIA NIM using the `base_url` arg to
 86 |             link to the local host at localhost:8000:
 87 |             embedder = NVIDIAEmbeddings(base_url="http://localhost:8080/v1")
 88 |         """
 89 |         super().__init__(**kwargs)
 90 |         # allow nvidia_base_url as an alternative for base_url
 91 |         base_url = kwargs.pop("nvidia_base_url", self.base_url)
 92 |         # allow nvidia_api_key as an alternative for api_key
 93 |         api_key = kwargs.pop("nvidia_api_key", kwargs.pop("api_key", None))
 94 |         self._client = _NVIDIAClient(
 95 |             **({"base_url": base_url} if base_url else {}),  # only pass if set
 96 |             mdl_name=self.model,
 97 |             default_hosted_model_name=_DEFAULT_MODEL_NAME,
 98 |             **({"api_key": api_key} if api_key else {}),  # only pass if set
 99 |             infer_path="{base_url}/embeddings",
100 |             cls=self.__class__.__name__,
101 |         )
102 |         # todo: only store the model in one place
103 |         # the model may be updated to a newer name during initialization
104 |         self.model = self._client.mdl_name
105 |         # same for base_url
106 |         self.base_url = self._client.base_url
107 | 
108 |     @property
109 |     def available_models(self) -> List[Model]:
110 |         """
111 |         Get a list of available models that work with NVIDIAEmbeddings.
112 |         """
113 |         return self._client.get_available_models(self.__class__.__name__)
114 | 
115 |     @classmethod
116 |     def get_available_models(
117 |         cls,
118 |         **kwargs: Any,
119 |     ) -> List[Model]:
120 |         """
121 |         Get a list of available models that work with NVIDIAEmbeddings.
122 |         """
123 |         return cls(**kwargs).available_models
124 | 
125 |     def _embed(
126 |         self, texts: List[str], model_type: Literal["passage", "query"]
127 |     ) -> List[List[float]]:
128 |         """Embed a single text entry to either passage or query type"""
129 |         # API Catalog API -
130 |         #  input: str | list[str]              -- char limit depends on model
131 |         #  model: str                          -- model name, e.g. NV-Embed-QA
132 |         #  encoding_format: "float" | "base64"
133 |         #  input_type: "query" | "passage"
134 |         #  user: str                           -- ignored
135 |         #  truncate: "NONE" | "START" | "END"  -- default "NONE", error raised if
136 |         #                                         an input is too long
137 |         #  dimensions: int                     -- not supported by all models
138 |         payload: Dict[str, Any] = {
139 |             "input": texts,
140 |             "model": self.model,
141 |             "encoding_format": "float",
142 |             "input_type": model_type,
143 |         }
144 |         if self.truncate:
145 |             payload["truncate"] = self.truncate
146 |         if self.dimensions:
147 |             payload["dimensions"] = self.dimensions
148 | 
149 |         response = self._client.get_req(
150 |             payload=payload,
151 |         )
152 |         response.raise_for_status()
153 |         result = response.json()
154 |         data = result.get("data", result)
155 |         if not isinstance(data, list):
156 |             raise ValueError(f"Expected data with a list of embeddings. Got: {data}")
157 |         embedding_list = [(res["embedding"], res["index"]) for res in data]
158 |         self._invoke_callback_vars(result)
159 |         return [x[0] for x in sorted(embedding_list, key=lambda x: x[1])]
160 | 
161 |     def embed_query(self, text: str) -> List[float]:
162 |         """Input pathway for query embeddings."""
163 |         return self._embed([text], model_type="query")[0]
164 | 
165 |     def embed_documents(self, texts: List[str]) -> List[List[float]]:
166 |         """Input pathway for document embeddings."""
167 |         if not isinstance(texts, list) or not all(
168 |             isinstance(text, str) for text in texts
169 |         ):
170 |             raise ValueError(f"`texts` must be a list of strings, given: {repr(texts)}")
171 | 
172 |         all_embeddings = []
173 |         for i in range(0, len(texts), self.max_batch_size):
174 |             batch = texts[i : i + self.max_batch_size]
175 |             all_embeddings.extend(self._embed(batch, model_type="passage"))
176 |         return all_embeddings
177 | 
178 |     def _invoke_callback_vars(self, response: dict) -> None:
179 |         """Invoke the callback context variables if there are any."""
180 |         callback_vars = [
181 |             usage_callback_var.get(),
182 |         ]
183 |         llm_output = {**response, "model_name": self.model}
184 |         result = LLMResult(generations=[[]], llm_output=llm_output)
185 |         for cb_var in callback_vars:
186 |             if cb_var:
187 |                 cb_var.on_llm_end(result)
188 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/langchain_nvidia_ai_endpoints/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/langchain_nvidia_ai_endpoints/py.typed


--------------------------------------------------------------------------------
/libs/ai-endpoints/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [tool.poetry]
  2 | name = "langchain-nvidia-ai-endpoints"
  3 | version = "0.3.10"
  4 | description = "An integration package connecting NVIDIA AI Endpoints and LangChain"
  5 | authors = []
  6 | readme = "README.md"
  7 | repository = "https://github.com/langchain-ai/langchain-nvidia"
  8 | license = "MIT"
  9 | packages = [
 10 |   { include = "langchain_nvidia.py" },
 11 |   { include = "langchain_nvidia_ai_endpoints" },
 12 | ]
 13 | 
 14 | [tool.poetry.urls]
 15 | "Source Code" = "https://github.com/langchain-ai/langchain-nvidia/tree/main/libs/ai-endpoints"
 16 | 
 17 | [tool.poetry.dependencies]
 18 | python = ">=3.9,<4.0"
 19 | langchain-core = ">=0.3.51,<0.4"
 20 | aiohttp = "^3.9.1"
 21 | filetype = "^1.2.0"
 22 | 
 23 | [tool.poetry.group.test]
 24 | optional = true
 25 | 
 26 | [tool.poetry.group.test.dependencies]
 27 | pytest = "^7.3.0"
 28 | freezegun = "^1.2.2"
 29 | pytest-mock = "^3.10.0"
 30 | syrupy = "^4.0.2"
 31 | pytest-watcher = "^0.3.4"
 32 | pytest-asyncio = "^0.21.1"
 33 | requests-mock = "^1.11.0"
 34 | langchain-tests = "^0.3.17"
 35 | faker = "^24.4.0"
 36 | 
 37 | [tool.poetry.group.codespell]
 38 | optional = true
 39 | 
 40 | [tool.poetry.group.codespell.dependencies]
 41 | codespell = "^2.2.0"
 42 | 
 43 | [tool.poetry.group.test_integration]
 44 | optional = true
 45 | 
 46 | [tool.poetry.group.test_integration.dependencies]
 47 | requests-mock = "^1.11.0"
 48 | 
 49 | [tool.poetry.group.lint]
 50 | optional = true
 51 | 
 52 | [tool.poetry.group.lint.dependencies]
 53 | ruff = "^0.1.5"
 54 | 
 55 | [tool.poetry.group.typing.dependencies]
 56 | mypy = "^0.991"
 57 | types-requests = "^2.31.0.10"
 58 | types-pillow = "^10.2.0.20240125"
 59 | 
 60 | [tool.ruff.lint]
 61 | select = [
 62 |   "E",    # pycodestyle
 63 |   "F",    # pyflakes
 64 |   "I",    # isort
 65 |   "T201", # print
 66 | ]
 67 | 
 68 | [tool.mypy]
 69 | disallow_untyped_defs = "True"
 70 | exclude = ["notebooks", "examples", "example_data", "langchain_core/pydantic"]
 71 | 
 72 | [[tool.mypy.overrides]]
 73 | # conditional dependencies introduced by langsmith-sdk
 74 | module = ["numpy", "pytest"]
 75 | ignore_missing_imports = true
 76 | 
 77 | [tool.coverage.run]
 78 | omit = ["tests/*"]
 79 | 
 80 | [build-system]
 81 | requires = ["poetry-core>=1.0.0"]
 82 | build-backend = "poetry.core.masonry.api"
 83 | 
 84 | [tool.pytest.ini_options]
 85 | # --strict-markers will raise errors on unknown marks.
 86 | # https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
 87 | #
 88 | # https://docs.pytest.org/en/7.1.x/reference/reference.html
 89 | # --strict-config       any warnings encountered while parsing the `pytest`
 90 | #                       section of the configuration file raise errors.
 91 | #
 92 | # https://github.com/tophat/syrupy
 93 | # --snapshot-warn-unused    Prints a warning on unused snapshots rather than fail the test suite.
 94 | addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5"
 95 | # Registering custom markers.
 96 | # https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
 97 | markers = [
 98 |   "requires: mark tests as requiring a specific library",
 99 |   "asyncio: mark tests as requiring asyncio",
100 |   "compile: mark placeholder test used to compile integration tests without running them",
101 | ]
102 | asyncio_mode = "auto"
103 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/scripts/check_imports.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import traceback
 3 | from importlib.machinery import SourceFileLoader
 4 | 
 5 | if __name__ == "__main__":
 6 |     files = sys.argv[1:]
 7 |     has_failure = False
 8 |     for file in files:
 9 |         try:
10 |             SourceFileLoader("x", file).load_module()
11 |         except Exception:
12 |             has_faillure = True
13 |             print(file)  # noqa: T201
14 |             traceback.print_exc()
15 |             print()  # noqa: T201
16 | 
17 |     sys.exit(1 if has_failure else 0)
18 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/scripts/lint_imports.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eu
 4 | 
 5 | # Initialize a variable to keep track of errors
 6 | errors=0
 7 | 
 8 | # make sure not importing from langchain or langchain_experimental
 9 | git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
10 | git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
11 | 
12 | # Decide on an exit status based on the errors
13 | if [ "$errors" -gt 0 ]; then
14 |     exit 1
15 | else
16 |     exit 0
17 | fi
18 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/__init__.py


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/data/nvidia-picasso-large.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/data/nvidia-picasso-large.png


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/data/nvidia-picasso.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/data/nvidia-picasso.gif


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/data/nvidia-picasso.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/data/nvidia-picasso.jpg


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/data/nvidia-picasso.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/data/nvidia-picasso.png


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/data/nvidia-picasso.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/data/nvidia-picasso.webp


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/integration_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/integration_tests/__init__.py


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/integration_tests/conftest.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, List
  2 | 
  3 | import pytest
  4 | from langchain_core.documents import Document
  5 | 
  6 | from langchain_nvidia_ai_endpoints import (
  7 |     NVIDIA,
  8 |     ChatNVIDIA,
  9 |     NVIDIAEmbeddings,
 10 |     NVIDIARerank,
 11 | )
 12 | from langchain_nvidia_ai_endpoints._statics import MODEL_TABLE, Model
 13 | from langchain_nvidia_ai_endpoints.chat_models import (
 14 |     _DEFAULT_MODEL_NAME as DEFAULT_CHAT_MODEL,
 15 | )
 16 | from langchain_nvidia_ai_endpoints.embeddings import (
 17 |     _DEFAULT_MODEL_NAME as DEFAULT_EMBEDDINGS_MODEL,
 18 | )
 19 | from langchain_nvidia_ai_endpoints.llm import (
 20 |     _DEFAULT_MODEL_NAME as DEFAULT_COMPLETIONS_MODEL,
 21 | )
 22 | from langchain_nvidia_ai_endpoints.reranking import (
 23 |     _DEFAULT_MODEL_NAME as DEFAULT_RERANKING_MODEL,
 24 | )
 25 | 
 26 | 
 27 | def get_mode(config: pytest.Config) -> dict:
 28 |     nim_endpoint = config.getoption("--nim-endpoint")
 29 |     if nim_endpoint:
 30 |         return dict(base_url=nim_endpoint)
 31 |     return {}
 32 | 
 33 | 
 34 | def pytest_addoption(parser: pytest.Parser) -> None:
 35 |     parser.addoption(
 36 |         "--chat-model-id",
 37 |         action="store",
 38 |         nargs="+",
 39 |         help="Run tests for a specific chat model or list of models",
 40 |     )
 41 |     parser.addoption(
 42 |         "--tool-model-id",
 43 |         action="store",
 44 |         nargs="+",
 45 |         help="Run tests for a specific chat models that support tool calling",
 46 |     )
 47 |     parser.addoption(
 48 |         "--structured-model-id",
 49 |         action="store",
 50 |         nargs="+",
 51 |         help="Run tests for a specific models that support structured output",
 52 |     )
 53 |     parser.addoption(
 54 |         "--qa-model-id",
 55 |         action="store",
 56 |         nargs="+",
 57 |         help="Run tests for a specific qa model or list of models",
 58 |     )
 59 |     parser.addoption(
 60 |         "--completions-model-id",
 61 |         action="store",
 62 |         nargs="+",
 63 |         help="Run tests for a specific completions model or list of models",
 64 |     )
 65 |     parser.addoption(
 66 |         "--embedding-model-id",
 67 |         action="store",
 68 |         nargs="+",
 69 |         help="Run tests for a specific embedding model or list of models",
 70 |     )
 71 |     parser.addoption(
 72 |         "--rerank-model-id",
 73 |         action="store",
 74 |         nargs="+",
 75 |         help="Run tests for a specific rerank model or list of models",
 76 |     )
 77 |     parser.addoption(
 78 |         "--vlm-model-id",
 79 |         action="store",
 80 |         nargs="+",
 81 |         help="Run tests for a specific vlm model or list of models",
 82 |     )
 83 |     parser.addoption(
 84 |         "--all-models",
 85 |         action="store_true",
 86 |         help="Run tests across all models",
 87 |     )
 88 |     parser.addoption(
 89 |         "--nim-endpoint",
 90 |         type=str,
 91 |         help="Run tests using NIM mode",
 92 |     )
 93 | 
 94 | 
 95 | def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
 96 |     mode = get_mode(metafunc.config)
 97 | 
 98 |     def get_all_known_models() -> List[Model]:
 99 |         return list(MODEL_TABLE.values())
100 | 
101 |     if "chat_model" in metafunc.fixturenames:
102 |         models = [DEFAULT_CHAT_MODEL]
103 |         if model_list := metafunc.config.getoption("chat_model_id"):
104 |             models = model_list
105 |         if metafunc.config.getoption("all_models"):
106 |             models = [
107 |                 model.id
108 |                 for model in ChatNVIDIA(**mode).available_models
109 |                 if model.model_type == "chat"
110 |             ]
111 |         metafunc.parametrize("chat_model", models, ids=models)
112 | 
113 |     if "tool_model" in metafunc.fixturenames:
114 |         models = ["meta/llama-3.3-70b-instruct"]
115 |         if model_list := metafunc.config.getoption("tool_model_id"):
116 |             models = model_list
117 |         if metafunc.config.getoption("all_models"):
118 |             models = [
119 |                 model.id
120 |                 for model in ChatNVIDIA(**mode).available_models
121 |                 if model.model_type == "chat" and model.supports_tools
122 |             ]
123 |         metafunc.parametrize("tool_model", models, ids=models)
124 | 
125 |     if "completions_model" in metafunc.fixturenames:
126 |         models = [DEFAULT_COMPLETIONS_MODEL]
127 |         if model_list := metafunc.config.getoption("completions_model_id"):
128 |             models = model_list
129 |         if metafunc.config.getoption("all_models"):
130 |             models = [
131 |                 model.id
132 |                 for model in NVIDIA(**mode).available_models
133 |                 if model.model_type == "completions"
134 |             ]
135 |         metafunc.parametrize("completions_model", models, ids=models)
136 | 
137 |     if "structured_model" in metafunc.fixturenames:
138 |         models = ["meta/llama-3.3-70b-instruct"]
139 |         if model_list := metafunc.config.getoption("structured_model_id"):
140 |             models = model_list
141 |         if metafunc.config.getoption("all_models"):
142 |             models = [
143 |                 model.id
144 |                 for model in ChatNVIDIA(**mode).available_models
145 |                 if model.supports_structured_output
146 |             ]
147 |         metafunc.parametrize("structured_model", models, ids=models)
148 | 
149 |     if "rerank_model" in metafunc.fixturenames:
150 |         models = [DEFAULT_RERANKING_MODEL]
151 |         if model_list := metafunc.config.getoption("rerank_model_id"):
152 |             models = model_list
153 |         if metafunc.config.getoption("all_models"):
154 |             models = [model.id for model in NVIDIARerank(**mode).available_models]
155 |         metafunc.parametrize("rerank_model", models, ids=models)
156 | 
157 |     if "vlm_model" in metafunc.fixturenames:
158 |         models = ["meta/llama-3.2-11b-vision-instruct"]
159 |         if model_list := metafunc.config.getoption("vlm_model_id"):
160 |             models = model_list
161 |         if metafunc.config.getoption("all_models"):
162 |             models = [
163 |                 model.id
164 |                 for model in get_all_known_models()
165 |                 if model.model_type in {"vlm", "nv-vlm"}
166 |             ]
167 |         metafunc.parametrize("vlm_model", models, ids=models)
168 | 
169 |     if "qa_model" in metafunc.fixturenames:
170 |         models = []
171 |         if model_list := metafunc.config.getoption("qa_model_id"):
172 |             models = model_list
173 |         if metafunc.config.getoption("all_models"):
174 |             models = [
175 |                 model.id
176 |                 for model in ChatNVIDIA(**mode).available_models
177 |                 if model.model_type == "qa"
178 |             ]
179 |         metafunc.parametrize("qa_model", models, ids=models)
180 | 
181 |     if "embedding_model" in metafunc.fixturenames:
182 |         models = [DEFAULT_EMBEDDINGS_MODEL]
183 |         if metafunc.config.getoption("all_models"):
184 |             models = [model.id for model in NVIDIAEmbeddings(**mode).available_models]
185 |         if model_list := metafunc.config.getoption("embedding_model_id"):
186 |             models = model_list
187 |         if metafunc.config.getoption("all_models"):
188 |             models = [model.id for model in NVIDIAEmbeddings(**mode).available_models]
189 |         metafunc.parametrize("embedding_model", models, ids=models)
190 | 
191 | 
192 | @pytest.fixture
193 | def mode(request: pytest.FixtureRequest) -> dict:
194 |     return get_mode(request.config)
195 | 
196 | 
197 | @pytest.fixture(
198 |     params=[
199 |         ChatNVIDIA,
200 |         NVIDIAEmbeddings,
201 |         NVIDIARerank,
202 |         NVIDIA,
203 |     ]
204 | )
205 | def public_class(request: pytest.FixtureRequest) -> type:
206 |     return request.param
207 | 
208 | 
209 | @pytest.fixture
210 | def contact_service() -> Any:
211 |     def _contact_service(instance: Any) -> None:
212 |         if isinstance(instance, ChatNVIDIA):
213 |             instance.invoke("Hello")
214 |         elif isinstance(instance, NVIDIAEmbeddings):
215 |             instance.embed_documents(["Hello"])
216 |         elif isinstance(instance, NVIDIARerank):
217 |             instance.compress_documents(
218 |                 documents=[Document(page_content="World")], query="Hello"
219 |             )
220 |         elif isinstance(instance, NVIDIA):
221 |             instance.invoke("Hello")
222 | 
223 |     return _contact_service
224 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/integration_tests/test_api_key.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Any
 3 | 
 4 | import pytest
 5 | from langchain_core.messages import HumanMessage
 6 | 
 7 | from langchain_nvidia_ai_endpoints import ChatNVIDIA
 8 | 
 9 | from ..unit_tests.test_api_key import no_env_var
10 | 
11 | 
12 | def test_missing_api_key_error(public_class: type, contact_service: Any) -> None:
13 |     with no_env_var("NVIDIA_API_KEY"):
14 |         with pytest.warns(UserWarning) as record:
15 |             client = public_class()
16 |         assert len(record) == 1
17 |         assert "API key is required for the hosted" in str(record[0].message)
18 |         with pytest.raises(Exception) as exc_info:
19 |             contact_service(client)
20 |         message = str(exc_info.value)
21 |         assert "401" in message
22 |         assert "Unauthorized" in message
23 |         assert "API key" in message
24 | 
25 | 
26 | def test_bogus_api_key_error(public_class: type, contact_service: Any) -> None:
27 |     with no_env_var("NVIDIA_API_KEY"):
28 |         client = public_class(nvidia_api_key="BOGUS")
29 |         with pytest.raises(Exception) as exc_info:
30 |             contact_service(client)
31 |         message = str(exc_info.value)
32 |         assert "401" in message
33 |         assert "Unauthorized" in message
34 |         assert "API key" in message
35 | 
36 | 
37 | @pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"])
38 | def test_api_key(public_class: type, param: str, contact_service: Any) -> None:
39 |     api_key = os.environ.get("NVIDIA_API_KEY")
40 |     with no_env_var("NVIDIA_API_KEY"):
41 |         client = public_class(**{param: api_key})
42 |         contact_service(client)
43 | 
44 | 
45 | def test_api_key_leakage(chat_model: str, mode: dict) -> None:
46 |     """Test ChatNVIDIA wrapper."""
47 |     chat = ChatNVIDIA(model=chat_model, temperature=0.7, **mode)
48 |     message = HumanMessage(content="Hello")
49 |     chat.invoke([message])
50 | 
51 |     # check last_input post request
52 |     last_inputs = chat._client.last_inputs
53 |     assert last_inputs
54 | 
55 |     authorization_header = last_inputs.get("headers", {}).get("Authorization")
56 | 
57 |     if authorization_header:
58 |         key = authorization_header.split("Bearer ")[1]
59 | 
60 |         assert not key.startswith("nvapi-")
61 |         assert key == "**********"
62 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/integration_tests/test_available_models.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import pytest
 4 | import requests_mock
 5 | 
 6 | from langchain_nvidia_ai_endpoints._statics import MODEL_TABLE
 7 | 
 8 | 
 9 | def test_available_models(public_class: type, mode: dict) -> None:
10 |     models = public_class(**mode).available_models
11 |     assert models
12 |     assert isinstance(models, list)
13 |     assert len(models) >= 1
14 |     assert all(isinstance(model.id, str) for model in models)
15 |     assert all(model.model_type is not None for model in models)
16 |     assert all(model.client == public_class.__name__ for model in models)
17 | 
18 | 
19 | def test_get_available_models(public_class: Any, mode: dict) -> None:
20 |     models = public_class.get_available_models(**mode)
21 |     assert isinstance(models, list)
22 |     assert len(models) >= 1
23 |     assert all(isinstance(model.id, str) for model in models)
24 |     assert all(model.model_type is not None for model in models)
25 |     assert all(model.client == public_class.__name__ for model in models)
26 | 
27 | 
28 | # todo: turn this into a unit test
29 | def test_available_models_cached(public_class: type, mode: dict) -> None:
30 |     if public_class.__name__ == "NVIDIARerank" and "base_url" not in mode:
31 |         pytest.skip("There is no listing service for hosted ranking NIMs")
32 |     with requests_mock.Mocker(real_http=True) as mock:
33 |         client = public_class()
34 |         assert not mock.called
35 |         client.available_models
36 |         assert mock.called
37 |         client.available_models
38 |         assert mock.call_count == 1
39 | 
40 | 
41 | def test_known_models_are_available(public_class: type, mode: dict) -> None:
42 |     known_models = set(
43 |         model.id
44 |         for model in MODEL_TABLE.values()
45 |         if model.client == public_class.__name__
46 |     )
47 |     available_models = set(
48 |         model.id
49 |         for model in public_class.get_available_models(**mode)  # type: ignore
50 |     )
51 | 
52 |     assert known_models - available_models == set()
53 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/integration_tests/test_base_url.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import Any
 3 | 
 4 | import pytest
 5 | from requests.exceptions import ConnectionError
 6 | from requests_mock import Mocker
 7 | 
 8 | 
 9 | # Fixture setup /v1/chat/completions endpoints
10 | @pytest.fixture()
11 | def mock_endpoints(requests_mock: Mocker) -> None:
12 |     for endpoint in [
13 |         "/v1/embeddings",
14 |         "/v1/chat/completions",
15 |         "/v1/ranking",
16 |         "/v1/completions",
17 |     ]:
18 |         requests_mock.post(
19 |             re.compile(f".*{endpoint}"),
20 |             exc=ConnectionError(f"Mocked ConnectionError for {endpoint}"),
21 |         )
22 |     requests_mock.get(
23 |         re.compile(".*/v1/models"),
24 |         json={
25 |             "data": [
26 |                 {
27 |                     "id": "not-a-model",
28 |                     "object": "model",
29 |                     "created": 1234567890,
30 |                     "owned_by": "OWNER",
31 |                 },
32 |             ]
33 |         },
34 |     )
35 | 
36 | 
37 | # Test function using the mock_endpoints fixture
38 | @pytest.mark.parametrize(
39 |     "base_url",
40 |     [
41 |         "http://localhost:12321",
42 |         "http://localhost:12321/v1",
43 |     ],
44 | )
45 | def test_endpoint_unavailable(
46 |     public_class: type,
47 |     base_url: str,
48 |     contact_service: Any,
49 |     mock_endpoints: None,  # Inject the mock_endpoints fixture
50 | ) -> None:
51 |     # we test this with a bogus model because users should supply
52 |     # a model when using their own base_url
53 |     client = public_class(model="not-a-model", base_url=base_url)
54 |     with pytest.raises(ConnectionError) as e:
55 |         contact_service(client)
56 |     assert "Mocked ConnectionError for" in str(e.value)
57 | 
58 | 
59 | # todo: move this to be a unit test
60 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/integration_tests/test_compile.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | 
3 | 
4 | @pytest.mark.compile
5 | def test_placeholder() -> None:
6 |     """Used for compiling integration tests without running any real tests."""
7 |     pass
8 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/integration_tests/test_completions_models.py:
--------------------------------------------------------------------------------
  1 | # https://platform.openai.com/docs/api-reference/completions/create
  2 | # POST https://.../v1/completions
  3 | #  model: str -- The ID of the model to use for completion.
  4 | #  prompt: str | Array[str] -- The prompt(s) to generate completions for.
  5 | #  best_of: Optional[int] (default: 1) -- An integer representing the number
  6 | #                                         of completions to generate and score.
  7 | #                                         The API will return the best completion
  8 | #                                         of the group.
  9 | #  echo: Optional[bool] (default: False) -- Whether to echo the prompt in addition
 10 | #                                           to the completion.
 11 | #  frequency_penalty: Optional[float] (default: 0.0) -- Float that penalizes new
 12 | #                                                       tokens. Range -2.0 to 2.0.
 13 | #  logit_bias: Optional[Dict[str, float]] -- Dict containing token to logit bias.
 14 | #  logprobs: Optional[int] (default: None) -- Integer representing the number of
 15 | #                                             logprobs to return. 0 means no logprobs.
 16 | #                                             Max value is 5.
 17 | #  max_tokens: Optional[int] (default: 16) -- Integer representing the maximum number
 18 | #                                             of tokens to generate.
 19 | #  n: Optional[int] (default: 1) -- Integer representing the number of completions
 20 | #                                   to generate.
 21 | #  presence_penalty: Optional[float] (default: 0.0) -- Float that penalizes new tokens
 22 | #                                                      based on whether they appear in
 23 | #                                                      the text so far. Range -2.0 to
 24 | #                                                      2.0.
 25 | #  seed: Optional[int] (default: None) -- Integer seed that attempts to make the
 26 | #                                         completions deterministic.
 27 | #  stop: Optional[str|Array[str]] -- Token at which to stop generating completions.
 28 | #                                    Up to 4 sequences.
 29 | #  stream: Optional[bool] (default: False) -- Whether to stream back partial progress.
 30 | #  stream_options: Optional[Dict["include_usage": bool]] -- Dict containing stream
 31 | #                                                           options.
 32 | #  suffix: Optional[str] -- Suffix to add to the completion.
 33 | #  temperature: Optional[float] (default: 1.0) -- Sampling temperature, between 0 and 2.
 34 | #  top_p: Optional[float] (default: 1.0) -- Alternative to temperature sampling.
 35 | #  user: Optional[str] -- User ID to associate with the request.
 36 | #
 37 | # Returns:
 38 | #  id: str -- The ID of the completion.
 39 | #  object: str -- Always "text_completion".
 40 | #  created: int -- Unix timestamp of when the completion was created.
 41 | #  model: str -- The ID of the model used to generate the completion.
 42 | #  choices: List[{"finish_reason": "stop"|"length"|"content_filter",
 43 | #                 "index": int,
 44 | #                 "text": str,
 45 | #                 "logprobs": Optional[{"text_offset": array,
 46 | #                                       "token_logprobs": array,
 47 | #                                       "tokens": array,
 48 | #                                       "top_logprobs": array}]}] --
 49 | #    List of completions generated by the model.
 50 | #  usage: {"completion_tokens": int,
 51 | #          "prompt_tokens": int,
 52 | #          "total_tokens": int} -- Usage statistics for the model.
 53 | #  system_fingerprint: str -- System fingerprint of the model used to generate
 54 | #                             the completion.
 55 | 
 56 | 
 57 | from typing import Any, Callable, Tuple
 58 | 
 59 | import pytest
 60 | 
 61 | from langchain_nvidia_ai_endpoints import NVIDIA
 62 | 
 63 | 
 64 | def invoke(llm: NVIDIA, prompt: str, **kwargs: Any) -> Tuple[str, int]:
 65 |     return llm.invoke(prompt, **kwargs), 1
 66 | 
 67 | 
 68 | def stream(llm: NVIDIA, prompt: str, **kwargs: Any) -> Tuple[str, int]:
 69 |     response = ""
 70 |     count = 0
 71 |     for chunk in llm.stream(prompt, **kwargs):
 72 |         response += chunk
 73 |         count += 1
 74 |     return response, count
 75 | 
 76 | 
 77 | async def ainvoke(llm: NVIDIA, prompt: str, **kwargs: Any) -> Tuple[str, int]:
 78 |     return await llm.ainvoke(prompt, **kwargs), 1
 79 | 
 80 | 
 81 | async def astream(llm: NVIDIA, prompt: str, **kwargs: Any) -> Tuple[str, int]:
 82 |     response = ""
 83 |     count = 0
 84 |     async for chunk in llm.astream(prompt, **kwargs):
 85 |         response += chunk
 86 |         count += 1
 87 |     return response, count
 88 | 
 89 | 
 90 | @pytest.mark.parametrize(
 91 |     "func, count", [(invoke, 0), (stream, 1)], ids=["invoke", "stream"]
 92 | )
 93 | def test_basic(completions_model: str, mode: dict, func: Callable, count: int) -> None:
 94 |     llm = NVIDIA(model=completions_model, **mode)
 95 |     response, cnt = func(llm, "Hello, my name is")
 96 |     assert isinstance(response, str)
 97 |     assert cnt > count, "Should have received more chunks"
 98 | 
 99 | 
100 | @pytest.mark.parametrize(
101 |     "func, count", [(ainvoke, 0), (astream, 1)], ids=["ainvoke", "astream"]
102 | )
103 | async def test_abasic(
104 |     completions_model: str, mode: dict, func: Callable, count: int
105 | ) -> None:
106 |     llm = NVIDIA(model=completions_model, **mode)
107 |     response, cnt = await func(llm, "Hello, my name is")
108 |     assert isinstance(response, str)
109 |     assert cnt > count, "Should have received more chunks"
110 | 
111 | 
112 | @pytest.mark.parametrize(
113 |     "param, value",
114 |     [
115 |         ("frequency_penalty", 0.5),
116 |         ("max_tokens", 32),
117 |         ("presence_penalty", 0.5),
118 |         ("seed", 1234),
119 |         ("stop", "Hello"),
120 |         ("temperature", 0.5),
121 |         ("top_p", 0.5),
122 |     ],
123 | )
124 | @pytest.mark.parametrize("func", [invoke, stream], ids=["invoke", "stream"])
125 | def test_params(
126 |     completions_model: str, mode: dict, param: str, value: Any, func: Callable
127 | ) -> None:
128 |     llm = NVIDIA(model=completions_model, **mode)
129 |     response, _ = func(llm, "Hello, my name is", **{param: value})
130 |     assert isinstance(response, str)
131 | 
132 | 
133 | @pytest.mark.parametrize(
134 |     "param, value",
135 |     [
136 |         ("best_of", 5),
137 |         ("echo", True),
138 |         ("logit_bias", {"hello": 1.0}),
139 |         ("logprobs", 2),
140 |         ("n", 2),
141 |         ("suffix", "Hello"),
142 |         ("user", "1234"),
143 |     ],
144 | )
145 | @pytest.mark.parametrize("func", [invoke, stream], ids=["invoke", "stream"])
146 | @pytest.mark.xfail(reason="Not consistently implemented")
147 | def test_params_incomplete(
148 |     completions_model: str, mode: dict, param: str, value: Any, func: Callable
149 | ) -> None:
150 |     llm = NVIDIA(model=completions_model, **mode)
151 |     response, _ = func(llm, "Hello, my name is", **{param: value})
152 |     assert isinstance(response, str)
153 | 
154 | 
155 | def test_invoke_with_stream_true(completions_model: str, mode: dict) -> None:
156 |     llm = NVIDIA(model=completions_model, **mode)
157 |     with pytest.warns(UserWarning) as record:
158 |         response = llm.invoke("Hello, my name is", stream=True)
159 |     assert isinstance(response, str)
160 |     assert len(record) == 1
161 |     assert "stream set to true" in str(record[0].message)
162 |     assert "ignoring" in str(record[0].message)
163 | 
164 | 
165 | def test_stream_with_stream_false(completions_model: str, mode: dict) -> None:
166 |     llm = NVIDIA(model=completions_model, **mode)
167 |     with pytest.warns(UserWarning) as record:
168 |         response = next(llm.stream("Hello, my name is", stream=False))
169 |     assert isinstance(response, str)
170 |     assert len(record) == 1
171 |     assert "stream set to false" in str(record[0].message)
172 |     assert "ignoring" in str(record[0].message)
173 | 
174 | 
175 | # todo: check stream_options
176 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/integration_tests/test_embeddings.py:
--------------------------------------------------------------------------------
  1 | """Test NVIDIA AI Foundation Model Embeddings.
  2 | 
  3 | Note: These tests are designed to validate the functionality of NVIDIAEmbeddings.
  4 | """
  5 | 
  6 | import pytest
  7 | 
  8 | from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
  9 | from langchain_nvidia_ai_endpoints.embeddings import _DEFAULT_BATCH_SIZE
 10 | 
 11 | 
 12 | def test_embed_query(embedding_model: str, mode: dict) -> None:
 13 |     """Test NVIDIA embeddings for a single query."""
 14 |     query = "foo bar"
 15 |     embedding = NVIDIAEmbeddings(model=embedding_model, **mode)
 16 |     output = embedding.embed_query(query)
 17 |     assert len(output) > 3
 18 | 
 19 | 
 20 | async def test_embed_query_async(embedding_model: str, mode: dict) -> None:
 21 |     """Test NVIDIA async embeddings for a single query."""
 22 |     query = "foo bar"
 23 |     embedding = NVIDIAEmbeddings(model=embedding_model, **mode)
 24 |     output = await embedding.aembed_query(query)
 25 |     assert len(output) > 3
 26 | 
 27 | 
 28 | def test_embed_documents_single(embedding_model: str, mode: dict) -> None:
 29 |     """Test NVIDIA embeddings for documents."""
 30 |     documents = ["foo bar"]
 31 |     embedding = NVIDIAEmbeddings(model=embedding_model, **mode)
 32 |     output = embedding.embed_documents(documents)
 33 |     assert len(output) == 1
 34 |     assert len(output[0]) > 3
 35 | 
 36 | 
 37 | def test_embed_documents_multiple(embedding_model: str, mode: dict) -> None:
 38 |     """Test NVIDIA embeddings for multiple documents."""
 39 |     documents = ["foo bar", "bar foo", "foo"]
 40 |     embedding = NVIDIAEmbeddings(model=embedding_model, **mode)
 41 |     output = embedding.embed_documents(documents)
 42 |     assert len(output) == 3
 43 |     assert all(len(doc) > 4 for doc in output)
 44 | 
 45 | 
 46 | async def test_embed_documents_multiple_async(embedding_model: str, mode: dict) -> None:
 47 |     """Test NVIDIA async embeddings for multiple documents."""
 48 |     documents = ["foo bar", "bar foo", "foo"]
 49 |     embedding = NVIDIAEmbeddings(model=embedding_model, **mode)
 50 |     output = await embedding.aembed_documents(documents)
 51 |     assert len(output) == 3
 52 |     assert all(len(doc) > 4 for doc in output)
 53 | 
 54 | 
 55 | def test_embed_query_long_text(embedding_model: str, mode: dict) -> None:
 56 |     embedding = NVIDIAEmbeddings(model=embedding_model, **mode)
 57 |     text = "nvidia " * 10240
 58 |     with pytest.raises(Exception):
 59 |         embedding.embed_query(text)
 60 | 
 61 | 
 62 | def test_embed_documents_batched_texts(embedding_model: str, mode: dict) -> None:
 63 |     embedding = NVIDIAEmbeddings(model=embedding_model, **mode)
 64 |     count = _DEFAULT_BATCH_SIZE * 2 + 1
 65 |     texts = ["nvidia " * 32] * count
 66 |     output = embedding.embed_documents(texts)
 67 |     assert len(output) == count
 68 |     assert all(len(embedding) > 3 for embedding in output)
 69 | 
 70 | 
 71 | def test_embed_documents_mixed_long_texts(embedding_model: str, mode: dict) -> None:
 72 |     embedding = NVIDIAEmbeddings(model=embedding_model, **mode)
 73 |     count = _DEFAULT_BATCH_SIZE * 2 - 1
 74 |     texts = ["nvidia " * 32] * count
 75 |     texts[len(texts) // 2] = "nvidia " * 10240
 76 |     with pytest.raises(Exception):
 77 |         embedding.embed_documents(texts)
 78 | 
 79 | 
 80 | @pytest.mark.parametrize("truncate", ["START", "END"])
 81 | def test_embed_query_truncate(embedding_model: str, mode: dict, truncate: str) -> None:
 82 |     embedding = NVIDIAEmbeddings(model=embedding_model, truncate=truncate, **mode)
 83 |     text = "nvidia " * 2048
 84 |     output = embedding.embed_query(text)
 85 |     assert len(output) > 3
 86 | 
 87 | 
 88 | @pytest.mark.parametrize("truncate", ["START", "END"])
 89 | def test_embed_documents_truncate(
 90 |     embedding_model: str, mode: dict, truncate: str
 91 | ) -> None:
 92 |     embedding = NVIDIAEmbeddings(model=embedding_model, truncate=truncate, **mode)
 93 |     count = 10
 94 |     texts = ["nvidia " * 32] * count
 95 |     texts[len(texts) // 2] = "nvidia " * 10240
 96 |     output = embedding.embed_documents(texts)
 97 |     assert len(output) == count
 98 | 
 99 | 
100 | @pytest.mark.parametrize("dimensions", [32, 64, 128, 2048])
101 | def test_embed_query_with_dimensions(
102 |     embedding_model: str, mode: dict, dimensions: int
103 | ) -> None:
104 |     if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2":
105 |         pytest.skip("Model does not support custom dimensions.")
106 |     query = "foo bar"
107 |     embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode)
108 |     assert len(embedding.embed_query(query)) == dimensions
109 | 
110 | 
111 | @pytest.mark.parametrize("dimensions", [32, 64, 128, 2048])
112 | def test_embed_documents_with_dimensions(
113 |     embedding_model: str, mode: dict, dimensions: int
114 | ) -> None:
115 |     if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2":
116 |         pytest.skip("Model does not support custom dimensions.")
117 |     documents = ["foo bar", "bar foo"]
118 |     embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode)
119 |     output = embedding.embed_documents(documents)
120 |     assert len(output) == len(documents)
121 |     assert all(len(doc) == dimensions for doc in output)
122 | 
123 | 
124 | @pytest.mark.parametrize("dimensions", [102400])
125 | def test_embed_query_with_large_dimensions(
126 |     embedding_model: str, mode: dict, dimensions: int
127 | ) -> None:
128 |     if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2":
129 |         pytest.skip("Model does not support custom dimensions.")
130 |     query = "foo bar"
131 |     embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode)
132 |     assert 2048 <= len(embedding.embed_query(query)) < dimensions
133 | 
134 | 
135 | @pytest.mark.parametrize("dimensions", [102400])
136 | def test_embed_documents_with_large_dimensions(
137 |     embedding_model: str, mode: dict, dimensions: int
138 | ) -> None:
139 |     if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2":
140 |         pytest.skip("Model does not support custom dimensions.")
141 |     documents = ["foo bar", "bar foo"]
142 |     embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode)
143 |     output = embedding.embed_documents(documents)
144 |     assert len(output) == len(documents)
145 |     assert all(2048 <= len(doc) < dimensions for doc in output)
146 | 
147 | 
148 | @pytest.mark.parametrize("dimensions", [-1])
149 | def test_embed_query_invalid_dimensions(
150 |     embedding_model: str, mode: dict, dimensions: int
151 | ) -> None:
152 |     if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2":
153 |         pytest.skip("Model does not support custom dimensions.")
154 |     query = "foo bar"
155 |     with pytest.raises(Exception) as exc:
156 |         NVIDIAEmbeddings(
157 |             model=embedding_model, dimensions=dimensions, **mode
158 |         ).embed_query(query)
159 |     assert "400" in str(exc.value)
160 | 
161 | 
162 | @pytest.mark.parametrize("dimensions", [-1])
163 | def test_embed_documents_invalid_dimensions(
164 |     embedding_model: str, mode: dict, dimensions: int
165 | ) -> None:
166 |     if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2":
167 |         pytest.skip("Model does not support custom dimensions.")
168 |     documents = ["foo bar", "bar foo"]
169 |     with pytest.raises(Exception) as exc:
170 |         NVIDIAEmbeddings(
171 |             model=embedding_model, dimensions=dimensions, **mode
172 |         ).embed_documents(documents)
173 |     assert "400" in str(exc.value)
174 | 
175 | 
176 | # todo: test max_length > max length accepted by the model
177 | # todo: test max_batch_size > max batch size accepted by the model
178 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/integration_tests/test_other_models.py:
--------------------------------------------------------------------------------
 1 | """Test ChatNVIDIA chat model."""
 2 | 
 3 | from langchain_core.messages import BaseMessage, HumanMessage
 4 | 
 5 | from langchain_nvidia_ai_endpoints.chat_models import ChatNVIDIA
 6 | 
 7 | 
 8 | def test_chat_ai_endpoints_context_message(qa_model: str, mode: dict) -> None:
 9 |     """Test wrapper with context message."""
10 |     chat = ChatNVIDIA(model=qa_model, max_tokens=36, **mode)
11 |     context_message = BaseMessage(
12 |         content="Once upon a time there was a little langchainer", type="context"
13 |     )
14 |     human_message = HumanMessage(content="What was there once upon a time?")
15 |     response = chat.invoke([context_message, human_message])
16 |     assert isinstance(response, BaseMessage)
17 |     assert isinstance(response.content, str)
18 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/integration_tests/test_ranking.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import List
  3 | 
  4 | import faker
  5 | import pytest
  6 | from langchain_core.documents import Document
  7 | 
  8 | from langchain_nvidia_ai_endpoints import NVIDIARerank  # type: ignore
  9 | 
 10 | 
 11 | class CharacterTextSplitter:
 12 |     def __init__(self, chunk_size: int):
 13 |         self.chunk_size = chunk_size
 14 | 
 15 |     def create_documents(self, text: str) -> List[Document]:
 16 |         words = text.split()
 17 |         chunks = []
 18 |         for i in range(0, len(words), self.chunk_size):
 19 |             chunk = " ".join(words[i : i + self.chunk_size])
 20 |             chunks.append(Document(page_content=chunk))
 21 |         return chunks
 22 | 
 23 | 
 24 | @pytest.fixture
 25 | def text() -> str:
 26 |     fake = faker.Faker()
 27 |     fake.seed_instance(os.environ.get("FAKER_SEED", 13131))
 28 |     return fake.paragraph(2016)
 29 | 
 30 | 
 31 | @pytest.fixture
 32 | def query() -> str:
 33 |     return "what are human rights?"
 34 | 
 35 | 
 36 | @pytest.fixture
 37 | def splitter() -> CharacterTextSplitter:
 38 |     return CharacterTextSplitter(chunk_size=300)
 39 | 
 40 | 
 41 | @pytest.fixture
 42 | def documents(text: str, splitter: CharacterTextSplitter) -> List[Document]:
 43 |     return splitter.create_documents(text)
 44 | 
 45 | 
 46 | def test_langchain_reranker_direct(
 47 |     query: str, documents: List[Document], rerank_model: str, mode: dict
 48 | ) -> None:
 49 |     ranker = NVIDIARerank(model=rerank_model, **mode)
 50 |     result_docs = ranker.compress_documents(documents=documents, query=query)
 51 |     assert len(result_docs) > 0
 52 |     for doc in result_docs:
 53 |         assert "relevance_score" in doc.metadata
 54 |         assert doc.metadata["relevance_score"] is not None
 55 |         assert isinstance(doc.metadata["relevance_score"], float)
 56 | 
 57 | 
 58 | def test_langchain_reranker_direct_empty_docs(
 59 |     query: str, rerank_model: str, mode: dict
 60 | ) -> None:
 61 |     ranker = NVIDIARerank(model=rerank_model, **mode)
 62 |     result_docs = ranker.compress_documents(documents=[], query=query)
 63 |     assert len(result_docs) == 0
 64 | 
 65 | 
 66 | def test_langchain_reranker_direct_top_n_negative(
 67 |     query: str, documents: List[Document], rerank_model: str, mode: dict
 68 | ) -> None:
 69 |     orig = NVIDIARerank.model_config["validate_assignment"]
 70 |     NVIDIARerank.model_config["validate_assignment"] = False
 71 |     ranker = NVIDIARerank(model=rerank_model, **mode)
 72 |     ranker.top_n = -100
 73 |     NVIDIARerank.model_config["validate_assignment"] = orig
 74 |     result_docs = ranker.compress_documents(documents=documents, query=query)
 75 |     assert len(result_docs) == 0
 76 | 
 77 | 
 78 | def test_langchain_reranker_direct_top_n_zero(
 79 |     query: str, documents: List[Document], rerank_model: str, mode: dict
 80 | ) -> None:
 81 |     ranker = NVIDIARerank(model=rerank_model, **mode)
 82 |     ranker.top_n = 0
 83 |     result_docs = ranker.compress_documents(documents=documents, query=query)
 84 |     assert len(result_docs) == 0
 85 | 
 86 | 
 87 | def test_langchain_reranker_direct_top_n_one(
 88 |     query: str, documents: List[Document], rerank_model: str, mode: dict
 89 | ) -> None:
 90 |     ranker = NVIDIARerank(model=rerank_model, **mode)
 91 |     ranker.top_n = 1
 92 |     result_docs = ranker.compress_documents(documents=documents, query=query)
 93 |     assert len(result_docs) == 1
 94 | 
 95 | 
 96 | def test_langchain_reranker_direct_top_n_equal_len_docs(
 97 |     query: str, documents: List[Document], rerank_model: str, mode: dict
 98 | ) -> None:
 99 |     ranker = NVIDIARerank(model=rerank_model, **mode)
100 |     ranker.top_n = len(documents)
101 |     result_docs = ranker.compress_documents(documents=documents, query=query)
102 |     assert len(result_docs) == len(documents)
103 | 
104 | 
105 | def test_langchain_reranker_direct_top_n_greater_len_docs(
106 |     query: str, documents: List[Document], rerank_model: str, mode: dict
107 | ) -> None:
108 |     ranker = NVIDIARerank(model=rerank_model, **mode)
109 |     ranker.top_n = len(documents) * 2
110 |     result_docs = ranker.compress_documents(documents=documents, query=query)
111 |     assert len(result_docs) == len(documents)
112 | 
113 | 
114 | @pytest.mark.parametrize("batch_size", [-10, 0])
115 | def test_rerank_invalid_max_batch_size(
116 |     rerank_model: str, mode: dict, batch_size: int
117 | ) -> None:
118 |     ranker = NVIDIARerank(model=rerank_model, **mode)
119 |     with pytest.raises(ValueError):
120 |         ranker.max_batch_size = batch_size
121 | 
122 | 
123 | def test_rerank_invalid_top_n(rerank_model: str, mode: dict) -> None:
124 |     ranker = NVIDIARerank(model=rerank_model, **mode)
125 |     with pytest.raises(ValueError):
126 |         ranker.top_n = -10
127 | 
128 | 
129 | @pytest.mark.parametrize(
130 |     "batch_size, top_n",
131 |     [
132 |         (7, 7),  # batch_size == top_n
133 |         (17, 7),  # batch_size > top_n
134 |         (3, 13),  # batch_size < top_n
135 |         (1, 1),  # batch_size == top_n, corner case 1
136 |         (1, 10),  # batch_size < top_n, corner case 1
137 |         (10, 1),  # batch_size > top_n, corner case 1
138 |     ],
139 | )
140 | def test_rerank_batching(
141 |     query: str,
142 |     documents: List[Document],
143 |     rerank_model: str,
144 |     mode: dict,
145 |     batch_size: int,
146 |     top_n: int,
147 | ) -> None:
148 |     assert len(documents) > batch_size, "test requires more documents"
149 | 
150 |     ranker = NVIDIARerank(model=rerank_model, **mode)
151 |     ranker.top_n = top_n
152 |     ranker.max_batch_size = batch_size
153 |     result_docs = ranker.compress_documents(documents=documents, query=query)
154 |     assert len(result_docs) == min(len(documents), top_n)
155 |     for doc in result_docs:
156 |         assert "relevance_score" in doc.metadata
157 |         assert doc.metadata["relevance_score"] is not None
158 |         assert isinstance(doc.metadata["relevance_score"], float)
159 |     assert all(
160 |         result_docs[i].metadata["relevance_score"]
161 |         >= result_docs[i + 1].metadata["relevance_score"]
162 |         for i in range(len(result_docs) - 1)
163 |     ), "results are not sorted"
164 | 
165 |     #
166 |     # there's a bug in the service that causes the results to be inconsistent
167 |     # depending on the batch shapes. running this test with FAKER_SEED=13131
168 |     # will demonstrate the issue.
169 |     #
170 |     # reference_ranker = NVIDIARerank(
171 |     #     model=rerank_model, max_batch_size=len(documents), top_n=len(documents)
172 |     # ).mode(**mode)
173 |     # reference_docs = reference_ranker.compress_documents(
174 |     #     documents=[doc.copy(deep=True) for doc in documents], query=query
175 |     # )
176 |     # for i in range(top_n):
177 |     #     assert result_docs[i].page_content == reference_docs[i].page_content
178 |     # assert all(
179 |     #     result_docs[i].page_content == reference_docs[i].page_content
180 |     #     for i in range(top_n)
181 |     # ), "batched results do not match unbatched results"
182 | 
183 | 
184 | @pytest.mark.parametrize("truncate", ["END"])
185 | def test_truncate_positive(rerank_model: str, mode: dict, truncate: str) -> None:
186 |     query = "What is acceleration?"
187 |     documents = [
188 |         Document(page_content="NVIDIA " * length)
189 |         for length in [32, 1024, 64, 128, 2048, 256, 512]
190 |     ]
191 |     client = NVIDIARerank(
192 |         model=rerank_model, top_n=len(documents), truncate=truncate, **mode
193 |     )
194 |     response = client.compress_documents(documents=documents, query=query)
195 |     assert len(response) == len(documents)
196 | 
197 | 
198 | @pytest.mark.parametrize("truncate", [None, "NONE"])
199 | def test_truncate_negative(rerank_model: str, mode: dict, truncate: str) -> None:
200 |     if rerank_model == "nv-rerank-qa-mistral-4b:1":
201 |         pytest.skip("nv-rerank-qa-mistral-4b:1 truncates by default")
202 |     query = "What is acceleration?"
203 |     documents = [
204 |         Document(page_content="NVIDIA " * length)
205 |         for length in [32, 1024, 64, 128, 10240, 256, 512]
206 |     ]
207 |     truncate_param = {}
208 |     if truncate:
209 |         truncate_param = {"truncate": truncate}
210 |     client = NVIDIARerank(model=rerank_model, **truncate_param, **mode)
211 |     with pytest.raises(Exception) as e:
212 |         client.compress_documents(documents=documents, query=query)
213 |     assert "400" in str(e.value)
214 |     assert "exceeds maximum allowed" in str(e.value)
215 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/integration_tests/test_register_model.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from typing import Any
 3 | 
 4 | import pytest
 5 | 
 6 | from langchain_nvidia_ai_endpoints import (
 7 |     NVIDIA,
 8 |     ChatNVIDIA,
 9 |     Model,
10 |     NVIDIAEmbeddings,
11 |     NVIDIARerank,
12 |     register_model,
13 | )
14 | 
15 | 
16 | #
17 | # if this test is failing it may be because the function uuids have changed.
18 | # you will have to find the new ones from https://api.nvcf.nvidia.com/v2/nvcf/functions
19 | #
20 | @pytest.mark.parametrize(
21 |     "client, id, endpoint",
22 |     [
23 |         (
24 |             ChatNVIDIA,
25 |             "meta/llama3-8b-instruct",
26 |             "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/a5a3ad64-ec2c-4bfc-8ef7-5636f26630fe",
27 |         ),
28 |         (
29 |             NVIDIAEmbeddings,
30 |             "NV-Embed-QA",
31 |             "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/09c64e32-2b65-4892-a285-2f585408d118",
32 |         ),
33 |         (
34 |             NVIDIARerank,
35 |             "nv-rerank-qa-mistral-4b:1",
36 |             "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0bf77f50-5c35-4488-8e7a-f49bb1974af6",
37 |         ),
38 |         (
39 |             NVIDIA,
40 |             "bigcode/starcoder2-15b",
41 |             "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/d9cfe8a2-44df-44a0-ba51-3fc4a202c11c",
42 |         ),
43 |     ],
44 | )
45 | def test_registered_model_functional(
46 |     client: type, id: str, endpoint: str, contact_service: Any
47 | ) -> None:
48 |     model = Model(id=id, endpoint=endpoint)
49 |     warnings.filterwarnings(
50 |         "ignore", r".*is already registered.*"
51 |     )  # intentionally overridding known models
52 |     warnings.filterwarnings(
53 |         "ignore", r".*Unable to determine validity of.*"
54 |     )  # we aren't passing client & type to Model()
55 |     register_model(model)
56 |     contact_service(client(model=id))
57 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/integration_tests/test_standard.py:
--------------------------------------------------------------------------------
 1 | """Standard LangChain interface tests"""
 2 | 
 3 | from typing import Any, Coroutine, Type
 4 | 
 5 | import pytest
 6 | from langchain_core.language_models import BaseChatModel
 7 | from langchain_core.tools import BaseTool
 8 | from langchain_tests.integration_tests import ChatModelIntegrationTests
 9 | 
10 | from langchain_nvidia_ai_endpoints import ChatNVIDIA
11 | 
12 | 
13 | class TestNVIDIAStandard(ChatModelIntegrationTests):
14 |     @property
15 |     def chat_model_class(self) -> Type[BaseChatModel]:
16 |         return ChatNVIDIA
17 | 
18 |     @property
19 |     def chat_model_params(self) -> dict:
20 |         return {"model": "meta/llama-3.3-70b-instruct", "temperature": 0}
21 | 
22 |     @pytest.mark.xfail(reason="anthropic-style list content not supported")
23 |     def test_tool_message_histories_list_content(
24 |         self, model: BaseChatModel, my_adder_tool: BaseTool
25 |     ) -> None:
26 |         return super().test_tool_message_histories_list_content(model, my_adder_tool)
27 | 
28 |     @pytest.mark.xfail(reason="Empty AIMessage content not supported")
29 |     def test_tool_message_error_status(
30 |         self, model: BaseChatModel, my_adder_tool: BaseTool
31 |     ) -> None:
32 |         return super().test_tool_message_error_status(model, my_adder_tool)
33 | 
34 |     @pytest.mark.xfail(reason="Empty AIMessage content not supported")
35 |     def test_tool_message_histories_string_content(
36 |         self, model: BaseChatModel, my_adder_tool: BaseTool
37 |     ) -> None:
38 |         return super().test_tool_message_histories_string_content(model, my_adder_tool)
39 | 
40 |     @pytest.mark.xfail(
41 |         reason="Only one chunk should set input_tokens, the rest should be 0 or None"
42 |     )
43 |     def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
44 |         return super().test_usage_metadata_streaming(model)
45 | 
46 |     @pytest.mark.parametrize("schema_type", ["typeddict"])
47 |     @pytest.mark.xfail(reason="TypedDict schema type not supported")
48 |     def test_structured_output(self, model: BaseChatModel, schema_type: str) -> None:
49 |         return super().test_structured_output(model, schema_type)
50 | 
51 |     @pytest.mark.parametrize("schema_type", ["typeddict"])
52 |     @pytest.mark.xfail(reason="TypedDict schema type not supported")
53 |     async def test_structured_output_async(
54 |         self, model: BaseChatModel, schema_type: str
55 |     ) -> Coroutine[Any, Any, None]:
56 |         # Return the coroutine directly without awaiting it
57 |         return super().test_structured_output_async(model, schema_type)
58 | 
59 |     @pytest.mark.xfail(reason="TypedDict schema type not supported")
60 |     def test_structured_output_optional_param(self, model: BaseChatModel) -> None:
61 |         # Don't return anything since the return type is None
62 |         super().test_structured_output_optional_param(model)
63 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/integration_tests/test_streaming.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from langchain_nvidia_ai_endpoints import ChatNVIDIA
 4 | 
 5 | 
 6 | def test_ttft(chat_model: str, mode: dict) -> None:
 7 |     # we had an issue where streaming took a long time to start. the issue
 8 |     # was all streamed results were collected before yielding them to the
 9 |     # user. this test tries to detect the incorrect behavior.
10 |     #
11 |     # warning:
12 |     #   - this can false positive if the model itself is slow to start
13 |     #   - this can false nagative if there is a delay after the first chunk
14 |     #
15 |     # potential mitigation for false negative is to check mean & stdev and
16 |     # filter outliers.
17 |     #
18 |     # credit to Pouyan Rezakhani for finding this issue
19 |     llm = ChatNVIDIA(model=chat_model, **mode)
20 |     chunk_times = [time.time()]
21 |     for chunk in llm.stream("Count to 1000 by 2s, e.g. 2 4 6 8 ...", max_tokens=512):
22 |         chunk_times.append(time.time())
23 |     ttft = chunk_times[1] - chunk_times[0]
24 |     total_time = chunk_times[-1] - chunk_times[0]
25 |     assert ttft < (
26 |         total_time / 2
27 |     ), "potential streaming issue, TTFT should be less than half of the total time"
28 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/integration_tests/test_structured_output.py:
--------------------------------------------------------------------------------
  1 | import enum
  2 | from typing import Any, Callable, Optional, Union
  3 | 
  4 | import pytest
  5 | from langchain_core.messages import HumanMessage
  6 | from pydantic import BaseModel, Field
  7 | 
  8 | from langchain_nvidia_ai_endpoints import ChatNVIDIA
  9 | 
 10 | 
 11 | def do_invoke(llm: ChatNVIDIA, message: str) -> Any:
 12 |     return llm.invoke(message)
 13 | 
 14 | 
 15 | def do_stream(llm: ChatNVIDIA, message: str) -> Any:
 16 |     # the way streaming works is to progressively grow the response
 17 |     # so we just return the last chunk. this is different from other
 18 |     # streaming results, which are *Chunks that can be concatenated.
 19 |     result = [chunk for chunk in llm.stream(message)]
 20 |     return result[-1] if result else None
 21 | 
 22 | 
 23 | @pytest.mark.xfail(reason="Accuracy is not guaranteed")
 24 | def test_accuracy(structured_model: str, mode: dict) -> None:
 25 |     class Person(BaseModel):
 26 |         name: str = Field(description="The name of the person")
 27 |         age: Optional[int] = Field(description="The age of the person")
 28 |         birthdate: Optional[str] = Field(description="The birthdate of the person")
 29 |         occupation: Optional[str] = Field(description="The occupation of the person")
 30 |         birthplace: Optional[str] = Field(description="The birthplace of the person")
 31 | 
 32 |     messages = [
 33 |         HumanMessage(
 34 |             """
 35 |         Jen-Hsun Huang was born in Tainan, Taiwan, on February 17, 1963. His family
 36 |         moved to Thailand when he was five; when he was nine, he and his brother were
 37 |         sent to the United States to live with an uncle in Tacoma, Washington. When he
 38 |         was ten, he lived in the boys' dormitory with his brother at Oneida Baptist
 39 |         Institute while attending Oneida Elementary school in Oneida, Kentucky—his
 40 |         uncle had mistaken what was actually a religious reform academy for a
 41 |         prestigious boarding school. Several years later, their parents also moved to
 42 |         the United States and settled in Oregon, where Huang graduated from Aloha
 43 |         High School in Aloha, Oregon. He skipped two years and graduated at sixteen.
 44 |         While growing up in Oregon in the 1980s, Huang got his first job at a local
 45 |         Denny's restaurant, where he worked as a busboy and waiter.
 46 |         Huang received his undergraduate degree in electrical engineering from Oregon
 47 |         State University in 1984, and his master's degree in electrical engineering
 48 |         from Stanford University in 1992.
 49 | 
 50 |         The current date is July 2034.
 51 |         """
 52 |         ),
 53 |         HumanMessage("Who is Jensen?"),
 54 |     ]
 55 | 
 56 |     llm = ChatNVIDIA(model=structured_model, **mode)
 57 |     structured_llm = llm.with_structured_output(Person)
 58 |     person = structured_llm.invoke(messages)
 59 |     assert isinstance(person, Person)
 60 |     assert person.name in ["Jen-Hsun Huang", "Jensen"]
 61 |     # assert person.age == 71  # this is too hard
 62 |     assert person.birthdate == "February 17, 1963"
 63 |     assert person.occupation and (
 64 |         "founder" in person.occupation.lower() or "CEO" in person.occupation.upper()
 65 |     )
 66 |     assert person.birthplace == "Tainan, Taiwan"
 67 | 
 68 | 
 69 | class Joke(BaseModel):
 70 |     """Joke to tell user."""
 71 | 
 72 |     setup: str = Field(description="The setup of the joke")
 73 |     punchline: str = Field(description="The punchline to the joke")
 74 |     rating: Optional[int] = Field(description="How funny the joke is, from 1 to 10")
 75 | 
 76 | 
 77 | @pytest.mark.parametrize("func", [do_invoke, do_stream], ids=["invoke", "stream"])
 78 | def test_pydantic(structured_model: str, mode: dict, func: Callable) -> None:
 79 |     llm = ChatNVIDIA(model=structured_model, temperature=0, **mode)
 80 |     structured_llm = llm.with_structured_output(Joke)
 81 |     result = func(structured_llm, "Tell me a joke about cats")
 82 |     assert isinstance(result, Joke)
 83 | 
 84 | 
 85 | @pytest.mark.parametrize("func", [do_invoke, do_stream], ids=["invoke", "stream"])
 86 | def test_dict(structured_model: str, mode: dict, func: Callable) -> None:
 87 |     json_schema = {
 88 |         "title": "joke",
 89 |         "description": "Joke to tell user.",
 90 |         "type": "object",
 91 |         "properties": {
 92 |             "setup": {
 93 |                 "type": "string",
 94 |                 "description": "The setup of the joke",
 95 |             },
 96 |             "punchline": {
 97 |                 "type": "string",
 98 |                 "description": "The punchline to the joke",
 99 |             },
100 |             "rating": {
101 |                 "type": "integer",
102 |                 "description": "How funny the joke is, from 1 to 10",
103 |             },
104 |         },
105 |         "required": ["setup", "punchline"],
106 |     }
107 | 
108 |     llm = ChatNVIDIA(model=structured_model, temperature=0, **mode)
109 |     structured_llm = llm.with_structured_output(json_schema)
110 |     result = func(structured_llm, "Tell me a joke about cats")
111 |     assert isinstance(result, dict)
112 |     assert "setup" in result
113 |     assert "punchline" in result
114 | 
115 | 
116 | @pytest.mark.parametrize("func", [do_invoke, do_stream], ids=["invoke", "stream"])
117 | def test_enum(structured_model: str, mode: dict, func: Callable) -> None:
118 |     class Choices(enum.Enum):
119 |         A = "A is an option"
120 |         B = "B is an option"
121 |         C = "C is an option"
122 | 
123 |     llm = ChatNVIDIA(model=structured_model, temperature=0, **mode)
124 |     structured_llm = llm.with_structured_output(Choices)
125 |     result = func(
126 |         structured_llm,
127 |         """
128 |         What does 1+1 equal?
129 |             A. -100
130 |             B. 2
131 |             C. doorstop
132 |         """,
133 |     )
134 |     assert isinstance(result, Choices)
135 |     assert result in Choices
136 | 
137 | 
138 | @pytest.mark.parametrize("func", [do_invoke, do_stream], ids=["invoke", "stream"])
139 | def test_enum_incomplete(structured_model: str, mode: dict, func: Callable) -> None:
140 |     class Choices(enum.Enum):
141 |         A = "A is an option you can pick"
142 |         B = "B is an option you can pick"
143 |         C = "C is an option you can pick"
144 | 
145 |     llm = ChatNVIDIA(model=structured_model, temperature=0, max_tokens=3, **mode)
146 |     structured_llm = llm.with_structured_output(Choices)
147 |     result = func(
148 |         structured_llm,
149 |         """
150 |         What does 1+1 equal?
151 |             A. -100
152 |             B. 2
153 |             C. doorstop
154 |         """,
155 |     )
156 |     assert result is None
157 | 
158 | 
159 | @pytest.mark.parametrize("func", [do_invoke, do_stream], ids=["invoke", "stream"])
160 | def test_multiple_schema(structured_model: str, mode: dict, func: Callable) -> None:
161 |     class ConversationalResponse(BaseModel):
162 |         """Respond in a conversational manner. Be kind and helpful."""
163 | 
164 |         response: str = Field(
165 |             description="A conversational response to the user's query"
166 |         )
167 | 
168 |     class Response(BaseModel):
169 |         output: Union[Joke, ConversationalResponse]
170 | 
171 |     llm = ChatNVIDIA(model=structured_model, temperature=0, **mode)
172 |     structured_llm = llm.with_structured_output(Response)
173 |     response = func(structured_llm, "Tell me a joke about cats")
174 |     assert isinstance(response, Response)
175 |     assert isinstance(response.output, Joke) or isinstance(
176 |         response.output, ConversationalResponse
177 |     )
178 | 
179 | 
180 | @pytest.mark.parametrize("func", [do_invoke, do_stream], ids=["invoke", "stream"])
181 | def test_pydantic_incomplete(structured_model: str, mode: dict, func: Callable) -> None:
182 |     # 3 tokens is not enough to construct a Joke
183 |     llm = ChatNVIDIA(model=structured_model, temperature=0, max_tokens=3, **mode)
184 |     structured_llm = llm.with_structured_output(Joke)
185 |     result = func(structured_llm, "Tell me a joke about cats")
186 |     assert result is None
187 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/unit_tests/__init__.py


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import Callable, Generator, List
 3 | 
 4 | import pytest
 5 | import requests_mock
 6 | 
 7 | from langchain_nvidia_ai_endpoints import (
 8 |     NVIDIA,
 9 |     ChatNVIDIA,
10 |     NVIDIAEmbeddings,
11 |     NVIDIARerank,
12 | )
13 | from langchain_nvidia_ai_endpoints._statics import MODEL_TABLE
14 | 
15 | 
16 | @pytest.fixture(
17 |     params=[
18 |         ChatNVIDIA,
19 |         NVIDIAEmbeddings,
20 |         NVIDIARerank,
21 |         NVIDIA,
22 |     ]
23 | )
24 | def public_class(request: pytest.FixtureRequest) -> type:
25 |     return request.param
26 | 
27 | 
28 | @pytest.fixture
29 | def empty_v1_models(requests_mock: requests_mock.Mocker) -> None:
30 |     requests_mock.get("https://integrate.api.nvidia.com/v1/models", json={"data": []})
31 | 
32 | 
33 | @pytest.fixture
34 | def mock_model() -> str:
35 |     return "mock-model"
36 | 
37 | 
38 | @pytest.fixture(autouse=True)
39 | def mock_v1_models(requests_mock: requests_mock.Mocker, mock_model: str) -> None:
40 |     requests_mock.get(
41 |         re.compile(".*/v1/models"),
42 |         json={
43 |             "data": [
44 |                 {"id": mock_model},
45 |             ]
46 |         },
47 |     )
48 | 
49 | 
50 | @pytest.fixture(autouse=True)
51 | def reset_model_table() -> Generator[None, None, None]:
52 |     """
53 |     Reset MODEL_TABLE between tests.
54 |     """
55 |     original = MODEL_TABLE.copy()
56 |     yield
57 |     MODEL_TABLE.clear()
58 |     MODEL_TABLE.update(original)
59 | 
60 | 
61 | @pytest.fixture
62 | def mock_streaming_response(
63 |     requests_mock: requests_mock.Mocker, mock_model: str
64 | ) -> Callable:
65 |     def builder(chunks: List[str]) -> None:
66 |         requests_mock.post(
67 |             "https://integrate.api.nvidia.com/v1/chat/completions",
68 |             text="\n\n".join(
69 |                 [
70 |                     'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"bogus","choices":[{"index":0,"delta":{"role":"assistant","content":null},"logprobs":null,"finish_reason":null}]}',  # noqa: E501
71 |                     *[
72 |                         f'data: {{"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"bogus","choices":[{{"index":0,"delta":{{"role":null,"content":"{content}"}},"logprobs":null,"finish_reason":null}}]}}'  # noqa: E501
73 |                         for content in chunks
74 |                     ],
75 |                     'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"bogus","choices":[{"index":0,"delta":{"role":null,"content":""},"logprobs":null,"finish_reason":"stop","stop_reason":null}]}',  # noqa: E501
76 |                     "data: [DONE]",
77 |                 ]
78 |             ),
79 |         )
80 | 
81 |     return builder
82 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_202_polling.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import requests_mock
 4 | from langchain_core.messages import AIMessage
 5 | 
 6 | from langchain_nvidia_ai_endpoints import ChatNVIDIA
 7 | 
 8 | 
 9 | def test_polling_auth_header(
10 |     requests_mock: requests_mock.Mocker,
11 |     mock_model: str,
12 | ) -> None:
13 |     infer_url = "https://integrate.api.nvidia.com/v1/chat/completions"
14 |     polling_url = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/test-request-id"
15 | 
16 |     requests_mock.post(
17 |         infer_url, status_code=202, headers={"NVCF-REQID": "test-request-id"}, json={}
18 |     )
19 | 
20 |     requests_mock.get(
21 |         polling_url,
22 |         status_code=200,
23 |         json={
24 |             "id": "mock-id",
25 |             "created": 1234567890,
26 |             "object": "chat.completion",
27 |             "model": mock_model,
28 |             "choices": [
29 |                 {
30 |                     "index": 0,
31 |                     "message": {"role": "assistant", "content": "WORKED"},
32 |                 }
33 |             ],
34 |         },
35 |     )
36 | 
37 |     warnings.filterwarnings("ignore", r".*type is unknown and inference may fail.*")
38 |     client = ChatNVIDIA(model=mock_model, api_key="BOGUS")
39 |     response = client.invoke("IGNORED")
40 | 
41 |     # expected behavior -
42 |     #  - first a GET request to /v1/models to check the model exists
43 |     #  - second a POST request to /v1/chat/completions
44 |     #  - third a GET request to /v2/nvcf/pexec/status/test-request-id
45 |     # we want to check on the second and third requests
46 | 
47 |     assert len(requests_mock.request_history) == 3
48 | 
49 |     infer_request = requests_mock.request_history[-2]
50 |     assert infer_request.method == "POST"
51 |     assert infer_request.url == infer_url
52 |     assert infer_request.headers["Authorization"] == "Bearer BOGUS"
53 | 
54 |     poll_request = requests_mock.request_history[-1]
55 |     assert poll_request.method == "GET"
56 |     assert poll_request.url == polling_url
57 |     assert poll_request.headers["Authorization"] == "Bearer BOGUS"
58 | 
59 |     assert isinstance(response, AIMessage)
60 |     assert response.content == "WORKED"
61 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_api_key.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from contextlib import contextmanager
  3 | from typing import Any, Generator
  4 | 
  5 | import pytest
  6 | from pydantic import SecretStr
  7 | from requests_mock import Mocker
  8 | 
  9 | 
 10 | @contextmanager
 11 | def no_env_var(var: str) -> Generator[None, None, None]:
 12 |     try:
 13 |         if val := os.environ.get(var, None):
 14 |             del os.environ[var]
 15 |         yield
 16 |     finally:
 17 |         if val:
 18 |             os.environ[var] = val
 19 |         else:
 20 |             if var in os.environ:
 21 |                 del os.environ[var]
 22 | 
 23 | 
 24 | @pytest.fixture(autouse=True)
 25 | def mock_endpoint_models(requests_mock: Mocker) -> None:
 26 |     requests_mock.get(
 27 |         "https://integrate.api.nvidia.com/v1/models",
 28 |         json={
 29 |             "data": [
 30 |                 {
 31 |                     "id": "meta/llama3-8b-instruct",
 32 |                     "object": "model",
 33 |                     "created": 1234567890,
 34 |                     "owned_by": "OWNER",
 35 |                     "root": "model1",
 36 |                 },
 37 |             ]
 38 |         },
 39 |     )
 40 | 
 41 | 
 42 | @pytest.fixture(autouse=True)
 43 | def mock_v1_local_models(requests_mock: Mocker) -> None:
 44 |     requests_mock.get(
 45 |         "https://test_url/v1/models",
 46 |         json={
 47 |             "data": [
 48 |                 {
 49 |                     "id": "model",
 50 |                     "object": "model",
 51 |                     "created": 1234567890,
 52 |                     "owned_by": "OWNER",
 53 |                     "root": "model",
 54 |                 },
 55 |             ]
 56 |         },
 57 |     )
 58 | 
 59 | 
 60 | def test_create_without_api_key(public_class: type) -> None:
 61 |     with no_env_var("NVIDIA_API_KEY"):
 62 |         with pytest.warns(UserWarning) as record:
 63 |             public_class()
 64 |         assert len(record) == 1
 65 |         assert "API key is required for the hosted" in str(record[0].message)
 66 | 
 67 | 
 68 | def test_create_unknown_url_no_api_key(public_class: type) -> None:
 69 |     with no_env_var("NVIDIA_API_KEY"):
 70 |         with pytest.warns(UserWarning) as record:
 71 |             public_class(base_url="https://test_url/v1")
 72 |     assert len(record) == 1
 73 |     assert "Default model is set as" in str(record[0].message)
 74 | 
 75 | 
 76 | @pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"])
 77 | def test_create_with_api_key(public_class: type, param: str) -> None:
 78 |     with no_env_var("NVIDIA_API_KEY"):
 79 |         public_class(**{param: "just testing no failure"})
 80 | 
 81 | 
 82 | def test_api_key_priority(public_class: type) -> None:
 83 |     def get_api_key(instance: Any) -> str:
 84 |         return instance._client.api_key.get_secret_value()
 85 | 
 86 |     with no_env_var("NVIDIA_API_KEY"):
 87 |         os.environ["NVIDIA_API_KEY"] = "ENV"
 88 |         assert get_api_key(public_class()) == "ENV"
 89 |         assert get_api_key(public_class(nvidia_api_key="PARAM")) == "PARAM"
 90 |         assert get_api_key(public_class(api_key="PARAM")) == "PARAM"
 91 |         assert get_api_key(public_class(api_key="LOW", nvidia_api_key="HIGH")) == "HIGH"
 92 | 
 93 | 
 94 | def test_api_key_type(public_class: type) -> None:
 95 |     # Test case to make sure the api_key is SecretStr and not str
 96 |     def get_api_key(instance: Any) -> str:
 97 |         return instance._client.api_key
 98 | 
 99 |     with no_env_var("NVIDIA_API_KEY"):
100 |         os.environ["NVIDIA_API_KEY"] = "ENV"
101 |         assert type(get_api_key(public_class())) == SecretStr
102 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_available_models.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from typing import Any
 3 | 
 4 | from langchain_nvidia_ai_endpoints import Model, register_model
 5 | 
 6 | 
 7 | def test_model_listing(public_class: Any, mock_model: str) -> None:
 8 |     warnings.filterwarnings("ignore", message=f"Default model is set as: {mock_model}")
 9 |     # we set base_url to avoid having results filtered by the public_class name
10 |     models = public_class.get_available_models(base_url="https://mock/v1")
11 |     assert any(model.id == mock_model for model in models)
12 | 
13 | 
14 | def test_model_listing_hosted(
15 |     public_class: Any,
16 |     mock_model: str,
17 | ) -> None:
18 |     model = Model(
19 |         id=mock_model,
20 |         model_type={
21 |             "ChatNVIDIA": "chat",
22 |             "NVIDIAEmbeddings": "embedding",
23 |             "NVIDIARerank": "ranking",
24 |             "NVIDIA": "completions",
25 |         }[public_class.__name__],
26 |         client=public_class.__name__,
27 |         endpoint="BOGUS",
28 |     )
29 |     register_model(model)
30 |     models = public_class.get_available_models()
31 |     assert any(model.id == mock_model for model in models)
32 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_base_url.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import warnings
  4 | from typing import Any
  5 | 
  6 | import pytest
  7 | from requests_mock import Mocker
  8 | 
  9 | from .test_api_key import no_env_var
 10 | 
 11 | 
 12 | @pytest.fixture(autouse=True)
 13 | def mock_v1_local_models(requests_mock: Mocker) -> None:
 14 |     requests_mock.get(
 15 |         re.compile(r".*/models"),
 16 |         json={
 17 |             "data": [
 18 |                 {
 19 |                     "id": "model1",
 20 |                     "object": "model",
 21 |                     "created": 1234567890,
 22 |                     "owned_by": "OWNER",
 23 |                     "root": "model1",
 24 |                 },
 25 |             ]
 26 |         },
 27 |     )
 28 | 
 29 | 
 30 | def test_create_without_base_url(public_class: type) -> None:
 31 |     with no_env_var("NVIDIA_BASE_URL"):
 32 |         x = public_class(api_key="BOGUS")
 33 |         assert x.base_url == "https://integrate.api.nvidia.com/v1"
 34 |         assert x._client.base_url == "https://integrate.api.nvidia.com/v1"
 35 | 
 36 | 
 37 | @pytest.mark.parametrize(
 38 |     "base_url, param",
 39 |     [("https://test_url/v1", "nvidia_base_url"), ("https://test_url/v1", "base_url")],
 40 | )
 41 | def test_create_with_base_url(public_class: type, base_url: str, param: str) -> None:
 42 |     with no_env_var("NVIDIA_BASE_URL"):
 43 |         assert public_class(model="model1", **{param: base_url}).base_url == base_url
 44 | 
 45 | 
 46 | def test_base_url_priority(public_class: type) -> None:
 47 |     ENV_URL = "https://ENV/v1"
 48 |     NV_PARAM_URL = "https://NV_PARAM/v1"
 49 |     PARAM_URL = "https://PARAM/v1"
 50 | 
 51 |     def get_base_url(**kwargs: Any) -> str:
 52 |         return public_class(model="model1", **kwargs).base_url
 53 | 
 54 |     with no_env_var("NVIDIA_BASE_URL"):
 55 |         os.environ["NVIDIA_BASE_URL"] = ENV_URL
 56 |         assert get_base_url() == ENV_URL
 57 |         assert get_base_url(nvidia_base_url=NV_PARAM_URL) == NV_PARAM_URL
 58 |         assert get_base_url(base_url=PARAM_URL) == PARAM_URL
 59 |         assert (
 60 |             get_base_url(base_url=PARAM_URL, nvidia_base_url=NV_PARAM_URL)
 61 |             == NV_PARAM_URL
 62 |         )
 63 | 
 64 | 
 65 | @pytest.mark.parametrize(
 66 |     "base_url",
 67 |     [
 68 |         "bogus",
 69 |         "http:/",
 70 |         "http://",
 71 |         "http:/oops",
 72 |     ],
 73 | )
 74 | def test_expect_warn_base_url(public_class: type, base_url: str) -> None:
 75 |     with pytest.warns(UserWarning) as record:
 76 |         public_class(model="model1", base_url=base_url)
 77 |     assert len(record) > 0
 78 |     assert "url appears incorrect" in str(record[0].message)
 79 | 
 80 | 
 81 | @pytest.mark.parametrize(
 82 |     "base_url",
 83 |     ["https://integrate.api.nvidia.com/v1", "https://ai.api.nvidia.com/v1"],
 84 | )
 85 | def test_param_base_url_hosted(public_class: type, base_url: str) -> None:
 86 |     with no_env_var("NVIDIA_BASE_URL"):
 87 |         client = public_class(api_key="BOGUS", base_url=base_url)
 88 |         assert client._client.is_hosted
 89 | 
 90 | 
 91 | @pytest.mark.parametrize(
 92 |     "base_url",
 93 |     [
 94 |         "https://localhost",
 95 |         "http://localhost:8888",
 96 |         "http://0.0.0.0:8888/v1",
 97 |         "http://0.0.0.0:8888/v1/",
 98 |         "http://blah/some/other/path/v1",
 99 |     ],
100 | )
101 | def test_param_base_url_not_hosted(public_class: type, base_url: str) -> None:
102 |     warnings.filterwarnings("ignore", r".*does not end in /v1.*")
103 |     with no_env_var("NVIDIA_BASE_URL"):
104 |         client = public_class(model="model1", base_url=base_url)
105 |         assert not client._client.is_hosted
106 | 
107 | 
108 | @pytest.mark.parametrize(
109 |     "base_url",
110 |     [
111 |         "http://localhost:8888/embeddings",
112 |         "http://0.0.0.0:8888/rankings",
113 |         "http://localhost:8888/embeddings/",
114 |         "http://0.0.0.0:8888/rankings/",
115 |         "http://localhost:8888/chat/completions",
116 |         "http://localhost:8080/v1/embeddings",
117 |         "http://0.0.0.0:8888/v1/rankings",
118 |     ],
119 | )
120 | def test_expect_warn(public_class: type, base_url: str) -> None:
121 |     with pytest.warns(UserWarning) as record:
122 |         public_class(model="model1", base_url=base_url)
123 |     assert len(record) == 1
124 |     assert "does not end in /v1" in str(record[0].message)
125 | 
126 | 
127 | @pytest.mark.parametrize(
128 |     "base_url",
129 |     [
130 |         "http://localhost:8888/embeddings",
131 |         "http://0.0.0.0:8888/rankings",
132 |         "http://localhost:8888/embeddings/",
133 |         "http://0.0.0.0:8888/rankings/",
134 |         "http://localhost:8888/chat/completions",
135 |         "http://localhost:8080/v1/embeddings",
136 |         "http://0.0.0.0:8888/v1/rankings",
137 |     ],
138 | )
139 | @pytest.mark.parametrize("false_value", ["false", "False", "0"])
140 | def test_expect_skip_check(public_class: type, base_url: str, false_value: str) -> None:
141 |     orig = os.environ.get("NVIDIA_APPEND_API_VERSION", None)
142 |     warnings.filterwarnings("error")
143 | 
144 |     try:
145 |         os.environ["NVIDIA_APPEND_API_VERSION"] = false_value
146 |         public_class(model="model1", base_url=base_url)
147 |     finally:
148 |         warnings.resetwarnings()
149 |         if orig is None:
150 |             os.environ.pop("NVIDIA_APPEND_API_VERSION", None)
151 |         else:
152 |             os.environ["NVIDIA_APPEND_API_VERSION"] = orig
153 | 
154 | 
155 | @pytest.mark.parametrize(
156 |     "base_url",
157 |     [
158 |         "http://localhost:8888/embeddings",
159 |         "http://0.0.0.0:8888/rankings",
160 |         "http://localhost:8888/embeddings/",
161 |         "http://0.0.0.0:8888/rankings/",
162 |         "http://localhost:8888/chat/completions",
163 |         "http://localhost:8080/v1/embeddings",
164 |         "http://0.0.0.0:8888/v1/rankings",
165 |     ],
166 | )
167 | @pytest.mark.parametrize(
168 |     "true_value",
169 |     ["true", "True", "yes", "1", "anything", "enabled", "on", ""],
170 | )
171 | def test_expect_not_skip_check(
172 |     public_class: type, base_url: str, true_value: str
173 | ) -> None:
174 |     warnings.filterwarnings("ignore", r".*does not end in /v1.*")
175 |     orig = os.environ.get("NVIDIA_APPEND_API_VERSION", None)
176 | 
177 |     try:
178 |         os.environ["NVIDIA_APPEND_API_VERSION"] = true_value
179 |         obj = public_class(model="model1", base_url=base_url)
180 |         assert obj.base_url.rstrip("/").endswith(
181 |             "/v1"
182 |         ), f"Expected {obj.base_url} to end with '/v1'"
183 |     finally:
184 |         warnings.resetwarnings()
185 |         if orig is None:
186 |             os.environ.pop("NVIDIA_APPEND_API_VERSION", None)
187 |         else:
188 |             os.environ["NVIDIA_APPEND_API_VERSION"] = orig
189 | 
190 | 
191 | def test_default_hosted(public_class: type) -> None:
192 |     x = public_class(api_key="BOGUS")
193 |     assert x._client.is_hosted
194 | 
195 | 
196 | @pytest.mark.parametrize(
197 |     "base_url",
198 |     [
199 |         "http://host/path0/path1/path2/v1",
200 |         "http://host:123/path0/path1/path2/v1/",
201 |     ],
202 | )
203 | def test_proxy_base_url(
204 |     public_class: type, base_url: str, requests_mock: Mocker
205 | ) -> None:
206 |     with no_env_var("NVIDIA_BASE_URL"):
207 |         client = public_class(model="model1", base_url=base_url)
208 |         assert base_url.startswith(client.base_url)
209 | 
210 | 
211 | @pytest.mark.parametrize(
212 |     "base_url",
213 |     [
214 |         "http://host/path0/path1/path2/v1",
215 |         "http://host:123/path0/path1/path2/v1/",
216 |     ],
217 | )
218 | def test_proxy_base_url_models(
219 |     public_class: type, base_url: str, requests_mock: Mocker
220 | ) -> None:
221 |     with no_env_var("NVIDIA_BASE_URL"):
222 |         client = public_class(model="model1", base_url=base_url)
223 |         client.available_models
224 |         models_url = base_url.rstrip("/") + "/models"
225 |         assert requests_mock.last_request
226 |         assert requests_mock.last_request.url == models_url
227 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_chat_models.py:
--------------------------------------------------------------------------------
 1 | """Test chat model integration."""
 2 | 
 3 | import warnings
 4 | 
 5 | import pytest
 6 | from requests_mock import Mocker
 7 | 
 8 | from langchain_nvidia_ai_endpoints.chat_models import ChatNVIDIA
 9 | 
10 | 
11 | @pytest.fixture
12 | def mock_local_models(requests_mock: Mocker) -> None:
13 |     requests_mock.get(
14 |         "http://localhost:8888/v1/models",
15 |         json={
16 |             "data": [
17 |                 {
18 |                     "id": "unknown_model",
19 |                     "object": "model",
20 |                     "created": 1234567890,
21 |                     "owned_by": "OWNER",
22 |                     "root": "unknown_model",
23 |                 },
24 |             ]
25 |         },
26 |     )
27 | 
28 | 
29 | def test_base_url_unknown_model(mock_local_models: None) -> None:
30 |     llm = ChatNVIDIA(model="unknown_model", base_url="http://localhost:8888/v1")
31 |     assert llm.model == "unknown_model"
32 | 
33 | 
34 | def test_integration_initialization() -> None:
35 |     """Test chat model initialization."""
36 |     ChatNVIDIA(
37 |         model="meta/llama2-70b",
38 |         nvidia_api_key="nvapi-...",
39 |         temperature=0.5,
40 |         top_p=0.9,
41 |         max_tokens=50,
42 |     )
43 |     ChatNVIDIA(model="meta/llama2-70b", nvidia_api_key="nvapi-...")
44 | 
45 | 
46 | def test_unavailable(empty_v1_models: None) -> None:
47 |     with pytest.warns(UserWarning, match="Model not-a-real-model is unknown"):
48 |         ChatNVIDIA(api_key="BOGUS", model="not-a-real-model")
49 | 
50 | 
51 | def test_max_tokens_deprecation_warning() -> None:
52 |     """Test that using max_tokens raises a deprecation warning."""
53 |     with pytest.warns(
54 |         DeprecationWarning,
55 |         match=(
56 |             "The 'max_tokens' parameter is deprecated and will be removed "
57 |             "in a future version"
58 |         ),
59 |     ):
60 |         ChatNVIDIA(model="meta/llama2-70b", max_tokens=50)
61 | 
62 | 
63 | def test_max_completion_tokens() -> None:
64 |     """Test that max_completion_tokens works without warning."""
65 |     with warnings.catch_warnings(record=True) as w:
66 |         warnings.simplefilter("always")
67 |         llm = ChatNVIDIA(
68 |             model="meta/llama2-70b",
69 |             max_completion_tokens=50,
70 |             nvidia_api_key="nvapi-...",
71 |         )
72 |         assert len(w) == 0
73 |         assert llm.max_tokens == 50
74 |         payload = llm._get_payload(
75 |             inputs=[{"role": "user", "content": "test"}],
76 |             stop=None,
77 |         )
78 |         assert payload["max_tokens"] == 50
79 | 
80 | 
81 | def test_max_tokens_value() -> None:
82 |     """Test that max_tokens value is correctly set and reflected in payload."""
83 |     llm = ChatNVIDIA(
84 |         model="meta/llama2-70b",
85 |         max_tokens=50,
86 |         nvidia_api_key="nvapi-...",
87 |     )
88 |     assert llm.max_tokens == 50
89 |     payload = llm._get_payload(
90 |         inputs=[{"role": "user", "content": "test"}],
91 |         stop=None,
92 |     )
93 |     assert payload["max_tokens"] == 50
94 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_completions_models.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from functools import reduce
  3 | from operator import add
  4 | from typing import Any, Callable, List
  5 | 
  6 | import pytest
  7 | import requests_mock
  8 | 
  9 | from langchain_nvidia_ai_endpoints import NVIDIA
 10 | 
 11 | 
 12 | def invoke(llm: NVIDIA, prompt: str, **kwargs: Any) -> str:
 13 |     return llm.invoke(prompt, **kwargs)
 14 | 
 15 | 
 16 | def stream(llm: NVIDIA, prompt: str, **kwargs: Any) -> str:
 17 |     return reduce(add, llm.stream(prompt, **kwargs))
 18 | 
 19 | 
 20 | mock_response = {
 21 |     "id": "ID",
 22 |     "object": "text_completion",
 23 |     "created": 1234567890,
 24 |     "model": "BOGUS",
 25 |     "choices": [
 26 |         {
 27 |             "index": 0,
 28 |             "text": "COMPLETION",
 29 |         }
 30 |     ],
 31 |     "usage": {"prompt_tokens": 7, "total_tokens": 207, "completion_tokens": 200},
 32 | }
 33 | 
 34 | 
 35 | @pytest.fixture(scope="function")
 36 | def mock_v1_completions_invoke(
 37 |     requests_mock: requests_mock.Mocker,
 38 | ) -> requests_mock.Mocker:
 39 |     requests_mock.post(
 40 |         "https://integrate.api.nvidia.com/v1/completions",
 41 |         json=mock_response,
 42 |     )
 43 |     return requests_mock
 44 | 
 45 | 
 46 | @pytest.fixture(scope="function")
 47 | def mock_v1_completions_stream(
 48 |     requests_mock: requests_mock.Mocker,
 49 | ) -> requests_mock.Mocker:
 50 |     requests_mock.post(
 51 |         "https://integrate.api.nvidia.com/v1/completions",
 52 |         text="\n\n".join(
 53 |             [
 54 |                 f"data: {json.dumps(mock_response)}",
 55 |                 "data: [DONE]",
 56 |             ]
 57 |         ),
 58 |     )
 59 |     return requests_mock
 60 | 
 61 | 
 62 | @pytest.mark.parametrize(
 63 |     "param, value",
 64 |     [
 65 |         ("frequency_penalty", [0.25, 0.5, 0.75]),
 66 |         ("max_tokens", [2, 32, 512]),
 67 |         ("presence_penalty", [0.25, 0.5, 0.75]),
 68 |         ("seed", [1, 1234, 4321]),
 69 |         ("stop", ["Hello", "There", "World"]),
 70 |         ("temperature", [0, 0.5, 1]),
 71 |         ("top_p", [0, 0.5, 1]),
 72 |         ("best_of", [1, 5, 10]),
 73 |         ("echo", [True, False, True]),
 74 |         ("logit_bias", [{"hello": 1.0}, {"there": 1.0}, {"world": 1.0}]),
 75 |         ("logprobs", [1, 2, 3]),
 76 |         ("n", [1, 2, 3]),
 77 |         ("suffix", ["Hello", "There", "World"]),
 78 |         ("user", ["Bob", "Alice", "Eve"]),
 79 |     ],
 80 | )
 81 | @pytest.mark.parametrize(
 82 |     "func, mock_name",
 83 |     [(invoke, "mock_v1_completions_invoke"), (stream, "mock_v1_completions_stream")],
 84 |     ids=["invoke", "stream"],
 85 | )
 86 | def test_params(
 87 |     param: str,
 88 |     value: List[Any],
 89 |     func: Callable,
 90 |     mock_name: str,
 91 |     request: pytest.FixtureRequest,
 92 | ) -> None:
 93 |     """
 94 |     This tests the following...
 95 |      - priority order (init -> bind -> infer)
 96 |      - param passed to init, bind, invoke / stream
 97 |     ...for each known Completion API param.
 98 |     """
 99 | 
100 |     mock = request.getfixturevalue(mock_name)
101 | 
102 |     init, bind, infer = value
103 | 
104 |     llm = NVIDIA(api_key="BOGUS", **{param: init})
105 |     func(llm, "IGNORED")
106 |     request_payload = mock.last_request.json()
107 |     assert param in request_payload
108 |     assert request_payload[param] == init
109 | 
110 |     bound_llm = llm.bind(**{param: bind})
111 |     func(bound_llm, "IGNORED")
112 |     request_payload = mock.last_request.json()
113 |     assert param in request_payload
114 |     assert request_payload[param] == bind
115 | 
116 |     func(bound_llm, "IGNORED", **{param: infer})
117 |     request_payload = mock.last_request.json()
118 |     assert param in request_payload
119 |     assert request_payload[param] == infer
120 | 
121 | 
122 | @pytest.mark.parametrize(
123 |     "func, mock_name",
124 |     [(invoke, "mock_v1_completions_invoke"), (stream, "mock_v1_completions_stream")],
125 |     ids=["invoke", "stream"],
126 | )
127 | def test_params_unknown(
128 |     func: Callable,
129 |     mock_name: str,
130 |     request: pytest.FixtureRequest,
131 | ) -> None:
132 |     request.getfixturevalue(mock_name)
133 | 
134 |     with pytest.warns(UserWarning) as record:
135 |         llm = NVIDIA(api_key="BOGUS", init_unknown="INIT")
136 |     assert len(record) == 1
137 |     assert "Unrecognized, ignored arguments: {'init_unknown'}" in str(record[0].message)
138 | 
139 |     with pytest.warns(UserWarning) as record:
140 |         func(llm, "IGNORED", arg_unknown="ARG")
141 |     assert len(record) == 1
142 |     assert "Unrecognized, ignored arguments: {'arg_unknown'}" in str(record[0].message)
143 | 
144 |     bound_llm = llm.bind(bind_unknown="BIND")
145 | 
146 |     with pytest.warns(UserWarning) as record:
147 |         func(bound_llm, "IGNORED")
148 |     assert len(record) == 1
149 |     assert "Unrecognized, ignored arguments: {'bind_unknown'}" in str(record[0].message)
150 | 
151 | 
152 | def test_identifying_params() -> None:
153 |     llm = NVIDIA(api_key="BOGUS")
154 |     assert set(llm._identifying_params.keys()) == {"model", "base_url"}
155 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_embeddings.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Generator
 2 | 
 3 | import pytest
 4 | from requests_mock import Mocker
 5 | 
 6 | from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def embedding(requests_mock: Mocker) -> Generator[NVIDIAEmbeddings, None, None]:
11 |     model = "mock-model"
12 |     requests_mock.get(
13 |         "https://integrate.api.nvidia.com/v1/models",
14 |         json={
15 |             "data": [
16 |                 {
17 |                     "id": model,
18 |                     "object": "model",
19 |                     "created": 1234567890,
20 |                     "owned_by": "OWNER",
21 |                 },
22 |             ]
23 |         },
24 |     )
25 |     requests_mock.post(
26 |         "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/ID",
27 |         json={
28 |             "data": [
29 |                 {
30 |                     "embedding": [
31 |                         0.1,
32 |                         0.2,
33 |                         0.3,
34 |                     ],
35 |                     "index": 0,
36 |                 }
37 |             ],
38 |             "usage": {"prompt_tokens": 8, "total_tokens": 8},
39 |         },
40 |     )
41 |     with pytest.warns(UserWarning) as record:
42 |         yield NVIDIAEmbeddings(model=model, nvidia_api_key="a-bogus-key")
43 |     assert len(record) == 1
44 |     assert "type is unknown and inference may fail" in str(record[0].message)
45 | 
46 | 
47 | def test_embed_documents_negative_input_int(embedding: NVIDIAEmbeddings) -> None:
48 |     documents = 1
49 |     with pytest.raises(ValueError):
50 |         embedding.embed_documents(documents)  # type: ignore
51 | 
52 | 
53 | def test_embed_documents_negative_input_float(embedding: NVIDIAEmbeddings) -> None:
54 |     documents = 1.0
55 |     with pytest.raises(ValueError):
56 |         embedding.embed_documents(documents)  # type: ignore
57 | 
58 | 
59 | def test_embed_documents_negative_input_str(embedding: NVIDIAEmbeddings) -> None:
60 |     documents = "subscriptable string, not a list"
61 |     with pytest.raises(ValueError):
62 |         embedding.embed_documents(documents)  # type: ignore
63 | 
64 | 
65 | def test_embed_documents_negative_input_list_int(embedding: NVIDIAEmbeddings) -> None:
66 |     documents = [1, 2, 3]
67 |     with pytest.raises(ValueError):
68 |         embedding.embed_documents(documents)  # type: ignore
69 | 
70 | 
71 | def test_embed_documents_negative_input_list_float(embedding: NVIDIAEmbeddings) -> None:
72 |     documents = [1.0, 2.0, 3.0]
73 |     with pytest.raises(ValueError):
74 |         embedding.embed_documents(documents)  # type: ignore
75 | 
76 | 
77 | def test_embed_documents_negative_input_list_mixed(embedding: NVIDIAEmbeddings) -> None:
78 |     documents = ["1", 2.0, 3]
79 |     with pytest.raises(ValueError):
80 |         embedding.embed_documents(documents)  # type: ignore
81 | 
82 | 
83 | @pytest.mark.parametrize("truncate", [True, False, 1, 0, 1.0, "BOGUS"])
84 | def test_embed_query_truncate_invalid(truncate: Any) -> None:
85 |     with pytest.raises(ValueError):
86 |         NVIDIAEmbeddings(truncate=truncate)
87 | 
88 | 
89 | # todo: test max_batch_size (-50, 0, 1, 50)
90 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_imports.py:
--------------------------------------------------------------------------------
 1 | from langchain_nvidia import __all__ as short_all
 2 | from langchain_nvidia_ai_endpoints import __all__ as long_all
 3 | 
 4 | EXPECTED_ALL = [
 5 |     "ChatNVIDIA",
 6 |     "NVIDIAEmbeddings",
 7 |     "NVIDIARerank",
 8 |     "NVIDIA",
 9 |     "register_model",
10 |     "Model",
11 | ]
12 | 
13 | 
14 | def test_all_imports() -> None:
15 |     assert sorted(EXPECTED_ALL) == sorted(short_all)
16 |     assert sorted(EXPECTED_ALL) == sorted(long_all)
17 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_messages.py:
--------------------------------------------------------------------------------
 1 | import requests_mock
 2 | from langchain_core.messages import AIMessage
 3 | 
 4 | from langchain_nvidia_ai_endpoints import ChatNVIDIA
 5 | 
 6 | 
 7 | def test_invoke_aimessage_content_none(requests_mock: requests_mock.Mocker) -> None:
 8 |     requests_mock.post(
 9 |         "https://integrate.api.nvidia.com/v1/chat/completions",
10 |         json={
11 |             "id": "mock-id",
12 |             "created": 1234567890,
13 |             "object": "chat.completion",
14 |             "model": "mock-model",
15 |             "choices": [
16 |                 {
17 |                     "index": 0,
18 |                     "message": {"role": "assistant", "content": "WORKED"},
19 |                 }
20 |             ],
21 |         },
22 |     )
23 | 
24 |     empty_aimessage = AIMessage(content="EMPTY")
25 |     empty_aimessage.content = None  # type: ignore
26 | 
27 |     llm = ChatNVIDIA(api_key="BOGUS")
28 |     response = llm.invoke([empty_aimessage])
29 |     request = requests_mock.request_history[0]
30 |     assert request.method == "POST"
31 |     assert request.url == "https://integrate.api.nvidia.com/v1/chat/completions"
32 |     message = request.json()["messages"][0]
33 |     assert "content" in message and message["content"] != "EMPTY"
34 |     assert "content" in message and message["content"] is None
35 |     assert isinstance(response, AIMessage)
36 |     assert response.content == "WORKED"
37 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_metadata.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Optional, cast
  2 | 
  3 | import pytest
  4 | import requests_mock
  5 | from langchain_core.messages import AIMessage, BaseMessageChunk, HumanMessage
  6 | 
  7 | # from langchain_core.messages.ai import UsageMetadata
  8 | from langchain_nvidia_ai_endpoints import ChatNVIDIA
  9 | 
 10 | mock_response = {
 11 |     "id": "chat-c891882b0c4448a5b258c63d2b031c82",
 12 |     "object": "chat.completion",
 13 |     "created": 1729173278,
 14 |     "model": "meta/llama-3.2-3b-instruct",
 15 |     "choices": [
 16 |         {
 17 |             "index": 0,
 18 |             "message": {"role": "assistant", "content": "A simple yet"},
 19 |             "logprobs": "",
 20 |             "finish_reason": "tool_calls",
 21 |             "stop_reason": "",
 22 |         }
 23 |     ],
 24 |     "usage": {"prompt_tokens": 12, "total_tokens": 15, "completion_tokens": 3},
 25 |     "prompt_logprobs": "",
 26 | }
 27 | 
 28 | 
 29 | @pytest.fixture
 30 | def mock_local_models_metadata(requests_mock: requests_mock.Mocker) -> None:
 31 |     mock_response["tool_calls"] = (
 32 |         [
 33 |             {
 34 |                 "id": "tool-ID",
 35 |                 "type": "function",
 36 |                 "function": {
 37 |                     "name": "magic",
 38 |                     "arguments": [],
 39 |                 },
 40 |             }
 41 |         ],
 42 |     )
 43 |     requests_mock.post("http://localhost:8888/v1/chat/completions", json=mock_response)
 44 | 
 45 | 
 46 | @pytest.fixture
 47 | def mock_local_models_stream_metadata(requests_mock: requests_mock.Mocker) -> None:
 48 |     response_contents = "\n\n".join(
 49 |         [
 50 |             'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null},"logprobs":null,"model_name":"dummy","finish_reason":null}]}',  # noqa: E501
 51 |             'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"id":"call_ID0","type":"function","function":{"name":"xxyyzz","arguments":""}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}',  # noqa: E501
 52 |             'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\\"a\\""}}]},"logprobs":null, "model_name":"dummy","finish_reason":null}]}',  # noqa: E501
 53 |             'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":": 11,"}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}',  # noqa: E501
 54 |             'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" \\"b\\": "}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}',  # noqa: E501
 55 |             'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"3}"}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}',  # noqa: E501
 56 |             'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"id":"call_ID1","type":"function","function":{"name":"zzyyxx","arguments":""}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}',  # noqa: E501
 57 |             'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{\\"a\\""}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}',  # noqa: E501
 58 |             'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":": 5, "}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}',  # noqa: E501
 59 |             'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"\\"b\\": 3"}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}',  # noqa: E501
 60 |             'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"}"}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}',  # noqa: E501
 61 |             'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"model_name":"dummy","finish_reason":"tool_calls"}]}',  # noqa: E501
 62 |         ]
 63 |     )
 64 |     requests_mock.post(
 65 |         "http://localhost:8888/v1/chat/completions",
 66 |         text=response_contents,
 67 |     )
 68 | 
 69 | 
 70 | def response_metadata_checks(result: Any) -> None:
 71 |     assert isinstance(result, AIMessage)
 72 |     assert result.response_metadata
 73 |     assert all(
 74 |         k in result.response_metadata for k in ("model_name", "role", "token_usage")
 75 |     )
 76 | 
 77 |     assert isinstance(result.content, str)
 78 |     assert result.response_metadata.get("model_name") is not None
 79 | 
 80 |     if result.usage_metadata is not None:
 81 |         assert isinstance(result.usage_metadata, dict)
 82 |         usage_metadata = result.usage_metadata
 83 | 
 84 |         assert usage_metadata["input_tokens"] > 0
 85 |         assert usage_metadata["output_tokens"] > 0
 86 |         assert usage_metadata["total_tokens"] > 0
 87 | 
 88 | 
 89 | def test_response_metadata(mock_local_models_metadata: None) -> None:
 90 |     llm = ChatNVIDIA(base_url="http://localhost:8888/v1")
 91 |     result = llm.invoke([HumanMessage(content="I'm PickleRick")])
 92 |     response_metadata_checks(result)
 93 | 
 94 | 
 95 | async def test_async_response_metadata(mock_local_models_metadata: None) -> None:
 96 |     llm = ChatNVIDIA(base_url="http://localhost:8888/v1")
 97 |     result = await llm.ainvoke([HumanMessage(content="I'm PickleRick")], logprobs=True)
 98 |     response_metadata_checks(result)
 99 | 
100 | 
101 | def test_response_metadata_streaming(mock_local_models_stream_metadata: None) -> None:
102 |     llm = ChatNVIDIA(base_url="http://localhost:8888/v1")
103 |     full: Optional[BaseMessageChunk] = None
104 |     for chunk in llm.stream("I'm Pickle Rick"):
105 |         assert isinstance(chunk.content, str)
106 |         full = chunk if full is None else full + chunk
107 |     assert all(
108 |         k in cast(BaseMessageChunk, full).response_metadata
109 |         for k in ("model_name", "finish_reason")
110 |     )
111 | 
112 | 
113 | async def test_async_response_metadata_streaming(
114 |     mock_local_models_stream_metadata: None,
115 | ) -> None:
116 |     llm = ChatNVIDIA(base_url="http://localhost:8888/v1")
117 |     full: Optional[BaseMessageChunk] = None
118 |     async for chunk in llm.astream("I'm Pickle Rick"):
119 |         assert isinstance(chunk.content, str)
120 |         full = chunk if full is None else full + chunk
121 |     assert all(
122 |         k in cast(BaseMessageChunk, full).response_metadata
123 |         for k in ("model_name", "finish_reason")
124 |     )
125 | 
126 | 
127 | def test_stream_tool_calls(
128 |     mock_local_models_stream_metadata: None,
129 | ) -> None:
130 |     llm = ChatNVIDIA(base_url="http://localhost:8888/v1")
131 |     generator = llm.stream(
132 |         "What is 11 xxyyzz 3 zzyyxx 5?",
133 |     )
134 |     response = next(generator)
135 |     for chunk in generator:
136 |         response += chunk
137 |     assert isinstance(response, AIMessage)
138 |     assert len(response.tool_calls) == 2
139 |     tool_call0 = response.tool_calls[0]
140 |     assert tool_call0["name"] == "xxyyzz"
141 |     assert tool_call0["args"] == {"b": 3, "a": 11}
142 |     tool_call1 = response.tool_calls[1]
143 |     assert tool_call1["name"] == "zzyyxx"
144 |     assert tool_call1["args"] == {"b": 3, "a": 5}
145 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_model.py:
--------------------------------------------------------------------------------
  1 | from itertools import chain
  2 | from typing import Any
  3 | 
  4 | import pytest
  5 | from requests_mock import Mocker
  6 | 
  7 | from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank
  8 | from langchain_nvidia_ai_endpoints._statics import (
  9 |     CHAT_MODEL_TABLE,
 10 |     EMBEDDING_MODEL_TABLE,
 11 |     MODEL_TABLE,
 12 |     QA_MODEL_TABLE,
 13 |     RANKING_MODEL_TABLE,
 14 |     VLM_MODEL_TABLE,
 15 | )
 16 | 
 17 | 
 18 | @pytest.fixture
 19 | def known_unknown() -> str:
 20 |     return "mock-model"
 21 | 
 22 | 
 23 | @pytest.fixture(autouse=True)
 24 | def mock_v1_models(requests_mock: Mocker, known_unknown: str) -> None:
 25 |     requests_mock.get(
 26 |         "https://integrate.api.nvidia.com/v1/models",
 27 |         json={
 28 |             "data": [
 29 |                 {
 30 |                     "id": known_unknown,
 31 |                     "object": "model",
 32 |                     "created": 1234567890,
 33 |                     "owned_by": "OWNER",
 34 |                 },
 35 |             ]
 36 |         },
 37 |     )
 38 | 
 39 | 
 40 | @pytest.fixture(autouse=True)
 41 | def mock_v1_local_models(requests_mock: Mocker, known_unknown: str) -> None:
 42 |     requests_mock.get(
 43 |         "http://localhost:8000/v1/models",
 44 |         json={
 45 |             "data": [
 46 |                 {
 47 |                     "id": known_unknown,
 48 |                     "object": "model",
 49 |                     "created": 1234567890,
 50 |                     "owned_by": "OWNER",
 51 |                     "root": known_unknown,
 52 |                 },
 53 |                 {
 54 |                     "id": "lora1",
 55 |                     "object": "model",
 56 |                     "created": 1234567890,
 57 |                     "owned_by": "OWNER",
 58 |                     "root": known_unknown,
 59 |                 },
 60 |             ]
 61 |         },
 62 |     )
 63 | 
 64 | 
 65 | @pytest.mark.parametrize(
 66 |     "alias, client",
 67 |     [
 68 |         (alias, ChatNVIDIA)
 69 |         for model in list(
 70 |             chain(
 71 |                 CHAT_MODEL_TABLE.values(),
 72 |                 VLM_MODEL_TABLE.values(),
 73 |                 QA_MODEL_TABLE.values(),
 74 |             )
 75 |         )
 76 |         if model.aliases is not None
 77 |         for alias in model.aliases
 78 |     ]
 79 |     + [
 80 |         (alias, NVIDIAEmbeddings)
 81 |         for model in EMBEDDING_MODEL_TABLE.values()
 82 |         if model.aliases is not None
 83 |         for alias in model.aliases
 84 |     ]
 85 |     + [
 86 |         (alias, NVIDIARerank)
 87 |         for model in RANKING_MODEL_TABLE.values()
 88 |         if model.aliases is not None
 89 |         for alias in model.aliases
 90 |     ],
 91 | )
 92 | def test_aliases(alias: str, client: Any) -> None:
 93 |     """
 94 |     Test that the aliases for each model in the model table are accepted
 95 |     with a warning about deprecation of the alias.
 96 |     """
 97 |     with pytest.warns(UserWarning) as record:
 98 |         x = client(model=alias, nvidia_api_key="a-bogus-key")
 99 |         assert x.model == x._client.mdl_name
100 |     assert isinstance(record[0].message, Warning)
101 |     assert "deprecated" in record[0].message.args[0]
102 | 
103 | 
104 | def test_known(public_class: type) -> None:
105 |     """
106 |     Test that a model in the model table will be accepted.
107 |     """
108 |     # find a model that matches the public_class under test
109 |     known = None
110 |     for model in MODEL_TABLE.values():
111 |         if model.client == public_class.__name__:
112 |             known = model.id
113 |             break
114 |     assert known is not None, f"Model not found for client {public_class.__name__}"
115 |     x = public_class(model=known, nvidia_api_key="a-bogus-key")
116 |     assert x.model == known
117 | 
118 | 
119 | def test_known_unknown(public_class: type, known_unknown: str) -> None:
120 |     """
121 |     Test that a model in /v1/models but not in the model table will be accepted
122 |     with a warning.
123 |     """
124 |     with pytest.warns(UserWarning) as record:
125 |         x = public_class(model=known_unknown, nvidia_api_key="a-bogus-key")
126 |         assert x.model == known_unknown
127 |     assert isinstance(record[0].message, Warning)
128 |     assert "Found" in record[0].message.args[0]
129 |     assert "unknown" in record[0].message.args[0]
130 | 
131 | 
132 | def test_unknown_unknown(public_class: type, empty_v1_models: None) -> None:
133 |     """
134 |     Test that a model not in /v1/models, not in known model table, and not internal
135 |     will be rejected.
136 |     """
137 |     # todo: make this work for local NIM
138 |     with pytest.warns(UserWarning, match="Model test/unknown-unknown is unknown"):
139 |         public_class(model="test/unknown-unknown", nvidia_api_key="a-bogus-key")
140 | 
141 | 
142 | def test_default_known(public_class: type, known_unknown: str) -> None:
143 |     """
144 |     Test that a model in the model table will be accepted.
145 |     """
146 |     # check if default model is getting set
147 |     with pytest.warns(UserWarning) as record:
148 |         x = public_class(base_url="http://localhost:8000/v1")
149 |         assert x.model == known_unknown
150 |     assert len(record) == 1
151 |     assert "Default model is set as: mock-model" in str(record[0].message)
152 | 
153 | 
154 | def test_default_lora(public_class: type) -> None:
155 |     """
156 |     Test that a model in the model table will be accepted.
157 |     """
158 |     # find a model that matches the public_class under test
159 |     x = public_class(base_url="http://localhost:8000/v1", model="lora1")
160 |     assert x.model == "lora1"
161 | 
162 | 
163 | def test_default(public_class: type) -> None:
164 |     x = public_class(api_key="BOGUS")
165 |     assert x.model is not None
166 | 
167 | 
168 | @pytest.mark.parametrize(
169 |     "model, client",
170 |     [(model.id, model.client) for model in MODEL_TABLE.values()],
171 | )
172 | def test_all_incompatible(public_class: type, model: str, client: str) -> None:
173 |     if client == public_class.__name__:
174 |         pytest.skip("Compatibility expected.")
175 | 
176 |     with pytest.warns(UserWarning) as record:
177 |         public_class(model=model, nvidia_api_key="a-bogus-key")
178 | 
179 |     assert len(record) == 1
180 |     assert "incompatible with client" in str(record[0].message)
181 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_ranking.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from typing import Any, Literal, Optional
 3 | 
 4 | import pytest
 5 | from langchain_core.documents import Document
 6 | from requests_mock import Mocker
 7 | 
 8 | from langchain_nvidia_ai_endpoints import NVIDIARerank
 9 | 
10 | 
11 | @pytest.fixture(autouse=True)
12 | def mock_v1_models(requests_mock: Mocker) -> None:
13 |     requests_mock.get(
14 |         "https://integrate.api.nvidia.com/v1/models",
15 |         json={
16 |             "data": [
17 |                 {
18 |                     "id": "mock-model",
19 |                     "object": "model",
20 |                     "created": 1234567890,
21 |                     "owned_by": "OWNER",
22 |                 }
23 |             ]
24 |         },
25 |     )
26 | 
27 | 
28 | @pytest.fixture(autouse=True)
29 | def mock_v1_ranking(requests_mock: Mocker) -> None:
30 |     requests_mock.post(
31 |         "https://integrate.api.nvidia.com/v1/ranking",
32 |         json={
33 |             "rankings": [
34 |                 {"index": 0, "logit": 4.2},
35 |             ]
36 |         },
37 |     )
38 | 
39 | 
40 | @pytest.mark.parametrize(
41 |     "truncate",
42 |     [
43 |         None,
44 |         "END",
45 |         "NONE",
46 |     ],
47 | )
48 | def test_truncate(
49 |     requests_mock: Mocker,
50 |     truncate: Optional[Literal["END", "NONE"]],
51 | ) -> None:
52 |     truncate_param = {}
53 |     if truncate:
54 |         truncate_param = {"truncate": truncate}
55 |     warnings.filterwarnings(
56 |         "ignore", ".*Found mock-model in available_models.*"
57 |     )  # expect to see this warning
58 |     client = NVIDIARerank(api_key="BOGUS", model="mock-model", **truncate_param)
59 |     response = client.compress_documents(
60 |         documents=[Document(page_content="Nothing really.")], query="What is it?"
61 |     )
62 | 
63 |     assert len(response) == 1
64 | 
65 |     assert requests_mock.last_request is not None
66 |     request_payload = requests_mock.last_request.json()
67 |     if truncate is None:
68 |         assert "truncate" not in request_payload
69 |     else:
70 |         assert "truncate" in request_payload
71 |         assert request_payload["truncate"] == truncate
72 | 
73 | 
74 | @pytest.mark.parametrize("truncate", [True, False, 1, 0, 1.0, "START", "BOGUS"])
75 | def test_truncate_invalid(truncate: Any) -> None:
76 |     with pytest.raises(ValueError):
77 |         NVIDIARerank(truncate=truncate)
78 | 
79 | 
80 | def test_extra_headers(requests_mock: Mocker) -> None:
81 |     client = NVIDIARerank(
82 |         api_key="BOGUS", model="mock-model", extra_headers={"X-Test": "test"}
83 |     )
84 |     assert client.extra_headers == {"X-Test": "test"}
85 | 
86 |     _ = client.compress_documents(
87 |         documents=[Document(page_content="Nothing really.")], query="What is it?"
88 |     )
89 |     assert requests_mock.last_request is not None
90 |     assert requests_mock.last_request.headers["X-Test"] == "test"
91 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_register_model.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import pytest
  4 | 
  5 | from langchain_nvidia_ai_endpoints import (
  6 |     NVIDIA,
  7 |     ChatNVIDIA,
  8 |     Model,
  9 |     NVIDIAEmbeddings,
 10 |     NVIDIARerank,
 11 |     register_model,
 12 | )
 13 | 
 14 | 
 15 | @pytest.mark.parametrize(
 16 |     "model_type, client",
 17 |     [
 18 |         ("chat", "NVIDIAEmbeddings"),
 19 |         ("chat", "NVIDIARerank"),
 20 |         ("chat", "NVIDIA"),
 21 |         ("vlm", "NVIDIAEmbeddings"),
 22 |         ("vlm", "NVIDIARerank"),
 23 |         ("vlm", "NVIDIA"),
 24 |         ("embedding", "ChatNVIDIA"),
 25 |         ("embedding", "NVIDIARerank"),
 26 |         ("embedding", "NVIDIA"),
 27 |         ("ranking", "ChatNVIDIA"),
 28 |         ("ranking", "NVIDIAEmbeddings"),
 29 |         ("ranking", "NVIDIA"),
 30 |         ("completions", "ChatNVIDIA"),
 31 |         ("completions", "NVIDIAEmbeddings"),
 32 |         ("completions", "NVIDIARerank"),
 33 |     ],
 34 | )
 35 | def test_mismatched_type_client(model_type: str, client: str) -> None:
 36 |     with pytest.raises(ValueError) as e:
 37 |         register_model(
 38 |             Model(
 39 |                 id="model",
 40 |                 model_type=model_type,
 41 |                 client=client,
 42 |                 endpoint="BOGUS",
 43 |             )
 44 |         )
 45 |     assert "not supported" in str(e.value)
 46 | 
 47 | 
 48 | def test_duplicate_model_warns() -> None:
 49 |     model = Model(id="registered-model", endpoint="BOGUS")
 50 |     register_model(model)
 51 |     with pytest.warns(UserWarning) as record:
 52 |         register_model(model)
 53 |     assert len(record) == 1
 54 |     assert isinstance(record[0].message, UserWarning)
 55 |     assert "already registered" in str(record[0].message)
 56 |     assert "Overriding" in str(record[0].message)
 57 | 
 58 | 
 59 | def test_registered_model_usable(public_class: type, mock_model: str) -> None:
 60 |     model_type = {
 61 |         "ChatNVIDIA": "chat",
 62 |         "NVIDIAEmbeddings": "embedding",
 63 |         "NVIDIARerank": "ranking",
 64 |         "NVIDIA": "completions",
 65 |     }[public_class.__name__]
 66 |     with warnings.catch_warnings():
 67 |         warnings.simplefilter("error")
 68 |         model = Model(
 69 |             id=mock_model,
 70 |             model_type=model_type,
 71 |             client=public_class.__name__,
 72 |             endpoint="BOGUS",
 73 |         )
 74 |         register_model(model)
 75 |         x = public_class(model=mock_model, nvidia_api_key="a-bogus-key")
 76 |         assert x.model == mock_model
 77 | 
 78 | 
 79 | def test_registered_model_without_client_usable(public_class: type) -> None:
 80 |     id = "test/no-client"
 81 |     model = Model(id=id, endpoint="BOGUS")
 82 |     register_model(model)
 83 |     with pytest.warns(UserWarning) as record:
 84 |         public_class(model=id, nvidia_api_key="a-bogus-key")
 85 |     assert len(record) == 1
 86 |     assert isinstance(record[0].message, UserWarning)
 87 |     assert "Unable to determine validity" in str(record[0].message)
 88 | 
 89 | 
 90 | def test_missing_endpoint() -> None:
 91 |     with pytest.raises(ValueError) as e:
 92 |         register_model(
 93 |             Model(id="missing-endpoint", model_type="chat", client="ChatNVIDIA")
 94 |         )
 95 |     assert "does not have an endpoint" in str(e.value)
 96 | 
 97 | 
 98 | def test_registered_model_is_available() -> None:
 99 |     register_model(
100 |         Model(
101 |             id="test/chat",
102 |             model_type="chat",
103 |             client="ChatNVIDIA",
104 |             endpoint="BOGUS",
105 |         )
106 |     )
107 |     register_model(
108 |         Model(
109 |             id="test/embedding",
110 |             model_type="embedding",
111 |             client="NVIDIAEmbeddings",
112 |             endpoint="BOGUS",
113 |         )
114 |     )
115 |     register_model(
116 |         Model(
117 |             id="test/rerank",
118 |             model_type="ranking",
119 |             client="NVIDIARerank",
120 |             endpoint="BOGUS",
121 |         )
122 |     )
123 |     register_model(
124 |         Model(
125 |             id="test/completions",
126 |             model_type="completions",
127 |             client="NVIDIA",
128 |             endpoint="BOGUS",
129 |         )
130 |     )
131 |     chat_models = ChatNVIDIA.get_available_models(api_key="BOGUS")
132 |     embedding_models = NVIDIAEmbeddings.get_available_models(api_key="BOGUS")
133 |     ranking_models = NVIDIARerank.get_available_models(api_key="BOGUS")
134 |     completions_models = NVIDIA.get_available_models(api_key="BOGUS")
135 | 
136 |     assert "test/chat" in [model.id for model in chat_models]
137 |     assert "test/chat" not in [model.id for model in embedding_models]
138 |     assert "test/chat" not in [model.id for model in ranking_models]
139 |     assert "test/chat" not in [model.id for model in completions_models]
140 | 
141 |     assert "test/embedding" not in [model.id for model in chat_models]
142 |     assert "test/embedding" in [model.id for model in embedding_models]
143 |     assert "test/embedding" not in [model.id for model in ranking_models]
144 |     assert "test/embedding" not in [model.id for model in completions_models]
145 | 
146 |     assert "test/rerank" not in [model.id for model in chat_models]
147 |     assert "test/rerank" not in [model.id for model in embedding_models]
148 |     assert "test/rerank" in [model.id for model in ranking_models]
149 |     assert "test/rerank" not in [model.id for model in completions_models]
150 | 
151 |     assert "test/completions" not in [model.id for model in chat_models]
152 |     assert "test/completions" not in [model.id for model in embedding_models]
153 |     assert "test/completions" not in [model.id for model in ranking_models]
154 |     assert "test/completions" in [model.id for model in completions_models]
155 | 
156 | 
157 | def test_registered_model_without_client_is_not_listed(public_class: type) -> None:
158 |     model_name = "test/model"
159 |     register_model(Model(id=model_name, endpoint="BOGUS"))
160 |     models = public_class.get_available_models(api_key="BOGUS")  # type: ignore
161 |     assert model_name not in [model.id for model in models]
162 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_serialization.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import pytest
 4 | from langchain_core.load.dump import dumps
 5 | from langchain_core.load.load import loads
 6 | 
 7 | from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
 8 | 
 9 | 
10 | @pytest.mark.skip("serialization support is broken, needs attention")
11 | def test_serialize_chatnvidia() -> None:
12 |     secret = "a-bogus-key"
13 |     x = ChatNVIDIA(nvidia_api_key=secret)
14 |     y = loads(
15 |         dumps(x),
16 |         secrets_map={"NVIDIA_API_KEY": secret},
17 |         valid_namespaces=["langchain_nvidia_ai_endpoints"],
18 |     )
19 |     assert x == y
20 |     assert isinstance(y, ChatNVIDIA)
21 | 
22 | 
23 | def test_pickle_embeddings() -> None:
24 |     x = NVIDIAEmbeddings(api_key="BOGUS")
25 |     y = pickle.loads(pickle.dumps(x))
26 |     assert x.model == y.model
27 |     assert x.max_batch_size == y.max_batch_size
28 |     assert isinstance(y, NVIDIAEmbeddings)
29 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_standard.py:
--------------------------------------------------------------------------------
 1 | """Standard LangChain interface tests"""
 2 | 
 3 | from typing import Type
 4 | 
 5 | from langchain_core.language_models import BaseChatModel
 6 | from langchain_tests.unit_tests import ChatModelUnitTests
 7 | 
 8 | from langchain_nvidia_ai_endpoints import ChatNVIDIA
 9 | 
10 | 
11 | class TestNVIDIAStandard(ChatModelUnitTests):
12 |     @property
13 |     def chat_model_class(self) -> Type[BaseChatModel]:
14 |         return ChatNVIDIA
15 | 
16 |     @property
17 |     def chat_model_params(self) -> dict:
18 |         return {
19 |             "model": "meta/llama-3.1-8b-instruct",
20 |             "api_key": "BOGUS",
21 |         }
22 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_statics.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import pytest
 4 | 
 5 | from langchain_nvidia_ai_endpoints._statics import MODEL_TABLE, determine_model
 6 | 
 7 | 
 8 | @pytest.fixture(params=MODEL_TABLE.keys())
 9 | def entry(request: Any) -> str:
10 |     return request.param
11 | 
12 | 
13 | @pytest.fixture(
14 |     params=[
15 |         alias
16 |         for ls in [model.aliases for model in MODEL_TABLE.values() if model.aliases]
17 |         for alias in ls
18 |     ]
19 | )
20 | def alias(request: Any) -> str:
21 |     return request.param
22 | 
23 | 
24 | def test_model_table_integrity_name_id(entry: str) -> None:
25 |     model = MODEL_TABLE[entry]
26 |     assert model.id == entry
27 | 
28 | 
29 | def test_determine_model_deprecated_alternative_warns(alias: str) -> None:
30 |     with pytest.warns(UserWarning) as record:
31 |         determine_model(alias)
32 |     assert len(record) == 1
33 |     assert f"Model {alias} is deprecated" in str(record[0].message)
34 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_stop.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from typing import Optional, Sequence, Union
  3 | 
  4 | import pytest
  5 | from requests_mock import Mocker
  6 | 
  7 | from langchain_nvidia_ai_endpoints import ChatNVIDIA
  8 | 
  9 | 
 10 | @pytest.fixture(autouse=True)
 11 | def mock_v1_models(requests_mock: Mocker) -> None:
 12 |     requests_mock.get(
 13 |         "https://integrate.api.nvidia.com/v1/models",
 14 |         json={
 15 |             "data": [
 16 |                 {
 17 |                     "id": "mock-model",
 18 |                     "object": "model",
 19 |                     "created": 1234567890,
 20 |                     "owned_by": "OWNER",
 21 |                 }
 22 |             ]
 23 |         },
 24 |     )
 25 | 
 26 | 
 27 | @pytest.fixture(autouse=True)
 28 | def mock_v1_chat_completions(requests_mock: Mocker) -> None:
 29 |     requests_mock.post(
 30 |         "https://integrate.api.nvidia.com/v1/chat/completions",
 31 |         json={
 32 |             "id": "mock-id",
 33 |             "created": 1234567890,
 34 |             "object": "chat.completion",
 35 |             "model": "mock-model",
 36 |             "choices": [
 37 |                 {
 38 |                     "index": 0,
 39 |                     "message": {"role": "assistant", "content": "Ok"},
 40 |                 }
 41 |             ],
 42 |         },
 43 |     )
 44 | 
 45 | 
 46 | @pytest.mark.parametrize(
 47 |     "prop_stop, param_stop, expected_stop",
 48 |     [
 49 |         (None, ["PARAM"], ["PARAM"]),
 50 |         (None, "PARAM", "PARAM"),
 51 |         (["PROP"], None, ["PROP"]),
 52 |         (["PROP"], ["PARAM"], ["PARAM"]),
 53 |         (["PROP"], "PARAM", "PARAM"),
 54 |         (None, None, None),
 55 |     ],
 56 |     ids=[
 57 |         "parameter_seq",
 58 |         "parameter_str",
 59 |         "property",
 60 |         "override_seq",
 61 |         "override_str",
 62 |         "absent",
 63 |     ],
 64 | )
 65 | @pytest.mark.parametrize("func_name", ["invoke", "stream"])
 66 | def test_stop(
 67 |     requests_mock: Mocker,
 68 |     prop_stop: Optional[Sequence[str]],
 69 |     param_stop: Optional[Union[str, Sequence[str]]],
 70 |     expected_stop: Union[str, Sequence[str]],
 71 |     func_name: str,
 72 | ) -> None:
 73 |     """
 74 |     Users can pass `stop` as a property of the client or as a parameter to the
 75 |     `invoke` or `stream` methods. The value passed as a parameter should
 76 |     override the value passed as a property.
 77 | 
 78 |     Also, the `stop` parameter can be a str or Sequence[str], while the `stop`
 79 |     property is always a Sequence[str].
 80 |     """
 81 |     # `**(dict(stop=...) if ... else {})` is a clever way to avoid passing stop
 82 |     # if the value is None
 83 |     warnings.filterwarnings(
 84 |         "ignore", ".*Found mock-model in available_models.*"
 85 |     )  # expect to see this warning
 86 |     client = ChatNVIDIA(
 87 |         model="mock-model",
 88 |         api_key="mocked",
 89 |         **(dict(stop=prop_stop) if prop_stop else {}),
 90 |     )
 91 |     # getattr(client, func_name) is a clever way to call a method by name
 92 |     response = getattr(client, func_name)(
 93 |         "Ok?", **(dict(stop=param_stop) if param_stop else {})
 94 |     )
 95 |     # the `stream` method returns a generator, so we need to call `next` to get
 96 |     # the actual response
 97 |     if func_name == "stream":  # one step too clever parameterizing the function name
 98 |         response = next(response)
 99 | 
100 |     assert response.content == "Ok"
101 | 
102 |     assert requests_mock.last_request is not None
103 |     request_payload = requests_mock.last_request.json()
104 |     if expected_stop:
105 |         assert "stop" in request_payload
106 |         assert request_payload["stop"] == expected_stop
107 |     else:
108 |         assert "stop" not in request_payload
109 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_structured_output.py:
--------------------------------------------------------------------------------
  1 | import enum
  2 | import warnings
  3 | from typing import Callable, List, Optional, Type
  4 | 
  5 | import pytest
  6 | import requests_mock
  7 | from pydantic import BaseModel as pydanticV2BaseModel  # ignore: check_pydantic
  8 | from pydantic import Field
  9 | from pydantic.v1 import BaseModel as pydanticV1BaseModel  # ignore: check_pydantic
 10 | 
 11 | from langchain_nvidia_ai_endpoints import ChatNVIDIA
 12 | 
 13 | 
 14 | class Joke(pydanticV2BaseModel):
 15 |     """Joke to tell user."""
 16 | 
 17 |     setup: str = Field(description="The setup of the joke")
 18 |     punchline: str = Field(description="The punchline to the joke")
 19 |     rating: Optional[int] = Field(description="How funny the joke is, from 1 to 10")
 20 | 
 21 | 
 22 | def test_method() -> None:
 23 |     with pytest.warns(UserWarning) as record:
 24 |         with warnings.catch_warnings():
 25 |             warnings.filterwarnings(
 26 |                 "ignore",
 27 |                 category=UserWarning,
 28 |                 message=".*not known to support structured output.*",
 29 |             )
 30 |             ChatNVIDIA(api_key="BOGUS").with_structured_output(Joke, method="json_mode")
 31 |         assert len(record) == 1
 32 |         assert "unnecessary" in str(record[0].message)
 33 | 
 34 | 
 35 | def test_include_raw() -> None:
 36 |     with pytest.raises(NotImplementedError):
 37 |         ChatNVIDIA(api_key="BOGUS").with_structured_output(Joke, include_raw=True)
 38 | 
 39 |     with pytest.raises(NotImplementedError):
 40 |         ChatNVIDIA(api_key="BOGUS").with_structured_output(
 41 |             Joke.model_json_schema(), include_raw=True
 42 |         )
 43 | 
 44 | 
 45 | def test_known_does_not_warn(empty_v1_models: None) -> None:
 46 |     structured_model = [
 47 |         model
 48 |         for model in ChatNVIDIA.get_available_models(api_key="BOGUS")
 49 |         if model.supports_structured_output
 50 |     ]
 51 |     assert structured_model, "No models support structured output"
 52 | 
 53 |     with warnings.catch_warnings():
 54 |         warnings.simplefilter("error")
 55 |         ChatNVIDIA(
 56 |             api_key="BOGUS", model=structured_model[0].id
 57 |         ).with_structured_output(Joke)
 58 | 
 59 | 
 60 | def test_unknown_warns(empty_v1_models: None) -> None:
 61 |     unstructured_model = [
 62 |         model
 63 |         for model in ChatNVIDIA.get_available_models(api_key="BOGUS")
 64 |         if not model.supports_structured_output
 65 |     ]
 66 |     assert unstructured_model, "All models support structured output"
 67 | 
 68 |     with pytest.warns(UserWarning) as record:
 69 |         ChatNVIDIA(
 70 |             api_key="BOGUS", model=unstructured_model[0].id
 71 |         ).with_structured_output(Joke)
 72 |     assert len(record) == 1
 73 |     assert "not known to support structured output" in str(record[0].message)
 74 | 
 75 | 
 76 | def test_enum_negative() -> None:
 77 |     class Choices(enum.Enum):
 78 |         A = "A"
 79 |         B = "2"
 80 |         C = 3
 81 | 
 82 |     llm = ChatNVIDIA(api_key="BOGUS")
 83 |     with warnings.catch_warnings():
 84 |         warnings.filterwarnings(
 85 |             "ignore",
 86 |             category=UserWarning,
 87 |             message=".*not known to support structured output.*",
 88 |         )
 89 |         with pytest.raises(ValueError) as e:
 90 |             llm.with_structured_output(Choices)
 91 |     assert "only contain string choices" in str(e.value)
 92 | 
 93 | 
 94 | class Choices(enum.Enum):
 95 |     YES = "Yes it is"
 96 |     NO = "No it is not"
 97 | 
 98 | 
 99 | @pytest.mark.parametrize(
100 |     "chunks",
101 |     [
102 |         ["Y", "es", " it", " is"],
103 |         ["N", "o", " it", " is", " not"],
104 |     ],
105 |     ids=["YES", "NO"],
106 | )
107 | def test_stream_enum(
108 |     mock_streaming_response: Callable,
109 |     chunks: List[str],
110 | ) -> None:
111 |     mock_streaming_response(chunks)
112 | 
113 |     warnings.filterwarnings("ignore", r".*not known to support structured output.*")
114 |     structured_llm = ChatNVIDIA(api_key="BOGUS").with_structured_output(Choices)
115 |     # chunks are progressively more complete, so we only consider the last
116 |     for chunk in structured_llm.stream("This is ignored."):
117 |         response = chunk
118 |     assert isinstance(response, Choices)
119 |     assert response in Choices
120 | 
121 | 
122 | @pytest.mark.parametrize(
123 |     "chunks",
124 |     [
125 |         ["Y", "es", " it"],
126 |         ["N", "o", " it", " is"],
127 |     ],
128 |     ids=["YES", "NO"],
129 | )
130 | def test_stream_enum_incomplete(
131 |     mock_streaming_response: Callable,
132 |     chunks: List[str],
133 | ) -> None:
134 |     mock_streaming_response(chunks)
135 | 
136 |     warnings.filterwarnings("ignore", r".*not known to support structured output.*")
137 |     structured_llm = ChatNVIDIA(api_key="BOGUS").with_structured_output(Choices)
138 |     # chunks are progressively more complete, so we only consider the last
139 |     for chunk in structured_llm.stream("This is ignored."):
140 |         response = chunk
141 |     assert response is None
142 | 
143 | 
144 | @pytest.mark.parametrize(
145 |     "pydanticBaseModel",
146 |     [
147 |         pydanticV1BaseModel,
148 |         pydanticV2BaseModel,
149 |     ],
150 |     ids=["pydantic-v1", "pydantic-v2"],
151 | )
152 | def test_pydantic_version(
153 |     requests_mock: requests_mock.Mocker,
154 |     pydanticBaseModel: Type,
155 | ) -> None:
156 |     requests_mock.post(
157 |         "https://integrate.api.nvidia.com/v1/chat/completions",
158 |         json={
159 |             "id": "chatcmpl-ID",
160 |             "object": "chat.completion",
161 |             "created": 1234567890,
162 |             "model": "BOGUS",
163 |             "choices": [
164 |                 {
165 |                     "index": 0,
166 |                     "message": {
167 |                         "role": "assistant",
168 |                         "content": '{"name": "Sam Doe"}',
169 |                     },
170 |                     "logprobs": None,
171 |                     "finish_reason": "stop",
172 |                 }
173 |             ],
174 |             "usage": {
175 |                 "prompt_tokens": 22,
176 |                 "completion_tokens": 20,
177 |                 "total_tokens": 42,
178 |             },
179 |             "system_fingerprint": None,
180 |         },
181 |     )
182 | 
183 |     class Person(pydanticBaseModel):  # type: ignore
184 |         name: str
185 | 
186 |     warnings.filterwarnings("ignore", r".*not known to support structured output.*")
187 |     llm = ChatNVIDIA(api_key="BOGUS").with_structured_output(Person)
188 |     response = llm.invoke("This is ignored.")
189 |     assert isinstance(response, Person)
190 |     assert response.name == "Sam Doe"
191 | 
192 | 
193 | @pytest.mark.parametrize(
194 |     "strict",
195 |     [False, None, "BOGUS"],
196 | )
197 | def test_strict_warns(strict: Optional[bool]) -> None:
198 |     warnings.filterwarnings("error")  # no warnings should be raised
199 | 
200 |     # acceptable warnings
201 |     warnings.filterwarnings(
202 |         "ignore", category=UserWarning, message=".*not known to support.*"
203 |     )
204 | 
205 |     # warnings under test
206 |     strict_warning = ".*`strict` is ignored.*"
207 |     warnings.filterwarnings("default", category=UserWarning, message=strict_warning)
208 | 
209 |     with pytest.warns(UserWarning, match=strict_warning):
210 |         ChatNVIDIA(api_key="BOGUS").with_structured_output(
211 |             Joke,
212 |             strict=strict,
213 |         )
214 | 
215 | 
216 | @pytest.mark.parametrize(
217 |     "strict",
218 |     [True, None],
219 |     ids=["strict-True", "no-strict"],
220 | )
221 | def test_strict_no_warns(strict: Optional[bool]) -> None:
222 |     warnings.filterwarnings("error")  # no warnings should be raised
223 | 
224 |     # acceptable warnings
225 |     warnings.filterwarnings(
226 |         "ignore", category=UserWarning, message=".*not known to support.*"
227 |     )
228 | 
229 |     ChatNVIDIA(api_key="BOGUS").with_structured_output(
230 |         Joke,
231 |         **({"strict": strict} if strict is not None else {}),
232 |     )
233 | 


--------------------------------------------------------------------------------
/libs/ai-endpoints/tests/unit_tests/test_vlm_models.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List, Union
 2 | 
 3 | import pytest
 4 | 
 5 | from langchain_nvidia_ai_endpoints.chat_models import _nv_vlm_get_asset_ids
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     "content, expected",
10 |     [
11 |         # Single asset ID in a string (double quotes)
12 |         ('<img src="data:image/png;asset_id,12345"/>', ["12345"]),
13 |         # Multiple asset IDs in a string (double quotes)
14 |         (
15 |             (
16 |                 '<img src="data:image/png;asset_id,12345"/>'
17 |                 '<img src="data:image/jpeg;asset_id,67890"/>'
18 |             ),
19 |             ["12345", "67890"],
20 |         ),
21 |         # Single asset ID in list of strings (single quotes)
22 |         (["<img src='data:image/png;asset_id,12345'/>"], ["12345"]),
23 |         # Multiple asset IDs in list of strings (single quotes)
24 |         (
25 |             [
26 |                 "<img src='data:image/png;asset_id,12345'/>",
27 |                 "<img src='data:image/jpeg;asset_id,67890'/>",
28 |             ],
29 |             ["12345", "67890"],
30 |         ),
31 |         # Single asset ID in a list of dictionaries
32 |         ([{"image_url": {"url": "data:image/png;asset_id,12345"}}], ["12345"]),
33 |         # Multiple asset IDs in a list of dictionaries
34 |         (
35 |             [
36 |                 {"image_url": {"url": "data:image/png;asset_id,12345"}},
37 |                 {"image_url": {"url": "data:image/jpeg;asset_id,67890"}},
38 |             ],
39 |             ["12345", "67890"],
40 |         ),
41 |         # No asset IDs present (double quotes)
42 |         ('<img src="data:image/png;no_asset_id"/>', []),
43 |         # No asset IDs present (single quotes)
44 |         ("<img src='data:image/png;no_asset_id'/>", []),
45 |     ],
46 |     ids=[
47 |         "single_asset_id_string_double_quotes",
48 |         "multiple_asset_ids_string_double_quotes",
49 |         "single_asset_id_list_of_strings_single_quotes",
50 |         "multiple_asset_ids_list_of_strings_single_quotes",
51 |         "single_asset_id_list_of_dicts",
52 |         "multiple_asset_ids_list_of_dicts",
53 |         "no_asset_ids_double_quotes",
54 |         "no_asset_ids_single_quotes",
55 |     ],
56 | )
57 | def test_nv_vlm_get_asset_ids(
58 |     content: Union[str, List[Union[str, Dict[str, Any]]]], expected: List[str]
59 | ) -> None:
60 |     result = _nv_vlm_get_asset_ids(content)
61 |     assert result == expected
62 | 


--------------------------------------------------------------------------------
/libs/trt/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | 


--------------------------------------------------------------------------------
/libs/trt/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 LangChain, Inc.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/libs/trt/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all format lint test tests integration_tests docker_tests help extended_tests
 2 | 
 3 | # Default target executed when no arguments are given to make.
 4 | all: help
 5 | 
 6 | # Define a variable for the test file path.
 7 | TEST_FILE ?= tests/unit_tests/
 8 | 
 9 | test:
10 | 	poetry run pytest $(TEST_FILE)
11 | 
12 | tests:
13 | 	poetry run pytest $(TEST_FILE)
14 | 
15 | 
16 | ######################
17 | # LINTING AND FORMATTING
18 | ######################
19 | 
20 | # Define a variable for Python and notebook files.
21 | PYTHON_FILES=.
22 | MYPY_CACHE=.mypy_cache
23 | lint format: PYTHON_FILES=.
24 | lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/nvidia-trt --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
25 | lint_package: PYTHON_FILES=langchain_nvidia_trt
26 | lint_tests: PYTHON_FILES=tests
27 | lint_tests: MYPY_CACHE=.mypy_cache_test
28 | 
29 | lint lint_diff lint_package lint_tests:
30 | 	poetry run ruff .
31 | 	poetry run ruff format $(PYTHON_FILES) --diff
32 | 	poetry run ruff --select I $(PYTHON_FILES)
33 | 	mkdir $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
34 | 
35 | format format_diff:
36 | 	poetry run ruff format $(PYTHON_FILES)
37 | 	poetry run ruff --select I --fix $(PYTHON_FILES)
38 | 
39 | spell_check:
40 | 	poetry run codespell --toml pyproject.toml
41 | 
42 | spell_fix:
43 | 	poetry run codespell --toml pyproject.toml -w
44 | 
45 | check_imports: $(shell find langchain_nvidia_trt -name '*.py')
46 | 	poetry run python ./scripts/check_imports.py $^
47 | 
48 | ######################
49 | # HELP
50 | ######################
51 | 
52 | help:
53 | 	@echo '----'
54 | 	@echo 'check_imports				- check imports'
55 | 	@echo 'format                       - run code formatters'
56 | 	@echo 'lint                         - run linters'
57 | 	@echo 'test                         - run unit tests'
58 | 	@echo 'tests                        - run unit tests'
59 | 	@echo 'test TEST_FILE=<test_file>   - run all tests in file'
60 | 


--------------------------------------------------------------------------------
/libs/trt/README.md:
--------------------------------------------------------------------------------
1 | # langchain-nvidia-trt
2 | 


--------------------------------------------------------------------------------
/libs/trt/docs/llms.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "raw",
  5 |    "id": "67db2992",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "---\n",
  9 |     "sidebar_label: TritonTensorRT\n",
 10 |     "---"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "b56b221d",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Nvidia Triton+TRT-LLM\n",
 19 |     "\n",
 20 |     "Nvidia's Triton is an inference server that provides an API style access to hosted LLM models. Likewise, Nvidia TensorRT-LLM, often abbreviated as TRT-LLM, is a GPU accelerated SDK for running optimizations and inference on LLM models. This connector allows for Langchain to remotely interact with a Triton inference server over GRPC or HTTP to performance accelerated inference operations.\n",
 21 |     "\n",
 22 |     "[Triton Inference Server Github](https://github.com/triton-inference-server/server)\n",
 23 |     "\n",
 24 |     "\n",
 25 |     "## TritonTensorRTLLM\n",
 26 |     "\n",
 27 |     "This example goes over how to use LangChain to interact with `TritonTensorRT` LLMs. To install, run the following command:"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "id": "59c710c4",
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "# install package\n",
 38 |     "%pip install -U langchain-nvidia-trt"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "id": "0ee90032",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "## Create the Triton+TRT-LLM instance\n",
 47 |     "\n",
 48 |     "Remember that a Triton instance represents a running server instance therefore you should ensure you have a valid server configuration running and change the `localhost:8001` to the correct IP/hostname:port combination for your server.\n",
 49 |     "\n",
 50 |     "An example of setting up this environment can be found at Nvidia's (GenerativeAIExamples Github Repo)[https://github.com/NVIDIA/GenerativeAIExamples/tree/main/RetrievalAugmentedGeneration]"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "id": "035dea0f",
 57 |    "metadata": {
 58 |     "tags": []
 59 |    },
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "from langchain_core.prompts import PromptTemplate\n",
 63 |     "from langchain_nvidia_trt.llms import TritonTensorRTLLM\n",
 64 |     "\n",
 65 |     "template = \"\"\"Question: {question}\n",
 66 |     "\n",
 67 |     "Answer: Let's think step by step.\"\"\"\n",
 68 |     "\n",
 69 |     "prompt = PromptTemplate.from_template(template)\n",
 70 |     "\n",
 71 |     "# Connect to the TRT-LLM Llama-2 model running on the Triton server at the url below\n",
 72 |     "triton_llm = TritonTensorRTLLM(server_url =\"localhost:8001\", model_name=\"ensemble\", tokens=500)\n",
 73 |     "\n",
 74 |     "chain = prompt | triton_llm \n",
 75 |     "\n",
 76 |     "chain.invoke({\"question\": \"What is LangChain?\"})"
 77 |    ]
 78 |   }
 79 |  ],
 80 |  "metadata": {
 81 |   "kernelspec": {
 82 |    "display_name": "Python 3 (ipykernel)",
 83 |    "language": "python",
 84 |    "name": "python3"
 85 |   },
 86 |   "language_info": {
 87 |    "codemirror_mode": {
 88 |     "name": "ipython",
 89 |     "version": 3
 90 |    },
 91 |    "file_extension": ".py",
 92 |    "mimetype": "text/x-python",
 93 |    "name": "python",
 94 |    "nbconvert_exporter": "python",
 95 |    "pygments_lexer": "ipython3",
 96 |    "version": "3.10.9"
 97 |   },
 98 |   "vscode": {
 99 |    "interpreter": {
100 |     "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1"
101 |    }
102 |   }
103 |  },
104 |  "nbformat": 4,
105 |  "nbformat_minor": 5
106 | }
107 | 


--------------------------------------------------------------------------------
/libs/trt/langchain_nvidia_trt/__init__.py:
--------------------------------------------------------------------------------
1 | from langchain_nvidia_trt.llms import TritonTensorRTLLM
2 | 
3 | __all__ = ["TritonTensorRTLLM"]
4 | 


--------------------------------------------------------------------------------
/libs/trt/langchain_nvidia_trt/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/trt/langchain_nvidia_trt/py.typed


--------------------------------------------------------------------------------
/libs/trt/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | # Empty global config
3 | [mypy-tritonclient.*]
4 | ignore_missing_imports = True
5 | 


--------------------------------------------------------------------------------
/libs/trt/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "langchain-nvidia-trt"
 3 | version = "0.0.1"
 4 | description = "An integration package connecting TritonTensorRT and LangChain"
 5 | authors = []
 6 | readme = "README.md"
 7 | repository = "https://github.com/langchain-ai/langchain-nvidia"
 8 | license = "MIT"
 9 | 
10 | [tool.poetry.urls]
11 | "Source Code" = "https://github.com/langchain-ai/langchain-nvidia/tree/main/libs/trt"
12 | 
13 | [tool.poetry.dependencies]
14 | python = ">=3.8.1,<4.0"
15 | langchain-core = "^0.1"
16 | tritonclient = { extras = ["grpc"], version = "^2.42.0" }
17 | lint = "^1.2.1"
18 | types-protobuf = "^4.24.0.4"
19 | protobuf = "^3.5.0"
20 | 
21 | [tool.poetry.group.test]
22 | optional = true
23 | 
24 | [tool.poetry.group.test.dependencies]
25 | pytest = "^7.3.0"
26 | freezegun = "^1.2.2"
27 | pytest-mock = "^3.10.0"
28 | syrupy = "^4.0.2"
29 | pytest-watcher = "^0.3.4"
30 | pytest-asyncio = "^0.21.1"
31 | langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core" }
32 | 
33 | [tool.poetry.group.codespell]
34 | optional = true
35 | 
36 | [tool.poetry.group.codespell.dependencies]
37 | codespell = "^2.2.0"
38 | 
39 | [tool.poetry.group.test_integration]
40 | optional = true
41 | 
42 | [tool.poetry.group.test_integration.dependencies]
43 | 
44 | [tool.poetry.group.lint]
45 | optional = true
46 | 
47 | [tool.poetry.group.lint.dependencies]
48 | ruff = "^0.1.5"
49 | 
50 | [tool.poetry.group.typing.dependencies]
51 | mypy = "^0.991"
52 | langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core" }
53 | 
54 | [tool.poetry.group.dev]
55 | optional = true
56 | 
57 | [tool.poetry.group.dev.dependencies]
58 | langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core" }
59 | 
60 | [tool.ruff.lint]
61 | select = [
62 |   "E",    # pycodestyle
63 |   "F",    # pyflakes
64 |   "I",    # isort
65 |   "T201", # print
66 | ]
67 | 
68 | [tool.mypy]
69 | disallow_untyped_defs = "True"
70 | 
71 | [tool.coverage.run]
72 | omit = ["tests/*"]
73 | 
74 | [build-system]
75 | requires = ["poetry-core>=1.0.0"]
76 | build-backend = "poetry.core.masonry.api"
77 | 
78 | [tool.pytest.ini_options]
79 | # --strict-markers will raise errors on unknown marks.
80 | # https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
81 | #
82 | # https://docs.pytest.org/en/7.1.x/reference/reference.html
83 | # --strict-config       any warnings encountered while parsing the `pytest`
84 | #                       section of the configuration file raise errors.
85 | #
86 | # https://github.com/tophat/syrupy
87 | # --snapshot-warn-unused    Prints a warning on unused snapshots rather than fail the test suite.
88 | addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5"
89 | # Registering custom markers.
90 | # https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
91 | markers = [
92 |   "requires: mark tests as requiring a specific library",
93 |   "asyncio: mark tests as requiring asyncio",
94 |   "compile: mark placeholder test used to compile integration tests without running them",
95 | ]
96 | asyncio_mode = "auto"
97 | 


--------------------------------------------------------------------------------
/libs/trt/scripts/check_imports.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import traceback
 3 | from importlib.machinery import SourceFileLoader
 4 | 
 5 | if __name__ == "__main__":
 6 |     files = sys.argv[1:]
 7 |     has_failure = False
 8 |     for file in files:
 9 |         try:
10 |             SourceFileLoader("x", file).load_module()
11 |         except Exception:
12 |             has_faillure = True
13 |             print(file)  # noqa: T201
14 |             traceback.print_exc()
15 |             print()  # noqa: T201
16 | 
17 |     sys.exit(1 if has_failure else 0)
18 | 


--------------------------------------------------------------------------------
/libs/trt/scripts/check_pydantic.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # This script searches for lines starting with "import pydantic" or "from pydantic"
 4 | # in tracked files within a Git repository.
 5 | #
 6 | # Usage: ./scripts/check_pydantic.sh /path/to/repository
 7 | 
 8 | # Check if a path argument is provided
 9 | if [ $# -ne 1 ]; then
10 |   echo "Usage: $0 /path/to/repository"
11 |   exit 1
12 | fi
13 | 
14 | repository_path="$1"
15 | 
16 | # Search for lines matching the pattern within the specified repository
17 | result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic')
18 | 
19 | # Check if any matching lines were found
20 | if [ -n "$result" ]; then
21 |   echo "ERROR: The following lines need to be updated:"
22 |   echo "$result"
23 |   echo "Please replace the code with an import from langchain_core.pydantic_v1."
24 |   echo "For example, replace 'from pydantic import BaseModel'"
25 |   echo "with 'from langchain_core.pydantic_v1 import BaseModel'"
26 |   exit 1
27 | fi
28 | 


--------------------------------------------------------------------------------
/libs/trt/scripts/lint_imports.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eu
 4 | 
 5 | # Initialize a variable to keep track of errors
 6 | errors=0
 7 | 
 8 | # make sure not importing from langchain or langchain_experimental
 9 | git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
10 | git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
11 | 
12 | # Decide on an exit status based on the errors
13 | if [ "$errors" -gt 0 ]; then
14 |     exit 1
15 | else
16 |     exit 0
17 | fi
18 | 


--------------------------------------------------------------------------------
/libs/trt/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/trt/tests/__init__.py


--------------------------------------------------------------------------------
/libs/trt/tests/integration_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/trt/tests/integration_tests/__init__.py


--------------------------------------------------------------------------------
/libs/trt/tests/integration_tests/test_compile.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | 
3 | 
4 | @pytest.mark.compile
5 | def test_placeholder() -> None:
6 |     """Used for compiling integration tests without running any real tests."""
7 |     pass
8 | 


--------------------------------------------------------------------------------
/libs/trt/tests/integration_tests/test_llms.py:
--------------------------------------------------------------------------------
 1 | """Test TritonTensorRTLLM llm."""
 2 | import pytest
 3 | 
 4 | from langchain_nvidia_trt.llms import TritonTensorRTLLM
 5 | 
 6 | _MODEL_NAME = "ensemble"
 7 | 
 8 | 
 9 | @pytest.mark.skip(reason="Need a working Triton server")
10 | def test_stream() -> None:
11 |     """Test streaming tokens from NVIDIA TRT."""
12 |     llm = TritonTensorRTLLM(model_name=_MODEL_NAME)
13 | 
14 |     for token in llm.stream("I'm Pickle Rick"):
15 |         assert isinstance(token, str)
16 | 
17 | 
18 | @pytest.mark.skip(reason="Need a working Triton server")
19 | async def test_astream() -> None:
20 |     """Test streaming tokens from NVIDIA TRT."""
21 |     llm = TritonTensorRTLLM(model_name=_MODEL_NAME)
22 | 
23 |     async for token in llm.astream("I'm Pickle Rick"):
24 |         assert isinstance(token, str)
25 | 
26 | 
27 | @pytest.mark.skip(reason="Need a working Triton server")
28 | async def test_abatch() -> None:
29 |     """Test streaming tokens from TritonTensorRTLLM."""
30 |     llm = TritonTensorRTLLM(model_name=_MODEL_NAME)
31 | 
32 |     result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"])
33 |     for token in result:
34 |         assert isinstance(token, str)
35 | 
36 | 
37 | @pytest.mark.skip(reason="Need a working Triton server")
38 | async def test_abatch_tags() -> None:
39 |     """Test batch tokens from TritonTensorRTLLM."""
40 |     llm = TritonTensorRTLLM(model_name=_MODEL_NAME)
41 | 
42 |     result = await llm.abatch(
43 |         ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]}
44 |     )
45 |     for token in result:
46 |         assert isinstance(token, str)
47 | 
48 | 
49 | @pytest.mark.skip(reason="Need a working Triton server")
50 | def test_batch() -> None:
51 |     """Test batch tokens from TritonTensorRTLLM."""
52 |     llm = TritonTensorRTLLM(model_name=_MODEL_NAME)
53 | 
54 |     result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"])
55 |     for token in result:
56 |         assert isinstance(token, str)
57 | 
58 | 
59 | @pytest.mark.skip(reason="Need a working Triton server")
60 | async def test_ainvoke() -> None:
61 |     """Test invoke tokens from TritonTensorRTLLM."""
62 |     llm = TritonTensorRTLLM(model_name=_MODEL_NAME)
63 | 
64 |     result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]})
65 |     assert isinstance(result, str)
66 | 
67 | 
68 | @pytest.mark.skip(reason="Need a working Triton server")
69 | def test_invoke() -> None:
70 |     """Test invoke tokens from TritonTensorRTLLM."""
71 |     llm = TritonTensorRTLLM(model_name=_MODEL_NAME)
72 | 
73 |     result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"]))
74 |     assert isinstance(result, str)
75 | 


--------------------------------------------------------------------------------
/libs/trt/tests/unit_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/trt/tests/unit_tests/__init__.py


--------------------------------------------------------------------------------
/libs/trt/tests/unit_tests/test_imports.py:
--------------------------------------------------------------------------------
1 | from langchain_nvidia_trt import __all__
2 | 
3 | EXPECTED_ALL = ["TritonTensorRTLLM"]
4 | 
5 | 
6 | def test_all_imports() -> None:
7 |     assert sorted(EXPECTED_ALL) == sorted(__all__)
8 | 


--------------------------------------------------------------------------------
/libs/trt/tests/unit_tests/test_llms.py:
--------------------------------------------------------------------------------
1 | """Test TritonTensorRT Chat API wrapper."""
2 | from langchain_nvidia_trt import TritonTensorRTLLM
3 | 
4 | 
5 | def test_initialization() -> None:
6 |     """Test integration initialization."""
7 |     TritonTensorRTLLM(model_name="ensemble", server_url="http://localhost:8001")
8 | 


--------------------------------------------------------------------------------
/studio/.env.example:
--------------------------------------------------------------------------------
1 | NVIDIA_API_KEY=nvapi-
2 | TAVILY_API_KEY=tvly-


--------------------------------------------------------------------------------
/studio/langgraph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "dockerfile_lines": [],
 3 |     "graphs": {
 4 |       "agentic_rag_nvidia": "./agentic_rag_nvidia.py:graph"
 5 |     },
 6 |     "python_version": "3.11",
 7 |     "env": "./.env",
 8 |     "dependencies": [
 9 |       "."
10 |     ]
11 |   }


--------------------------------------------------------------------------------
/studio/requirements.txt:
--------------------------------------------------------------------------------
1 | langchain-nvidia-ai-endpoints
2 | langchain-chroma
3 | langchain-community
4 | langchain
5 | langgraph
6 | tavily-python
7 | beautifulsoup4
8 | lxml


--------------------------------------------------------------------------------