├── .github ├── actions │ └── poetry_setup │ │ └── action.yml ├── scripts │ ├── check_diff.py │ └── get_min_versions.py └── workflows │ ├── _all_ci.yml │ ├── _codespell.yml │ ├── _compile_integration_test.yml │ ├── _lint.yml │ ├── _release.yml │ ├── _scheduled_test.yml │ ├── _test.yml │ ├── _test_release.yml │ ├── check_diffs.yml │ └── extract_ignored_words_list.py ├── .gitignore ├── LICENSE ├── README.md ├── cookbook ├── img │ └── structured_report_generation_arch.png ├── langgraph_rag_agent_llama3_nvidia_nim.ipynb ├── nvidia_nim_agents_llama3.1.ipynb ├── structured_report_generation.ipynb └── structured_report_generation_elastic │ ├── setup.sh │ └── structured_report_generation_elastic.ipynb ├── libs ├── ai-endpoints │ ├── .gitignore │ ├── LICENSE │ ├── Makefile │ ├── README.md │ ├── docs │ │ ├── chat │ │ │ └── nvidia_ai_endpoints.ipynb │ │ ├── llms │ │ │ └── nvidia_ai_endpoints.ipynb │ │ ├── providers │ │ │ └── nvidia.mdx │ │ ├── retrievers │ │ │ └── nvidia_rerank.ipynb │ │ └── text_embedding │ │ │ └── nvidia_ai_endpoints.ipynb │ ├── langchain_nvidia.py │ ├── langchain_nvidia_ai_endpoints │ │ ├── __init__.py │ │ ├── _common.py │ │ ├── _statics.py │ │ ├── _utils.py │ │ ├── callbacks.py │ │ ├── chat_models.py │ │ ├── embeddings.py │ │ ├── llm.py │ │ ├── py.typed │ │ └── reranking.py │ ├── poetry.lock │ ├── pyproject.toml │ ├── scripts │ │ ├── check_imports.py │ │ └── lint_imports.sh │ └── tests │ │ ├── __init__.py │ │ ├── data │ │ ├── nvidia-picasso-large.png │ │ ├── nvidia-picasso.gif │ │ ├── nvidia-picasso.jpg │ │ ├── nvidia-picasso.png │ │ └── nvidia-picasso.webp │ │ ├── integration_tests │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_api_key.py │ │ ├── test_available_models.py │ │ ├── test_base_url.py │ │ ├── test_bind_tools.py │ │ ├── test_chat_models.py │ │ ├── test_compile.py │ │ ├── test_completions_models.py │ │ ├── test_embeddings.py │ │ ├── test_other_models.py │ │ ├── test_ranking.py │ │ ├── test_register_model.py │ │ ├── test_standard.py │ │ ├── test_streaming.py │ │ ├── test_structured_output.py │ │ └── test_vlm_models.py │ │ └── unit_tests │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_202_polling.py │ │ ├── test_api_key.py │ │ ├── test_available_models.py │ │ ├── test_base_url.py │ │ ├── test_bind_tools.py │ │ ├── test_chat_models.py │ │ ├── test_completions_models.py │ │ ├── test_embeddings.py │ │ ├── test_imports.py │ │ ├── test_messages.py │ │ ├── test_metadata.py │ │ ├── test_model.py │ │ ├── test_parallel_tool_calls.py │ │ ├── test_ranking.py │ │ ├── test_register_model.py │ │ ├── test_serialization.py │ │ ├── test_standard.py │ │ ├── test_statics.py │ │ ├── test_stop.py │ │ ├── test_structured_output.py │ │ └── test_vlm_models.py └── trt │ ├── .gitignore │ ├── LICENSE │ ├── Makefile │ ├── README.md │ ├── docs │ └── llms.ipynb │ ├── langchain_nvidia_trt │ ├── __init__.py │ ├── llms.py │ └── py.typed │ ├── mypy.ini │ ├── poetry.lock │ ├── pyproject.toml │ ├── scripts │ ├── check_imports.py │ ├── check_pydantic.sh │ └── lint_imports.sh │ └── tests │ ├── __init__.py │ ├── integration_tests │ ├── __init__.py │ ├── test_compile.py │ └── test_llms.py │ └── unit_tests │ ├── __init__.py │ ├── test_imports.py │ └── test_llms.py └── studio ├── .env.example ├── agentic_rag_nvidia.py ├── langgraph.json └── requirements.txt /.github/actions/poetry_setup/action.yml: -------------------------------------------------------------------------------- 1 | # An action for setting up poetry install with caching. 2 | # Using a custom action since the default action does not 3 | # take poetry install groups into account. 4 | # Action code from: 5 | # https://github.com/actions/setup-python/issues/505#issuecomment-1273013236 6 | name: poetry-install-with-caching 7 | description: Poetry install with support for caching of dependency groups. 8 | 9 | inputs: 10 | python-version: 11 | description: Python version, supporting MAJOR.MINOR only 12 | required: true 13 | 14 | poetry-version: 15 | description: Poetry version 16 | required: true 17 | 18 | cache-key: 19 | description: Cache key to use for manual handling of caching 20 | required: true 21 | 22 | working-directory: 23 | description: Directory whose poetry.lock file should be cached 24 | required: true 25 | 26 | runs: 27 | using: composite 28 | steps: 29 | - uses: actions/setup-python@v5 30 | name: Setup python ${{ inputs.python-version }} 31 | id: setup-python 32 | with: 33 | python-version: ${{ inputs.python-version }} 34 | 35 | - uses: actions/cache@v4 36 | id: cache-bin-poetry 37 | name: Cache Poetry binary - Python ${{ inputs.python-version }} 38 | env: 39 | SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1" 40 | with: 41 | path: | 42 | /opt/pipx/venvs/poetry 43 | # This step caches the poetry installation, so make sure it's keyed on the poetry version as well. 44 | key: bin-poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-${{ inputs.poetry-version }} 45 | 46 | - name: Refresh shell hashtable and fixup softlinks 47 | if: steps.cache-bin-poetry.outputs.cache-hit == 'true' 48 | shell: bash 49 | env: 50 | POETRY_VERSION: ${{ inputs.poetry-version }} 51 | PYTHON_VERSION: ${{ inputs.python-version }} 52 | run: | 53 | set -eux 54 | 55 | # Refresh the shell hashtable, to ensure correct `which` output. 56 | hash -r 57 | 58 | # `actions/cache@v3` doesn't always seem able to correctly unpack softlinks. 59 | # Delete and recreate the softlinks pipx expects to have. 60 | rm /opt/pipx/venvs/poetry/bin/python 61 | cd /opt/pipx/venvs/poetry/bin 62 | ln -s "$(which "python$PYTHON_VERSION")" python 63 | chmod +x python 64 | cd /opt/pipx_bin/ 65 | ln -s /opt/pipx/venvs/poetry/bin/poetry poetry 66 | chmod +x poetry 67 | 68 | # Ensure everything got set up correctly. 69 | /opt/pipx/venvs/poetry/bin/python --version 70 | /opt/pipx_bin/poetry --version 71 | 72 | - name: Install poetry 73 | if: steps.cache-bin-poetry.outputs.cache-hit != 'true' 74 | shell: bash 75 | env: 76 | POETRY_VERSION: ${{ inputs.poetry-version }} 77 | PYTHON_VERSION: ${{ inputs.python-version }} 78 | # Install poetry using the python version installed by setup-python step. 79 | run: pipx install "poetry==$POETRY_VERSION" --python '${{ steps.setup-python.outputs.python-path }}' --verbose 80 | 81 | - name: Restore pip and poetry cached dependencies 82 | uses: actions/cache@v4 83 | env: 84 | SEGMENT_DOWNLOAD_TIMEOUT_MIN: "4" 85 | WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }} 86 | with: 87 | path: | 88 | ~/.cache/pip 89 | ~/.cache/pypoetry/virtualenvs 90 | ~/.cache/pypoetry/cache 91 | ~/.cache/pypoetry/artifacts 92 | ${{ env.WORKDIR }}/.venv 93 | key: py-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/**/poetry.lock', env.WORKDIR)) }} 94 | -------------------------------------------------------------------------------- /.github/scripts/check_diff.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import os 4 | from typing import Dict 5 | 6 | NVIDIA_DIRS = [ 7 | "libs/ai-endpoints", 8 | "libs/trt", 9 | ] 10 | 11 | if __name__ == "__main__": 12 | files = sys.argv[1:] 13 | 14 | dirs_to_run: Dict[str, set] = { 15 | "lint": set(), 16 | "test": set(), 17 | } 18 | 19 | if len(files) == 300: 20 | # max diff length is 300 files - there are likely files missing 21 | raise ValueError("Max diff reached. Please manually run CI on changed libs.") 22 | 23 | for file in files: 24 | if any( 25 | file.startswith(dir_) 26 | for dir_ in ( 27 | ".github/workflows", 28 | ".github/tools", 29 | ".github/actions", 30 | ".github/scripts/check_diff.py", 31 | ) 32 | ): 33 | # add all LANGCHAIN_DIRS for infra changes 34 | # dirs_to_run["lint"].add(".") 35 | pass 36 | 37 | if any(file.startswith(dir_) for dir_ in NVIDIA_DIRS): 38 | for dir_ in NVIDIA_DIRS: 39 | if file.startswith(dir_): 40 | # add that dir and all dirs after in LANGCHAIN_DIRS 41 | # for extended testing 42 | dirs_to_run["test"].add(dir_) 43 | elif file.startswith("libs/"): 44 | raise ValueError( 45 | f"Unknown lib: {file}. check_diff.py likely needs " 46 | "an update for this new library!" 47 | ) 48 | # elif any(file.startswith(p) for p in ["docs/", "templates/", "cookbook/"]): 49 | # dirs_to_run["lint"].add(".") 50 | 51 | outputs = { 52 | "dirs-to-lint": list( 53 | dirs_to_run["lint"] | dirs_to_run["test"] 54 | ), 55 | "dirs-to-test": list(dirs_to_run["test"]), 56 | } 57 | for key, value in outputs.items(): 58 | json_output = json.dumps(value) 59 | print(f"{key}={json_output}") # noqa: T201 60 | -------------------------------------------------------------------------------- /.github/scripts/get_min_versions.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import tomllib 4 | from packaging.version import parse as parse_version 5 | import re 6 | 7 | MIN_VERSION_LIBS = ["langchain-core"] 8 | 9 | 10 | def get_min_version(version: str) -> str: 11 | # case ^x.x.x 12 | _match = re.match(r"^\^(\d+(?:\.\d+){0,2})$", version) 13 | if _match: 14 | return _match.group(1) 15 | 16 | # case >=x.x.x,=(\d+(?:\.\d+){0,2}),<(\d+(?:\.\d+){0,2})$", version) 18 | if _match: 19 | _min = _match.group(1) 20 | _max = _match.group(2) 21 | assert parse_version(_min) < parse_version(_max) 22 | return _min 23 | 24 | # case x.x.x 25 | _match = re.match(r"^(\d+(?:\.\d+){0,2})$", version) 26 | if _match: 27 | return _match.group(1) 28 | 29 | raise ValueError(f"Unrecognized version format: {version}") 30 | 31 | 32 | def get_min_version_from_toml(toml_path: str): 33 | # Parse the TOML file 34 | with open(toml_path, "rb") as file: 35 | toml_data = tomllib.load(file) 36 | 37 | # Get the dependencies from tool.poetry.dependencies 38 | dependencies = toml_data["tool"]["poetry"]["dependencies"] 39 | 40 | # Initialize a dictionary to store the minimum versions 41 | min_versions = {} 42 | 43 | # Iterate over the libs in MIN_VERSION_LIBS 44 | for lib in MIN_VERSION_LIBS: 45 | # Check if the lib is present in the dependencies 46 | if lib in dependencies: 47 | # Get the version string 48 | version_string = dependencies[lib] 49 | 50 | # Use parse_version to get the minimum supported version from version_string 51 | min_version = get_min_version(version_string) 52 | 53 | # Store the minimum version in the min_versions dictionary 54 | min_versions[lib] = min_version 55 | 56 | return min_versions 57 | 58 | 59 | # Get the TOML file path from the command line argument 60 | toml_file = sys.argv[1] 61 | 62 | # Call the function to get the minimum versions 63 | min_versions = get_min_version_from_toml(toml_file) 64 | 65 | print(" ".join([f"{lib}=={version}" for lib, version in min_versions.items()])) 66 | -------------------------------------------------------------------------------- /.github/workflows/_all_ci.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: langchain CI 3 | 4 | on: 5 | workflow_call: 6 | inputs: 7 | working-directory: 8 | required: true 9 | type: string 10 | description: "From which folder this pipeline executes" 11 | workflow_dispatch: 12 | inputs: 13 | working-directory: 14 | required: true 15 | type: choice 16 | default: 'libs/vertexai' 17 | options: 18 | - libs/genai 19 | - libs/vertexai 20 | 21 | 22 | # If another push to the same PR or branch happens while this workflow is still running, 23 | # cancel the earlier run in favor of the next run. 24 | # 25 | # There's no point in testing an outdated version of the code. GitHub only allows 26 | # a limited number of job runners to be active at the same time, so it's better to cancel 27 | # pointless jobs early so that more useful jobs can run sooner. 28 | concurrency: 29 | group: ${{ github.workflow }}-${{ github.ref }}-${{ inputs.working-directory }} 30 | cancel-in-progress: true 31 | 32 | env: 33 | POETRY_VERSION: "1.7.1" 34 | 35 | jobs: 36 | lint: 37 | name: "-" 38 | uses: ./.github/workflows/_lint.yml 39 | with: 40 | working-directory: ${{ inputs.working-directory }} 41 | secrets: inherit 42 | 43 | test: 44 | name: "-" 45 | uses: ./.github/workflows/_test.yml 46 | with: 47 | working-directory: ${{ inputs.working-directory }} 48 | secrets: inherit 49 | 50 | compile-integration-tests: 51 | name: "-" 52 | uses: ./.github/workflows/_compile_integration_test.yml 53 | with: 54 | working-directory: ${{ inputs.working-directory }} 55 | secrets: inherit -------------------------------------------------------------------------------- /.github/workflows/_codespell.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: make spell_check 3 | 4 | on: 5 | workflow_call: 6 | inputs: 7 | working-directory: 8 | required: true 9 | type: string 10 | description: "From which folder this pipeline executes" 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | codespell: 17 | name: (Check for spelling errors) 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - name: Checkout 22 | uses: actions/checkout@v4 23 | 24 | - name: Install Dependencies 25 | run: | 26 | pip install toml 27 | 28 | - name: Extract Ignore Words List 29 | working-directory: ${{ inputs.working-directory }} 30 | run: | 31 | # Use a Python script to extract the ignore words list from pyproject.toml 32 | python ../../.github/workflows/extract_ignored_words_list.py 33 | id: extract_ignore_words 34 | 35 | - name: Codespell 36 | uses: codespell-project/actions-codespell@v2 37 | with: 38 | skip: guide_imports.json 39 | ignore_words_list: ${{ steps.extract_ignore_words.outputs.ignore_words_list }} 40 | -------------------------------------------------------------------------------- /.github/workflows/_compile_integration_test.yml: -------------------------------------------------------------------------------- 1 | name: compile-integration-test 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | working-directory: 7 | required: true 8 | type: string 9 | description: "From which folder this pipeline executes" 10 | 11 | env: 12 | POETRY_VERSION: "1.7.1" 13 | 14 | jobs: 15 | build: 16 | defaults: 17 | run: 18 | working-directory: ${{ inputs.working-directory }} 19 | runs-on: ubuntu-latest 20 | strategy: 21 | matrix: 22 | python-version: 23 | - "3.8" 24 | - "3.9" 25 | - "3.10" 26 | - "3.11" 27 | name: "poetry run pytest -m compile tests/integration_tests #${{ matrix.python-version }}" 28 | steps: 29 | - uses: actions/checkout@v4 30 | 31 | - name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }} 32 | uses: "./.github/actions/poetry_setup" 33 | with: 34 | python-version: ${{ matrix.python-version }} 35 | poetry-version: ${{ env.POETRY_VERSION }} 36 | working-directory: ${{ inputs.working-directory }} 37 | cache-key: compile-integration 38 | 39 | - name: Install integration dependencies 40 | shell: bash 41 | run: poetry install --with=test_integration,test 42 | 43 | - name: Check integration tests compile 44 | shell: bash 45 | env: 46 | NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} 47 | run: poetry run pytest -m compile tests/integration_tests 48 | 49 | - name: Ensure the tests did not create any additional files 50 | shell: bash 51 | run: | 52 | set -eu 53 | 54 | STATUS="$(git status)" 55 | echo "$STATUS" 56 | 57 | # grep will exit non-zero if the target message isn't found, 58 | # and `set -e` above will cause the step to fail. 59 | echo "$STATUS" | grep 'nothing to commit, working tree clean' 60 | -------------------------------------------------------------------------------- /.github/workflows/_lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | working-directory: 7 | required: true 8 | type: string 9 | description: "From which folder this pipeline executes" 10 | langchain-location: 11 | required: false 12 | type: string 13 | description: "Relative path to the langchain library folder" 14 | 15 | env: 16 | POETRY_VERSION: "1.7.1" 17 | WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }} 18 | 19 | # This env var allows us to get inline annotations when ruff has complaints. 20 | RUFF_OUTPUT_FORMAT: github 21 | 22 | jobs: 23 | build: 24 | name: "make lint #${{ matrix.python-version }}" 25 | runs-on: ubuntu-latest 26 | strategy: 27 | matrix: 28 | # Only lint on the min and max supported Python versions. 29 | # It's extremely unlikely that there's a lint issue on any version in between 30 | # that doesn't show up on the min or max versions. 31 | # 32 | # GitHub rate-limits how many jobs can be running at any one time. 33 | # Starting new jobs is also relatively slow, 34 | # so linting on fewer versions makes CI faster. 35 | python-version: 36 | - "3.8" 37 | - "3.11" 38 | steps: 39 | - uses: actions/checkout@v4 40 | 41 | - name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }} 42 | uses: "./.github/actions/poetry_setup" 43 | with: 44 | python-version: ${{ matrix.python-version }} 45 | poetry-version: ${{ env.POETRY_VERSION }} 46 | working-directory: ${{ inputs.working-directory }} 47 | cache-key: lint-with-extras 48 | 49 | - name: Check Poetry File 50 | shell: bash 51 | working-directory: ${{ inputs.working-directory }} 52 | run: | 53 | poetry check 54 | 55 | - name: Check lock file 56 | shell: bash 57 | working-directory: ${{ inputs.working-directory }} 58 | run: | 59 | poetry lock --check 60 | 61 | - name: Install dependencies 62 | # Also installs dev/lint/test/typing dependencies, to ensure we have 63 | # type hints for as many of our libraries as possible. 64 | # This helps catch errors that require dependencies to be spotted, for example: 65 | # https://github.com/langchain-ai/langchain/pull/10249/files#diff-935185cd488d015f026dcd9e19616ff62863e8cde8c0bee70318d3ccbca98341 66 | # 67 | # If you change this configuration, make sure to change the `cache-key` 68 | # in the `poetry_setup` action above to stop using the old cache. 69 | # It doesn't matter how you change it, any change will cause a cache-bust. 70 | working-directory: ${{ inputs.working-directory }} 71 | run: | 72 | poetry install --with lint,typing 73 | 74 | - name: Install langchain editable 75 | working-directory: ${{ inputs.working-directory }} 76 | if: ${{ inputs.langchain-location }} 77 | env: 78 | LANGCHAIN_LOCATION: ${{ inputs.langchain-location }} 79 | run: | 80 | poetry run pip install -e "$LANGCHAIN_LOCATION" 81 | 82 | - name: Get .mypy_cache to speed up mypy 83 | uses: actions/cache@v4 84 | env: 85 | SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2" 86 | with: 87 | path: | 88 | ${{ env.WORKDIR }}/.mypy_cache 89 | key: mypy-lint-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', inputs.working-directory)) }} 90 | 91 | - name: Install unit test dependencies 92 | # Also installs dev/lint/test/typing dependencies, to ensure we have 93 | # type hints for as many of our libraries as possible. 94 | # This helps catch errors that require dependencies to be spotted, for example: 95 | # https://github.com/langchain-ai/langchain/pull/10249/files#diff-935185cd488d015f026dcd9e19616ff62863e8cde8c0bee70318d3ccbca98341 96 | # 97 | # If you change this configuration, make sure to change the `cache-key` 98 | # in the `poetry_setup` action above to stop using the old cache. 99 | # It doesn't matter how you change it, any change will cause a cache-bust. 100 | if: ${{ ! startsWith(inputs.working-directory, 'libs/partners/') }} 101 | working-directory: ${{ inputs.working-directory }} 102 | run: | 103 | poetry install --with test 104 | 105 | - name: Analysing the code with our lint 106 | working-directory: ${{ inputs.working-directory }} 107 | run: | 108 | make lint_package 109 | 110 | - name: Install unit+integration test dependencies 111 | if: ${{ startsWith(inputs.working-directory, 'libs/partners/') }} 112 | working-directory: ${{ inputs.working-directory }} 113 | run: | 114 | poetry install --with test,test_integration 115 | 116 | - name: Get .mypy_cache_test to speed up mypy 117 | uses: actions/cache@v4 118 | env: 119 | SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2" 120 | with: 121 | path: | 122 | ${{ env.WORKDIR }}/.mypy_cache_test 123 | key: mypy-test-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', inputs.working-directory)) }} 124 | 125 | - name: Analysing the code with our lint 126 | working-directory: ${{ inputs.working-directory }} 127 | run: | 128 | make lint_tests 129 | -------------------------------------------------------------------------------- /.github/workflows/_scheduled_test.yml: -------------------------------------------------------------------------------- 1 | name: Scheduled tests 2 | run-name: langchain-nvidia Scheduled tests 3 | 4 | on: 5 | workflow_dispatch: 6 | schedule: 7 | - cron: '0 8 * * *' 8 | 9 | env: 10 | POETRY_VERSION: "1.7.1" 11 | 12 | jobs: 13 | build: 14 | name: Python ${{ matrix.python-version }} - ${{ matrix.working-directory }} 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: 20 | - "3.8" 21 | - "3.11" 22 | working-directory: 23 | - "libs/ai-endpoints" 24 | - "libs/trt" 25 | 26 | steps: 27 | - uses: actions/checkout@v4 28 | 29 | - name: Set up Python + Poetry ${{ env.POETRY_VERSION }} 30 | uses: "./.github/actions/poetry_setup" 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | poetry-version: ${{ env.POETRY_VERSION }} 34 | working-directory: ${{ matrix.working-directory }} 35 | cache-key: scheduled 36 | 37 | - name: Install dependencies 38 | run: poetry install --with test,test_integration 39 | working-directory: ${{ matrix.working-directory }} 40 | 41 | - name: Run unit tests 42 | run: make tests 43 | working-directory: ${{ matrix.working-directory }} 44 | 45 | - name: Run integration tests 46 | env: 47 | NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} 48 | run: make integration_tests 49 | working-directory: ${{ matrix.working-directory }} 50 | 51 | 52 | - name: Ensure the tests did not create any additional files 53 | working-directory: ${{ matrix.working-directory }} 54 | run: | 55 | set -eu 56 | 57 | STATUS="$(git status)" 58 | echo "$STATUS" 59 | 60 | # grep will exit non-zero if the target message isn't found, 61 | # and `set -e` above will cause the step to fail. 62 | echo "$STATUS" | grep 'nothing to commit, working tree clean' 63 | -------------------------------------------------------------------------------- /.github/workflows/_test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | working-directory: 7 | required: true 8 | type: string 9 | description: "From which folder this pipeline executes" 10 | langchain-location: 11 | required: false 12 | type: string 13 | description: "Relative path to the langchain library folder" 14 | 15 | env: 16 | POETRY_VERSION: "1.7.1" 17 | 18 | jobs: 19 | build: 20 | defaults: 21 | run: 22 | working-directory: ${{ inputs.working-directory }} 23 | runs-on: ubuntu-latest 24 | strategy: 25 | matrix: 26 | python-version: 27 | - "3.8" 28 | - "3.9" 29 | - "3.10" 30 | - "3.11" 31 | name: "make test #${{ matrix.python-version }}" 32 | steps: 33 | - uses: actions/checkout@v4 34 | 35 | - name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }} 36 | uses: "./.github/actions/poetry_setup" 37 | with: 38 | python-version: ${{ matrix.python-version }} 39 | poetry-version: ${{ env.POETRY_VERSION }} 40 | working-directory: ${{ inputs.working-directory }} 41 | cache-key: core 42 | 43 | - name: Install dependencies 44 | shell: bash 45 | run: poetry install --with test 46 | 47 | - name: Install langchain editable 48 | working-directory: ${{ inputs.working-directory }} 49 | if: ${{ inputs.langchain-location }} 50 | env: 51 | LANGCHAIN_LOCATION: ${{ inputs.langchain-location }} 52 | run: | 53 | poetry run pip install -e "$LANGCHAIN_LOCATION" 54 | 55 | - name: Run core tests 56 | shell: bash 57 | run: | 58 | make test 59 | 60 | - name: Ensure the tests did not create any additional files 61 | shell: bash 62 | run: | 63 | set -eu 64 | 65 | STATUS="$(git status)" 66 | echo "$STATUS" 67 | 68 | # grep will exit non-zero if the target message isn't found, 69 | # and `set -e` above will cause the step to fail. 70 | echo "$STATUS" | grep 'nothing to commit, working tree clean' 71 | -------------------------------------------------------------------------------- /.github/workflows/_test_release.yml: -------------------------------------------------------------------------------- 1 | name: test-release 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | working-directory: 7 | required: true 8 | type: string 9 | description: "From which folder this pipeline executes" 10 | 11 | env: 12 | POETRY_VERSION: "1.7.1" 13 | PYTHON_VERSION: "3.10" 14 | 15 | jobs: 16 | build: 17 | if: github.ref == 'refs/heads/main' 18 | runs-on: ubuntu-latest 19 | 20 | outputs: 21 | pkg-name: ${{ steps.check-version.outputs.pkg-name }} 22 | version: ${{ steps.check-version.outputs.version }} 23 | 24 | steps: 25 | - uses: actions/checkout@v4 26 | 27 | - name: Set up Python + Poetry ${{ env.POETRY_VERSION }} 28 | uses: "./.github/actions/poetry_setup" 29 | with: 30 | python-version: ${{ env.PYTHON_VERSION }} 31 | poetry-version: ${{ env.POETRY_VERSION }} 32 | working-directory: ${{ inputs.working-directory }} 33 | cache-key: release 34 | 35 | # We want to keep this build stage *separate* from the release stage, 36 | # so that there's no sharing of permissions between them. 37 | # The release stage has trusted publishing and GitHub repo contents write access, 38 | # and we want to keep the scope of that access limited just to the release job. 39 | # Otherwise, a malicious `build` step (e.g. via a compromised dependency) 40 | # could get access to our GitHub or PyPI credentials. 41 | # 42 | # Per the trusted publishing GitHub Action: 43 | # > It is strongly advised to separate jobs for building [...] 44 | # > from the publish job. 45 | # https://github.com/pypa/gh-action-pypi-publish#non-goals 46 | - name: Build project for distribution 47 | run: poetry build 48 | working-directory: ${{ inputs.working-directory }} 49 | 50 | - name: Upload build 51 | uses: actions/upload-artifact@v4 52 | with: 53 | name: test-dist 54 | path: ${{ inputs.working-directory }}/dist/ 55 | 56 | - name: Check Version 57 | id: check-version 58 | shell: bash 59 | working-directory: ${{ inputs.working-directory }} 60 | run: | 61 | echo pkg-name="$(poetry version | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT 62 | echo version="$(poetry version --short)" >> $GITHUB_OUTPUT 63 | 64 | publish: 65 | needs: 66 | - build 67 | runs-on: ubuntu-latest 68 | permissions: 69 | # This permission is used for trusted publishing: 70 | # https://blog.pypi.org/posts/2023-04-20-introducing-trusted-publishers/ 71 | # 72 | # Trusted publishing has to also be configured on PyPI for each package: 73 | # https://docs.pypi.org/trusted-publishers/adding-a-publisher/ 74 | id-token: write 75 | 76 | steps: 77 | - uses: actions/checkout@v4 78 | 79 | - uses: actions/download-artifact@v4 80 | with: 81 | name: test-dist 82 | path: ${{ inputs.working-directory }}/dist/ 83 | 84 | - name: Publish to test PyPI 85 | uses: pypa/gh-action-pypi-publish@release/v1 86 | with: 87 | packages-dir: ${{ inputs.working-directory }}/dist/ 88 | verbose: true 89 | print-hash: true 90 | repository-url: https://test.pypi.org/legacy/ 91 | 92 | # We overwrite any existing distributions with the same name and version. 93 | # This is *only for CI use* and is *extremely dangerous* otherwise! 94 | # https://github.com/pypa/gh-action-pypi-publish#tolerating-release-package-file-duplicates 95 | skip-existing: true 96 | # Temp workaround since attestations are on by default as of gh-action-pypi-publish v1.11.0 97 | attestations: false 98 | -------------------------------------------------------------------------------- /.github/workflows/check_diffs.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: CI 3 | 4 | on: 5 | push: 6 | branches: [main] 7 | pull_request: 8 | 9 | # If another push to the same PR or branch happens while this workflow is still running, 10 | # cancel the earlier run in favor of the next run. 11 | # 12 | # There's no point in testing an outdated version of the code. GitHub only allows 13 | # a limited number of job runners to be active at the same time, so it's better to cancel 14 | # pointless jobs early so that more useful jobs can run sooner. 15 | concurrency: 16 | group: ${{ github.workflow }}-${{ github.ref }} 17 | cancel-in-progress: true 18 | 19 | env: 20 | POETRY_VERSION: "1.7.1" 21 | 22 | jobs: 23 | build: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v4 27 | - uses: actions/setup-python@v5 28 | with: 29 | python-version: '3.10' 30 | - id: files 31 | uses: Ana06/get-changed-files@v2.2.0 32 | - id: set-matrix 33 | run: | 34 | python .github/scripts/check_diff.py ${{ steps.files.outputs.all }} >> $GITHUB_OUTPUT 35 | outputs: 36 | dirs-to-lint: ${{ steps.set-matrix.outputs.dirs-to-lint }} 37 | dirs-to-test: ${{ steps.set-matrix.outputs.dirs-to-test }} 38 | lint: 39 | name: cd ${{ matrix.working-directory }} 40 | needs: [ build ] 41 | if: ${{ needs.build.outputs.dirs-to-lint != '[]' }} 42 | strategy: 43 | matrix: 44 | working-directory: ${{ fromJson(needs.build.outputs.dirs-to-lint) }} 45 | uses: ./.github/workflows/_lint.yml 46 | with: 47 | working-directory: ${{ matrix.working-directory }} 48 | secrets: inherit 49 | 50 | test: 51 | name: cd ${{ matrix.working-directory }} 52 | needs: [ build ] 53 | if: ${{ needs.build.outputs.dirs-to-test != '[]' }} 54 | strategy: 55 | matrix: 56 | working-directory: ${{ fromJson(needs.build.outputs.dirs-to-test) }} 57 | uses: ./.github/workflows/_test.yml 58 | with: 59 | working-directory: ${{ matrix.working-directory }} 60 | secrets: inherit 61 | 62 | compile-integration-tests: 63 | name: cd ${{ matrix.working-directory }} 64 | needs: [ build ] 65 | if: ${{ needs.build.outputs.dirs-to-test != '[]' }} 66 | strategy: 67 | matrix: 68 | working-directory: ${{ fromJson(needs.build.outputs.dirs-to-test) }} 69 | uses: ./.github/workflows/_compile_integration_test.yml 70 | with: 71 | working-directory: ${{ matrix.working-directory }} 72 | secrets: inherit 73 | ci_success: 74 | name: "CI Success" 75 | needs: [build, lint, test, compile-integration-tests] 76 | if: | 77 | always() 78 | runs-on: ubuntu-latest 79 | env: 80 | JOBS_JSON: ${{ toJSON(needs) }} 81 | RESULTS_JSON: ${{ toJSON(needs.*.result) }} 82 | EXIT_CODE: ${{!contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') && '0' || '1'}} 83 | steps: 84 | - name: "CI Success" 85 | run: | 86 | echo $JOBS_JSON 87 | echo $RESULTS_JSON 88 | echo "Exiting with $EXIT_CODE" 89 | exit $EXIT_CODE 90 | -------------------------------------------------------------------------------- /.github/workflows/extract_ignored_words_list.py: -------------------------------------------------------------------------------- 1 | import toml 2 | 3 | pyproject_toml = toml.load("pyproject.toml") 4 | 5 | # Extract the ignore words list (adjust the key as per your TOML structure) 6 | ignore_words_list = ( 7 | pyproject_toml.get("tool", {}).get("codespell", {}).get("ignore-words-list") 8 | ) 9 | 10 | print(f"::set-output name=ignore_words_list::{ignore_words_list}") 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vs/ 2 | .vscode/ 3 | .idea/ 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | docs/docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | notebooks/ 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 100 | __pypackages__/ 101 | 102 | # Celery stuff 103 | celerybeat-schedule 104 | celerybeat.pid 105 | 106 | # SageMath parsed files 107 | *.sage.py 108 | 109 | # Environments 110 | .env 111 | .envrc 112 | .venv 113 | .venvs 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | 138 | # macOS display setting files 139 | .DS_Store 140 | 141 | # Wandb directory 142 | wandb/ 143 | 144 | # asdf tool versions 145 | .tool-versions 146 | /.ruff_cache/ 147 | 148 | *.pkl 149 | *.bin 150 | 151 | # integration test artifacts 152 | data_map* 153 | \[('_type', 'fake'), ('stop', None)] 154 | 155 | # Replit files 156 | *replit* 157 | 158 | node_modules 159 | docs/.yarn/ 160 | docs/node_modules/ 161 | docs/.docusaurus/ 162 | docs/.cache-loader/ 163 | docs/_dist 164 | docs/api_reference/*api_reference.rst 165 | docs/api_reference/_build 166 | docs/api_reference/*/ 167 | !docs/api_reference/_static/ 168 | !docs/api_reference/templates/ 169 | !docs/api_reference/themes/ 170 | docs/docs/build 171 | docs/docs/node_modules 172 | docs/docs/yarn.lock 173 | _dist 174 | docs/docs/templates -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 LangChain 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🦜️🔗 LangChain NVIDIA 2 | 3 | ## Packages 4 | 5 | This repository contains two packages with NVIDIA integrations with LangChain: 6 | - [langchain-nvidia-ai-endpoints](https://pypi.org/project/langchain-nvidia-ai-endpoints/) integrates [NVIDIA AI Foundation Models and Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/). 7 | - [langchain-nvidia-trt](https://pypi.org/project/langchain-nvidia-trt/) implements integrations of NVIDIA [TensorRT](https://developer.nvidia.com/tensorrt) models. 8 | 9 | ## Testing 10 | 11 | ### Cookbooks 12 | 13 | See the notebooks in the [cookbook](./cookbook) directory for examples of using `ChatNVIDIA` and `NVIDIAEmbeddings` with LangGraph for agentic RAG and tool-calling agents. 14 | 15 | ### Studio 16 | 17 | See the [studio](./studio) directory to test the agentic RAG workflow in LangGraph Studio. 18 | 19 | Simply load the `studio` directory in [LangGraph Studio](https://github.com/langchain-ai/langgraph-studio?tab=readme-ov-file#download) and click the "Run" button with an input question. 20 | 21 | This will run agentic RAG where it first reflects on the question to decide whether to use web search or vectorstore retrieval. It also grades retrieved documents as well as generated answers. 22 | 23 | ![Screenshot 2024-12-04 at 11 19 54 AM](https://github.com/user-attachments/assets/736544ff-6597-4eb4-89d1-e1e5863baad4) 24 | -------------------------------------------------------------------------------- /cookbook/img/structured_report_generation_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/cookbook/img/structured_report_generation_arch.png -------------------------------------------------------------------------------- /cookbook/structured_report_generation_elastic/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Function to print messages 5 | log() { 6 | echo "[`date +"%Y-%m-%d %H:%M:%S"`] $1" 7 | } 8 | 9 | # Check if container exists and remove it if it does 10 | log "Checking for existing es_test container..." 11 | if docker ps -a | grep -q es_test; then 12 | log "Found existing es_test container. Removing it..." 13 | docker rm -f es_test 14 | fi 15 | 16 | # 1. Start Elasticsearch server in Docker (single-node for testing) with security disabled 17 | log "Starting Elasticsearch Docker container..." 18 | docker run -d --name es_test -p 9200:9200 \ 19 | -e "discovery.type=single-node" \ 20 | -e "xpack.security.enabled=false" \ 21 | docker.elastic.co/elasticsearch/elasticsearch:8.9.0 22 | 23 | # Wait until Elasticsearch is responsive 24 | log "Waiting for Elasticsearch to be available on http://localhost:9200 ..." 25 | until curl -s http://localhost:9200 >/dev/null; do 26 | sleep 1 27 | done 28 | log "Elasticsearch is up and running." 29 | 30 | # 2. Create an index named "weather" with mappings and populate with sample data 31 | 32 | log "Creating the 'weather' index with mappings..." 33 | curl -s -X PUT "http://localhost:9200/weather?pretty" -H 'Content-Type: application/json' -d' 34 | { 35 | "mappings": { 36 | "properties": { 37 | "city": { "type": "keyword" }, 38 | "country": { "type": "keyword" }, 39 | "temperature": { "type": "float" }, 40 | "condition": { "type": "text" }, 41 | "timestamp": { "type": "date" } 42 | } 43 | } 44 | } 45 | ' 46 | log "'weather' index created." 47 | 48 | log "Indexing dummy weather data..." 49 | 50 | # New York, USA 51 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d' 52 | { 53 | "city": "New York", 54 | "country": "USA", 55 | "temperature": 22.5, 56 | "condition": "Sunny", 57 | "timestamp": "2025-02-02T12:00:00Z" 58 | } 59 | ' 60 | 61 | # London, UK 62 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d' 63 | { 64 | "city": "London", 65 | "country": "UK", 66 | "temperature": 16.0, 67 | "condition": "Cloudy", 68 | "timestamp": "2025-02-02T12:05:00Z" 69 | } 70 | ' 71 | 72 | # Paris, France 73 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d' 74 | { 75 | "city": "Paris", 76 | "country": "France", 77 | "temperature": 18.3, 78 | "condition": "Rainy", 79 | "timestamp": "2025-02-02T12:10:00Z" 80 | } 81 | ' 82 | 83 | # Tokyo, Japan 84 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d' 85 | { 86 | "city": "Tokyo", 87 | "country": "Japan", 88 | "temperature": 24.0, 89 | "condition": "Clear", 90 | "timestamp": "2025-02-02T12:15:00Z" 91 | } 92 | ' 93 | 94 | # Berlin, Germany 95 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d' 96 | { 97 | "city": "Berlin", 98 | "country": "Germany", 99 | "temperature": 14.7, 100 | "condition": "Overcast", 101 | "timestamp": "2025-02-02T12:20:00Z" 102 | } 103 | ' 104 | 105 | # Sydney, Australia 106 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d' 107 | { 108 | "city": "Sydney", 109 | "country": "Australia", 110 | "temperature": 26.4, 111 | "condition": "Sunny", 112 | "timestamp": "2025-02-02T12:25:00Z" 113 | } 114 | ' 115 | 116 | # Moscow, Russia 117 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d' 118 | { 119 | "city": "Moscow", 120 | "country": "Russia", 121 | "temperature": -5.0, 122 | "condition": "Snowy", 123 | "timestamp": "2025-02-02T12:30:00Z" 124 | } 125 | ' 126 | 127 | # Beijing, China 128 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d' 129 | { 130 | "city": "Beijing", 131 | "country": "China", 132 | "temperature": 10.2, 133 | "condition": "Smoggy", 134 | "timestamp": "2025-02-02T12:35:00Z" 135 | } 136 | ' 137 | 138 | # Rio de Janeiro, Brazil 139 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d' 140 | { 141 | "city": "Rio de Janeiro", 142 | "country": "Brazil", 143 | "temperature": 28.1, 144 | "condition": "Humid", 145 | "timestamp": "2025-02-02T12:40:00Z" 146 | } 147 | ' 148 | 149 | # Cape Town, South Africa 150 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d' 151 | { 152 | "city": "Cape Town", 153 | "country": "South Africa", 154 | "temperature": 20.3, 155 | "condition": "Windy", 156 | "timestamp": "2025-02-02T12:45:00Z" 157 | } 158 | ' 159 | 160 | # Mumbai, India 161 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d' 162 | { 163 | "city": "Mumbai", 164 | "country": "India", 165 | "temperature": 30.5, 166 | "condition": "Humid", 167 | "timestamp": "2025-02-02T12:50:00Z" 168 | } 169 | ' 170 | 171 | # San Francisco, USA 172 | curl -s -X POST "http://localhost:9200/weather/_doc?pretty" -H 'Content-Type: application/json' -d' 173 | { 174 | "city": "San Francisco", 175 | "country": "USA", 176 | "temperature": 17.8, 177 | "condition": "Foggy", 178 | "timestamp": "2025-02-02T12:55:00Z" 179 | } 180 | ' 181 | 182 | # Refresh the index to make sure documents are searchable immediately. 183 | log "Refreshing the index..." 184 | curl -s -X POST "http://localhost:9200/weather/_refresh?pretty" 185 | 186 | # 3. Test: List all indices to verify the "weather" index is up 187 | log "Testing: Listing all indices..." 188 | curl -X GET "http://localhost:9200/_cat/indices?v&pretty" 189 | 190 | log "Elasticsearch setup complete. The 'weather' index is populated with expanded dummy weather data." 191 | 192 | # Uncomment the following line to stop and remove the container after testing 193 | # docker rm -f es_test 194 | -------------------------------------------------------------------------------- /libs/ai-endpoints/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | -------------------------------------------------------------------------------- /libs/ai-endpoints/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 LangChain, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /libs/ai-endpoints/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all format lint test tests integration_tests help 2 | 3 | # Default target executed when no arguments are given to make. 4 | all: help 5 | 6 | # Define a variable for the test file path. 7 | TEST_FILE ?= tests/unit_tests/ 8 | 9 | test: 10 | poetry run pytest $(PYTEST_ARGS) $(TEST_FILE) 11 | 12 | tests: 13 | poetry run pytest $(PYTEST_ARGS) $(TEST_FILE) 14 | 15 | check_imports: $(shell find langchain_nvidia_ai_endpoints -name '*.py') 16 | poetry run python ./scripts/check_imports.py $^ 17 | 18 | integration_tests: 19 | poetry run pytest tests/integration_tests $(PYTEST_ARGS) 20 | 21 | 22 | ###################### 23 | # LINTING AND FORMATTING 24 | ###################### 25 | 26 | # Define a variable for Python and notebook files. 27 | PYTHON_FILES=. 28 | MYPY_CACHE=.mypy_cache 29 | lint format: PYTHON_FILES=. 30 | lint_diff format_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$') 31 | lint_package: PYTHON_FILES=langchain_nvidia_ai_endpoints 32 | lint_tests: PYTHON_FILES=tests 33 | lint_tests: MYPY_CACHE=.mypy_cache_test 34 | 35 | lint lint_diff lint_package lint_tests: 36 | ./scripts/lint_imports.sh 37 | poetry run ruff . 38 | [ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff 39 | [ "$(PYTHON_FILES)" = "" ] || poetry run mypy $(PYTHON_FILES) 40 | 41 | format format_diff: 42 | poetry run ruff format $(PYTHON_FILES) 43 | poetry run ruff --select I --fix $(PYTHON_FILES) 44 | 45 | spell_check: 46 | poetry run codespell --toml pyproject.toml 47 | 48 | spell_fix: 49 | poetry run codespell --toml pyproject.toml -w 50 | 51 | ###################### 52 | # HELP 53 | ###################### 54 | 55 | help: 56 | @echo '----' 57 | @echo 'format - run code formatters' 58 | @echo 'lint - run linters' 59 | @echo 'test - run unit tests' 60 | @echo 'tests - run unit tests' 61 | @echo 'test TEST_FILE= - run all tests in file' 62 | -------------------------------------------------------------------------------- /libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# NVIDIA NIMs\n", 8 | "\n", 9 | ":::caution\n", 10 | "You are currently on a page documenting the use of models as [text completion models](/docs/concepts/#llms).\n", 11 | "Many popular models are [chat completion models](/docs/concepts/#chat-models).\n", 12 | "\n", 13 | "To use chat completion models, use [ChatNVIDIA](/docs/integrations/chat/nvidia_ai_endpoints/) instead.\n", 14 | ":::\n", 15 | "\n", 16 | "The `langchain-nvidia-ai-endpoints` package contains LangChain integrations building applications with models on \n", 17 | "NVIDIA NIM inference microservice. NIM supports models across domains like chat, completion, embedding, and re-ranking models \n", 18 | "from the community as well as NVIDIA. These models are optimized by NVIDIA to deliver the best performance on NVIDIA \n", 19 | "accelerated infrastructure and deployed as a NIM, an easy-to-use, prebuilt containers that deploy anywhere using a single \n", 20 | "command on NVIDIA accelerated infrastructure.\n", 21 | "\n", 22 | "NVIDIA hosted deployments of NIMs are available to test on the [NVIDIA API catalog](https://build.nvidia.com/). After testing, \n", 23 | "NIMs can be exported from NVIDIA’s API catalog using the NVIDIA AI Enterprise license and run on-premises or in the cloud, \n", 24 | "giving enterprises ownership and full control of their IP and AI application.\n", 25 | "\n", 26 | "NIMs are packaged as container images on a per model basis and are distributed as NGC container images through the NVIDIA NGC Catalog. \n", 27 | "At their core, NIMs provide easy, consistent, and familiar APIs for running inference on an AI model.\n", 28 | "\n", 29 | "This example goes over how to use LangChain to interact with NVIDIA supported via the `NVIDIA` class.\n", 30 | "\n", 31 | "For more information on accessing the completion models through this api, check out the [NVIDIA](https://python.langchain.com/docs/integrations/llms/nvidia_ai_endpoints/) documentation.\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Installation" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "#%pip install -qU langchain-nvidia-ai-endpoints" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "## Setup\n", 55 | "\n", 56 | "**To get started:**\n", 57 | "\n", 58 | "1. Create a free account with [NVIDIA](https://build.nvidia.com/), which hosts NVIDIA AI Foundation models.\n", 59 | "\n", 60 | "2. Click on your model of choice.\n", 61 | "\n", 62 | "3. Under `Input` select the `Python` tab, and click `Get API Key`. Then click `Generate Key`.\n", 63 | "\n", 64 | "4. Copy and save the generated key as `NVIDIA_API_KEY`. From there, you should have access to the endpoints." 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "import os\n", 74 | "from getpass import getpass\n", 75 | "\n", 76 | "# del os.environ['NVIDIA_API_KEY'] ## delete key and reset\n", 77 | "if os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n", 78 | " print(\"Valid NVIDIA_API_KEY already in environment. Delete to reset\")\n", 79 | "else:\n", 80 | " candidate_api_key = getpass(\"NVAPI Key (starts with nvapi-): \")\n", 81 | " assert candidate_api_key.startswith(\"nvapi-\"), f\"{candidate_api_key[:5]}... is not a valid key\"\n", 82 | " os.environ[\"NVIDIA_API_KEY\"] = candidate_api_key" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "## Usage\n", 90 | "\n", 91 | "See [LLM](/docs/how_to#llms) for full functionality." 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "from langchain_nvidia_ai_endpoints import NVIDIA" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "llm = NVIDIA().bind(max_tokens=256)\n", 110 | "llm" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "prompt = \"# Function that does quicksort written in Rust without comments:\"" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "print(llm.invoke(prompt))" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "## Stream, Batch, and Async\n", 136 | "\n", 137 | "These models natively support streaming, and as is the case with all LangChain LLMs they expose a batch method to handle concurrent requests, as well as async methods for invoke, stream, and batch. Below are a few examples." 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "for chunk in llm.stream(prompt):\n", 147 | " print(chunk, end=\"\", flush=True)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "llm.batch([prompt])" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "await llm.ainvoke(prompt)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "async for chunk in llm.astream(prompt):\n", 175 | " print(chunk, end=\"\", flush=True)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "await llm.abatch([prompt])" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "async for chunk in llm.astream_log(prompt):\n", 194 | " print(chunk)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "response = llm.invoke(\n", 204 | " \"X_train, y_train, X_test, y_test = train_test_split(X, y, test_size=0.1) #Train a logistic regression model, predict the labels on the test set and compute the accuracy score\"\n", 205 | ")\n", 206 | "print(response)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "## Supported models\n", 214 | "\n", 215 | "Querying `available_models` will still give you all of the other models offered by your API credentials." 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "NVIDIA.get_available_models()\n", 225 | "# llm.get_available_models()" 226 | ] 227 | } 228 | ], 229 | "metadata": { 230 | "kernelspec": { 231 | "display_name": "langchain-nvidia-ai-endpoints-m0-Y4aGr-py3.10", 232 | "language": "python", 233 | "name": "python3" 234 | }, 235 | "language_info": { 236 | "codemirror_mode": { 237 | "name": "ipython", 238 | "version": 3 239 | }, 240 | "file_extension": ".py", 241 | "mimetype": "text/x-python", 242 | "name": "python", 243 | "nbconvert_exporter": "python", 244 | "pygments_lexer": "ipython3", 245 | "version": "3.10.14" 246 | } 247 | }, 248 | "nbformat": 4, 249 | "nbformat_minor": 2 250 | } 251 | -------------------------------------------------------------------------------- /libs/ai-endpoints/docs/providers/nvidia.mdx: -------------------------------------------------------------------------------- 1 | # NVIDIA 2 | The `langchain-nvidia-ai-endpoints` package contains LangChain integrations building applications with models on 3 | NVIDIA NIM inference microservice. NIM supports models across domains like chat, embedding, and re-ranking models 4 | from the community as well as NVIDIA. These models are optimized by NVIDIA to deliver the best performance on NVIDIA 5 | accelerated infrastructure and deployed as a NIM, an easy-to-use, prebuilt containers that deploy anywhere using a single 6 | command on NVIDIA accelerated infrastructure. 7 | 8 | NVIDIA hosted deployments of NIMs are available to test on the [NVIDIA API catalog](https://build.nvidia.com/). After testing, 9 | NIMs can be exported from NVIDIA’s API catalog using the NVIDIA AI Enterprise license and run on-premises or in the cloud, 10 | giving enterprises ownership and full control of their IP and AI application. 11 | 12 | NIMs are packaged as container images on a per model basis and are distributed as NGC container images through the NVIDIA NGC Catalog. 13 | At their core, NIMs provide easy, consistent, and familiar APIs for running inference on an AI model. 14 | 15 | Below is an example on how to use some common functionality surrounding text-generative and embedding models. 16 | 17 | ## Installation 18 | 19 | ```python 20 | pip install -U --quiet langchain-nvidia-ai-endpoints 21 | ``` 22 | 23 | ## Setup 24 | 25 | **To get started:** 26 | 27 | 1. Create a free account with [NVIDIA](https://build.nvidia.com/), which hosts NVIDIA AI Foundation models. 28 | 29 | 2. Click on your model of choice. 30 | 31 | 3. Under Input select the Python tab, and click `Get API Key`. Then click `Generate Key`. 32 | 33 | 4. Copy and save the generated key as NVIDIA_API_KEY. From there, you should have access to the endpoints. 34 | 35 | ```python 36 | import getpass 37 | import os 38 | 39 | if not os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"): 40 | nvidia_api_key = getpass.getpass("Enter your NVIDIA API key: ") 41 | assert nvidia_api_key.startswith("nvapi-"), f"{nvidia_api_key[:5]}... is not a valid key" 42 | os.environ["NVIDIA_API_KEY"] = nvidia_api_key 43 | ``` 44 | ## Working with NVIDIA API Catalog 45 | 46 | ```python 47 | from langchain_nvidia_ai_endpoints import ChatNVIDIA 48 | 49 | llm = ChatNVIDIA(model="mistralai/mixtral-8x22b-instruct-v0.1") 50 | result = llm.invoke("Write a ballad about LangChain.") 51 | print(result.content) 52 | ``` 53 | 54 | Using the API, you can query live endpoints available on the NVIDIA API Catalog to get quick results from a DGX-hosted cloud compute environment. All models are source-accessible and can be deployed on your own compute cluster using NVIDIA NIM which is part of NVIDIA AI Enterprise, shown in the next section [Working with NVIDIA NIMs](##working-with-nvidia-nims). 55 | 56 | ## Working with NVIDIA NIMs 57 | When ready to deploy, you can self-host models with NVIDIA NIM—which is included with the NVIDIA AI Enterprise software license—and run them anywhere, giving you ownership of your customizations and full control of your intellectual property (IP) and AI applications. 58 | 59 | [Learn more about NIMs](https://developer.nvidia.com/blog/nvidia-nim-offers-optimized-inference-microservices-for-deploying-ai-models-at-scale/) 60 | 61 | ```python 62 | from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank 63 | 64 | # connect to an chat NIM running at localhost:8000, specifyig a specific model 65 | llm = ChatNVIDIA(base_url="http://localhost:8000/v1", model="meta/llama3-8b-instruct") 66 | 67 | # connect to an embedding NIM running at localhost:8080 68 | embedder = NVIDIAEmbeddings(base_url="http://localhost:8080/v1") 69 | 70 | # connect to a reranking NIM running at localhost:2016 71 | ranker = NVIDIARerank(base_url="http://localhost:2016/v1") 72 | ``` 73 | 74 | ## Using NVIDIA AI Foundation Endpoints 75 | 76 | A selection of NVIDIA AI Foundation models are supported directly in LangChain with familiar APIs. 77 | 78 | The active models which are supported can be found [in API Catalog](https://build.nvidia.com/). 79 | 80 | **The following may be useful examples to help you get started:** 81 | - **[`ChatNVIDIA` Model](https://github.com/langchain-ai/langchain/blob/master/docs/docs/integrations/chat/nvidia_ai_endpoints.ipynb).** 82 | - **[`NVIDIAEmbeddings` Model for retrieval in RAG Workflows](https://github.com/langchain-ai/langchain/blob/master/docs/docs/integrations/text_embedding/nvidia_ai_endpoints.ipynb).** -------------------------------------------------------------------------------- /libs/ai-endpoints/langchain_nvidia.py: -------------------------------------------------------------------------------- 1 | """ 2 | **LangChain NVIDIA AI Endpoints** 3 | 4 | This comprehensive module integrates NVIDIA's state-of-the-art NIM endpoints, 5 | featuring advanced models for conversational AI and semantic embeddings, 6 | into the LangChain framework. It provides robust classes for seamless interaction 7 | with AI models, particularly tailored for enriching conversational experiences 8 | and enhancing semantic understanding in various applications. 9 | 10 | **Features:** 11 | 12 | 1. **`ChatNVIDIA`:** This class serves as the primary interface for interacting 13 | with chat models. Users can effortlessly utilize advanced models like 'Nemotron' 14 | to engage in rich, context-aware conversations, applicable across diverse 15 | domains from customer support to interactive storytelling. 16 | 17 | 2. **`NVIDIAEmbeddings`:** The class offers capabilities to generate sophisticated 18 | embeddings using AI models. These embeddings are instrumental for tasks like 19 | semantic analysis, text similarity assessments, and contextual understanding, 20 | significantly enhancing the depth of NLP applications. 21 | 22 | 3. **`NVIDIARerank`:** This class provides an interface for reranking search results 23 | using AI models. Users can leverage this functionality to enhance search 24 | relevance and improve user experience in information retrieval systems. 25 | 26 | 4. **`NVIDIA`:** This class enables users to interact with large language models 27 | through a completions, or prompting, interface. Users can generate text 28 | completions, summaries, and other language model outputs using this class. 29 | This class is particularly useful for code generation tasks. 30 | 31 | **Installation:** 32 | 33 | Install this module easily using pip: 34 | 35 | ```python 36 | pip install langchain-nvidia-ai-endpoints 37 | ``` 38 | 39 | After setting up the environment, interact with NIM endpoints - 40 | 41 | ## Utilizing chat models: 42 | 43 | ```python 44 | from langchain_nvidia import ChatNVIDIA 45 | 46 | llm = ChatNVIDIA(model="nvidia/llama-3.1-nemotron-51b-instruct") 47 | response = llm.invoke("Tell me about the LangChain integration.") 48 | ``` 49 | 50 | ## Generating semantic embeddings: 51 | 52 | Create embeddings useful in various NLP tasks: 53 | 54 | ```python 55 | from langchain_nvidia import NVIDIAEmbeddings 56 | 57 | embedder = NVIDIAEmbeddings(model="nvidia/nv-embedqa-e5-v5") 58 | embedding = embedder.embed_query("Exploring AI capabilities.") 59 | ``` 60 | 61 | ## Code completion using large language models: 62 | 63 | ```python 64 | from langchain_nvidia import NVIDIA 65 | 66 | llm = NVIDIA(model="meta/codellama-70b") 67 | completion = llm.invoke("def hello_world():") 68 | ``` 69 | """ # noqa: E501 70 | 71 | from langchain_nvidia_ai_endpoints import * # noqa: F403 72 | from langchain_nvidia_ai_endpoints import __all__ # noqa: F401 73 | -------------------------------------------------------------------------------- /libs/ai-endpoints/langchain_nvidia_ai_endpoints/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | **NOTE: You can `import langchain_nvidia` instead.** 3 | 4 | **LangChain NVIDIA AI Foundation Model Playground Integration** 5 | 6 | This comprehensive module integrates NVIDIA's state-of-the-art AI Foundation Models, featuring advanced models for conversational AI and semantic embeddings, into the LangChain framework. It provides robust classes for seamless interaction with NVIDIA's AI models, particularly tailored for enriching conversational experiences and enhancing semantic understanding in various applications. 7 | 8 | **Features:** 9 | 10 | 1. **Chat Models (`ChatNVIDIA`):** This class serves as the primary interface for interacting with NVIDIA's Foundation chat models. Users can effortlessly utilize NVIDIA's advanced models like 'Mistral' to engage in rich, context-aware conversations, applicable across diverse domains from customer support to interactive storytelling. 11 | 12 | 2. **Semantic Embeddings (`NVIDIAEmbeddings`):** The module offers capabilities to generate sophisticated embeddings using NVIDIA's AI models. These embeddings are instrumental for tasks like semantic analysis, text similarity assessments, and contextual understanding, significantly enhancing the depth of NLP applications. 13 | 14 | **Installation:** 15 | 16 | Install this module easily using pip: 17 | 18 | ```python 19 | pip install langchain-nvidia-ai-endpoints 20 | ``` 21 | 22 | ## Utilizing Chat Models: 23 | 24 | After setting up the environment, interact with NVIDIA AI Foundation models: 25 | ```python 26 | from langchain_nvidia_ai_endpoints import ChatNVIDIA 27 | 28 | ai_chat_model = ChatNVIDIA(model="meta/llama2-70b") 29 | response = ai_chat_model.invoke("Tell me about the LangChain integration.") 30 | ``` 31 | 32 | # Generating Semantic Embeddings: 33 | 34 | Use NVIDIA's models for creating embeddings, useful in various NLP tasks: 35 | 36 | ```python 37 | from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings 38 | 39 | embed_model = NVIDIAEmbeddings(model="nvolveqa_40k") 40 | embedding_output = embed_model.embed_query("Exploring AI capabilities.") 41 | ``` 42 | """ # noqa: E501 43 | 44 | from langchain_nvidia_ai_endpoints._statics import Model, register_model 45 | from langchain_nvidia_ai_endpoints.chat_models import ChatNVIDIA 46 | from langchain_nvidia_ai_endpoints.embeddings import NVIDIAEmbeddings 47 | from langchain_nvidia_ai_endpoints.llm import NVIDIA 48 | from langchain_nvidia_ai_endpoints.reranking import NVIDIARerank 49 | 50 | __all__ = [ 51 | "ChatNVIDIA", 52 | "NVIDIA", 53 | "NVIDIAEmbeddings", 54 | "NVIDIARerank", 55 | "register_model", 56 | "Model", 57 | ] 58 | -------------------------------------------------------------------------------- /libs/ai-endpoints/langchain_nvidia_ai_endpoints/_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import ( 4 | Any, 5 | Dict, 6 | ) 7 | 8 | from langchain_core.messages import ( 9 | AIMessage, 10 | BaseMessage, 11 | ChatMessage, 12 | FunctionMessage, 13 | HumanMessage, 14 | SystemMessage, 15 | ToolMessage, 16 | ) 17 | 18 | 19 | def convert_message_to_dict(message: BaseMessage) -> dict: 20 | """Convert a LangChain message to a dictionary. 21 | 22 | Args: 23 | message: The LangChain message. 24 | 25 | Returns: 26 | The dictionary. 27 | """ 28 | message_dict: Dict[str, Any] 29 | if isinstance(message, ChatMessage): 30 | message_dict = {"role": message.role, "content": message.content} 31 | elif isinstance(message, HumanMessage): 32 | message_dict = {"role": "user", "content": message.content} 33 | elif isinstance(message, AIMessage): 34 | message_dict = {"role": "assistant", "content": message.content} 35 | if "function_call" in message.additional_kwargs: 36 | message_dict["function_call"] = message.additional_kwargs["function_call"] 37 | # If function call only, content is None not empty string 38 | if message_dict["content"] == "": 39 | message_dict["content"] = None 40 | if "tool_calls" in message.additional_kwargs: 41 | message_dict["tool_calls"] = message.additional_kwargs["tool_calls"] 42 | # If tool calls only, content is None not empty string 43 | if message_dict["content"] == "": 44 | message_dict["content"] = None 45 | elif isinstance(message, SystemMessage): 46 | message_dict = {"role": "system", "content": message.content} 47 | elif isinstance(message, FunctionMessage): 48 | message_dict = { 49 | "role": "function", 50 | "content": message.content, 51 | "name": message.name, 52 | } 53 | elif isinstance(message, ToolMessage): 54 | message_dict = { 55 | "role": "tool", 56 | "content": message.content, 57 | "tool_call_id": message.tool_call_id, 58 | } 59 | else: 60 | raise TypeError(f"Got unknown type {message}") 61 | if "name" in message.additional_kwargs: 62 | message_dict["name"] = message.additional_kwargs["name"] 63 | return message_dict 64 | -------------------------------------------------------------------------------- /libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Literal, Optional 2 | 3 | from langchain_core.embeddings import Embeddings 4 | from langchain_core.outputs.llm_result import LLMResult 5 | from pydantic import ( 6 | BaseModel, 7 | ConfigDict, 8 | Field, 9 | PrivateAttr, 10 | ) 11 | 12 | from langchain_nvidia_ai_endpoints._common import _NVIDIAClient 13 | from langchain_nvidia_ai_endpoints._statics import Model 14 | from langchain_nvidia_ai_endpoints.callbacks import usage_callback_var 15 | 16 | _DEFAULT_MODEL_NAME: str = "nvidia/nv-embedqa-e5-v5" 17 | _DEFAULT_BATCH_SIZE: int = 50 18 | 19 | 20 | class NVIDIAEmbeddings(BaseModel, Embeddings): 21 | """ 22 | Client to NVIDIA embeddings models. 23 | 24 | Fields: 25 | - model: str, the name of the model to use 26 | - truncate: "NONE", "START", "END", truncate input text if it exceeds the model's 27 | maximum token length. Default is "NONE", which raises an error if an input is 28 | too long. 29 | - dimensions: int, the number of dimensions for the embeddings. This parameter is 30 | not supported by all models. 31 | """ 32 | 33 | model_config = ConfigDict( 34 | validate_assignment=True, 35 | ) 36 | 37 | _client: _NVIDIAClient = PrivateAttr() 38 | base_url: Optional[str] = Field( 39 | default=None, 40 | description="Base url for model listing an invocation", 41 | ) 42 | model: Optional[str] = Field(None, description="Name of the model to invoke") 43 | truncate: Literal["NONE", "START", "END"] = Field( 44 | default="NONE", 45 | description=( 46 | "Truncate input text if it exceeds the model's maximum token length. " 47 | "Default is 'NONE', which raises an error if an input is too long." 48 | ), 49 | ) 50 | dimensions: Optional[int] = Field( 51 | default=None, 52 | description=( 53 | "The number of dimensions for the embeddings. This parameter is not " 54 | "supported by all models." 55 | ), 56 | ) 57 | max_batch_size: int = Field(default=_DEFAULT_BATCH_SIZE) 58 | 59 | def __init__(self, **kwargs: Any): 60 | """ 61 | Create a new NVIDIAEmbeddings embedder. 62 | 63 | This class provides access to a NVIDIA NIM for embedding. By default, it 64 | connects to a hosted NIM, but can be configured to connect to a local NIM 65 | using the `base_url` parameter. An API key is required to connect to the 66 | hosted NIM. 67 | 68 | Args: 69 | model (str): The model to use for embedding. 70 | nvidia_api_key (str): The API key to use for connecting to the hosted NIM. 71 | api_key (str): Alternative to nvidia_api_key. 72 | base_url (str): The base URL of the NIM to connect to. 73 | Format for base URL is http://host:port 74 | trucate (str): "NONE", "START", "END", truncate input text if it exceeds 75 | the model's context length. Default is "NONE", which raises 76 | an error if an input is too long. 77 | dimensions (int): The number of dimensions for the embeddings. This 78 | parameter is not supported by all models. 79 | 80 | API Key: 81 | - The recommended way to provide the API key is through the `NVIDIA_API_KEY` 82 | environment variable. 83 | 84 | Base URL: 85 | - Connect to a self-hosted model with NVIDIA NIM using the `base_url` arg to 86 | link to the local host at localhost:8000: 87 | embedder = NVIDIAEmbeddings(base_url="http://localhost:8080/v1") 88 | """ 89 | super().__init__(**kwargs) 90 | # allow nvidia_base_url as an alternative for base_url 91 | base_url = kwargs.pop("nvidia_base_url", self.base_url) 92 | # allow nvidia_api_key as an alternative for api_key 93 | api_key = kwargs.pop("nvidia_api_key", kwargs.pop("api_key", None)) 94 | self._client = _NVIDIAClient( 95 | **({"base_url": base_url} if base_url else {}), # only pass if set 96 | mdl_name=self.model, 97 | default_hosted_model_name=_DEFAULT_MODEL_NAME, 98 | **({"api_key": api_key} if api_key else {}), # only pass if set 99 | infer_path="{base_url}/embeddings", 100 | cls=self.__class__.__name__, 101 | ) 102 | # todo: only store the model in one place 103 | # the model may be updated to a newer name during initialization 104 | self.model = self._client.mdl_name 105 | # same for base_url 106 | self.base_url = self._client.base_url 107 | 108 | @property 109 | def available_models(self) -> List[Model]: 110 | """ 111 | Get a list of available models that work with NVIDIAEmbeddings. 112 | """ 113 | return self._client.get_available_models(self.__class__.__name__) 114 | 115 | @classmethod 116 | def get_available_models( 117 | cls, 118 | **kwargs: Any, 119 | ) -> List[Model]: 120 | """ 121 | Get a list of available models that work with NVIDIAEmbeddings. 122 | """ 123 | return cls(**kwargs).available_models 124 | 125 | def _embed( 126 | self, texts: List[str], model_type: Literal["passage", "query"] 127 | ) -> List[List[float]]: 128 | """Embed a single text entry to either passage or query type""" 129 | # API Catalog API - 130 | # input: str | list[str] -- char limit depends on model 131 | # model: str -- model name, e.g. NV-Embed-QA 132 | # encoding_format: "float" | "base64" 133 | # input_type: "query" | "passage" 134 | # user: str -- ignored 135 | # truncate: "NONE" | "START" | "END" -- default "NONE", error raised if 136 | # an input is too long 137 | # dimensions: int -- not supported by all models 138 | payload: Dict[str, Any] = { 139 | "input": texts, 140 | "model": self.model, 141 | "encoding_format": "float", 142 | "input_type": model_type, 143 | } 144 | if self.truncate: 145 | payload["truncate"] = self.truncate 146 | if self.dimensions: 147 | payload["dimensions"] = self.dimensions 148 | 149 | response = self._client.get_req( 150 | payload=payload, 151 | ) 152 | response.raise_for_status() 153 | result = response.json() 154 | data = result.get("data", result) 155 | if not isinstance(data, list): 156 | raise ValueError(f"Expected data with a list of embeddings. Got: {data}") 157 | embedding_list = [(res["embedding"], res["index"]) for res in data] 158 | self._invoke_callback_vars(result) 159 | return [x[0] for x in sorted(embedding_list, key=lambda x: x[1])] 160 | 161 | def embed_query(self, text: str) -> List[float]: 162 | """Input pathway for query embeddings.""" 163 | return self._embed([text], model_type="query")[0] 164 | 165 | def embed_documents(self, texts: List[str]) -> List[List[float]]: 166 | """Input pathway for document embeddings.""" 167 | if not isinstance(texts, list) or not all( 168 | isinstance(text, str) for text in texts 169 | ): 170 | raise ValueError(f"`texts` must be a list of strings, given: {repr(texts)}") 171 | 172 | all_embeddings = [] 173 | for i in range(0, len(texts), self.max_batch_size): 174 | batch = texts[i : i + self.max_batch_size] 175 | all_embeddings.extend(self._embed(batch, model_type="passage")) 176 | return all_embeddings 177 | 178 | def _invoke_callback_vars(self, response: dict) -> None: 179 | """Invoke the callback context variables if there are any.""" 180 | callback_vars = [ 181 | usage_callback_var.get(), 182 | ] 183 | llm_output = {**response, "model_name": self.model} 184 | result = LLMResult(generations=[[]], llm_output=llm_output) 185 | for cb_var in callback_vars: 186 | if cb_var: 187 | cb_var.on_llm_end(result) 188 | -------------------------------------------------------------------------------- /libs/ai-endpoints/langchain_nvidia_ai_endpoints/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/langchain_nvidia_ai_endpoints/py.typed -------------------------------------------------------------------------------- /libs/ai-endpoints/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "langchain-nvidia-ai-endpoints" 3 | version = "0.3.10" 4 | description = "An integration package connecting NVIDIA AI Endpoints and LangChain" 5 | authors = [] 6 | readme = "README.md" 7 | repository = "https://github.com/langchain-ai/langchain-nvidia" 8 | license = "MIT" 9 | packages = [ 10 | { include = "langchain_nvidia.py" }, 11 | { include = "langchain_nvidia_ai_endpoints" }, 12 | ] 13 | 14 | [tool.poetry.urls] 15 | "Source Code" = "https://github.com/langchain-ai/langchain-nvidia/tree/main/libs/ai-endpoints" 16 | 17 | [tool.poetry.dependencies] 18 | python = ">=3.9,<4.0" 19 | langchain-core = ">=0.3.51,<0.4" 20 | aiohttp = "^3.9.1" 21 | filetype = "^1.2.0" 22 | 23 | [tool.poetry.group.test] 24 | optional = true 25 | 26 | [tool.poetry.group.test.dependencies] 27 | pytest = "^7.3.0" 28 | freezegun = "^1.2.2" 29 | pytest-mock = "^3.10.0" 30 | syrupy = "^4.0.2" 31 | pytest-watcher = "^0.3.4" 32 | pytest-asyncio = "^0.21.1" 33 | requests-mock = "^1.11.0" 34 | langchain-tests = "^0.3.17" 35 | faker = "^24.4.0" 36 | 37 | [tool.poetry.group.codespell] 38 | optional = true 39 | 40 | [tool.poetry.group.codespell.dependencies] 41 | codespell = "^2.2.0" 42 | 43 | [tool.poetry.group.test_integration] 44 | optional = true 45 | 46 | [tool.poetry.group.test_integration.dependencies] 47 | requests-mock = "^1.11.0" 48 | 49 | [tool.poetry.group.lint] 50 | optional = true 51 | 52 | [tool.poetry.group.lint.dependencies] 53 | ruff = "^0.1.5" 54 | 55 | [tool.poetry.group.typing.dependencies] 56 | mypy = "^0.991" 57 | types-requests = "^2.31.0.10" 58 | types-pillow = "^10.2.0.20240125" 59 | 60 | [tool.ruff.lint] 61 | select = [ 62 | "E", # pycodestyle 63 | "F", # pyflakes 64 | "I", # isort 65 | "T201", # print 66 | ] 67 | 68 | [tool.mypy] 69 | disallow_untyped_defs = "True" 70 | exclude = ["notebooks", "examples", "example_data", "langchain_core/pydantic"] 71 | 72 | [[tool.mypy.overrides]] 73 | # conditional dependencies introduced by langsmith-sdk 74 | module = ["numpy", "pytest"] 75 | ignore_missing_imports = true 76 | 77 | [tool.coverage.run] 78 | omit = ["tests/*"] 79 | 80 | [build-system] 81 | requires = ["poetry-core>=1.0.0"] 82 | build-backend = "poetry.core.masonry.api" 83 | 84 | [tool.pytest.ini_options] 85 | # --strict-markers will raise errors on unknown marks. 86 | # https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks 87 | # 88 | # https://docs.pytest.org/en/7.1.x/reference/reference.html 89 | # --strict-config any warnings encountered while parsing the `pytest` 90 | # section of the configuration file raise errors. 91 | # 92 | # https://github.com/tophat/syrupy 93 | # --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite. 94 | addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5" 95 | # Registering custom markers. 96 | # https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers 97 | markers = [ 98 | "requires: mark tests as requiring a specific library", 99 | "asyncio: mark tests as requiring asyncio", 100 | "compile: mark placeholder test used to compile integration tests without running them", 101 | ] 102 | asyncio_mode = "auto" 103 | -------------------------------------------------------------------------------- /libs/ai-endpoints/scripts/check_imports.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import traceback 3 | from importlib.machinery import SourceFileLoader 4 | 5 | if __name__ == "__main__": 6 | files = sys.argv[1:] 7 | has_failure = False 8 | for file in files: 9 | try: 10 | SourceFileLoader("x", file).load_module() 11 | except Exception: 12 | has_faillure = True 13 | print(file) # noqa: T201 14 | traceback.print_exc() 15 | print() # noqa: T201 16 | 17 | sys.exit(1 if has_failure else 0) 18 | -------------------------------------------------------------------------------- /libs/ai-endpoints/scripts/lint_imports.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | # Initialize a variable to keep track of errors 6 | errors=0 7 | 8 | # make sure not importing from langchain or langchain_experimental 9 | git --no-pager grep '^from langchain\.' . && errors=$((errors+1)) 10 | git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1)) 11 | 12 | # Decide on an exit status based on the errors 13 | if [ "$errors" -gt 0 ]; then 14 | exit 1 15 | else 16 | exit 0 17 | fi 18 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/__init__.py -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/data/nvidia-picasso-large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/data/nvidia-picasso-large.png -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/data/nvidia-picasso.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/data/nvidia-picasso.gif -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/data/nvidia-picasso.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/data/nvidia-picasso.jpg -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/data/nvidia-picasso.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/data/nvidia-picasso.png -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/data/nvidia-picasso.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/data/nvidia-picasso.webp -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/integration_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/integration_tests/__init__.py -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/integration_tests/conftest.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List 2 | 3 | import pytest 4 | from langchain_core.documents import Document 5 | 6 | from langchain_nvidia_ai_endpoints import ( 7 | NVIDIA, 8 | ChatNVIDIA, 9 | NVIDIAEmbeddings, 10 | NVIDIARerank, 11 | ) 12 | from langchain_nvidia_ai_endpoints._statics import MODEL_TABLE, Model 13 | from langchain_nvidia_ai_endpoints.chat_models import ( 14 | _DEFAULT_MODEL_NAME as DEFAULT_CHAT_MODEL, 15 | ) 16 | from langchain_nvidia_ai_endpoints.embeddings import ( 17 | _DEFAULT_MODEL_NAME as DEFAULT_EMBEDDINGS_MODEL, 18 | ) 19 | from langchain_nvidia_ai_endpoints.llm import ( 20 | _DEFAULT_MODEL_NAME as DEFAULT_COMPLETIONS_MODEL, 21 | ) 22 | from langchain_nvidia_ai_endpoints.reranking import ( 23 | _DEFAULT_MODEL_NAME as DEFAULT_RERANKING_MODEL, 24 | ) 25 | 26 | 27 | def get_mode(config: pytest.Config) -> dict: 28 | nim_endpoint = config.getoption("--nim-endpoint") 29 | if nim_endpoint: 30 | return dict(base_url=nim_endpoint) 31 | return {} 32 | 33 | 34 | def pytest_addoption(parser: pytest.Parser) -> None: 35 | parser.addoption( 36 | "--chat-model-id", 37 | action="store", 38 | nargs="+", 39 | help="Run tests for a specific chat model or list of models", 40 | ) 41 | parser.addoption( 42 | "--tool-model-id", 43 | action="store", 44 | nargs="+", 45 | help="Run tests for a specific chat models that support tool calling", 46 | ) 47 | parser.addoption( 48 | "--structured-model-id", 49 | action="store", 50 | nargs="+", 51 | help="Run tests for a specific models that support structured output", 52 | ) 53 | parser.addoption( 54 | "--qa-model-id", 55 | action="store", 56 | nargs="+", 57 | help="Run tests for a specific qa model or list of models", 58 | ) 59 | parser.addoption( 60 | "--completions-model-id", 61 | action="store", 62 | nargs="+", 63 | help="Run tests for a specific completions model or list of models", 64 | ) 65 | parser.addoption( 66 | "--embedding-model-id", 67 | action="store", 68 | nargs="+", 69 | help="Run tests for a specific embedding model or list of models", 70 | ) 71 | parser.addoption( 72 | "--rerank-model-id", 73 | action="store", 74 | nargs="+", 75 | help="Run tests for a specific rerank model or list of models", 76 | ) 77 | parser.addoption( 78 | "--vlm-model-id", 79 | action="store", 80 | nargs="+", 81 | help="Run tests for a specific vlm model or list of models", 82 | ) 83 | parser.addoption( 84 | "--all-models", 85 | action="store_true", 86 | help="Run tests across all models", 87 | ) 88 | parser.addoption( 89 | "--nim-endpoint", 90 | type=str, 91 | help="Run tests using NIM mode", 92 | ) 93 | 94 | 95 | def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: 96 | mode = get_mode(metafunc.config) 97 | 98 | def get_all_known_models() -> List[Model]: 99 | return list(MODEL_TABLE.values()) 100 | 101 | if "chat_model" in metafunc.fixturenames: 102 | models = [DEFAULT_CHAT_MODEL] 103 | if model_list := metafunc.config.getoption("chat_model_id"): 104 | models = model_list 105 | if metafunc.config.getoption("all_models"): 106 | models = [ 107 | model.id 108 | for model in ChatNVIDIA(**mode).available_models 109 | if model.model_type == "chat" 110 | ] 111 | metafunc.parametrize("chat_model", models, ids=models) 112 | 113 | if "tool_model" in metafunc.fixturenames: 114 | models = ["meta/llama-3.3-70b-instruct"] 115 | if model_list := metafunc.config.getoption("tool_model_id"): 116 | models = model_list 117 | if metafunc.config.getoption("all_models"): 118 | models = [ 119 | model.id 120 | for model in ChatNVIDIA(**mode).available_models 121 | if model.model_type == "chat" and model.supports_tools 122 | ] 123 | metafunc.parametrize("tool_model", models, ids=models) 124 | 125 | if "completions_model" in metafunc.fixturenames: 126 | models = [DEFAULT_COMPLETIONS_MODEL] 127 | if model_list := metafunc.config.getoption("completions_model_id"): 128 | models = model_list 129 | if metafunc.config.getoption("all_models"): 130 | models = [ 131 | model.id 132 | for model in NVIDIA(**mode).available_models 133 | if model.model_type == "completions" 134 | ] 135 | metafunc.parametrize("completions_model", models, ids=models) 136 | 137 | if "structured_model" in metafunc.fixturenames: 138 | models = ["meta/llama-3.3-70b-instruct"] 139 | if model_list := metafunc.config.getoption("structured_model_id"): 140 | models = model_list 141 | if metafunc.config.getoption("all_models"): 142 | models = [ 143 | model.id 144 | for model in ChatNVIDIA(**mode).available_models 145 | if model.supports_structured_output 146 | ] 147 | metafunc.parametrize("structured_model", models, ids=models) 148 | 149 | if "rerank_model" in metafunc.fixturenames: 150 | models = [DEFAULT_RERANKING_MODEL] 151 | if model_list := metafunc.config.getoption("rerank_model_id"): 152 | models = model_list 153 | if metafunc.config.getoption("all_models"): 154 | models = [model.id for model in NVIDIARerank(**mode).available_models] 155 | metafunc.parametrize("rerank_model", models, ids=models) 156 | 157 | if "vlm_model" in metafunc.fixturenames: 158 | models = ["meta/llama-3.2-11b-vision-instruct"] 159 | if model_list := metafunc.config.getoption("vlm_model_id"): 160 | models = model_list 161 | if metafunc.config.getoption("all_models"): 162 | models = [ 163 | model.id 164 | for model in get_all_known_models() 165 | if model.model_type in {"vlm", "nv-vlm"} 166 | ] 167 | metafunc.parametrize("vlm_model", models, ids=models) 168 | 169 | if "qa_model" in metafunc.fixturenames: 170 | models = [] 171 | if model_list := metafunc.config.getoption("qa_model_id"): 172 | models = model_list 173 | if metafunc.config.getoption("all_models"): 174 | models = [ 175 | model.id 176 | for model in ChatNVIDIA(**mode).available_models 177 | if model.model_type == "qa" 178 | ] 179 | metafunc.parametrize("qa_model", models, ids=models) 180 | 181 | if "embedding_model" in metafunc.fixturenames: 182 | models = [DEFAULT_EMBEDDINGS_MODEL] 183 | if metafunc.config.getoption("all_models"): 184 | models = [model.id for model in NVIDIAEmbeddings(**mode).available_models] 185 | if model_list := metafunc.config.getoption("embedding_model_id"): 186 | models = model_list 187 | if metafunc.config.getoption("all_models"): 188 | models = [model.id for model in NVIDIAEmbeddings(**mode).available_models] 189 | metafunc.parametrize("embedding_model", models, ids=models) 190 | 191 | 192 | @pytest.fixture 193 | def mode(request: pytest.FixtureRequest) -> dict: 194 | return get_mode(request.config) 195 | 196 | 197 | @pytest.fixture( 198 | params=[ 199 | ChatNVIDIA, 200 | NVIDIAEmbeddings, 201 | NVIDIARerank, 202 | NVIDIA, 203 | ] 204 | ) 205 | def public_class(request: pytest.FixtureRequest) -> type: 206 | return request.param 207 | 208 | 209 | @pytest.fixture 210 | def contact_service() -> Any: 211 | def _contact_service(instance: Any) -> None: 212 | if isinstance(instance, ChatNVIDIA): 213 | instance.invoke("Hello") 214 | elif isinstance(instance, NVIDIAEmbeddings): 215 | instance.embed_documents(["Hello"]) 216 | elif isinstance(instance, NVIDIARerank): 217 | instance.compress_documents( 218 | documents=[Document(page_content="World")], query="Hello" 219 | ) 220 | elif isinstance(instance, NVIDIA): 221 | instance.invoke("Hello") 222 | 223 | return _contact_service 224 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/integration_tests/test_api_key.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Any 3 | 4 | import pytest 5 | from langchain_core.messages import HumanMessage 6 | 7 | from langchain_nvidia_ai_endpoints import ChatNVIDIA 8 | 9 | from ..unit_tests.test_api_key import no_env_var 10 | 11 | 12 | def test_missing_api_key_error(public_class: type, contact_service: Any) -> None: 13 | with no_env_var("NVIDIA_API_KEY"): 14 | with pytest.warns(UserWarning) as record: 15 | client = public_class() 16 | assert len(record) == 1 17 | assert "API key is required for the hosted" in str(record[0].message) 18 | with pytest.raises(Exception) as exc_info: 19 | contact_service(client) 20 | message = str(exc_info.value) 21 | assert "401" in message 22 | assert "Unauthorized" in message 23 | assert "API key" in message 24 | 25 | 26 | def test_bogus_api_key_error(public_class: type, contact_service: Any) -> None: 27 | with no_env_var("NVIDIA_API_KEY"): 28 | client = public_class(nvidia_api_key="BOGUS") 29 | with pytest.raises(Exception) as exc_info: 30 | contact_service(client) 31 | message = str(exc_info.value) 32 | assert "401" in message 33 | assert "Unauthorized" in message 34 | assert "API key" in message 35 | 36 | 37 | @pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"]) 38 | def test_api_key(public_class: type, param: str, contact_service: Any) -> None: 39 | api_key = os.environ.get("NVIDIA_API_KEY") 40 | with no_env_var("NVIDIA_API_KEY"): 41 | client = public_class(**{param: api_key}) 42 | contact_service(client) 43 | 44 | 45 | def test_api_key_leakage(chat_model: str, mode: dict) -> None: 46 | """Test ChatNVIDIA wrapper.""" 47 | chat = ChatNVIDIA(model=chat_model, temperature=0.7, **mode) 48 | message = HumanMessage(content="Hello") 49 | chat.invoke([message]) 50 | 51 | # check last_input post request 52 | last_inputs = chat._client.last_inputs 53 | assert last_inputs 54 | 55 | authorization_header = last_inputs.get("headers", {}).get("Authorization") 56 | 57 | if authorization_header: 58 | key = authorization_header.split("Bearer ")[1] 59 | 60 | assert not key.startswith("nvapi-") 61 | assert key == "**********" 62 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/integration_tests/test_available_models.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import pytest 4 | import requests_mock 5 | 6 | from langchain_nvidia_ai_endpoints._statics import MODEL_TABLE 7 | 8 | 9 | def test_available_models(public_class: type, mode: dict) -> None: 10 | models = public_class(**mode).available_models 11 | assert models 12 | assert isinstance(models, list) 13 | assert len(models) >= 1 14 | assert all(isinstance(model.id, str) for model in models) 15 | assert all(model.model_type is not None for model in models) 16 | assert all(model.client == public_class.__name__ for model in models) 17 | 18 | 19 | def test_get_available_models(public_class: Any, mode: dict) -> None: 20 | models = public_class.get_available_models(**mode) 21 | assert isinstance(models, list) 22 | assert len(models) >= 1 23 | assert all(isinstance(model.id, str) for model in models) 24 | assert all(model.model_type is not None for model in models) 25 | assert all(model.client == public_class.__name__ for model in models) 26 | 27 | 28 | # todo: turn this into a unit test 29 | def test_available_models_cached(public_class: type, mode: dict) -> None: 30 | if public_class.__name__ == "NVIDIARerank" and "base_url" not in mode: 31 | pytest.skip("There is no listing service for hosted ranking NIMs") 32 | with requests_mock.Mocker(real_http=True) as mock: 33 | client = public_class() 34 | assert not mock.called 35 | client.available_models 36 | assert mock.called 37 | client.available_models 38 | assert mock.call_count == 1 39 | 40 | 41 | def test_known_models_are_available(public_class: type, mode: dict) -> None: 42 | known_models = set( 43 | model.id 44 | for model in MODEL_TABLE.values() 45 | if model.client == public_class.__name__ 46 | ) 47 | available_models = set( 48 | model.id 49 | for model in public_class.get_available_models(**mode) # type: ignore 50 | ) 51 | 52 | assert known_models - available_models == set() 53 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/integration_tests/test_base_url.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Any 3 | 4 | import pytest 5 | from requests.exceptions import ConnectionError 6 | from requests_mock import Mocker 7 | 8 | 9 | # Fixture setup /v1/chat/completions endpoints 10 | @pytest.fixture() 11 | def mock_endpoints(requests_mock: Mocker) -> None: 12 | for endpoint in [ 13 | "/v1/embeddings", 14 | "/v1/chat/completions", 15 | "/v1/ranking", 16 | "/v1/completions", 17 | ]: 18 | requests_mock.post( 19 | re.compile(f".*{endpoint}"), 20 | exc=ConnectionError(f"Mocked ConnectionError for {endpoint}"), 21 | ) 22 | requests_mock.get( 23 | re.compile(".*/v1/models"), 24 | json={ 25 | "data": [ 26 | { 27 | "id": "not-a-model", 28 | "object": "model", 29 | "created": 1234567890, 30 | "owned_by": "OWNER", 31 | }, 32 | ] 33 | }, 34 | ) 35 | 36 | 37 | # Test function using the mock_endpoints fixture 38 | @pytest.mark.parametrize( 39 | "base_url", 40 | [ 41 | "http://localhost:12321", 42 | "http://localhost:12321/v1", 43 | ], 44 | ) 45 | def test_endpoint_unavailable( 46 | public_class: type, 47 | base_url: str, 48 | contact_service: Any, 49 | mock_endpoints: None, # Inject the mock_endpoints fixture 50 | ) -> None: 51 | # we test this with a bogus model because users should supply 52 | # a model when using their own base_url 53 | client = public_class(model="not-a-model", base_url=base_url) 54 | with pytest.raises(ConnectionError) as e: 55 | contact_service(client) 56 | assert "Mocked ConnectionError for" in str(e.value) 57 | 58 | 59 | # todo: move this to be a unit test 60 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/integration_tests/test_compile.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.mark.compile 5 | def test_placeholder() -> None: 6 | """Used for compiling integration tests without running any real tests.""" 7 | pass 8 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/integration_tests/test_completions_models.py: -------------------------------------------------------------------------------- 1 | # https://platform.openai.com/docs/api-reference/completions/create 2 | # POST https://.../v1/completions 3 | # model: str -- The ID of the model to use for completion. 4 | # prompt: str | Array[str] -- The prompt(s) to generate completions for. 5 | # best_of: Optional[int] (default: 1) -- An integer representing the number 6 | # of completions to generate and score. 7 | # The API will return the best completion 8 | # of the group. 9 | # echo: Optional[bool] (default: False) -- Whether to echo the prompt in addition 10 | # to the completion. 11 | # frequency_penalty: Optional[float] (default: 0.0) -- Float that penalizes new 12 | # tokens. Range -2.0 to 2.0. 13 | # logit_bias: Optional[Dict[str, float]] -- Dict containing token to logit bias. 14 | # logprobs: Optional[int] (default: None) -- Integer representing the number of 15 | # logprobs to return. 0 means no logprobs. 16 | # Max value is 5. 17 | # max_tokens: Optional[int] (default: 16) -- Integer representing the maximum number 18 | # of tokens to generate. 19 | # n: Optional[int] (default: 1) -- Integer representing the number of completions 20 | # to generate. 21 | # presence_penalty: Optional[float] (default: 0.0) -- Float that penalizes new tokens 22 | # based on whether they appear in 23 | # the text so far. Range -2.0 to 24 | # 2.0. 25 | # seed: Optional[int] (default: None) -- Integer seed that attempts to make the 26 | # completions deterministic. 27 | # stop: Optional[str|Array[str]] -- Token at which to stop generating completions. 28 | # Up to 4 sequences. 29 | # stream: Optional[bool] (default: False) -- Whether to stream back partial progress. 30 | # stream_options: Optional[Dict["include_usage": bool]] -- Dict containing stream 31 | # options. 32 | # suffix: Optional[str] -- Suffix to add to the completion. 33 | # temperature: Optional[float] (default: 1.0) -- Sampling temperature, between 0 and 2. 34 | # top_p: Optional[float] (default: 1.0) -- Alternative to temperature sampling. 35 | # user: Optional[str] -- User ID to associate with the request. 36 | # 37 | # Returns: 38 | # id: str -- The ID of the completion. 39 | # object: str -- Always "text_completion". 40 | # created: int -- Unix timestamp of when the completion was created. 41 | # model: str -- The ID of the model used to generate the completion. 42 | # choices: List[{"finish_reason": "stop"|"length"|"content_filter", 43 | # "index": int, 44 | # "text": str, 45 | # "logprobs": Optional[{"text_offset": array, 46 | # "token_logprobs": array, 47 | # "tokens": array, 48 | # "top_logprobs": array}]}] -- 49 | # List of completions generated by the model. 50 | # usage: {"completion_tokens": int, 51 | # "prompt_tokens": int, 52 | # "total_tokens": int} -- Usage statistics for the model. 53 | # system_fingerprint: str -- System fingerprint of the model used to generate 54 | # the completion. 55 | 56 | 57 | from typing import Any, Callable, Tuple 58 | 59 | import pytest 60 | 61 | from langchain_nvidia_ai_endpoints import NVIDIA 62 | 63 | 64 | def invoke(llm: NVIDIA, prompt: str, **kwargs: Any) -> Tuple[str, int]: 65 | return llm.invoke(prompt, **kwargs), 1 66 | 67 | 68 | def stream(llm: NVIDIA, prompt: str, **kwargs: Any) -> Tuple[str, int]: 69 | response = "" 70 | count = 0 71 | for chunk in llm.stream(prompt, **kwargs): 72 | response += chunk 73 | count += 1 74 | return response, count 75 | 76 | 77 | async def ainvoke(llm: NVIDIA, prompt: str, **kwargs: Any) -> Tuple[str, int]: 78 | return await llm.ainvoke(prompt, **kwargs), 1 79 | 80 | 81 | async def astream(llm: NVIDIA, prompt: str, **kwargs: Any) -> Tuple[str, int]: 82 | response = "" 83 | count = 0 84 | async for chunk in llm.astream(prompt, **kwargs): 85 | response += chunk 86 | count += 1 87 | return response, count 88 | 89 | 90 | @pytest.mark.parametrize( 91 | "func, count", [(invoke, 0), (stream, 1)], ids=["invoke", "stream"] 92 | ) 93 | def test_basic(completions_model: str, mode: dict, func: Callable, count: int) -> None: 94 | llm = NVIDIA(model=completions_model, **mode) 95 | response, cnt = func(llm, "Hello, my name is") 96 | assert isinstance(response, str) 97 | assert cnt > count, "Should have received more chunks" 98 | 99 | 100 | @pytest.mark.parametrize( 101 | "func, count", [(ainvoke, 0), (astream, 1)], ids=["ainvoke", "astream"] 102 | ) 103 | async def test_abasic( 104 | completions_model: str, mode: dict, func: Callable, count: int 105 | ) -> None: 106 | llm = NVIDIA(model=completions_model, **mode) 107 | response, cnt = await func(llm, "Hello, my name is") 108 | assert isinstance(response, str) 109 | assert cnt > count, "Should have received more chunks" 110 | 111 | 112 | @pytest.mark.parametrize( 113 | "param, value", 114 | [ 115 | ("frequency_penalty", 0.5), 116 | ("max_tokens", 32), 117 | ("presence_penalty", 0.5), 118 | ("seed", 1234), 119 | ("stop", "Hello"), 120 | ("temperature", 0.5), 121 | ("top_p", 0.5), 122 | ], 123 | ) 124 | @pytest.mark.parametrize("func", [invoke, stream], ids=["invoke", "stream"]) 125 | def test_params( 126 | completions_model: str, mode: dict, param: str, value: Any, func: Callable 127 | ) -> None: 128 | llm = NVIDIA(model=completions_model, **mode) 129 | response, _ = func(llm, "Hello, my name is", **{param: value}) 130 | assert isinstance(response, str) 131 | 132 | 133 | @pytest.mark.parametrize( 134 | "param, value", 135 | [ 136 | ("best_of", 5), 137 | ("echo", True), 138 | ("logit_bias", {"hello": 1.0}), 139 | ("logprobs", 2), 140 | ("n", 2), 141 | ("suffix", "Hello"), 142 | ("user", "1234"), 143 | ], 144 | ) 145 | @pytest.mark.parametrize("func", [invoke, stream], ids=["invoke", "stream"]) 146 | @pytest.mark.xfail(reason="Not consistently implemented") 147 | def test_params_incomplete( 148 | completions_model: str, mode: dict, param: str, value: Any, func: Callable 149 | ) -> None: 150 | llm = NVIDIA(model=completions_model, **mode) 151 | response, _ = func(llm, "Hello, my name is", **{param: value}) 152 | assert isinstance(response, str) 153 | 154 | 155 | def test_invoke_with_stream_true(completions_model: str, mode: dict) -> None: 156 | llm = NVIDIA(model=completions_model, **mode) 157 | with pytest.warns(UserWarning) as record: 158 | response = llm.invoke("Hello, my name is", stream=True) 159 | assert isinstance(response, str) 160 | assert len(record) == 1 161 | assert "stream set to true" in str(record[0].message) 162 | assert "ignoring" in str(record[0].message) 163 | 164 | 165 | def test_stream_with_stream_false(completions_model: str, mode: dict) -> None: 166 | llm = NVIDIA(model=completions_model, **mode) 167 | with pytest.warns(UserWarning) as record: 168 | response = next(llm.stream("Hello, my name is", stream=False)) 169 | assert isinstance(response, str) 170 | assert len(record) == 1 171 | assert "stream set to false" in str(record[0].message) 172 | assert "ignoring" in str(record[0].message) 173 | 174 | 175 | # todo: check stream_options 176 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/integration_tests/test_embeddings.py: -------------------------------------------------------------------------------- 1 | """Test NVIDIA AI Foundation Model Embeddings. 2 | 3 | Note: These tests are designed to validate the functionality of NVIDIAEmbeddings. 4 | """ 5 | 6 | import pytest 7 | 8 | from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings 9 | from langchain_nvidia_ai_endpoints.embeddings import _DEFAULT_BATCH_SIZE 10 | 11 | 12 | def test_embed_query(embedding_model: str, mode: dict) -> None: 13 | """Test NVIDIA embeddings for a single query.""" 14 | query = "foo bar" 15 | embedding = NVIDIAEmbeddings(model=embedding_model, **mode) 16 | output = embedding.embed_query(query) 17 | assert len(output) > 3 18 | 19 | 20 | async def test_embed_query_async(embedding_model: str, mode: dict) -> None: 21 | """Test NVIDIA async embeddings for a single query.""" 22 | query = "foo bar" 23 | embedding = NVIDIAEmbeddings(model=embedding_model, **mode) 24 | output = await embedding.aembed_query(query) 25 | assert len(output) > 3 26 | 27 | 28 | def test_embed_documents_single(embedding_model: str, mode: dict) -> None: 29 | """Test NVIDIA embeddings for documents.""" 30 | documents = ["foo bar"] 31 | embedding = NVIDIAEmbeddings(model=embedding_model, **mode) 32 | output = embedding.embed_documents(documents) 33 | assert len(output) == 1 34 | assert len(output[0]) > 3 35 | 36 | 37 | def test_embed_documents_multiple(embedding_model: str, mode: dict) -> None: 38 | """Test NVIDIA embeddings for multiple documents.""" 39 | documents = ["foo bar", "bar foo", "foo"] 40 | embedding = NVIDIAEmbeddings(model=embedding_model, **mode) 41 | output = embedding.embed_documents(documents) 42 | assert len(output) == 3 43 | assert all(len(doc) > 4 for doc in output) 44 | 45 | 46 | async def test_embed_documents_multiple_async(embedding_model: str, mode: dict) -> None: 47 | """Test NVIDIA async embeddings for multiple documents.""" 48 | documents = ["foo bar", "bar foo", "foo"] 49 | embedding = NVIDIAEmbeddings(model=embedding_model, **mode) 50 | output = await embedding.aembed_documents(documents) 51 | assert len(output) == 3 52 | assert all(len(doc) > 4 for doc in output) 53 | 54 | 55 | def test_embed_query_long_text(embedding_model: str, mode: dict) -> None: 56 | embedding = NVIDIAEmbeddings(model=embedding_model, **mode) 57 | text = "nvidia " * 10240 58 | with pytest.raises(Exception): 59 | embedding.embed_query(text) 60 | 61 | 62 | def test_embed_documents_batched_texts(embedding_model: str, mode: dict) -> None: 63 | embedding = NVIDIAEmbeddings(model=embedding_model, **mode) 64 | count = _DEFAULT_BATCH_SIZE * 2 + 1 65 | texts = ["nvidia " * 32] * count 66 | output = embedding.embed_documents(texts) 67 | assert len(output) == count 68 | assert all(len(embedding) > 3 for embedding in output) 69 | 70 | 71 | def test_embed_documents_mixed_long_texts(embedding_model: str, mode: dict) -> None: 72 | embedding = NVIDIAEmbeddings(model=embedding_model, **mode) 73 | count = _DEFAULT_BATCH_SIZE * 2 - 1 74 | texts = ["nvidia " * 32] * count 75 | texts[len(texts) // 2] = "nvidia " * 10240 76 | with pytest.raises(Exception): 77 | embedding.embed_documents(texts) 78 | 79 | 80 | @pytest.mark.parametrize("truncate", ["START", "END"]) 81 | def test_embed_query_truncate(embedding_model: str, mode: dict, truncate: str) -> None: 82 | embedding = NVIDIAEmbeddings(model=embedding_model, truncate=truncate, **mode) 83 | text = "nvidia " * 2048 84 | output = embedding.embed_query(text) 85 | assert len(output) > 3 86 | 87 | 88 | @pytest.mark.parametrize("truncate", ["START", "END"]) 89 | def test_embed_documents_truncate( 90 | embedding_model: str, mode: dict, truncate: str 91 | ) -> None: 92 | embedding = NVIDIAEmbeddings(model=embedding_model, truncate=truncate, **mode) 93 | count = 10 94 | texts = ["nvidia " * 32] * count 95 | texts[len(texts) // 2] = "nvidia " * 10240 96 | output = embedding.embed_documents(texts) 97 | assert len(output) == count 98 | 99 | 100 | @pytest.mark.parametrize("dimensions", [32, 64, 128, 2048]) 101 | def test_embed_query_with_dimensions( 102 | embedding_model: str, mode: dict, dimensions: int 103 | ) -> None: 104 | if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2": 105 | pytest.skip("Model does not support custom dimensions.") 106 | query = "foo bar" 107 | embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode) 108 | assert len(embedding.embed_query(query)) == dimensions 109 | 110 | 111 | @pytest.mark.parametrize("dimensions", [32, 64, 128, 2048]) 112 | def test_embed_documents_with_dimensions( 113 | embedding_model: str, mode: dict, dimensions: int 114 | ) -> None: 115 | if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2": 116 | pytest.skip("Model does not support custom dimensions.") 117 | documents = ["foo bar", "bar foo"] 118 | embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode) 119 | output = embedding.embed_documents(documents) 120 | assert len(output) == len(documents) 121 | assert all(len(doc) == dimensions for doc in output) 122 | 123 | 124 | @pytest.mark.parametrize("dimensions", [102400]) 125 | def test_embed_query_with_large_dimensions( 126 | embedding_model: str, mode: dict, dimensions: int 127 | ) -> None: 128 | if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2": 129 | pytest.skip("Model does not support custom dimensions.") 130 | query = "foo bar" 131 | embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode) 132 | assert 2048 <= len(embedding.embed_query(query)) < dimensions 133 | 134 | 135 | @pytest.mark.parametrize("dimensions", [102400]) 136 | def test_embed_documents_with_large_dimensions( 137 | embedding_model: str, mode: dict, dimensions: int 138 | ) -> None: 139 | if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2": 140 | pytest.skip("Model does not support custom dimensions.") 141 | documents = ["foo bar", "bar foo"] 142 | embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode) 143 | output = embedding.embed_documents(documents) 144 | assert len(output) == len(documents) 145 | assert all(2048 <= len(doc) < dimensions for doc in output) 146 | 147 | 148 | @pytest.mark.parametrize("dimensions", [-1]) 149 | def test_embed_query_invalid_dimensions( 150 | embedding_model: str, mode: dict, dimensions: int 151 | ) -> None: 152 | if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2": 153 | pytest.skip("Model does not support custom dimensions.") 154 | query = "foo bar" 155 | with pytest.raises(Exception) as exc: 156 | NVIDIAEmbeddings( 157 | model=embedding_model, dimensions=dimensions, **mode 158 | ).embed_query(query) 159 | assert "400" in str(exc.value) 160 | 161 | 162 | @pytest.mark.parametrize("dimensions", [-1]) 163 | def test_embed_documents_invalid_dimensions( 164 | embedding_model: str, mode: dict, dimensions: int 165 | ) -> None: 166 | if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2": 167 | pytest.skip("Model does not support custom dimensions.") 168 | documents = ["foo bar", "bar foo"] 169 | with pytest.raises(Exception) as exc: 170 | NVIDIAEmbeddings( 171 | model=embedding_model, dimensions=dimensions, **mode 172 | ).embed_documents(documents) 173 | assert "400" in str(exc.value) 174 | 175 | 176 | # todo: test max_length > max length accepted by the model 177 | # todo: test max_batch_size > max batch size accepted by the model 178 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/integration_tests/test_other_models.py: -------------------------------------------------------------------------------- 1 | """Test ChatNVIDIA chat model.""" 2 | 3 | from langchain_core.messages import BaseMessage, HumanMessage 4 | 5 | from langchain_nvidia_ai_endpoints.chat_models import ChatNVIDIA 6 | 7 | 8 | def test_chat_ai_endpoints_context_message(qa_model: str, mode: dict) -> None: 9 | """Test wrapper with context message.""" 10 | chat = ChatNVIDIA(model=qa_model, max_tokens=36, **mode) 11 | context_message = BaseMessage( 12 | content="Once upon a time there was a little langchainer", type="context" 13 | ) 14 | human_message = HumanMessage(content="What was there once upon a time?") 15 | response = chat.invoke([context_message, human_message]) 16 | assert isinstance(response, BaseMessage) 17 | assert isinstance(response.content, str) 18 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/integration_tests/test_ranking.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | 4 | import faker 5 | import pytest 6 | from langchain_core.documents import Document 7 | 8 | from langchain_nvidia_ai_endpoints import NVIDIARerank # type: ignore 9 | 10 | 11 | class CharacterTextSplitter: 12 | def __init__(self, chunk_size: int): 13 | self.chunk_size = chunk_size 14 | 15 | def create_documents(self, text: str) -> List[Document]: 16 | words = text.split() 17 | chunks = [] 18 | for i in range(0, len(words), self.chunk_size): 19 | chunk = " ".join(words[i : i + self.chunk_size]) 20 | chunks.append(Document(page_content=chunk)) 21 | return chunks 22 | 23 | 24 | @pytest.fixture 25 | def text() -> str: 26 | fake = faker.Faker() 27 | fake.seed_instance(os.environ.get("FAKER_SEED", 13131)) 28 | return fake.paragraph(2016) 29 | 30 | 31 | @pytest.fixture 32 | def query() -> str: 33 | return "what are human rights?" 34 | 35 | 36 | @pytest.fixture 37 | def splitter() -> CharacterTextSplitter: 38 | return CharacterTextSplitter(chunk_size=300) 39 | 40 | 41 | @pytest.fixture 42 | def documents(text: str, splitter: CharacterTextSplitter) -> List[Document]: 43 | return splitter.create_documents(text) 44 | 45 | 46 | def test_langchain_reranker_direct( 47 | query: str, documents: List[Document], rerank_model: str, mode: dict 48 | ) -> None: 49 | ranker = NVIDIARerank(model=rerank_model, **mode) 50 | result_docs = ranker.compress_documents(documents=documents, query=query) 51 | assert len(result_docs) > 0 52 | for doc in result_docs: 53 | assert "relevance_score" in doc.metadata 54 | assert doc.metadata["relevance_score"] is not None 55 | assert isinstance(doc.metadata["relevance_score"], float) 56 | 57 | 58 | def test_langchain_reranker_direct_empty_docs( 59 | query: str, rerank_model: str, mode: dict 60 | ) -> None: 61 | ranker = NVIDIARerank(model=rerank_model, **mode) 62 | result_docs = ranker.compress_documents(documents=[], query=query) 63 | assert len(result_docs) == 0 64 | 65 | 66 | def test_langchain_reranker_direct_top_n_negative( 67 | query: str, documents: List[Document], rerank_model: str, mode: dict 68 | ) -> None: 69 | orig = NVIDIARerank.model_config["validate_assignment"] 70 | NVIDIARerank.model_config["validate_assignment"] = False 71 | ranker = NVIDIARerank(model=rerank_model, **mode) 72 | ranker.top_n = -100 73 | NVIDIARerank.model_config["validate_assignment"] = orig 74 | result_docs = ranker.compress_documents(documents=documents, query=query) 75 | assert len(result_docs) == 0 76 | 77 | 78 | def test_langchain_reranker_direct_top_n_zero( 79 | query: str, documents: List[Document], rerank_model: str, mode: dict 80 | ) -> None: 81 | ranker = NVIDIARerank(model=rerank_model, **mode) 82 | ranker.top_n = 0 83 | result_docs = ranker.compress_documents(documents=documents, query=query) 84 | assert len(result_docs) == 0 85 | 86 | 87 | def test_langchain_reranker_direct_top_n_one( 88 | query: str, documents: List[Document], rerank_model: str, mode: dict 89 | ) -> None: 90 | ranker = NVIDIARerank(model=rerank_model, **mode) 91 | ranker.top_n = 1 92 | result_docs = ranker.compress_documents(documents=documents, query=query) 93 | assert len(result_docs) == 1 94 | 95 | 96 | def test_langchain_reranker_direct_top_n_equal_len_docs( 97 | query: str, documents: List[Document], rerank_model: str, mode: dict 98 | ) -> None: 99 | ranker = NVIDIARerank(model=rerank_model, **mode) 100 | ranker.top_n = len(documents) 101 | result_docs = ranker.compress_documents(documents=documents, query=query) 102 | assert len(result_docs) == len(documents) 103 | 104 | 105 | def test_langchain_reranker_direct_top_n_greater_len_docs( 106 | query: str, documents: List[Document], rerank_model: str, mode: dict 107 | ) -> None: 108 | ranker = NVIDIARerank(model=rerank_model, **mode) 109 | ranker.top_n = len(documents) * 2 110 | result_docs = ranker.compress_documents(documents=documents, query=query) 111 | assert len(result_docs) == len(documents) 112 | 113 | 114 | @pytest.mark.parametrize("batch_size", [-10, 0]) 115 | def test_rerank_invalid_max_batch_size( 116 | rerank_model: str, mode: dict, batch_size: int 117 | ) -> None: 118 | ranker = NVIDIARerank(model=rerank_model, **mode) 119 | with pytest.raises(ValueError): 120 | ranker.max_batch_size = batch_size 121 | 122 | 123 | def test_rerank_invalid_top_n(rerank_model: str, mode: dict) -> None: 124 | ranker = NVIDIARerank(model=rerank_model, **mode) 125 | with pytest.raises(ValueError): 126 | ranker.top_n = -10 127 | 128 | 129 | @pytest.mark.parametrize( 130 | "batch_size, top_n", 131 | [ 132 | (7, 7), # batch_size == top_n 133 | (17, 7), # batch_size > top_n 134 | (3, 13), # batch_size < top_n 135 | (1, 1), # batch_size == top_n, corner case 1 136 | (1, 10), # batch_size < top_n, corner case 1 137 | (10, 1), # batch_size > top_n, corner case 1 138 | ], 139 | ) 140 | def test_rerank_batching( 141 | query: str, 142 | documents: List[Document], 143 | rerank_model: str, 144 | mode: dict, 145 | batch_size: int, 146 | top_n: int, 147 | ) -> None: 148 | assert len(documents) > batch_size, "test requires more documents" 149 | 150 | ranker = NVIDIARerank(model=rerank_model, **mode) 151 | ranker.top_n = top_n 152 | ranker.max_batch_size = batch_size 153 | result_docs = ranker.compress_documents(documents=documents, query=query) 154 | assert len(result_docs) == min(len(documents), top_n) 155 | for doc in result_docs: 156 | assert "relevance_score" in doc.metadata 157 | assert doc.metadata["relevance_score"] is not None 158 | assert isinstance(doc.metadata["relevance_score"], float) 159 | assert all( 160 | result_docs[i].metadata["relevance_score"] 161 | >= result_docs[i + 1].metadata["relevance_score"] 162 | for i in range(len(result_docs) - 1) 163 | ), "results are not sorted" 164 | 165 | # 166 | # there's a bug in the service that causes the results to be inconsistent 167 | # depending on the batch shapes. running this test with FAKER_SEED=13131 168 | # will demonstrate the issue. 169 | # 170 | # reference_ranker = NVIDIARerank( 171 | # model=rerank_model, max_batch_size=len(documents), top_n=len(documents) 172 | # ).mode(**mode) 173 | # reference_docs = reference_ranker.compress_documents( 174 | # documents=[doc.copy(deep=True) for doc in documents], query=query 175 | # ) 176 | # for i in range(top_n): 177 | # assert result_docs[i].page_content == reference_docs[i].page_content 178 | # assert all( 179 | # result_docs[i].page_content == reference_docs[i].page_content 180 | # for i in range(top_n) 181 | # ), "batched results do not match unbatched results" 182 | 183 | 184 | @pytest.mark.parametrize("truncate", ["END"]) 185 | def test_truncate_positive(rerank_model: str, mode: dict, truncate: str) -> None: 186 | query = "What is acceleration?" 187 | documents = [ 188 | Document(page_content="NVIDIA " * length) 189 | for length in [32, 1024, 64, 128, 2048, 256, 512] 190 | ] 191 | client = NVIDIARerank( 192 | model=rerank_model, top_n=len(documents), truncate=truncate, **mode 193 | ) 194 | response = client.compress_documents(documents=documents, query=query) 195 | assert len(response) == len(documents) 196 | 197 | 198 | @pytest.mark.parametrize("truncate", [None, "NONE"]) 199 | def test_truncate_negative(rerank_model: str, mode: dict, truncate: str) -> None: 200 | if rerank_model == "nv-rerank-qa-mistral-4b:1": 201 | pytest.skip("nv-rerank-qa-mistral-4b:1 truncates by default") 202 | query = "What is acceleration?" 203 | documents = [ 204 | Document(page_content="NVIDIA " * length) 205 | for length in [32, 1024, 64, 128, 10240, 256, 512] 206 | ] 207 | truncate_param = {} 208 | if truncate: 209 | truncate_param = {"truncate": truncate} 210 | client = NVIDIARerank(model=rerank_model, **truncate_param, **mode) 211 | with pytest.raises(Exception) as e: 212 | client.compress_documents(documents=documents, query=query) 213 | assert "400" in str(e.value) 214 | assert "exceeds maximum allowed" in str(e.value) 215 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/integration_tests/test_register_model.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import Any 3 | 4 | import pytest 5 | 6 | from langchain_nvidia_ai_endpoints import ( 7 | NVIDIA, 8 | ChatNVIDIA, 9 | Model, 10 | NVIDIAEmbeddings, 11 | NVIDIARerank, 12 | register_model, 13 | ) 14 | 15 | 16 | # 17 | # if this test is failing it may be because the function uuids have changed. 18 | # you will have to find the new ones from https://api.nvcf.nvidia.com/v2/nvcf/functions 19 | # 20 | @pytest.mark.parametrize( 21 | "client, id, endpoint", 22 | [ 23 | ( 24 | ChatNVIDIA, 25 | "meta/llama3-8b-instruct", 26 | "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/a5a3ad64-ec2c-4bfc-8ef7-5636f26630fe", 27 | ), 28 | ( 29 | NVIDIAEmbeddings, 30 | "NV-Embed-QA", 31 | "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/09c64e32-2b65-4892-a285-2f585408d118", 32 | ), 33 | ( 34 | NVIDIARerank, 35 | "nv-rerank-qa-mistral-4b:1", 36 | "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0bf77f50-5c35-4488-8e7a-f49bb1974af6", 37 | ), 38 | ( 39 | NVIDIA, 40 | "bigcode/starcoder2-15b", 41 | "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/d9cfe8a2-44df-44a0-ba51-3fc4a202c11c", 42 | ), 43 | ], 44 | ) 45 | def test_registered_model_functional( 46 | client: type, id: str, endpoint: str, contact_service: Any 47 | ) -> None: 48 | model = Model(id=id, endpoint=endpoint) 49 | warnings.filterwarnings( 50 | "ignore", r".*is already registered.*" 51 | ) # intentionally overridding known models 52 | warnings.filterwarnings( 53 | "ignore", r".*Unable to determine validity of.*" 54 | ) # we aren't passing client & type to Model() 55 | register_model(model) 56 | contact_service(client(model=id)) 57 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/integration_tests/test_standard.py: -------------------------------------------------------------------------------- 1 | """Standard LangChain interface tests""" 2 | 3 | from typing import Any, Coroutine, Type 4 | 5 | import pytest 6 | from langchain_core.language_models import BaseChatModel 7 | from langchain_core.tools import BaseTool 8 | from langchain_tests.integration_tests import ChatModelIntegrationTests 9 | 10 | from langchain_nvidia_ai_endpoints import ChatNVIDIA 11 | 12 | 13 | class TestNVIDIAStandard(ChatModelIntegrationTests): 14 | @property 15 | def chat_model_class(self) -> Type[BaseChatModel]: 16 | return ChatNVIDIA 17 | 18 | @property 19 | def chat_model_params(self) -> dict: 20 | return {"model": "meta/llama-3.3-70b-instruct", "temperature": 0} 21 | 22 | @pytest.mark.xfail(reason="anthropic-style list content not supported") 23 | def test_tool_message_histories_list_content( 24 | self, model: BaseChatModel, my_adder_tool: BaseTool 25 | ) -> None: 26 | return super().test_tool_message_histories_list_content(model, my_adder_tool) 27 | 28 | @pytest.mark.xfail(reason="Empty AIMessage content not supported") 29 | def test_tool_message_error_status( 30 | self, model: BaseChatModel, my_adder_tool: BaseTool 31 | ) -> None: 32 | return super().test_tool_message_error_status(model, my_adder_tool) 33 | 34 | @pytest.mark.xfail(reason="Empty AIMessage content not supported") 35 | def test_tool_message_histories_string_content( 36 | self, model: BaseChatModel, my_adder_tool: BaseTool 37 | ) -> None: 38 | return super().test_tool_message_histories_string_content(model, my_adder_tool) 39 | 40 | @pytest.mark.xfail( 41 | reason="Only one chunk should set input_tokens, the rest should be 0 or None" 42 | ) 43 | def test_usage_metadata_streaming(self, model: BaseChatModel) -> None: 44 | return super().test_usage_metadata_streaming(model) 45 | 46 | @pytest.mark.parametrize("schema_type", ["typeddict"]) 47 | @pytest.mark.xfail(reason="TypedDict schema type not supported") 48 | def test_structured_output(self, model: BaseChatModel, schema_type: str) -> None: 49 | return super().test_structured_output(model, schema_type) 50 | 51 | @pytest.mark.parametrize("schema_type", ["typeddict"]) 52 | @pytest.mark.xfail(reason="TypedDict schema type not supported") 53 | async def test_structured_output_async( 54 | self, model: BaseChatModel, schema_type: str 55 | ) -> Coroutine[Any, Any, None]: 56 | # Return the coroutine directly without awaiting it 57 | return super().test_structured_output_async(model, schema_type) 58 | 59 | @pytest.mark.xfail(reason="TypedDict schema type not supported") 60 | def test_structured_output_optional_param(self, model: BaseChatModel) -> None: 61 | # Don't return anything since the return type is None 62 | super().test_structured_output_optional_param(model) 63 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/integration_tests/test_streaming.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from langchain_nvidia_ai_endpoints import ChatNVIDIA 4 | 5 | 6 | def test_ttft(chat_model: str, mode: dict) -> None: 7 | # we had an issue where streaming took a long time to start. the issue 8 | # was all streamed results were collected before yielding them to the 9 | # user. this test tries to detect the incorrect behavior. 10 | # 11 | # warning: 12 | # - this can false positive if the model itself is slow to start 13 | # - this can false nagative if there is a delay after the first chunk 14 | # 15 | # potential mitigation for false negative is to check mean & stdev and 16 | # filter outliers. 17 | # 18 | # credit to Pouyan Rezakhani for finding this issue 19 | llm = ChatNVIDIA(model=chat_model, **mode) 20 | chunk_times = [time.time()] 21 | for chunk in llm.stream("Count to 1000 by 2s, e.g. 2 4 6 8 ...", max_tokens=512): 22 | chunk_times.append(time.time()) 23 | ttft = chunk_times[1] - chunk_times[0] 24 | total_time = chunk_times[-1] - chunk_times[0] 25 | assert ttft < ( 26 | total_time / 2 27 | ), "potential streaming issue, TTFT should be less than half of the total time" 28 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/integration_tests/test_structured_output.py: -------------------------------------------------------------------------------- 1 | import enum 2 | from typing import Any, Callable, Optional, Union 3 | 4 | import pytest 5 | from langchain_core.messages import HumanMessage 6 | from pydantic import BaseModel, Field 7 | 8 | from langchain_nvidia_ai_endpoints import ChatNVIDIA 9 | 10 | 11 | def do_invoke(llm: ChatNVIDIA, message: str) -> Any: 12 | return llm.invoke(message) 13 | 14 | 15 | def do_stream(llm: ChatNVIDIA, message: str) -> Any: 16 | # the way streaming works is to progressively grow the response 17 | # so we just return the last chunk. this is different from other 18 | # streaming results, which are *Chunks that can be concatenated. 19 | result = [chunk for chunk in llm.stream(message)] 20 | return result[-1] if result else None 21 | 22 | 23 | @pytest.mark.xfail(reason="Accuracy is not guaranteed") 24 | def test_accuracy(structured_model: str, mode: dict) -> None: 25 | class Person(BaseModel): 26 | name: str = Field(description="The name of the person") 27 | age: Optional[int] = Field(description="The age of the person") 28 | birthdate: Optional[str] = Field(description="The birthdate of the person") 29 | occupation: Optional[str] = Field(description="The occupation of the person") 30 | birthplace: Optional[str] = Field(description="The birthplace of the person") 31 | 32 | messages = [ 33 | HumanMessage( 34 | """ 35 | Jen-Hsun Huang was born in Tainan, Taiwan, on February 17, 1963. His family 36 | moved to Thailand when he was five; when he was nine, he and his brother were 37 | sent to the United States to live with an uncle in Tacoma, Washington. When he 38 | was ten, he lived in the boys' dormitory with his brother at Oneida Baptist 39 | Institute while attending Oneida Elementary school in Oneida, Kentucky—his 40 | uncle had mistaken what was actually a religious reform academy for a 41 | prestigious boarding school. Several years later, their parents also moved to 42 | the United States and settled in Oregon, where Huang graduated from Aloha 43 | High School in Aloha, Oregon. He skipped two years and graduated at sixteen. 44 | While growing up in Oregon in the 1980s, Huang got his first job at a local 45 | Denny's restaurant, where he worked as a busboy and waiter. 46 | Huang received his undergraduate degree in electrical engineering from Oregon 47 | State University in 1984, and his master's degree in electrical engineering 48 | from Stanford University in 1992. 49 | 50 | The current date is July 2034. 51 | """ 52 | ), 53 | HumanMessage("Who is Jensen?"), 54 | ] 55 | 56 | llm = ChatNVIDIA(model=structured_model, **mode) 57 | structured_llm = llm.with_structured_output(Person) 58 | person = structured_llm.invoke(messages) 59 | assert isinstance(person, Person) 60 | assert person.name in ["Jen-Hsun Huang", "Jensen"] 61 | # assert person.age == 71 # this is too hard 62 | assert person.birthdate == "February 17, 1963" 63 | assert person.occupation and ( 64 | "founder" in person.occupation.lower() or "CEO" in person.occupation.upper() 65 | ) 66 | assert person.birthplace == "Tainan, Taiwan" 67 | 68 | 69 | class Joke(BaseModel): 70 | """Joke to tell user.""" 71 | 72 | setup: str = Field(description="The setup of the joke") 73 | punchline: str = Field(description="The punchline to the joke") 74 | rating: Optional[int] = Field(description="How funny the joke is, from 1 to 10") 75 | 76 | 77 | @pytest.mark.parametrize("func", [do_invoke, do_stream], ids=["invoke", "stream"]) 78 | def test_pydantic(structured_model: str, mode: dict, func: Callable) -> None: 79 | llm = ChatNVIDIA(model=structured_model, temperature=0, **mode) 80 | structured_llm = llm.with_structured_output(Joke) 81 | result = func(structured_llm, "Tell me a joke about cats") 82 | assert isinstance(result, Joke) 83 | 84 | 85 | @pytest.mark.parametrize("func", [do_invoke, do_stream], ids=["invoke", "stream"]) 86 | def test_dict(structured_model: str, mode: dict, func: Callable) -> None: 87 | json_schema = { 88 | "title": "joke", 89 | "description": "Joke to tell user.", 90 | "type": "object", 91 | "properties": { 92 | "setup": { 93 | "type": "string", 94 | "description": "The setup of the joke", 95 | }, 96 | "punchline": { 97 | "type": "string", 98 | "description": "The punchline to the joke", 99 | }, 100 | "rating": { 101 | "type": "integer", 102 | "description": "How funny the joke is, from 1 to 10", 103 | }, 104 | }, 105 | "required": ["setup", "punchline"], 106 | } 107 | 108 | llm = ChatNVIDIA(model=structured_model, temperature=0, **mode) 109 | structured_llm = llm.with_structured_output(json_schema) 110 | result = func(structured_llm, "Tell me a joke about cats") 111 | assert isinstance(result, dict) 112 | assert "setup" in result 113 | assert "punchline" in result 114 | 115 | 116 | @pytest.mark.parametrize("func", [do_invoke, do_stream], ids=["invoke", "stream"]) 117 | def test_enum(structured_model: str, mode: dict, func: Callable) -> None: 118 | class Choices(enum.Enum): 119 | A = "A is an option" 120 | B = "B is an option" 121 | C = "C is an option" 122 | 123 | llm = ChatNVIDIA(model=structured_model, temperature=0, **mode) 124 | structured_llm = llm.with_structured_output(Choices) 125 | result = func( 126 | structured_llm, 127 | """ 128 | What does 1+1 equal? 129 | A. -100 130 | B. 2 131 | C. doorstop 132 | """, 133 | ) 134 | assert isinstance(result, Choices) 135 | assert result in Choices 136 | 137 | 138 | @pytest.mark.parametrize("func", [do_invoke, do_stream], ids=["invoke", "stream"]) 139 | def test_enum_incomplete(structured_model: str, mode: dict, func: Callable) -> None: 140 | class Choices(enum.Enum): 141 | A = "A is an option you can pick" 142 | B = "B is an option you can pick" 143 | C = "C is an option you can pick" 144 | 145 | llm = ChatNVIDIA(model=structured_model, temperature=0, max_tokens=3, **mode) 146 | structured_llm = llm.with_structured_output(Choices) 147 | result = func( 148 | structured_llm, 149 | """ 150 | What does 1+1 equal? 151 | A. -100 152 | B. 2 153 | C. doorstop 154 | """, 155 | ) 156 | assert result is None 157 | 158 | 159 | @pytest.mark.parametrize("func", [do_invoke, do_stream], ids=["invoke", "stream"]) 160 | def test_multiple_schema(structured_model: str, mode: dict, func: Callable) -> None: 161 | class ConversationalResponse(BaseModel): 162 | """Respond in a conversational manner. Be kind and helpful.""" 163 | 164 | response: str = Field( 165 | description="A conversational response to the user's query" 166 | ) 167 | 168 | class Response(BaseModel): 169 | output: Union[Joke, ConversationalResponse] 170 | 171 | llm = ChatNVIDIA(model=structured_model, temperature=0, **mode) 172 | structured_llm = llm.with_structured_output(Response) 173 | response = func(structured_llm, "Tell me a joke about cats") 174 | assert isinstance(response, Response) 175 | assert isinstance(response.output, Joke) or isinstance( 176 | response.output, ConversationalResponse 177 | ) 178 | 179 | 180 | @pytest.mark.parametrize("func", [do_invoke, do_stream], ids=["invoke", "stream"]) 181 | def test_pydantic_incomplete(structured_model: str, mode: dict, func: Callable) -> None: 182 | # 3 tokens is not enough to construct a Joke 183 | llm = ChatNVIDIA(model=structured_model, temperature=0, max_tokens=3, **mode) 184 | structured_llm = llm.with_structured_output(Joke) 185 | result = func(structured_llm, "Tell me a joke about cats") 186 | assert result is None 187 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/ai-endpoints/tests/unit_tests/__init__.py -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/conftest.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Callable, Generator, List 3 | 4 | import pytest 5 | import requests_mock 6 | 7 | from langchain_nvidia_ai_endpoints import ( 8 | NVIDIA, 9 | ChatNVIDIA, 10 | NVIDIAEmbeddings, 11 | NVIDIARerank, 12 | ) 13 | from langchain_nvidia_ai_endpoints._statics import MODEL_TABLE 14 | 15 | 16 | @pytest.fixture( 17 | params=[ 18 | ChatNVIDIA, 19 | NVIDIAEmbeddings, 20 | NVIDIARerank, 21 | NVIDIA, 22 | ] 23 | ) 24 | def public_class(request: pytest.FixtureRequest) -> type: 25 | return request.param 26 | 27 | 28 | @pytest.fixture 29 | def empty_v1_models(requests_mock: requests_mock.Mocker) -> None: 30 | requests_mock.get("https://integrate.api.nvidia.com/v1/models", json={"data": []}) 31 | 32 | 33 | @pytest.fixture 34 | def mock_model() -> str: 35 | return "mock-model" 36 | 37 | 38 | @pytest.fixture(autouse=True) 39 | def mock_v1_models(requests_mock: requests_mock.Mocker, mock_model: str) -> None: 40 | requests_mock.get( 41 | re.compile(".*/v1/models"), 42 | json={ 43 | "data": [ 44 | {"id": mock_model}, 45 | ] 46 | }, 47 | ) 48 | 49 | 50 | @pytest.fixture(autouse=True) 51 | def reset_model_table() -> Generator[None, None, None]: 52 | """ 53 | Reset MODEL_TABLE between tests. 54 | """ 55 | original = MODEL_TABLE.copy() 56 | yield 57 | MODEL_TABLE.clear() 58 | MODEL_TABLE.update(original) 59 | 60 | 61 | @pytest.fixture 62 | def mock_streaming_response( 63 | requests_mock: requests_mock.Mocker, mock_model: str 64 | ) -> Callable: 65 | def builder(chunks: List[str]) -> None: 66 | requests_mock.post( 67 | "https://integrate.api.nvidia.com/v1/chat/completions", 68 | text="\n\n".join( 69 | [ 70 | 'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"bogus","choices":[{"index":0,"delta":{"role":"assistant","content":null},"logprobs":null,"finish_reason":null}]}', # noqa: E501 71 | *[ 72 | f'data: {{"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"bogus","choices":[{{"index":0,"delta":{{"role":null,"content":"{content}"}},"logprobs":null,"finish_reason":null}}]}}' # noqa: E501 73 | for content in chunks 74 | ], 75 | 'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"bogus","choices":[{"index":0,"delta":{"role":null,"content":""},"logprobs":null,"finish_reason":"stop","stop_reason":null}]}', # noqa: E501 76 | "data: [DONE]", 77 | ] 78 | ), 79 | ) 80 | 81 | return builder 82 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_202_polling.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import requests_mock 4 | from langchain_core.messages import AIMessage 5 | 6 | from langchain_nvidia_ai_endpoints import ChatNVIDIA 7 | 8 | 9 | def test_polling_auth_header( 10 | requests_mock: requests_mock.Mocker, 11 | mock_model: str, 12 | ) -> None: 13 | infer_url = "https://integrate.api.nvidia.com/v1/chat/completions" 14 | polling_url = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/test-request-id" 15 | 16 | requests_mock.post( 17 | infer_url, status_code=202, headers={"NVCF-REQID": "test-request-id"}, json={} 18 | ) 19 | 20 | requests_mock.get( 21 | polling_url, 22 | status_code=200, 23 | json={ 24 | "id": "mock-id", 25 | "created": 1234567890, 26 | "object": "chat.completion", 27 | "model": mock_model, 28 | "choices": [ 29 | { 30 | "index": 0, 31 | "message": {"role": "assistant", "content": "WORKED"}, 32 | } 33 | ], 34 | }, 35 | ) 36 | 37 | warnings.filterwarnings("ignore", r".*type is unknown and inference may fail.*") 38 | client = ChatNVIDIA(model=mock_model, api_key="BOGUS") 39 | response = client.invoke("IGNORED") 40 | 41 | # expected behavior - 42 | # - first a GET request to /v1/models to check the model exists 43 | # - second a POST request to /v1/chat/completions 44 | # - third a GET request to /v2/nvcf/pexec/status/test-request-id 45 | # we want to check on the second and third requests 46 | 47 | assert len(requests_mock.request_history) == 3 48 | 49 | infer_request = requests_mock.request_history[-2] 50 | assert infer_request.method == "POST" 51 | assert infer_request.url == infer_url 52 | assert infer_request.headers["Authorization"] == "Bearer BOGUS" 53 | 54 | poll_request = requests_mock.request_history[-1] 55 | assert poll_request.method == "GET" 56 | assert poll_request.url == polling_url 57 | assert poll_request.headers["Authorization"] == "Bearer BOGUS" 58 | 59 | assert isinstance(response, AIMessage) 60 | assert response.content == "WORKED" 61 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_api_key.py: -------------------------------------------------------------------------------- 1 | import os 2 | from contextlib import contextmanager 3 | from typing import Any, Generator 4 | 5 | import pytest 6 | from pydantic import SecretStr 7 | from requests_mock import Mocker 8 | 9 | 10 | @contextmanager 11 | def no_env_var(var: str) -> Generator[None, None, None]: 12 | try: 13 | if val := os.environ.get(var, None): 14 | del os.environ[var] 15 | yield 16 | finally: 17 | if val: 18 | os.environ[var] = val 19 | else: 20 | if var in os.environ: 21 | del os.environ[var] 22 | 23 | 24 | @pytest.fixture(autouse=True) 25 | def mock_endpoint_models(requests_mock: Mocker) -> None: 26 | requests_mock.get( 27 | "https://integrate.api.nvidia.com/v1/models", 28 | json={ 29 | "data": [ 30 | { 31 | "id": "meta/llama3-8b-instruct", 32 | "object": "model", 33 | "created": 1234567890, 34 | "owned_by": "OWNER", 35 | "root": "model1", 36 | }, 37 | ] 38 | }, 39 | ) 40 | 41 | 42 | @pytest.fixture(autouse=True) 43 | def mock_v1_local_models(requests_mock: Mocker) -> None: 44 | requests_mock.get( 45 | "https://test_url/v1/models", 46 | json={ 47 | "data": [ 48 | { 49 | "id": "model", 50 | "object": "model", 51 | "created": 1234567890, 52 | "owned_by": "OWNER", 53 | "root": "model", 54 | }, 55 | ] 56 | }, 57 | ) 58 | 59 | 60 | def test_create_without_api_key(public_class: type) -> None: 61 | with no_env_var("NVIDIA_API_KEY"): 62 | with pytest.warns(UserWarning) as record: 63 | public_class() 64 | assert len(record) == 1 65 | assert "API key is required for the hosted" in str(record[0].message) 66 | 67 | 68 | def test_create_unknown_url_no_api_key(public_class: type) -> None: 69 | with no_env_var("NVIDIA_API_KEY"): 70 | with pytest.warns(UserWarning) as record: 71 | public_class(base_url="https://test_url/v1") 72 | assert len(record) == 1 73 | assert "Default model is set as" in str(record[0].message) 74 | 75 | 76 | @pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"]) 77 | def test_create_with_api_key(public_class: type, param: str) -> None: 78 | with no_env_var("NVIDIA_API_KEY"): 79 | public_class(**{param: "just testing no failure"}) 80 | 81 | 82 | def test_api_key_priority(public_class: type) -> None: 83 | def get_api_key(instance: Any) -> str: 84 | return instance._client.api_key.get_secret_value() 85 | 86 | with no_env_var("NVIDIA_API_KEY"): 87 | os.environ["NVIDIA_API_KEY"] = "ENV" 88 | assert get_api_key(public_class()) == "ENV" 89 | assert get_api_key(public_class(nvidia_api_key="PARAM")) == "PARAM" 90 | assert get_api_key(public_class(api_key="PARAM")) == "PARAM" 91 | assert get_api_key(public_class(api_key="LOW", nvidia_api_key="HIGH")) == "HIGH" 92 | 93 | 94 | def test_api_key_type(public_class: type) -> None: 95 | # Test case to make sure the api_key is SecretStr and not str 96 | def get_api_key(instance: Any) -> str: 97 | return instance._client.api_key 98 | 99 | with no_env_var("NVIDIA_API_KEY"): 100 | os.environ["NVIDIA_API_KEY"] = "ENV" 101 | assert type(get_api_key(public_class())) == SecretStr 102 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_available_models.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import Any 3 | 4 | from langchain_nvidia_ai_endpoints import Model, register_model 5 | 6 | 7 | def test_model_listing(public_class: Any, mock_model: str) -> None: 8 | warnings.filterwarnings("ignore", message=f"Default model is set as: {mock_model}") 9 | # we set base_url to avoid having results filtered by the public_class name 10 | models = public_class.get_available_models(base_url="https://mock/v1") 11 | assert any(model.id == mock_model for model in models) 12 | 13 | 14 | def test_model_listing_hosted( 15 | public_class: Any, 16 | mock_model: str, 17 | ) -> None: 18 | model = Model( 19 | id=mock_model, 20 | model_type={ 21 | "ChatNVIDIA": "chat", 22 | "NVIDIAEmbeddings": "embedding", 23 | "NVIDIARerank": "ranking", 24 | "NVIDIA": "completions", 25 | }[public_class.__name__], 26 | client=public_class.__name__, 27 | endpoint="BOGUS", 28 | ) 29 | register_model(model) 30 | models = public_class.get_available_models() 31 | assert any(model.id == mock_model for model in models) 32 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_base_url.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import warnings 4 | from typing import Any 5 | 6 | import pytest 7 | from requests_mock import Mocker 8 | 9 | from .test_api_key import no_env_var 10 | 11 | 12 | @pytest.fixture(autouse=True) 13 | def mock_v1_local_models(requests_mock: Mocker) -> None: 14 | requests_mock.get( 15 | re.compile(r".*/models"), 16 | json={ 17 | "data": [ 18 | { 19 | "id": "model1", 20 | "object": "model", 21 | "created": 1234567890, 22 | "owned_by": "OWNER", 23 | "root": "model1", 24 | }, 25 | ] 26 | }, 27 | ) 28 | 29 | 30 | def test_create_without_base_url(public_class: type) -> None: 31 | with no_env_var("NVIDIA_BASE_URL"): 32 | x = public_class(api_key="BOGUS") 33 | assert x.base_url == "https://integrate.api.nvidia.com/v1" 34 | assert x._client.base_url == "https://integrate.api.nvidia.com/v1" 35 | 36 | 37 | @pytest.mark.parametrize( 38 | "base_url, param", 39 | [("https://test_url/v1", "nvidia_base_url"), ("https://test_url/v1", "base_url")], 40 | ) 41 | def test_create_with_base_url(public_class: type, base_url: str, param: str) -> None: 42 | with no_env_var("NVIDIA_BASE_URL"): 43 | assert public_class(model="model1", **{param: base_url}).base_url == base_url 44 | 45 | 46 | def test_base_url_priority(public_class: type) -> None: 47 | ENV_URL = "https://ENV/v1" 48 | NV_PARAM_URL = "https://NV_PARAM/v1" 49 | PARAM_URL = "https://PARAM/v1" 50 | 51 | def get_base_url(**kwargs: Any) -> str: 52 | return public_class(model="model1", **kwargs).base_url 53 | 54 | with no_env_var("NVIDIA_BASE_URL"): 55 | os.environ["NVIDIA_BASE_URL"] = ENV_URL 56 | assert get_base_url() == ENV_URL 57 | assert get_base_url(nvidia_base_url=NV_PARAM_URL) == NV_PARAM_URL 58 | assert get_base_url(base_url=PARAM_URL) == PARAM_URL 59 | assert ( 60 | get_base_url(base_url=PARAM_URL, nvidia_base_url=NV_PARAM_URL) 61 | == NV_PARAM_URL 62 | ) 63 | 64 | 65 | @pytest.mark.parametrize( 66 | "base_url", 67 | [ 68 | "bogus", 69 | "http:/", 70 | "http://", 71 | "http:/oops", 72 | ], 73 | ) 74 | def test_expect_warn_base_url(public_class: type, base_url: str) -> None: 75 | with pytest.warns(UserWarning) as record: 76 | public_class(model="model1", base_url=base_url) 77 | assert len(record) > 0 78 | assert "url appears incorrect" in str(record[0].message) 79 | 80 | 81 | @pytest.mark.parametrize( 82 | "base_url", 83 | ["https://integrate.api.nvidia.com/v1", "https://ai.api.nvidia.com/v1"], 84 | ) 85 | def test_param_base_url_hosted(public_class: type, base_url: str) -> None: 86 | with no_env_var("NVIDIA_BASE_URL"): 87 | client = public_class(api_key="BOGUS", base_url=base_url) 88 | assert client._client.is_hosted 89 | 90 | 91 | @pytest.mark.parametrize( 92 | "base_url", 93 | [ 94 | "https://localhost", 95 | "http://localhost:8888", 96 | "http://0.0.0.0:8888/v1", 97 | "http://0.0.0.0:8888/v1/", 98 | "http://blah/some/other/path/v1", 99 | ], 100 | ) 101 | def test_param_base_url_not_hosted(public_class: type, base_url: str) -> None: 102 | warnings.filterwarnings("ignore", r".*does not end in /v1.*") 103 | with no_env_var("NVIDIA_BASE_URL"): 104 | client = public_class(model="model1", base_url=base_url) 105 | assert not client._client.is_hosted 106 | 107 | 108 | @pytest.mark.parametrize( 109 | "base_url", 110 | [ 111 | "http://localhost:8888/embeddings", 112 | "http://0.0.0.0:8888/rankings", 113 | "http://localhost:8888/embeddings/", 114 | "http://0.0.0.0:8888/rankings/", 115 | "http://localhost:8888/chat/completions", 116 | "http://localhost:8080/v1/embeddings", 117 | "http://0.0.0.0:8888/v1/rankings", 118 | ], 119 | ) 120 | def test_expect_warn(public_class: type, base_url: str) -> None: 121 | with pytest.warns(UserWarning) as record: 122 | public_class(model="model1", base_url=base_url) 123 | assert len(record) == 1 124 | assert "does not end in /v1" in str(record[0].message) 125 | 126 | 127 | @pytest.mark.parametrize( 128 | "base_url", 129 | [ 130 | "http://localhost:8888/embeddings", 131 | "http://0.0.0.0:8888/rankings", 132 | "http://localhost:8888/embeddings/", 133 | "http://0.0.0.0:8888/rankings/", 134 | "http://localhost:8888/chat/completions", 135 | "http://localhost:8080/v1/embeddings", 136 | "http://0.0.0.0:8888/v1/rankings", 137 | ], 138 | ) 139 | @pytest.mark.parametrize("false_value", ["false", "False", "0"]) 140 | def test_expect_skip_check(public_class: type, base_url: str, false_value: str) -> None: 141 | orig = os.environ.get("NVIDIA_APPEND_API_VERSION", None) 142 | warnings.filterwarnings("error") 143 | 144 | try: 145 | os.environ["NVIDIA_APPEND_API_VERSION"] = false_value 146 | public_class(model="model1", base_url=base_url) 147 | finally: 148 | warnings.resetwarnings() 149 | if orig is None: 150 | os.environ.pop("NVIDIA_APPEND_API_VERSION", None) 151 | else: 152 | os.environ["NVIDIA_APPEND_API_VERSION"] = orig 153 | 154 | 155 | @pytest.mark.parametrize( 156 | "base_url", 157 | [ 158 | "http://localhost:8888/embeddings", 159 | "http://0.0.0.0:8888/rankings", 160 | "http://localhost:8888/embeddings/", 161 | "http://0.0.0.0:8888/rankings/", 162 | "http://localhost:8888/chat/completions", 163 | "http://localhost:8080/v1/embeddings", 164 | "http://0.0.0.0:8888/v1/rankings", 165 | ], 166 | ) 167 | @pytest.mark.parametrize( 168 | "true_value", 169 | ["true", "True", "yes", "1", "anything", "enabled", "on", ""], 170 | ) 171 | def test_expect_not_skip_check( 172 | public_class: type, base_url: str, true_value: str 173 | ) -> None: 174 | warnings.filterwarnings("ignore", r".*does not end in /v1.*") 175 | orig = os.environ.get("NVIDIA_APPEND_API_VERSION", None) 176 | 177 | try: 178 | os.environ["NVIDIA_APPEND_API_VERSION"] = true_value 179 | obj = public_class(model="model1", base_url=base_url) 180 | assert obj.base_url.rstrip("/").endswith( 181 | "/v1" 182 | ), f"Expected {obj.base_url} to end with '/v1'" 183 | finally: 184 | warnings.resetwarnings() 185 | if orig is None: 186 | os.environ.pop("NVIDIA_APPEND_API_VERSION", None) 187 | else: 188 | os.environ["NVIDIA_APPEND_API_VERSION"] = orig 189 | 190 | 191 | def test_default_hosted(public_class: type) -> None: 192 | x = public_class(api_key="BOGUS") 193 | assert x._client.is_hosted 194 | 195 | 196 | @pytest.mark.parametrize( 197 | "base_url", 198 | [ 199 | "http://host/path0/path1/path2/v1", 200 | "http://host:123/path0/path1/path2/v1/", 201 | ], 202 | ) 203 | def test_proxy_base_url( 204 | public_class: type, base_url: str, requests_mock: Mocker 205 | ) -> None: 206 | with no_env_var("NVIDIA_BASE_URL"): 207 | client = public_class(model="model1", base_url=base_url) 208 | assert base_url.startswith(client.base_url) 209 | 210 | 211 | @pytest.mark.parametrize( 212 | "base_url", 213 | [ 214 | "http://host/path0/path1/path2/v1", 215 | "http://host:123/path0/path1/path2/v1/", 216 | ], 217 | ) 218 | def test_proxy_base_url_models( 219 | public_class: type, base_url: str, requests_mock: Mocker 220 | ) -> None: 221 | with no_env_var("NVIDIA_BASE_URL"): 222 | client = public_class(model="model1", base_url=base_url) 223 | client.available_models 224 | models_url = base_url.rstrip("/") + "/models" 225 | assert requests_mock.last_request 226 | assert requests_mock.last_request.url == models_url 227 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_chat_models.py: -------------------------------------------------------------------------------- 1 | """Test chat model integration.""" 2 | 3 | import warnings 4 | 5 | import pytest 6 | from requests_mock import Mocker 7 | 8 | from langchain_nvidia_ai_endpoints.chat_models import ChatNVIDIA 9 | 10 | 11 | @pytest.fixture 12 | def mock_local_models(requests_mock: Mocker) -> None: 13 | requests_mock.get( 14 | "http://localhost:8888/v1/models", 15 | json={ 16 | "data": [ 17 | { 18 | "id": "unknown_model", 19 | "object": "model", 20 | "created": 1234567890, 21 | "owned_by": "OWNER", 22 | "root": "unknown_model", 23 | }, 24 | ] 25 | }, 26 | ) 27 | 28 | 29 | def test_base_url_unknown_model(mock_local_models: None) -> None: 30 | llm = ChatNVIDIA(model="unknown_model", base_url="http://localhost:8888/v1") 31 | assert llm.model == "unknown_model" 32 | 33 | 34 | def test_integration_initialization() -> None: 35 | """Test chat model initialization.""" 36 | ChatNVIDIA( 37 | model="meta/llama2-70b", 38 | nvidia_api_key="nvapi-...", 39 | temperature=0.5, 40 | top_p=0.9, 41 | max_tokens=50, 42 | ) 43 | ChatNVIDIA(model="meta/llama2-70b", nvidia_api_key="nvapi-...") 44 | 45 | 46 | def test_unavailable(empty_v1_models: None) -> None: 47 | with pytest.warns(UserWarning, match="Model not-a-real-model is unknown"): 48 | ChatNVIDIA(api_key="BOGUS", model="not-a-real-model") 49 | 50 | 51 | def test_max_tokens_deprecation_warning() -> None: 52 | """Test that using max_tokens raises a deprecation warning.""" 53 | with pytest.warns( 54 | DeprecationWarning, 55 | match=( 56 | "The 'max_tokens' parameter is deprecated and will be removed " 57 | "in a future version" 58 | ), 59 | ): 60 | ChatNVIDIA(model="meta/llama2-70b", max_tokens=50) 61 | 62 | 63 | def test_max_completion_tokens() -> None: 64 | """Test that max_completion_tokens works without warning.""" 65 | with warnings.catch_warnings(record=True) as w: 66 | warnings.simplefilter("always") 67 | llm = ChatNVIDIA( 68 | model="meta/llama2-70b", 69 | max_completion_tokens=50, 70 | nvidia_api_key="nvapi-...", 71 | ) 72 | assert len(w) == 0 73 | assert llm.max_tokens == 50 74 | payload = llm._get_payload( 75 | inputs=[{"role": "user", "content": "test"}], 76 | stop=None, 77 | ) 78 | assert payload["max_tokens"] == 50 79 | 80 | 81 | def test_max_tokens_value() -> None: 82 | """Test that max_tokens value is correctly set and reflected in payload.""" 83 | llm = ChatNVIDIA( 84 | model="meta/llama2-70b", 85 | max_tokens=50, 86 | nvidia_api_key="nvapi-...", 87 | ) 88 | assert llm.max_tokens == 50 89 | payload = llm._get_payload( 90 | inputs=[{"role": "user", "content": "test"}], 91 | stop=None, 92 | ) 93 | assert payload["max_tokens"] == 50 94 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_completions_models.py: -------------------------------------------------------------------------------- 1 | import json 2 | from functools import reduce 3 | from operator import add 4 | from typing import Any, Callable, List 5 | 6 | import pytest 7 | import requests_mock 8 | 9 | from langchain_nvidia_ai_endpoints import NVIDIA 10 | 11 | 12 | def invoke(llm: NVIDIA, prompt: str, **kwargs: Any) -> str: 13 | return llm.invoke(prompt, **kwargs) 14 | 15 | 16 | def stream(llm: NVIDIA, prompt: str, **kwargs: Any) -> str: 17 | return reduce(add, llm.stream(prompt, **kwargs)) 18 | 19 | 20 | mock_response = { 21 | "id": "ID", 22 | "object": "text_completion", 23 | "created": 1234567890, 24 | "model": "BOGUS", 25 | "choices": [ 26 | { 27 | "index": 0, 28 | "text": "COMPLETION", 29 | } 30 | ], 31 | "usage": {"prompt_tokens": 7, "total_tokens": 207, "completion_tokens": 200}, 32 | } 33 | 34 | 35 | @pytest.fixture(scope="function") 36 | def mock_v1_completions_invoke( 37 | requests_mock: requests_mock.Mocker, 38 | ) -> requests_mock.Mocker: 39 | requests_mock.post( 40 | "https://integrate.api.nvidia.com/v1/completions", 41 | json=mock_response, 42 | ) 43 | return requests_mock 44 | 45 | 46 | @pytest.fixture(scope="function") 47 | def mock_v1_completions_stream( 48 | requests_mock: requests_mock.Mocker, 49 | ) -> requests_mock.Mocker: 50 | requests_mock.post( 51 | "https://integrate.api.nvidia.com/v1/completions", 52 | text="\n\n".join( 53 | [ 54 | f"data: {json.dumps(mock_response)}", 55 | "data: [DONE]", 56 | ] 57 | ), 58 | ) 59 | return requests_mock 60 | 61 | 62 | @pytest.mark.parametrize( 63 | "param, value", 64 | [ 65 | ("frequency_penalty", [0.25, 0.5, 0.75]), 66 | ("max_tokens", [2, 32, 512]), 67 | ("presence_penalty", [0.25, 0.5, 0.75]), 68 | ("seed", [1, 1234, 4321]), 69 | ("stop", ["Hello", "There", "World"]), 70 | ("temperature", [0, 0.5, 1]), 71 | ("top_p", [0, 0.5, 1]), 72 | ("best_of", [1, 5, 10]), 73 | ("echo", [True, False, True]), 74 | ("logit_bias", [{"hello": 1.0}, {"there": 1.0}, {"world": 1.0}]), 75 | ("logprobs", [1, 2, 3]), 76 | ("n", [1, 2, 3]), 77 | ("suffix", ["Hello", "There", "World"]), 78 | ("user", ["Bob", "Alice", "Eve"]), 79 | ], 80 | ) 81 | @pytest.mark.parametrize( 82 | "func, mock_name", 83 | [(invoke, "mock_v1_completions_invoke"), (stream, "mock_v1_completions_stream")], 84 | ids=["invoke", "stream"], 85 | ) 86 | def test_params( 87 | param: str, 88 | value: List[Any], 89 | func: Callable, 90 | mock_name: str, 91 | request: pytest.FixtureRequest, 92 | ) -> None: 93 | """ 94 | This tests the following... 95 | - priority order (init -> bind -> infer) 96 | - param passed to init, bind, invoke / stream 97 | ...for each known Completion API param. 98 | """ 99 | 100 | mock = request.getfixturevalue(mock_name) 101 | 102 | init, bind, infer = value 103 | 104 | llm = NVIDIA(api_key="BOGUS", **{param: init}) 105 | func(llm, "IGNORED") 106 | request_payload = mock.last_request.json() 107 | assert param in request_payload 108 | assert request_payload[param] == init 109 | 110 | bound_llm = llm.bind(**{param: bind}) 111 | func(bound_llm, "IGNORED") 112 | request_payload = mock.last_request.json() 113 | assert param in request_payload 114 | assert request_payload[param] == bind 115 | 116 | func(bound_llm, "IGNORED", **{param: infer}) 117 | request_payload = mock.last_request.json() 118 | assert param in request_payload 119 | assert request_payload[param] == infer 120 | 121 | 122 | @pytest.mark.parametrize( 123 | "func, mock_name", 124 | [(invoke, "mock_v1_completions_invoke"), (stream, "mock_v1_completions_stream")], 125 | ids=["invoke", "stream"], 126 | ) 127 | def test_params_unknown( 128 | func: Callable, 129 | mock_name: str, 130 | request: pytest.FixtureRequest, 131 | ) -> None: 132 | request.getfixturevalue(mock_name) 133 | 134 | with pytest.warns(UserWarning) as record: 135 | llm = NVIDIA(api_key="BOGUS", init_unknown="INIT") 136 | assert len(record) == 1 137 | assert "Unrecognized, ignored arguments: {'init_unknown'}" in str(record[0].message) 138 | 139 | with pytest.warns(UserWarning) as record: 140 | func(llm, "IGNORED", arg_unknown="ARG") 141 | assert len(record) == 1 142 | assert "Unrecognized, ignored arguments: {'arg_unknown'}" in str(record[0].message) 143 | 144 | bound_llm = llm.bind(bind_unknown="BIND") 145 | 146 | with pytest.warns(UserWarning) as record: 147 | func(bound_llm, "IGNORED") 148 | assert len(record) == 1 149 | assert "Unrecognized, ignored arguments: {'bind_unknown'}" in str(record[0].message) 150 | 151 | 152 | def test_identifying_params() -> None: 153 | llm = NVIDIA(api_key="BOGUS") 154 | assert set(llm._identifying_params.keys()) == {"model", "base_url"} 155 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_embeddings.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Generator 2 | 3 | import pytest 4 | from requests_mock import Mocker 5 | 6 | from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings 7 | 8 | 9 | @pytest.fixture 10 | def embedding(requests_mock: Mocker) -> Generator[NVIDIAEmbeddings, None, None]: 11 | model = "mock-model" 12 | requests_mock.get( 13 | "https://integrate.api.nvidia.com/v1/models", 14 | json={ 15 | "data": [ 16 | { 17 | "id": model, 18 | "object": "model", 19 | "created": 1234567890, 20 | "owned_by": "OWNER", 21 | }, 22 | ] 23 | }, 24 | ) 25 | requests_mock.post( 26 | "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/ID", 27 | json={ 28 | "data": [ 29 | { 30 | "embedding": [ 31 | 0.1, 32 | 0.2, 33 | 0.3, 34 | ], 35 | "index": 0, 36 | } 37 | ], 38 | "usage": {"prompt_tokens": 8, "total_tokens": 8}, 39 | }, 40 | ) 41 | with pytest.warns(UserWarning) as record: 42 | yield NVIDIAEmbeddings(model=model, nvidia_api_key="a-bogus-key") 43 | assert len(record) == 1 44 | assert "type is unknown and inference may fail" in str(record[0].message) 45 | 46 | 47 | def test_embed_documents_negative_input_int(embedding: NVIDIAEmbeddings) -> None: 48 | documents = 1 49 | with pytest.raises(ValueError): 50 | embedding.embed_documents(documents) # type: ignore 51 | 52 | 53 | def test_embed_documents_negative_input_float(embedding: NVIDIAEmbeddings) -> None: 54 | documents = 1.0 55 | with pytest.raises(ValueError): 56 | embedding.embed_documents(documents) # type: ignore 57 | 58 | 59 | def test_embed_documents_negative_input_str(embedding: NVIDIAEmbeddings) -> None: 60 | documents = "subscriptable string, not a list" 61 | with pytest.raises(ValueError): 62 | embedding.embed_documents(documents) # type: ignore 63 | 64 | 65 | def test_embed_documents_negative_input_list_int(embedding: NVIDIAEmbeddings) -> None: 66 | documents = [1, 2, 3] 67 | with pytest.raises(ValueError): 68 | embedding.embed_documents(documents) # type: ignore 69 | 70 | 71 | def test_embed_documents_negative_input_list_float(embedding: NVIDIAEmbeddings) -> None: 72 | documents = [1.0, 2.0, 3.0] 73 | with pytest.raises(ValueError): 74 | embedding.embed_documents(documents) # type: ignore 75 | 76 | 77 | def test_embed_documents_negative_input_list_mixed(embedding: NVIDIAEmbeddings) -> None: 78 | documents = ["1", 2.0, 3] 79 | with pytest.raises(ValueError): 80 | embedding.embed_documents(documents) # type: ignore 81 | 82 | 83 | @pytest.mark.parametrize("truncate", [True, False, 1, 0, 1.0, "BOGUS"]) 84 | def test_embed_query_truncate_invalid(truncate: Any) -> None: 85 | with pytest.raises(ValueError): 86 | NVIDIAEmbeddings(truncate=truncate) 87 | 88 | 89 | # todo: test max_batch_size (-50, 0, 1, 50) 90 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_imports.py: -------------------------------------------------------------------------------- 1 | from langchain_nvidia import __all__ as short_all 2 | from langchain_nvidia_ai_endpoints import __all__ as long_all 3 | 4 | EXPECTED_ALL = [ 5 | "ChatNVIDIA", 6 | "NVIDIAEmbeddings", 7 | "NVIDIARerank", 8 | "NVIDIA", 9 | "register_model", 10 | "Model", 11 | ] 12 | 13 | 14 | def test_all_imports() -> None: 15 | assert sorted(EXPECTED_ALL) == sorted(short_all) 16 | assert sorted(EXPECTED_ALL) == sorted(long_all) 17 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_messages.py: -------------------------------------------------------------------------------- 1 | import requests_mock 2 | from langchain_core.messages import AIMessage 3 | 4 | from langchain_nvidia_ai_endpoints import ChatNVIDIA 5 | 6 | 7 | def test_invoke_aimessage_content_none(requests_mock: requests_mock.Mocker) -> None: 8 | requests_mock.post( 9 | "https://integrate.api.nvidia.com/v1/chat/completions", 10 | json={ 11 | "id": "mock-id", 12 | "created": 1234567890, 13 | "object": "chat.completion", 14 | "model": "mock-model", 15 | "choices": [ 16 | { 17 | "index": 0, 18 | "message": {"role": "assistant", "content": "WORKED"}, 19 | } 20 | ], 21 | }, 22 | ) 23 | 24 | empty_aimessage = AIMessage(content="EMPTY") 25 | empty_aimessage.content = None # type: ignore 26 | 27 | llm = ChatNVIDIA(api_key="BOGUS") 28 | response = llm.invoke([empty_aimessage]) 29 | request = requests_mock.request_history[0] 30 | assert request.method == "POST" 31 | assert request.url == "https://integrate.api.nvidia.com/v1/chat/completions" 32 | message = request.json()["messages"][0] 33 | assert "content" in message and message["content"] != "EMPTY" 34 | assert "content" in message and message["content"] is None 35 | assert isinstance(response, AIMessage) 36 | assert response.content == "WORKED" 37 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_metadata.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, cast 2 | 3 | import pytest 4 | import requests_mock 5 | from langchain_core.messages import AIMessage, BaseMessageChunk, HumanMessage 6 | 7 | # from langchain_core.messages.ai import UsageMetadata 8 | from langchain_nvidia_ai_endpoints import ChatNVIDIA 9 | 10 | mock_response = { 11 | "id": "chat-c891882b0c4448a5b258c63d2b031c82", 12 | "object": "chat.completion", 13 | "created": 1729173278, 14 | "model": "meta/llama-3.2-3b-instruct", 15 | "choices": [ 16 | { 17 | "index": 0, 18 | "message": {"role": "assistant", "content": "A simple yet"}, 19 | "logprobs": "", 20 | "finish_reason": "tool_calls", 21 | "stop_reason": "", 22 | } 23 | ], 24 | "usage": {"prompt_tokens": 12, "total_tokens": 15, "completion_tokens": 3}, 25 | "prompt_logprobs": "", 26 | } 27 | 28 | 29 | @pytest.fixture 30 | def mock_local_models_metadata(requests_mock: requests_mock.Mocker) -> None: 31 | mock_response["tool_calls"] = ( 32 | [ 33 | { 34 | "id": "tool-ID", 35 | "type": "function", 36 | "function": { 37 | "name": "magic", 38 | "arguments": [], 39 | }, 40 | } 41 | ], 42 | ) 43 | requests_mock.post("http://localhost:8888/v1/chat/completions", json=mock_response) 44 | 45 | 46 | @pytest.fixture 47 | def mock_local_models_stream_metadata(requests_mock: requests_mock.Mocker) -> None: 48 | response_contents = "\n\n".join( 49 | [ 50 | 'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null},"logprobs":null,"model_name":"dummy","finish_reason":null}]}', # noqa: E501 51 | 'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"id":"call_ID0","type":"function","function":{"name":"xxyyzz","arguments":""}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}', # noqa: E501 52 | 'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\\"a\\""}}]},"logprobs":null, "model_name":"dummy","finish_reason":null}]}', # noqa: E501 53 | 'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":": 11,"}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}', # noqa: E501 54 | 'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" \\"b\\": "}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}', # noqa: E501 55 | 'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"3}"}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}', # noqa: E501 56 | 'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"id":"call_ID1","type":"function","function":{"name":"zzyyxx","arguments":""}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}', # noqa: E501 57 | 'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{\\"a\\""}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}', # noqa: E501 58 | 'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":": 5, "}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}', # noqa: E501 59 | 'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"\\"b\\": 3"}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}', # noqa: E501 60 | 'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"}"}}]},"logprobs":null,"model_name":"dummy","finish_reason":null}]}', # noqa: E501 61 | 'data: {"id":"chatcmpl-ID0","object":"chat.completion.chunk","created":1721155403,"model":"magic-model","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"model_name":"dummy","finish_reason":"tool_calls"}]}', # noqa: E501 62 | ] 63 | ) 64 | requests_mock.post( 65 | "http://localhost:8888/v1/chat/completions", 66 | text=response_contents, 67 | ) 68 | 69 | 70 | def response_metadata_checks(result: Any) -> None: 71 | assert isinstance(result, AIMessage) 72 | assert result.response_metadata 73 | assert all( 74 | k in result.response_metadata for k in ("model_name", "role", "token_usage") 75 | ) 76 | 77 | assert isinstance(result.content, str) 78 | assert result.response_metadata.get("model_name") is not None 79 | 80 | if result.usage_metadata is not None: 81 | assert isinstance(result.usage_metadata, dict) 82 | usage_metadata = result.usage_metadata 83 | 84 | assert usage_metadata["input_tokens"] > 0 85 | assert usage_metadata["output_tokens"] > 0 86 | assert usage_metadata["total_tokens"] > 0 87 | 88 | 89 | def test_response_metadata(mock_local_models_metadata: None) -> None: 90 | llm = ChatNVIDIA(base_url="http://localhost:8888/v1") 91 | result = llm.invoke([HumanMessage(content="I'm PickleRick")]) 92 | response_metadata_checks(result) 93 | 94 | 95 | async def test_async_response_metadata(mock_local_models_metadata: None) -> None: 96 | llm = ChatNVIDIA(base_url="http://localhost:8888/v1") 97 | result = await llm.ainvoke([HumanMessage(content="I'm PickleRick")], logprobs=True) 98 | response_metadata_checks(result) 99 | 100 | 101 | def test_response_metadata_streaming(mock_local_models_stream_metadata: None) -> None: 102 | llm = ChatNVIDIA(base_url="http://localhost:8888/v1") 103 | full: Optional[BaseMessageChunk] = None 104 | for chunk in llm.stream("I'm Pickle Rick"): 105 | assert isinstance(chunk.content, str) 106 | full = chunk if full is None else full + chunk 107 | assert all( 108 | k in cast(BaseMessageChunk, full).response_metadata 109 | for k in ("model_name", "finish_reason") 110 | ) 111 | 112 | 113 | async def test_async_response_metadata_streaming( 114 | mock_local_models_stream_metadata: None, 115 | ) -> None: 116 | llm = ChatNVIDIA(base_url="http://localhost:8888/v1") 117 | full: Optional[BaseMessageChunk] = None 118 | async for chunk in llm.astream("I'm Pickle Rick"): 119 | assert isinstance(chunk.content, str) 120 | full = chunk if full is None else full + chunk 121 | assert all( 122 | k in cast(BaseMessageChunk, full).response_metadata 123 | for k in ("model_name", "finish_reason") 124 | ) 125 | 126 | 127 | def test_stream_tool_calls( 128 | mock_local_models_stream_metadata: None, 129 | ) -> None: 130 | llm = ChatNVIDIA(base_url="http://localhost:8888/v1") 131 | generator = llm.stream( 132 | "What is 11 xxyyzz 3 zzyyxx 5?", 133 | ) 134 | response = next(generator) 135 | for chunk in generator: 136 | response += chunk 137 | assert isinstance(response, AIMessage) 138 | assert len(response.tool_calls) == 2 139 | tool_call0 = response.tool_calls[0] 140 | assert tool_call0["name"] == "xxyyzz" 141 | assert tool_call0["args"] == {"b": 3, "a": 11} 142 | tool_call1 = response.tool_calls[1] 143 | assert tool_call1["name"] == "zzyyxx" 144 | assert tool_call1["args"] == {"b": 3, "a": 5} 145 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_model.py: -------------------------------------------------------------------------------- 1 | from itertools import chain 2 | from typing import Any 3 | 4 | import pytest 5 | from requests_mock import Mocker 6 | 7 | from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank 8 | from langchain_nvidia_ai_endpoints._statics import ( 9 | CHAT_MODEL_TABLE, 10 | EMBEDDING_MODEL_TABLE, 11 | MODEL_TABLE, 12 | QA_MODEL_TABLE, 13 | RANKING_MODEL_TABLE, 14 | VLM_MODEL_TABLE, 15 | ) 16 | 17 | 18 | @pytest.fixture 19 | def known_unknown() -> str: 20 | return "mock-model" 21 | 22 | 23 | @pytest.fixture(autouse=True) 24 | def mock_v1_models(requests_mock: Mocker, known_unknown: str) -> None: 25 | requests_mock.get( 26 | "https://integrate.api.nvidia.com/v1/models", 27 | json={ 28 | "data": [ 29 | { 30 | "id": known_unknown, 31 | "object": "model", 32 | "created": 1234567890, 33 | "owned_by": "OWNER", 34 | }, 35 | ] 36 | }, 37 | ) 38 | 39 | 40 | @pytest.fixture(autouse=True) 41 | def mock_v1_local_models(requests_mock: Mocker, known_unknown: str) -> None: 42 | requests_mock.get( 43 | "http://localhost:8000/v1/models", 44 | json={ 45 | "data": [ 46 | { 47 | "id": known_unknown, 48 | "object": "model", 49 | "created": 1234567890, 50 | "owned_by": "OWNER", 51 | "root": known_unknown, 52 | }, 53 | { 54 | "id": "lora1", 55 | "object": "model", 56 | "created": 1234567890, 57 | "owned_by": "OWNER", 58 | "root": known_unknown, 59 | }, 60 | ] 61 | }, 62 | ) 63 | 64 | 65 | @pytest.mark.parametrize( 66 | "alias, client", 67 | [ 68 | (alias, ChatNVIDIA) 69 | for model in list( 70 | chain( 71 | CHAT_MODEL_TABLE.values(), 72 | VLM_MODEL_TABLE.values(), 73 | QA_MODEL_TABLE.values(), 74 | ) 75 | ) 76 | if model.aliases is not None 77 | for alias in model.aliases 78 | ] 79 | + [ 80 | (alias, NVIDIAEmbeddings) 81 | for model in EMBEDDING_MODEL_TABLE.values() 82 | if model.aliases is not None 83 | for alias in model.aliases 84 | ] 85 | + [ 86 | (alias, NVIDIARerank) 87 | for model in RANKING_MODEL_TABLE.values() 88 | if model.aliases is not None 89 | for alias in model.aliases 90 | ], 91 | ) 92 | def test_aliases(alias: str, client: Any) -> None: 93 | """ 94 | Test that the aliases for each model in the model table are accepted 95 | with a warning about deprecation of the alias. 96 | """ 97 | with pytest.warns(UserWarning) as record: 98 | x = client(model=alias, nvidia_api_key="a-bogus-key") 99 | assert x.model == x._client.mdl_name 100 | assert isinstance(record[0].message, Warning) 101 | assert "deprecated" in record[0].message.args[0] 102 | 103 | 104 | def test_known(public_class: type) -> None: 105 | """ 106 | Test that a model in the model table will be accepted. 107 | """ 108 | # find a model that matches the public_class under test 109 | known = None 110 | for model in MODEL_TABLE.values(): 111 | if model.client == public_class.__name__: 112 | known = model.id 113 | break 114 | assert known is not None, f"Model not found for client {public_class.__name__}" 115 | x = public_class(model=known, nvidia_api_key="a-bogus-key") 116 | assert x.model == known 117 | 118 | 119 | def test_known_unknown(public_class: type, known_unknown: str) -> None: 120 | """ 121 | Test that a model in /v1/models but not in the model table will be accepted 122 | with a warning. 123 | """ 124 | with pytest.warns(UserWarning) as record: 125 | x = public_class(model=known_unknown, nvidia_api_key="a-bogus-key") 126 | assert x.model == known_unknown 127 | assert isinstance(record[0].message, Warning) 128 | assert "Found" in record[0].message.args[0] 129 | assert "unknown" in record[0].message.args[0] 130 | 131 | 132 | def test_unknown_unknown(public_class: type, empty_v1_models: None) -> None: 133 | """ 134 | Test that a model not in /v1/models, not in known model table, and not internal 135 | will be rejected. 136 | """ 137 | # todo: make this work for local NIM 138 | with pytest.warns(UserWarning, match="Model test/unknown-unknown is unknown"): 139 | public_class(model="test/unknown-unknown", nvidia_api_key="a-bogus-key") 140 | 141 | 142 | def test_default_known(public_class: type, known_unknown: str) -> None: 143 | """ 144 | Test that a model in the model table will be accepted. 145 | """ 146 | # check if default model is getting set 147 | with pytest.warns(UserWarning) as record: 148 | x = public_class(base_url="http://localhost:8000/v1") 149 | assert x.model == known_unknown 150 | assert len(record) == 1 151 | assert "Default model is set as: mock-model" in str(record[0].message) 152 | 153 | 154 | def test_default_lora(public_class: type) -> None: 155 | """ 156 | Test that a model in the model table will be accepted. 157 | """ 158 | # find a model that matches the public_class under test 159 | x = public_class(base_url="http://localhost:8000/v1", model="lora1") 160 | assert x.model == "lora1" 161 | 162 | 163 | def test_default(public_class: type) -> None: 164 | x = public_class(api_key="BOGUS") 165 | assert x.model is not None 166 | 167 | 168 | @pytest.mark.parametrize( 169 | "model, client", 170 | [(model.id, model.client) for model in MODEL_TABLE.values()], 171 | ) 172 | def test_all_incompatible(public_class: type, model: str, client: str) -> None: 173 | if client == public_class.__name__: 174 | pytest.skip("Compatibility expected.") 175 | 176 | with pytest.warns(UserWarning) as record: 177 | public_class(model=model, nvidia_api_key="a-bogus-key") 178 | 179 | assert len(record) == 1 180 | assert "incompatible with client" in str(record[0].message) 181 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_ranking.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import Any, Literal, Optional 3 | 4 | import pytest 5 | from langchain_core.documents import Document 6 | from requests_mock import Mocker 7 | 8 | from langchain_nvidia_ai_endpoints import NVIDIARerank 9 | 10 | 11 | @pytest.fixture(autouse=True) 12 | def mock_v1_models(requests_mock: Mocker) -> None: 13 | requests_mock.get( 14 | "https://integrate.api.nvidia.com/v1/models", 15 | json={ 16 | "data": [ 17 | { 18 | "id": "mock-model", 19 | "object": "model", 20 | "created": 1234567890, 21 | "owned_by": "OWNER", 22 | } 23 | ] 24 | }, 25 | ) 26 | 27 | 28 | @pytest.fixture(autouse=True) 29 | def mock_v1_ranking(requests_mock: Mocker) -> None: 30 | requests_mock.post( 31 | "https://integrate.api.nvidia.com/v1/ranking", 32 | json={ 33 | "rankings": [ 34 | {"index": 0, "logit": 4.2}, 35 | ] 36 | }, 37 | ) 38 | 39 | 40 | @pytest.mark.parametrize( 41 | "truncate", 42 | [ 43 | None, 44 | "END", 45 | "NONE", 46 | ], 47 | ) 48 | def test_truncate( 49 | requests_mock: Mocker, 50 | truncate: Optional[Literal["END", "NONE"]], 51 | ) -> None: 52 | truncate_param = {} 53 | if truncate: 54 | truncate_param = {"truncate": truncate} 55 | warnings.filterwarnings( 56 | "ignore", ".*Found mock-model in available_models.*" 57 | ) # expect to see this warning 58 | client = NVIDIARerank(api_key="BOGUS", model="mock-model", **truncate_param) 59 | response = client.compress_documents( 60 | documents=[Document(page_content="Nothing really.")], query="What is it?" 61 | ) 62 | 63 | assert len(response) == 1 64 | 65 | assert requests_mock.last_request is not None 66 | request_payload = requests_mock.last_request.json() 67 | if truncate is None: 68 | assert "truncate" not in request_payload 69 | else: 70 | assert "truncate" in request_payload 71 | assert request_payload["truncate"] == truncate 72 | 73 | 74 | @pytest.mark.parametrize("truncate", [True, False, 1, 0, 1.0, "START", "BOGUS"]) 75 | def test_truncate_invalid(truncate: Any) -> None: 76 | with pytest.raises(ValueError): 77 | NVIDIARerank(truncate=truncate) 78 | 79 | 80 | def test_extra_headers(requests_mock: Mocker) -> None: 81 | client = NVIDIARerank( 82 | api_key="BOGUS", model="mock-model", extra_headers={"X-Test": "test"} 83 | ) 84 | assert client.extra_headers == {"X-Test": "test"} 85 | 86 | _ = client.compress_documents( 87 | documents=[Document(page_content="Nothing really.")], query="What is it?" 88 | ) 89 | assert requests_mock.last_request is not None 90 | assert requests_mock.last_request.headers["X-Test"] == "test" 91 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_register_model.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import pytest 4 | 5 | from langchain_nvidia_ai_endpoints import ( 6 | NVIDIA, 7 | ChatNVIDIA, 8 | Model, 9 | NVIDIAEmbeddings, 10 | NVIDIARerank, 11 | register_model, 12 | ) 13 | 14 | 15 | @pytest.mark.parametrize( 16 | "model_type, client", 17 | [ 18 | ("chat", "NVIDIAEmbeddings"), 19 | ("chat", "NVIDIARerank"), 20 | ("chat", "NVIDIA"), 21 | ("vlm", "NVIDIAEmbeddings"), 22 | ("vlm", "NVIDIARerank"), 23 | ("vlm", "NVIDIA"), 24 | ("embedding", "ChatNVIDIA"), 25 | ("embedding", "NVIDIARerank"), 26 | ("embedding", "NVIDIA"), 27 | ("ranking", "ChatNVIDIA"), 28 | ("ranking", "NVIDIAEmbeddings"), 29 | ("ranking", "NVIDIA"), 30 | ("completions", "ChatNVIDIA"), 31 | ("completions", "NVIDIAEmbeddings"), 32 | ("completions", "NVIDIARerank"), 33 | ], 34 | ) 35 | def test_mismatched_type_client(model_type: str, client: str) -> None: 36 | with pytest.raises(ValueError) as e: 37 | register_model( 38 | Model( 39 | id="model", 40 | model_type=model_type, 41 | client=client, 42 | endpoint="BOGUS", 43 | ) 44 | ) 45 | assert "not supported" in str(e.value) 46 | 47 | 48 | def test_duplicate_model_warns() -> None: 49 | model = Model(id="registered-model", endpoint="BOGUS") 50 | register_model(model) 51 | with pytest.warns(UserWarning) as record: 52 | register_model(model) 53 | assert len(record) == 1 54 | assert isinstance(record[0].message, UserWarning) 55 | assert "already registered" in str(record[0].message) 56 | assert "Overriding" in str(record[0].message) 57 | 58 | 59 | def test_registered_model_usable(public_class: type, mock_model: str) -> None: 60 | model_type = { 61 | "ChatNVIDIA": "chat", 62 | "NVIDIAEmbeddings": "embedding", 63 | "NVIDIARerank": "ranking", 64 | "NVIDIA": "completions", 65 | }[public_class.__name__] 66 | with warnings.catch_warnings(): 67 | warnings.simplefilter("error") 68 | model = Model( 69 | id=mock_model, 70 | model_type=model_type, 71 | client=public_class.__name__, 72 | endpoint="BOGUS", 73 | ) 74 | register_model(model) 75 | x = public_class(model=mock_model, nvidia_api_key="a-bogus-key") 76 | assert x.model == mock_model 77 | 78 | 79 | def test_registered_model_without_client_usable(public_class: type) -> None: 80 | id = "test/no-client" 81 | model = Model(id=id, endpoint="BOGUS") 82 | register_model(model) 83 | with pytest.warns(UserWarning) as record: 84 | public_class(model=id, nvidia_api_key="a-bogus-key") 85 | assert len(record) == 1 86 | assert isinstance(record[0].message, UserWarning) 87 | assert "Unable to determine validity" in str(record[0].message) 88 | 89 | 90 | def test_missing_endpoint() -> None: 91 | with pytest.raises(ValueError) as e: 92 | register_model( 93 | Model(id="missing-endpoint", model_type="chat", client="ChatNVIDIA") 94 | ) 95 | assert "does not have an endpoint" in str(e.value) 96 | 97 | 98 | def test_registered_model_is_available() -> None: 99 | register_model( 100 | Model( 101 | id="test/chat", 102 | model_type="chat", 103 | client="ChatNVIDIA", 104 | endpoint="BOGUS", 105 | ) 106 | ) 107 | register_model( 108 | Model( 109 | id="test/embedding", 110 | model_type="embedding", 111 | client="NVIDIAEmbeddings", 112 | endpoint="BOGUS", 113 | ) 114 | ) 115 | register_model( 116 | Model( 117 | id="test/rerank", 118 | model_type="ranking", 119 | client="NVIDIARerank", 120 | endpoint="BOGUS", 121 | ) 122 | ) 123 | register_model( 124 | Model( 125 | id="test/completions", 126 | model_type="completions", 127 | client="NVIDIA", 128 | endpoint="BOGUS", 129 | ) 130 | ) 131 | chat_models = ChatNVIDIA.get_available_models(api_key="BOGUS") 132 | embedding_models = NVIDIAEmbeddings.get_available_models(api_key="BOGUS") 133 | ranking_models = NVIDIARerank.get_available_models(api_key="BOGUS") 134 | completions_models = NVIDIA.get_available_models(api_key="BOGUS") 135 | 136 | assert "test/chat" in [model.id for model in chat_models] 137 | assert "test/chat" not in [model.id for model in embedding_models] 138 | assert "test/chat" not in [model.id for model in ranking_models] 139 | assert "test/chat" not in [model.id for model in completions_models] 140 | 141 | assert "test/embedding" not in [model.id for model in chat_models] 142 | assert "test/embedding" in [model.id for model in embedding_models] 143 | assert "test/embedding" not in [model.id for model in ranking_models] 144 | assert "test/embedding" not in [model.id for model in completions_models] 145 | 146 | assert "test/rerank" not in [model.id for model in chat_models] 147 | assert "test/rerank" not in [model.id for model in embedding_models] 148 | assert "test/rerank" in [model.id for model in ranking_models] 149 | assert "test/rerank" not in [model.id for model in completions_models] 150 | 151 | assert "test/completions" not in [model.id for model in chat_models] 152 | assert "test/completions" not in [model.id for model in embedding_models] 153 | assert "test/completions" not in [model.id for model in ranking_models] 154 | assert "test/completions" in [model.id for model in completions_models] 155 | 156 | 157 | def test_registered_model_without_client_is_not_listed(public_class: type) -> None: 158 | model_name = "test/model" 159 | register_model(Model(id=model_name, endpoint="BOGUS")) 160 | models = public_class.get_available_models(api_key="BOGUS") # type: ignore 161 | assert model_name not in [model.id for model in models] 162 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_serialization.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import pytest 4 | from langchain_core.load.dump import dumps 5 | from langchain_core.load.load import loads 6 | 7 | from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings 8 | 9 | 10 | @pytest.mark.skip("serialization support is broken, needs attention") 11 | def test_serialize_chatnvidia() -> None: 12 | secret = "a-bogus-key" 13 | x = ChatNVIDIA(nvidia_api_key=secret) 14 | y = loads( 15 | dumps(x), 16 | secrets_map={"NVIDIA_API_KEY": secret}, 17 | valid_namespaces=["langchain_nvidia_ai_endpoints"], 18 | ) 19 | assert x == y 20 | assert isinstance(y, ChatNVIDIA) 21 | 22 | 23 | def test_pickle_embeddings() -> None: 24 | x = NVIDIAEmbeddings(api_key="BOGUS") 25 | y = pickle.loads(pickle.dumps(x)) 26 | assert x.model == y.model 27 | assert x.max_batch_size == y.max_batch_size 28 | assert isinstance(y, NVIDIAEmbeddings) 29 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_standard.py: -------------------------------------------------------------------------------- 1 | """Standard LangChain interface tests""" 2 | 3 | from typing import Type 4 | 5 | from langchain_core.language_models import BaseChatModel 6 | from langchain_tests.unit_tests import ChatModelUnitTests 7 | 8 | from langchain_nvidia_ai_endpoints import ChatNVIDIA 9 | 10 | 11 | class TestNVIDIAStandard(ChatModelUnitTests): 12 | @property 13 | def chat_model_class(self) -> Type[BaseChatModel]: 14 | return ChatNVIDIA 15 | 16 | @property 17 | def chat_model_params(self) -> dict: 18 | return { 19 | "model": "meta/llama-3.1-8b-instruct", 20 | "api_key": "BOGUS", 21 | } 22 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_statics.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import pytest 4 | 5 | from langchain_nvidia_ai_endpoints._statics import MODEL_TABLE, determine_model 6 | 7 | 8 | @pytest.fixture(params=MODEL_TABLE.keys()) 9 | def entry(request: Any) -> str: 10 | return request.param 11 | 12 | 13 | @pytest.fixture( 14 | params=[ 15 | alias 16 | for ls in [model.aliases for model in MODEL_TABLE.values() if model.aliases] 17 | for alias in ls 18 | ] 19 | ) 20 | def alias(request: Any) -> str: 21 | return request.param 22 | 23 | 24 | def test_model_table_integrity_name_id(entry: str) -> None: 25 | model = MODEL_TABLE[entry] 26 | assert model.id == entry 27 | 28 | 29 | def test_determine_model_deprecated_alternative_warns(alias: str) -> None: 30 | with pytest.warns(UserWarning) as record: 31 | determine_model(alias) 32 | assert len(record) == 1 33 | assert f"Model {alias} is deprecated" in str(record[0].message) 34 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_stop.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import Optional, Sequence, Union 3 | 4 | import pytest 5 | from requests_mock import Mocker 6 | 7 | from langchain_nvidia_ai_endpoints import ChatNVIDIA 8 | 9 | 10 | @pytest.fixture(autouse=True) 11 | def mock_v1_models(requests_mock: Mocker) -> None: 12 | requests_mock.get( 13 | "https://integrate.api.nvidia.com/v1/models", 14 | json={ 15 | "data": [ 16 | { 17 | "id": "mock-model", 18 | "object": "model", 19 | "created": 1234567890, 20 | "owned_by": "OWNER", 21 | } 22 | ] 23 | }, 24 | ) 25 | 26 | 27 | @pytest.fixture(autouse=True) 28 | def mock_v1_chat_completions(requests_mock: Mocker) -> None: 29 | requests_mock.post( 30 | "https://integrate.api.nvidia.com/v1/chat/completions", 31 | json={ 32 | "id": "mock-id", 33 | "created": 1234567890, 34 | "object": "chat.completion", 35 | "model": "mock-model", 36 | "choices": [ 37 | { 38 | "index": 0, 39 | "message": {"role": "assistant", "content": "Ok"}, 40 | } 41 | ], 42 | }, 43 | ) 44 | 45 | 46 | @pytest.mark.parametrize( 47 | "prop_stop, param_stop, expected_stop", 48 | [ 49 | (None, ["PARAM"], ["PARAM"]), 50 | (None, "PARAM", "PARAM"), 51 | (["PROP"], None, ["PROP"]), 52 | (["PROP"], ["PARAM"], ["PARAM"]), 53 | (["PROP"], "PARAM", "PARAM"), 54 | (None, None, None), 55 | ], 56 | ids=[ 57 | "parameter_seq", 58 | "parameter_str", 59 | "property", 60 | "override_seq", 61 | "override_str", 62 | "absent", 63 | ], 64 | ) 65 | @pytest.mark.parametrize("func_name", ["invoke", "stream"]) 66 | def test_stop( 67 | requests_mock: Mocker, 68 | prop_stop: Optional[Sequence[str]], 69 | param_stop: Optional[Union[str, Sequence[str]]], 70 | expected_stop: Union[str, Sequence[str]], 71 | func_name: str, 72 | ) -> None: 73 | """ 74 | Users can pass `stop` as a property of the client or as a parameter to the 75 | `invoke` or `stream` methods. The value passed as a parameter should 76 | override the value passed as a property. 77 | 78 | Also, the `stop` parameter can be a str or Sequence[str], while the `stop` 79 | property is always a Sequence[str]. 80 | """ 81 | # `**(dict(stop=...) if ... else {})` is a clever way to avoid passing stop 82 | # if the value is None 83 | warnings.filterwarnings( 84 | "ignore", ".*Found mock-model in available_models.*" 85 | ) # expect to see this warning 86 | client = ChatNVIDIA( 87 | model="mock-model", 88 | api_key="mocked", 89 | **(dict(stop=prop_stop) if prop_stop else {}), 90 | ) 91 | # getattr(client, func_name) is a clever way to call a method by name 92 | response = getattr(client, func_name)( 93 | "Ok?", **(dict(stop=param_stop) if param_stop else {}) 94 | ) 95 | # the `stream` method returns a generator, so we need to call `next` to get 96 | # the actual response 97 | if func_name == "stream": # one step too clever parameterizing the function name 98 | response = next(response) 99 | 100 | assert response.content == "Ok" 101 | 102 | assert requests_mock.last_request is not None 103 | request_payload = requests_mock.last_request.json() 104 | if expected_stop: 105 | assert "stop" in request_payload 106 | assert request_payload["stop"] == expected_stop 107 | else: 108 | assert "stop" not in request_payload 109 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_structured_output.py: -------------------------------------------------------------------------------- 1 | import enum 2 | import warnings 3 | from typing import Callable, List, Optional, Type 4 | 5 | import pytest 6 | import requests_mock 7 | from pydantic import BaseModel as pydanticV2BaseModel # ignore: check_pydantic 8 | from pydantic import Field 9 | from pydantic.v1 import BaseModel as pydanticV1BaseModel # ignore: check_pydantic 10 | 11 | from langchain_nvidia_ai_endpoints import ChatNVIDIA 12 | 13 | 14 | class Joke(pydanticV2BaseModel): 15 | """Joke to tell user.""" 16 | 17 | setup: str = Field(description="The setup of the joke") 18 | punchline: str = Field(description="The punchline to the joke") 19 | rating: Optional[int] = Field(description="How funny the joke is, from 1 to 10") 20 | 21 | 22 | def test_method() -> None: 23 | with pytest.warns(UserWarning) as record: 24 | with warnings.catch_warnings(): 25 | warnings.filterwarnings( 26 | "ignore", 27 | category=UserWarning, 28 | message=".*not known to support structured output.*", 29 | ) 30 | ChatNVIDIA(api_key="BOGUS").with_structured_output(Joke, method="json_mode") 31 | assert len(record) == 1 32 | assert "unnecessary" in str(record[0].message) 33 | 34 | 35 | def test_include_raw() -> None: 36 | with pytest.raises(NotImplementedError): 37 | ChatNVIDIA(api_key="BOGUS").with_structured_output(Joke, include_raw=True) 38 | 39 | with pytest.raises(NotImplementedError): 40 | ChatNVIDIA(api_key="BOGUS").with_structured_output( 41 | Joke.model_json_schema(), include_raw=True 42 | ) 43 | 44 | 45 | def test_known_does_not_warn(empty_v1_models: None) -> None: 46 | structured_model = [ 47 | model 48 | for model in ChatNVIDIA.get_available_models(api_key="BOGUS") 49 | if model.supports_structured_output 50 | ] 51 | assert structured_model, "No models support structured output" 52 | 53 | with warnings.catch_warnings(): 54 | warnings.simplefilter("error") 55 | ChatNVIDIA( 56 | api_key="BOGUS", model=structured_model[0].id 57 | ).with_structured_output(Joke) 58 | 59 | 60 | def test_unknown_warns(empty_v1_models: None) -> None: 61 | unstructured_model = [ 62 | model 63 | for model in ChatNVIDIA.get_available_models(api_key="BOGUS") 64 | if not model.supports_structured_output 65 | ] 66 | assert unstructured_model, "All models support structured output" 67 | 68 | with pytest.warns(UserWarning) as record: 69 | ChatNVIDIA( 70 | api_key="BOGUS", model=unstructured_model[0].id 71 | ).with_structured_output(Joke) 72 | assert len(record) == 1 73 | assert "not known to support structured output" in str(record[0].message) 74 | 75 | 76 | def test_enum_negative() -> None: 77 | class Choices(enum.Enum): 78 | A = "A" 79 | B = "2" 80 | C = 3 81 | 82 | llm = ChatNVIDIA(api_key="BOGUS") 83 | with warnings.catch_warnings(): 84 | warnings.filterwarnings( 85 | "ignore", 86 | category=UserWarning, 87 | message=".*not known to support structured output.*", 88 | ) 89 | with pytest.raises(ValueError) as e: 90 | llm.with_structured_output(Choices) 91 | assert "only contain string choices" in str(e.value) 92 | 93 | 94 | class Choices(enum.Enum): 95 | YES = "Yes it is" 96 | NO = "No it is not" 97 | 98 | 99 | @pytest.mark.parametrize( 100 | "chunks", 101 | [ 102 | ["Y", "es", " it", " is"], 103 | ["N", "o", " it", " is", " not"], 104 | ], 105 | ids=["YES", "NO"], 106 | ) 107 | def test_stream_enum( 108 | mock_streaming_response: Callable, 109 | chunks: List[str], 110 | ) -> None: 111 | mock_streaming_response(chunks) 112 | 113 | warnings.filterwarnings("ignore", r".*not known to support structured output.*") 114 | structured_llm = ChatNVIDIA(api_key="BOGUS").with_structured_output(Choices) 115 | # chunks are progressively more complete, so we only consider the last 116 | for chunk in structured_llm.stream("This is ignored."): 117 | response = chunk 118 | assert isinstance(response, Choices) 119 | assert response in Choices 120 | 121 | 122 | @pytest.mark.parametrize( 123 | "chunks", 124 | [ 125 | ["Y", "es", " it"], 126 | ["N", "o", " it", " is"], 127 | ], 128 | ids=["YES", "NO"], 129 | ) 130 | def test_stream_enum_incomplete( 131 | mock_streaming_response: Callable, 132 | chunks: List[str], 133 | ) -> None: 134 | mock_streaming_response(chunks) 135 | 136 | warnings.filterwarnings("ignore", r".*not known to support structured output.*") 137 | structured_llm = ChatNVIDIA(api_key="BOGUS").with_structured_output(Choices) 138 | # chunks are progressively more complete, so we only consider the last 139 | for chunk in structured_llm.stream("This is ignored."): 140 | response = chunk 141 | assert response is None 142 | 143 | 144 | @pytest.mark.parametrize( 145 | "pydanticBaseModel", 146 | [ 147 | pydanticV1BaseModel, 148 | pydanticV2BaseModel, 149 | ], 150 | ids=["pydantic-v1", "pydantic-v2"], 151 | ) 152 | def test_pydantic_version( 153 | requests_mock: requests_mock.Mocker, 154 | pydanticBaseModel: Type, 155 | ) -> None: 156 | requests_mock.post( 157 | "https://integrate.api.nvidia.com/v1/chat/completions", 158 | json={ 159 | "id": "chatcmpl-ID", 160 | "object": "chat.completion", 161 | "created": 1234567890, 162 | "model": "BOGUS", 163 | "choices": [ 164 | { 165 | "index": 0, 166 | "message": { 167 | "role": "assistant", 168 | "content": '{"name": "Sam Doe"}', 169 | }, 170 | "logprobs": None, 171 | "finish_reason": "stop", 172 | } 173 | ], 174 | "usage": { 175 | "prompt_tokens": 22, 176 | "completion_tokens": 20, 177 | "total_tokens": 42, 178 | }, 179 | "system_fingerprint": None, 180 | }, 181 | ) 182 | 183 | class Person(pydanticBaseModel): # type: ignore 184 | name: str 185 | 186 | warnings.filterwarnings("ignore", r".*not known to support structured output.*") 187 | llm = ChatNVIDIA(api_key="BOGUS").with_structured_output(Person) 188 | response = llm.invoke("This is ignored.") 189 | assert isinstance(response, Person) 190 | assert response.name == "Sam Doe" 191 | 192 | 193 | @pytest.mark.parametrize( 194 | "strict", 195 | [False, None, "BOGUS"], 196 | ) 197 | def test_strict_warns(strict: Optional[bool]) -> None: 198 | warnings.filterwarnings("error") # no warnings should be raised 199 | 200 | # acceptable warnings 201 | warnings.filterwarnings( 202 | "ignore", category=UserWarning, message=".*not known to support.*" 203 | ) 204 | 205 | # warnings under test 206 | strict_warning = ".*`strict` is ignored.*" 207 | warnings.filterwarnings("default", category=UserWarning, message=strict_warning) 208 | 209 | with pytest.warns(UserWarning, match=strict_warning): 210 | ChatNVIDIA(api_key="BOGUS").with_structured_output( 211 | Joke, 212 | strict=strict, 213 | ) 214 | 215 | 216 | @pytest.mark.parametrize( 217 | "strict", 218 | [True, None], 219 | ids=["strict-True", "no-strict"], 220 | ) 221 | def test_strict_no_warns(strict: Optional[bool]) -> None: 222 | warnings.filterwarnings("error") # no warnings should be raised 223 | 224 | # acceptable warnings 225 | warnings.filterwarnings( 226 | "ignore", category=UserWarning, message=".*not known to support.*" 227 | ) 228 | 229 | ChatNVIDIA(api_key="BOGUS").with_structured_output( 230 | Joke, 231 | **({"strict": strict} if strict is not None else {}), 232 | ) 233 | -------------------------------------------------------------------------------- /libs/ai-endpoints/tests/unit_tests/test_vlm_models.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Union 2 | 3 | import pytest 4 | 5 | from langchain_nvidia_ai_endpoints.chat_models import _nv_vlm_get_asset_ids 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "content, expected", 10 | [ 11 | # Single asset ID in a string (double quotes) 12 | ('', ["12345"]), 13 | # Multiple asset IDs in a string (double quotes) 14 | ( 15 | ( 16 | '' 17 | '' 18 | ), 19 | ["12345", "67890"], 20 | ), 21 | # Single asset ID in list of strings (single quotes) 22 | ([""], ["12345"]), 23 | # Multiple asset IDs in list of strings (single quotes) 24 | ( 25 | [ 26 | "", 27 | "", 28 | ], 29 | ["12345", "67890"], 30 | ), 31 | # Single asset ID in a list of dictionaries 32 | ([{"image_url": {"url": "data:image/png;asset_id,12345"}}], ["12345"]), 33 | # Multiple asset IDs in a list of dictionaries 34 | ( 35 | [ 36 | {"image_url": {"url": "data:image/png;asset_id,12345"}}, 37 | {"image_url": {"url": "data:image/jpeg;asset_id,67890"}}, 38 | ], 39 | ["12345", "67890"], 40 | ), 41 | # No asset IDs present (double quotes) 42 | ('', []), 43 | # No asset IDs present (single quotes) 44 | ("", []), 45 | ], 46 | ids=[ 47 | "single_asset_id_string_double_quotes", 48 | "multiple_asset_ids_string_double_quotes", 49 | "single_asset_id_list_of_strings_single_quotes", 50 | "multiple_asset_ids_list_of_strings_single_quotes", 51 | "single_asset_id_list_of_dicts", 52 | "multiple_asset_ids_list_of_dicts", 53 | "no_asset_ids_double_quotes", 54 | "no_asset_ids_single_quotes", 55 | ], 56 | ) 57 | def test_nv_vlm_get_asset_ids( 58 | content: Union[str, List[Union[str, Dict[str, Any]]]], expected: List[str] 59 | ) -> None: 60 | result = _nv_vlm_get_asset_ids(content) 61 | assert result == expected 62 | -------------------------------------------------------------------------------- /libs/trt/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | -------------------------------------------------------------------------------- /libs/trt/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 LangChain, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /libs/trt/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all format lint test tests integration_tests docker_tests help extended_tests 2 | 3 | # Default target executed when no arguments are given to make. 4 | all: help 5 | 6 | # Define a variable for the test file path. 7 | TEST_FILE ?= tests/unit_tests/ 8 | 9 | test: 10 | poetry run pytest $(TEST_FILE) 11 | 12 | tests: 13 | poetry run pytest $(TEST_FILE) 14 | 15 | 16 | ###################### 17 | # LINTING AND FORMATTING 18 | ###################### 19 | 20 | # Define a variable for Python and notebook files. 21 | PYTHON_FILES=. 22 | MYPY_CACHE=.mypy_cache 23 | lint format: PYTHON_FILES=. 24 | lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/nvidia-trt --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$') 25 | lint_package: PYTHON_FILES=langchain_nvidia_trt 26 | lint_tests: PYTHON_FILES=tests 27 | lint_tests: MYPY_CACHE=.mypy_cache_test 28 | 29 | lint lint_diff lint_package lint_tests: 30 | poetry run ruff . 31 | poetry run ruff format $(PYTHON_FILES) --diff 32 | poetry run ruff --select I $(PYTHON_FILES) 33 | mkdir $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE) 34 | 35 | format format_diff: 36 | poetry run ruff format $(PYTHON_FILES) 37 | poetry run ruff --select I --fix $(PYTHON_FILES) 38 | 39 | spell_check: 40 | poetry run codespell --toml pyproject.toml 41 | 42 | spell_fix: 43 | poetry run codespell --toml pyproject.toml -w 44 | 45 | check_imports: $(shell find langchain_nvidia_trt -name '*.py') 46 | poetry run python ./scripts/check_imports.py $^ 47 | 48 | ###################### 49 | # HELP 50 | ###################### 51 | 52 | help: 53 | @echo '----' 54 | @echo 'check_imports - check imports' 55 | @echo 'format - run code formatters' 56 | @echo 'lint - run linters' 57 | @echo 'test - run unit tests' 58 | @echo 'tests - run unit tests' 59 | @echo 'test TEST_FILE= - run all tests in file' 60 | -------------------------------------------------------------------------------- /libs/trt/README.md: -------------------------------------------------------------------------------- 1 | # langchain-nvidia-trt 2 | -------------------------------------------------------------------------------- /libs/trt/docs/llms.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "raw", 5 | "id": "67db2992", 6 | "metadata": {}, 7 | "source": [ 8 | "---\n", 9 | "sidebar_label: TritonTensorRT\n", 10 | "---" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "b56b221d", 16 | "metadata": {}, 17 | "source": [ 18 | "# Nvidia Triton+TRT-LLM\n", 19 | "\n", 20 | "Nvidia's Triton is an inference server that provides an API style access to hosted LLM models. Likewise, Nvidia TensorRT-LLM, often abbreviated as TRT-LLM, is a GPU accelerated SDK for running optimizations and inference on LLM models. This connector allows for Langchain to remotely interact with a Triton inference server over GRPC or HTTP to performance accelerated inference operations.\n", 21 | "\n", 22 | "[Triton Inference Server Github](https://github.com/triton-inference-server/server)\n", 23 | "\n", 24 | "\n", 25 | "## TritonTensorRTLLM\n", 26 | "\n", 27 | "This example goes over how to use LangChain to interact with `TritonTensorRT` LLMs. To install, run the following command:" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "id": "59c710c4", 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "# install package\n", 38 | "%pip install -U langchain-nvidia-trt" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "id": "0ee90032", 44 | "metadata": {}, 45 | "source": [ 46 | "## Create the Triton+TRT-LLM instance\n", 47 | "\n", 48 | "Remember that a Triton instance represents a running server instance therefore you should ensure you have a valid server configuration running and change the `localhost:8001` to the correct IP/hostname:port combination for your server.\n", 49 | "\n", 50 | "An example of setting up this environment can be found at Nvidia's (GenerativeAIExamples Github Repo)[https://github.com/NVIDIA/GenerativeAIExamples/tree/main/RetrievalAugmentedGeneration]" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "id": "035dea0f", 57 | "metadata": { 58 | "tags": [] 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "from langchain_core.prompts import PromptTemplate\n", 63 | "from langchain_nvidia_trt.llms import TritonTensorRTLLM\n", 64 | "\n", 65 | "template = \"\"\"Question: {question}\n", 66 | "\n", 67 | "Answer: Let's think step by step.\"\"\"\n", 68 | "\n", 69 | "prompt = PromptTemplate.from_template(template)\n", 70 | "\n", 71 | "# Connect to the TRT-LLM Llama-2 model running on the Triton server at the url below\n", 72 | "triton_llm = TritonTensorRTLLM(server_url =\"localhost:8001\", model_name=\"ensemble\", tokens=500)\n", 73 | "\n", 74 | "chain = prompt | triton_llm \n", 75 | "\n", 76 | "chain.invoke({\"question\": \"What is LangChain?\"})" 77 | ] 78 | } 79 | ], 80 | "metadata": { 81 | "kernelspec": { 82 | "display_name": "Python 3 (ipykernel)", 83 | "language": "python", 84 | "name": "python3" 85 | }, 86 | "language_info": { 87 | "codemirror_mode": { 88 | "name": "ipython", 89 | "version": 3 90 | }, 91 | "file_extension": ".py", 92 | "mimetype": "text/x-python", 93 | "name": "python", 94 | "nbconvert_exporter": "python", 95 | "pygments_lexer": "ipython3", 96 | "version": "3.10.9" 97 | }, 98 | "vscode": { 99 | "interpreter": { 100 | "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1" 101 | } 102 | } 103 | }, 104 | "nbformat": 4, 105 | "nbformat_minor": 5 106 | } 107 | -------------------------------------------------------------------------------- /libs/trt/langchain_nvidia_trt/__init__.py: -------------------------------------------------------------------------------- 1 | from langchain_nvidia_trt.llms import TritonTensorRTLLM 2 | 3 | __all__ = ["TritonTensorRTLLM"] 4 | -------------------------------------------------------------------------------- /libs/trt/langchain_nvidia_trt/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/trt/langchain_nvidia_trt/py.typed -------------------------------------------------------------------------------- /libs/trt/mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | # Empty global config 3 | [mypy-tritonclient.*] 4 | ignore_missing_imports = True 5 | -------------------------------------------------------------------------------- /libs/trt/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "langchain-nvidia-trt" 3 | version = "0.0.1" 4 | description = "An integration package connecting TritonTensorRT and LangChain" 5 | authors = [] 6 | readme = "README.md" 7 | repository = "https://github.com/langchain-ai/langchain-nvidia" 8 | license = "MIT" 9 | 10 | [tool.poetry.urls] 11 | "Source Code" = "https://github.com/langchain-ai/langchain-nvidia/tree/main/libs/trt" 12 | 13 | [tool.poetry.dependencies] 14 | python = ">=3.8.1,<4.0" 15 | langchain-core = "^0.1" 16 | tritonclient = { extras = ["grpc"], version = "^2.42.0" } 17 | lint = "^1.2.1" 18 | types-protobuf = "^4.24.0.4" 19 | protobuf = "^3.5.0" 20 | 21 | [tool.poetry.group.test] 22 | optional = true 23 | 24 | [tool.poetry.group.test.dependencies] 25 | pytest = "^7.3.0" 26 | freezegun = "^1.2.2" 27 | pytest-mock = "^3.10.0" 28 | syrupy = "^4.0.2" 29 | pytest-watcher = "^0.3.4" 30 | pytest-asyncio = "^0.21.1" 31 | langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core" } 32 | 33 | [tool.poetry.group.codespell] 34 | optional = true 35 | 36 | [tool.poetry.group.codespell.dependencies] 37 | codespell = "^2.2.0" 38 | 39 | [tool.poetry.group.test_integration] 40 | optional = true 41 | 42 | [tool.poetry.group.test_integration.dependencies] 43 | 44 | [tool.poetry.group.lint] 45 | optional = true 46 | 47 | [tool.poetry.group.lint.dependencies] 48 | ruff = "^0.1.5" 49 | 50 | [tool.poetry.group.typing.dependencies] 51 | mypy = "^0.991" 52 | langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core" } 53 | 54 | [tool.poetry.group.dev] 55 | optional = true 56 | 57 | [tool.poetry.group.dev.dependencies] 58 | langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core" } 59 | 60 | [tool.ruff.lint] 61 | select = [ 62 | "E", # pycodestyle 63 | "F", # pyflakes 64 | "I", # isort 65 | "T201", # print 66 | ] 67 | 68 | [tool.mypy] 69 | disallow_untyped_defs = "True" 70 | 71 | [tool.coverage.run] 72 | omit = ["tests/*"] 73 | 74 | [build-system] 75 | requires = ["poetry-core>=1.0.0"] 76 | build-backend = "poetry.core.masonry.api" 77 | 78 | [tool.pytest.ini_options] 79 | # --strict-markers will raise errors on unknown marks. 80 | # https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks 81 | # 82 | # https://docs.pytest.org/en/7.1.x/reference/reference.html 83 | # --strict-config any warnings encountered while parsing the `pytest` 84 | # section of the configuration file raise errors. 85 | # 86 | # https://github.com/tophat/syrupy 87 | # --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite. 88 | addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5" 89 | # Registering custom markers. 90 | # https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers 91 | markers = [ 92 | "requires: mark tests as requiring a specific library", 93 | "asyncio: mark tests as requiring asyncio", 94 | "compile: mark placeholder test used to compile integration tests without running them", 95 | ] 96 | asyncio_mode = "auto" 97 | -------------------------------------------------------------------------------- /libs/trt/scripts/check_imports.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import traceback 3 | from importlib.machinery import SourceFileLoader 4 | 5 | if __name__ == "__main__": 6 | files = sys.argv[1:] 7 | has_failure = False 8 | for file in files: 9 | try: 10 | SourceFileLoader("x", file).load_module() 11 | except Exception: 12 | has_faillure = True 13 | print(file) # noqa: T201 14 | traceback.print_exc() 15 | print() # noqa: T201 16 | 17 | sys.exit(1 if has_failure else 0) 18 | -------------------------------------------------------------------------------- /libs/trt/scripts/check_pydantic.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This script searches for lines starting with "import pydantic" or "from pydantic" 4 | # in tracked files within a Git repository. 5 | # 6 | # Usage: ./scripts/check_pydantic.sh /path/to/repository 7 | 8 | # Check if a path argument is provided 9 | if [ $# -ne 1 ]; then 10 | echo "Usage: $0 /path/to/repository" 11 | exit 1 12 | fi 13 | 14 | repository_path="$1" 15 | 16 | # Search for lines matching the pattern within the specified repository 17 | result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic') 18 | 19 | # Check if any matching lines were found 20 | if [ -n "$result" ]; then 21 | echo "ERROR: The following lines need to be updated:" 22 | echo "$result" 23 | echo "Please replace the code with an import from langchain_core.pydantic_v1." 24 | echo "For example, replace 'from pydantic import BaseModel'" 25 | echo "with 'from langchain_core.pydantic_v1 import BaseModel'" 26 | exit 1 27 | fi 28 | -------------------------------------------------------------------------------- /libs/trt/scripts/lint_imports.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | # Initialize a variable to keep track of errors 6 | errors=0 7 | 8 | # make sure not importing from langchain or langchain_experimental 9 | git --no-pager grep '^from langchain\.' . && errors=$((errors+1)) 10 | git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1)) 11 | 12 | # Decide on an exit status based on the errors 13 | if [ "$errors" -gt 0 ]; then 14 | exit 1 15 | else 16 | exit 0 17 | fi 18 | -------------------------------------------------------------------------------- /libs/trt/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/trt/tests/__init__.py -------------------------------------------------------------------------------- /libs/trt/tests/integration_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/trt/tests/integration_tests/__init__.py -------------------------------------------------------------------------------- /libs/trt/tests/integration_tests/test_compile.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.mark.compile 5 | def test_placeholder() -> None: 6 | """Used for compiling integration tests without running any real tests.""" 7 | pass 8 | -------------------------------------------------------------------------------- /libs/trt/tests/integration_tests/test_llms.py: -------------------------------------------------------------------------------- 1 | """Test TritonTensorRTLLM llm.""" 2 | import pytest 3 | 4 | from langchain_nvidia_trt.llms import TritonTensorRTLLM 5 | 6 | _MODEL_NAME = "ensemble" 7 | 8 | 9 | @pytest.mark.skip(reason="Need a working Triton server") 10 | def test_stream() -> None: 11 | """Test streaming tokens from NVIDIA TRT.""" 12 | llm = TritonTensorRTLLM(model_name=_MODEL_NAME) 13 | 14 | for token in llm.stream("I'm Pickle Rick"): 15 | assert isinstance(token, str) 16 | 17 | 18 | @pytest.mark.skip(reason="Need a working Triton server") 19 | async def test_astream() -> None: 20 | """Test streaming tokens from NVIDIA TRT.""" 21 | llm = TritonTensorRTLLM(model_name=_MODEL_NAME) 22 | 23 | async for token in llm.astream("I'm Pickle Rick"): 24 | assert isinstance(token, str) 25 | 26 | 27 | @pytest.mark.skip(reason="Need a working Triton server") 28 | async def test_abatch() -> None: 29 | """Test streaming tokens from TritonTensorRTLLM.""" 30 | llm = TritonTensorRTLLM(model_name=_MODEL_NAME) 31 | 32 | result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"]) 33 | for token in result: 34 | assert isinstance(token, str) 35 | 36 | 37 | @pytest.mark.skip(reason="Need a working Triton server") 38 | async def test_abatch_tags() -> None: 39 | """Test batch tokens from TritonTensorRTLLM.""" 40 | llm = TritonTensorRTLLM(model_name=_MODEL_NAME) 41 | 42 | result = await llm.abatch( 43 | ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]} 44 | ) 45 | for token in result: 46 | assert isinstance(token, str) 47 | 48 | 49 | @pytest.mark.skip(reason="Need a working Triton server") 50 | def test_batch() -> None: 51 | """Test batch tokens from TritonTensorRTLLM.""" 52 | llm = TritonTensorRTLLM(model_name=_MODEL_NAME) 53 | 54 | result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"]) 55 | for token in result: 56 | assert isinstance(token, str) 57 | 58 | 59 | @pytest.mark.skip(reason="Need a working Triton server") 60 | async def test_ainvoke() -> None: 61 | """Test invoke tokens from TritonTensorRTLLM.""" 62 | llm = TritonTensorRTLLM(model_name=_MODEL_NAME) 63 | 64 | result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]}) 65 | assert isinstance(result, str) 66 | 67 | 68 | @pytest.mark.skip(reason="Need a working Triton server") 69 | def test_invoke() -> None: 70 | """Test invoke tokens from TritonTensorRTLLM.""" 71 | llm = TritonTensorRTLLM(model_name=_MODEL_NAME) 72 | 73 | result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"])) 74 | assert isinstance(result, str) 75 | -------------------------------------------------------------------------------- /libs/trt/tests/unit_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langchain-nvidia/7f28752b4470fb7e9cb2d359c05f6af178bff2e6/libs/trt/tests/unit_tests/__init__.py -------------------------------------------------------------------------------- /libs/trt/tests/unit_tests/test_imports.py: -------------------------------------------------------------------------------- 1 | from langchain_nvidia_trt import __all__ 2 | 3 | EXPECTED_ALL = ["TritonTensorRTLLM"] 4 | 5 | 6 | def test_all_imports() -> None: 7 | assert sorted(EXPECTED_ALL) == sorted(__all__) 8 | -------------------------------------------------------------------------------- /libs/trt/tests/unit_tests/test_llms.py: -------------------------------------------------------------------------------- 1 | """Test TritonTensorRT Chat API wrapper.""" 2 | from langchain_nvidia_trt import TritonTensorRTLLM 3 | 4 | 5 | def test_initialization() -> None: 6 | """Test integration initialization.""" 7 | TritonTensorRTLLM(model_name="ensemble", server_url="http://localhost:8001") 8 | -------------------------------------------------------------------------------- /studio/.env.example: -------------------------------------------------------------------------------- 1 | NVIDIA_API_KEY=nvapi- 2 | TAVILY_API_KEY=tvly- -------------------------------------------------------------------------------- /studio/langgraph.json: -------------------------------------------------------------------------------- 1 | { 2 | "dockerfile_lines": [], 3 | "graphs": { 4 | "agentic_rag_nvidia": "./agentic_rag_nvidia.py:graph" 5 | }, 6 | "python_version": "3.11", 7 | "env": "./.env", 8 | "dependencies": [ 9 | "." 10 | ] 11 | } -------------------------------------------------------------------------------- /studio/requirements.txt: -------------------------------------------------------------------------------- 1 | langchain-nvidia-ai-endpoints 2 | langchain-chroma 3 | langchain-community 4 | langchain 5 | langgraph 6 | tavily-python 7 | beautifulsoup4 8 | lxml --------------------------------------------------------------------------------