├── .gitattributes ├── .github └── workflows │ └── rustfmt.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── docs ├── README.md ├── concepts │ └── foundry-local-architecture.md ├── how-to │ ├── compile-models-for-foundry-local.md │ ├── integrate-with-inference-sdks.md │ └── manage.md ├── includes │ └── integrate-examples │ │ ├── csharp.md │ │ ├── javascript.md │ │ ├── python.md │ │ └── rest.md ├── media │ └── architecture │ │ └── foundry-local-arch.png ├── reference │ ├── reference-cli.md │ ├── reference-rest.md │ ├── reference-sdk.md │ ├── reference-security-privacy.md │ └── reference-troubleshooting.md ├── tutorials │ ├── chat-application-with-open-web-ui.md │ └── use-langchain-with-foundry-local.md └── what-is-foundry-local.md ├── licenses ├── deepseek.md ├── mistral.md └── phi.md ├── media └── icons │ ├── ai_studio_icon_black.svg │ ├── ai_studio_icon_color.svg │ └── ai_studio_icon_white.svg ├── samples ├── electron │ └── foundry-chat │ │ ├── .gitignore │ │ ├── .vscode │ │ └── launch.json │ │ ├── Readme.md │ │ ├── chat.html │ │ ├── main.js │ │ ├── package.json │ │ └── preload.cjs ├── js │ └── hello-foundry-local │ │ ├── README.md │ │ └── src │ │ └── app.js ├── python │ ├── hello-foundry-local │ │ ├── README.md │ │ └── src │ │ │ └── app.py │ └── summarize │ │ ├── .vscode │ │ └── launch.json │ │ ├── README.md │ │ ├── requirements.txt │ │ └── summarize.py └── rust │ ├── Cargo.toml │ ├── README.md │ └── hello-foundry-local │ ├── Cargo.toml │ ├── README.md │ └── src │ └── main.rs └── sdk ├── cs ├── .editorconfig ├── .gitignore ├── FoundryLocal.sln ├── README.md ├── samples │ └── TestApp │ │ ├── Program.cs │ │ └── TestApp.csproj ├── src │ ├── FoundryLocalManager.cs │ ├── FoundryModelInfo.cs │ ├── Microsoft.AI.Foundry.Local.csproj │ └── Microsoft.AI.Foundry.Local.csproj.user └── test │ └── FoundryLocal.Tests │ ├── FoundryLocal.Tests.csproj │ └── FoundryLocalManagerTest.cs ├── js ├── .eslintrc.cjs ├── .npmignore ├── .prettierignore ├── .prettierrc.json ├── LICENSE.txt ├── README.md ├── package-lock.json ├── package.json ├── src │ ├── base.ts │ ├── client.ts │ ├── index.ts │ ├── service.ts │ └── types.ts ├── test │ ├── base.test.ts │ ├── client.test.ts │ ├── index.test.ts │ └── service.test.ts └── tsconfig.json ├── python ├── .lintrunner.toml ├── LICENSE.txt ├── README.md ├── foundry_local │ ├── __init__.py │ ├── api.py │ ├── client.py │ ├── logging.py │ ├── models.py │ └── service.py ├── pyproject.toml ├── requirements-dev.txt ├── requirements-lintrunner.txt ├── requirements.txt ├── setup.py └── test │ ├── __init__.py │ ├── test_api.py │ ├── test_client.py │ ├── test_models.py │ └── test_service.py └── rust ├── Cargo.toml ├── README.md ├── src ├── api.rs ├── client.rs ├── lib.rs ├── models.rs └── service.rs └── tests ├── README.md ├── integration_tests.rs ├── mock_service.rs └── test_api.rs /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text=auto 3 | 4 | # Windows specific files should retain windows line-endings 5 | *.sln text eol=crlf 6 | -------------------------------------------------------------------------------- /.github/workflows/rustfmt.yml: -------------------------------------------------------------------------------- 1 | name: Rust-fmt 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - 'sdk/rust/**' 7 | - 'samples/rust/**' 8 | push: 9 | paths: 10 | - 'sdk/rust/**' 11 | - 'samples/rust/**' 12 | branches: 13 | - main 14 | workflow_dispatch: 15 | 16 | jobs: 17 | check: 18 | runs-on: ubuntu-22.04 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v4 22 | - name: Update toolchain 23 | run: rustup update --no-self-update stable && rustup default stable 24 | - name: Check SDK 25 | working-directory: sdk/rust 26 | run: cargo fmt --all -- --check 27 | - name: Check Samples 28 | working-directory: samples/rust 29 | run: cargo fmt --all -- --check 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Visual Studio and Visual Studio Code 2 | bin/ 3 | obj/ 4 | .vs/ 5 | 6 | # build, distribute, and bins 7 | build/ 8 | build_*/ 9 | .build_debug/* 10 | .build_release/* 11 | dist/ 12 | *.egg-info 13 | 14 | # python 15 | __pycache__/ 16 | *.py[cod] 17 | *$py.class 18 | 19 | # node 20 | node_modules/ 21 | 22 | # Rust build targets 23 | target/ 24 | Cargo.lock 25 | 26 | # Build outputs 27 | bin/ 28 | obj/ 29 | /src/cs/samples/ConsoleClient/test.http 30 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing 2 | 3 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 4 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 5 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 6 | 7 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 8 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 9 | provided by the bot. You will only need to do this once across all repos using our CLA. 10 | 11 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 12 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 13 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 14 | 15 | ## Trademarks 16 | 17 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 18 | trademarks or logos is subject to and must follow 19 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 20 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 21 | Any use of third-party trademarks or logos are subject to those third-party's policies. 22 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # TODO: The maintainer of this repo has not yet edited this file 2 | 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? 4 | 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help. 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps. 7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide. 8 | 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.* 10 | 11 | # Support 12 | 13 | ## How to file issues and get help 14 | 15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 17 | feature request as a new Issue. 18 | 19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER 21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**. 22 | 23 | ## Microsoft Support Policy 24 | 25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 26 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Get Started with Foundry Local 2 | 3 | This guide provides detailed instructions on installing, configuring, and using Foundry Local to run AI models on your device. 4 | 5 | ## Prerequisites 6 | 7 | - A PC with sufficient specifications to run AI models locally 8 | - Windows 10 or later 9 | - Greater than 8GB RAM 10 | - Greater than 10GB of free disk space for model caching (quantized Phi 3.2 models are ~3GB) 11 | - Suggested hardware for optimal performance: 12 | - Windows 11 13 | - NVIDIA GPU (2000 series or newer) OR AMD GPU (6000 series or newer) OR Qualcomm Snapdragon X Elite, with 8GB or more of VRAM 14 | - Greater than 16GB RAM 15 | - Greater than 20GB of free disk space for model caching (the largest models are ~15GB) 16 | - Administrator access to install software 17 | 18 | ## Installation 19 | 20 | 1. Download Foundry Local for your platform from the [releases page](https://github.com/microsoft/Foundry-Local/releases). 21 | 2. Install the package by following the on-screen prompts. 22 | 3. After installation, access the tool via command line with `foundry`. 23 | 24 | ## Running Your First Model 25 | 26 | 1. Open a command prompt or terminal window. 27 | 2. Run a model using the following command: 28 | 29 | ```bash 30 | foundry model run phi-3.5-mini 31 | ``` 32 | 33 | This command will: 34 | 35 | - Download the model to your local disk 36 | - Load the model into your device 37 | - Start a chat interface 38 | 39 | **💡 TIP:** Replace `phi-3.5-mini` with any model from the catalog. Use `foundry model list` to see available models. 40 | 41 | ## Explore Foundry Local CLI commands 42 | 43 | The foundry CLI is structured into several categories: 44 | 45 | - **Model**: Commands related to managing and running models 46 | - **Service**: Commands for managing the Foundry Local service 47 | - **Cache**: Commands for managing the local cache where models are stored 48 | 49 | To see all available commands, use the help option: 50 | 51 | ```bash 52 | foundry --help 53 | ``` 54 | 55 | **💡 TIP:** For a complete reference of all available CLI commands and their usage, see the [Foundry Local CLI Reference](./reference/reference-cli.md) 56 | 57 | ## Integrating with Applications 58 | 59 | Foundry Local provides an OpenAI-compatible REST API at `http://localhost:PORT/v1`. 60 | 61 | - Note that the port will be dynamically assigned, so check the logs for the correct port. 62 | 63 | ### REST API Example 64 | 65 | ```bash 66 | curl http://localhost:5273/v1/chat/completions \ 67 | -H "Content-Type: application/json" \ 68 | -d '{ 69 | "model": "Phi-3.5-mini-instruct-generic-cpu", 70 | "messages": [{"role": "user", "content": "What is the capital of France?"}], 71 | "temperature": 0.7, 72 | "max_tokens": 50 73 | }' 74 | ``` 75 | 76 | Read about all the samples we have for various languages and platforms in the [Integrate with Inference SDKs](./how-to/integrate-with-inference-sdks.md) section. 77 | 78 | ## Troubleshooting 79 | 80 | ### Common Issues and Solutions 81 | 82 | | Issue | Possible Cause | Solution | 83 | | ----------------------- | --------------------------------------- | ----------------------------------------------------------------------------------------- | 84 | | Slow inference | CPU-only model on large parameter count | Use GPU-optimized model variants when available | 85 | | Model download failures | Network connectivity issues | Check your internet connection, try `foundry cache list` to verify cache state | 86 | | Service won't start | Port conflicts or permission issues | Try `foundry service restart` or post an issue providing logs with `foundry zip-logsrock` | 87 | 88 | For more information, see the [troubleshooting guide](./reference/reference-troubleshooting.md). 89 | 90 | ## Next Steps 91 | 92 | - [Learn more about Foundry Local](./what-is-foundry-local.md) 93 | - [Integrate with inferencing SDKs](./how-to/integrate-with-inference-sdks.md) 94 | - [Compile models for Foundry Local](./how-to/compile-models-for-foundry-local.md) 95 | - [Build a chat application](./tutorials/chat-application-with-open-web-ui.md) 96 | - [Use Langchain](./tutorials/use-langchain-with-foundry-local.md) 97 | -------------------------------------------------------------------------------- /docs/concepts/foundry-local-architecture.md: -------------------------------------------------------------------------------- 1 | # Foundry Local Architecture 2 | 3 | Foundry Local is designed to enable efficient, secure, and scalable AI model inference directly on local devices. This article explains the key components of the Foundry Local architecture and how they interact to deliver AI capabilities. 4 | 5 | The benefits of Foundry Local include: 6 | 7 | - **Low Latency**: By running models locally, Foundry Local minimizes the time it takes to process requests and return results. 8 | - **Data Privacy**: Sensitive data can be processed locally without sending it to the cloud, ensuring compliance with data protection regulations. 9 | - **Flexibility**: Foundry Local supports a wide range of hardware configurations, allowing users to choose the best setup for their needs. 10 | - **Scalability**: Foundry Local can be deployed on various devices, from personal computers to powerful servers, making it suitable for different use cases. 11 | - **Cost-Effectiveness**: Running models locally can reduce costs associated with cloud computing, especially for high-volume applications. 12 | - **Offline Capabilities**: Foundry Local can operate without an internet connection, making it ideal for remote or disconnected environments. 13 | - **Integration with Existing Workflows**: Foundry Local can be easily integrated into existing development and deployment workflows, allowing for a smooth transition to local inference. 14 | 15 | ## Key Components 16 | 17 | The key components of the Foundry Local architecture are articulated in the following diagram: 18 | 19 | ![Foundry Local Architecture Diagram](../media/architecture/foundry-local-arch.png) 20 | 21 | ### Foundry Local Service 22 | 23 | The Foundry Local Service is an OpenAI compatible REST server that provides a standardized interface for interacting with the inference engine and model management. Developers can use this API to send requests, run models, and retrieve results programmatically. 24 | 25 | - **Endpoint**: `http://localhost:PORT/v1` 26 | - Note: The port is dynamically assigned, so check the logs for the correct port. 27 | - **Use Cases**: 28 | - Integrating Foundry Local with custom applications. 29 | - Running models via HTTP requests. 30 | 31 | ### ONNX Runtime 32 | 33 | The ONNX runtime is a core component responsible for running AI models. It uses optimized ONNX models to perform inference efficiently on local hardware, such as CPUs, GPUs, or NPUs. 34 | 35 | **Features**: 36 | 37 | - Supports multiple hardware providers (for example: NVIDIA, AMD, Intel) and devices (for example: NPUs, CPUs, GPUs). 38 | - Provides a unified interface for running models on different hardware platforms. 39 | - Best-in-class performance. 40 | - Supports quantized models for faster inference. 41 | 42 | ### Model Management 43 | 44 | Foundry Local provides robust tools for managing AI models, ensuring that they're readily available for inference and easy to maintain. Model management is handled through the **Model Cache** and the **Command-Line Interface (CLI)**. 45 | 46 | #### Model Cache 47 | 48 | The model cache is a local storage system where AI models are downloaded and stored. It ensures that models are available for inference without requiring repeated downloads. The cache can be managed using the Foundry CLI or REST API. 49 | 50 | - **Purpose**: Reduces latency by storing models locally. 51 | - **Management Commands**: 52 | - `foundry cache list`: Lists all models stored in the local cache. 53 | - `foundry cache remove `: Deletes a specific model from the cache. 54 | - `foundry cache cd `: Changes the directory where models are stored. 55 | 56 | #### Model Lifecycle 57 | 58 | 1. **Download**: Models are downloaded from the Azure AI Foundry model catalog to local disk. 59 | 2. **Load**: Models are loaded into the Foundry Local service (and therefore memory) for inference. You can set a TTL (time-to-live) for how long the model should remain in memory (the default is 10 minutes). 60 | 3. **Run**: Models are inferenced. 61 | 4. **Unload**: Models can be unloaded from the inference engine to free up resources. 62 | 5. **Delete**: Models can be deleted from the local cache to free up disk space. 63 | 64 | #### Model Compilation using Olive 65 | 66 | Before models can be used with Foundry Local, they must be compiled and optimized in the [ONNX](https://onnx.ai) format. Microsoft provides a selection of published models in the Azure AI Foundry Model Catalog that are already optimized for Foundry Local. However, you aren't limited to those models - by using [Olive](https://microsoft.github.io/Olive/). Olive is a powerful framework for preparing AI models for efficient inference. It converts models into the ONNX format, optimizes their graph structure, and applies techniques like quantization to improve performance on local hardware. 67 | 68 | **💡 TIP**: To learn more about compiling models for Foundry Local, read [Compile Hugging Face models for Foundry Local](../how-to/compile-models-for-foundry-local.md). 69 | 70 | ### Hardware Abstraction Layer 71 | 72 | The hardware abstraction layer ensures that Foundry Local can run on various devices by abstracting the underlying hardware. To optimize performance based on the available hardware, Foundry Local supports: 73 | 74 | - **multiple _execution providers_**, such as NVIDIA CUDA, AMD, Qualcomm, Intel. 75 | - **multiple _device types_**, such as CPU, GPU, NPU. 76 | 77 | ### Developer Experiences 78 | 79 | The Foundry Local architecture is designed to provide a seamless developer experience, enabling easy integration and interaction with AI models. 80 | 81 | Developers can choose from various interfaces to interact with the system, including: 82 | 83 | #### Command-Line Interface (CLI) 84 | 85 | The Foundry CLI is a powerful tool for managing models, the inference engine, and the local cache. 86 | 87 | **Examples**: 88 | 89 | - `foundry model list`: Lists all available models in the local cache. 90 | - `foundry model run `: Runs a model. 91 | - `foundry service status`: Checks the status of the service. 92 | 93 | **💡 TIP**: To learn more about the CLI commands, read [Foundry Local CLI Reference](../reference/reference-cli.md). 94 | 95 | #### Inferencing SDK Integration 96 | 97 | Foundry Local supports integration with various SDKs, such as the OpenAI SDK, enabling developers to use familiar programming interfaces to interact with the local inference engine. 98 | 99 | - **Supported SDKs**: Python, JavaScript, C#, and more. 100 | 101 | **💡 TIP**: To learn more about integrating with inferencing SDKs, read [Integrate Foundry Local with Inferencing SDKs](../how-to/integrate-with-inference-sdks.md). 102 | 103 | #### AI Toolkit for Visual Studio Code 104 | 105 | The AI Toolkit for Visual Studio Code provides a user-friendly interface for developers to interact with Foundry Local. It allows users to run models, manage the local cache, and visualize results directly within the IDE. 106 | 107 | - **Features**: 108 | - Model management: Download, load, and run models from within the IDE. 109 | - Interactive console: Send requests and view responses in real-time. 110 | - Visualization tools: Graphical representation of model performance and results. 111 | 112 | ## Next Steps 113 | 114 | - [Get started with Foundry Local](../get-started.md) 115 | - [Integrate with Inference SDKs](../how-to/integrate-with-inference-sdks.md) 116 | - [Foundry Local CLI Reference](../reference/reference-cli.md) 117 | -------------------------------------------------------------------------------- /docs/how-to/integrate-with-inference-sdks.md: -------------------------------------------------------------------------------- 1 | # Integrate Foundry Local with Inferencing SDKs 2 | 3 | Foundry Local provides a REST API endpoint that makes it easy to integrate with various inferencing SDKs and programming languages. This guide shows you how to connect your applications to locally running AI models using popular SDKs. 4 | 5 | ## Prerequisites 6 | 7 | - Foundry Local installed and running on your system 8 | - A model loaded into the service (use `foundry model load `) 9 | - Basic knowledge of the programming language you want to use for integration 10 | - Development environment for your chosen language 11 | 12 | ## Understanding the REST API 13 | 14 | When Foundry Local is running, it exposes an OpenAI-compatible REST API endpoint at `http://localhost:PORT/v1`. This endpoint supports standard API operations like: 15 | 16 | - `/completions` - For text completion 17 | - `/chat/completions` - For chat-based interactions 18 | - `/models` - To list available models 19 | 20 | This port will be dynamically assigned, so check the logs for the correct port. 21 | 22 | ## Language Examples 23 | 24 | ### Python 25 | 26 | ```python 27 | from openai import OpenAI 28 | 29 | # Configure the client to use your local endpoint 30 | client = OpenAI( 31 | base_url="http://localhost:5273/v1", 32 | api_key="not-needed" # API key isn't used but the client requires one 33 | ) 34 | 35 | # Chat completion example 36 | response = client.chat.completions.create( 37 | model="Phi-3.5-mini-instruct-generic-cpu", # Use the id of your loaded model, found in 'foundry service ps' 38 | messages=[ 39 | {"role": "system", "content": "You are a helpful assistant."}, 40 | {"role": "user", "content": "What is the capital of France?"} 41 | ], 42 | max_tokens=1000 43 | ) 44 | 45 | print(response.choices[0].message.content) 46 | ``` 47 | 48 | Check out the streaming example [here](../includes/integrate-examples/python.md). 49 | 50 | ### REST API 51 | 52 | ```bash 53 | curl http://localhost:5273/v1/chat/completions \ 54 | -H "Content-Type: application/json" \ 55 | -d '{ 56 | model="Phi-3.5-mini-instruct-generic-cpu", 57 | "messages": [ 58 | { 59 | "role": "system", 60 | "content": "You are a helpful assistant." 61 | }, 62 | { 63 | "role": "user", 64 | "content": "What is the capital of France?" 65 | } 66 | ], 67 | "max_tokens": 1000 68 | }' 69 | ``` 70 | 71 | Check out the streaming example [here](../includes/integrate-examples/rest.md). 72 | 73 | ### JavaScript 74 | 75 | ```javascript 76 | import OpenAI from "openai"; 77 | 78 | // Configure the client to use your local endpoint 79 | const openai = new OpenAI({ 80 | baseURL: "http://localhost:5273/v1", 81 | apiKey: "not-needed", // API key isn't used but the client requires one 82 | }); 83 | 84 | async function generateText() { 85 | const response = await openai.chat.completions.create({ 86 | model: "Phi-3.5-mini-instruct-generic-cpu", // Use the id of your loaded model, found in 'foundry service ps' 87 | messages: [ 88 | { role: "system", content: "You are a helpful assistant." }, 89 | { role: "user", content: "What is the capital of France?" }, 90 | ], 91 | max_tokens: 1000, 92 | }); 93 | 94 | console.log(response.choices[0].message.content); 95 | } 96 | 97 | generateText(); 98 | ``` 99 | 100 | Check out the streaming example [here](../includes/integrate-examples/javascript.md). 101 | 102 | ### C# 103 | 104 | ```csharp 105 | using Azure.AI.OpenAI; 106 | using Azure; 107 | 108 | // Configure the client to use your local endpoint 109 | OpenAIClient client = new OpenAIClient( 110 | new Uri("http://localhost:5273/v1"), 111 | new AzureKeyCredential("not-needed") // API key isn't used but the client requires one 112 | ); 113 | 114 | // Chat completion example 115 | var chatCompletionsOptions = new ChatCompletionsOptions() 116 | { 117 | Messages = 118 | { 119 | new ChatMessage(ChatRole.System, "You are a helpful assistant."), 120 | new ChatMessage(ChatRole.User, "What is the capital of France?") 121 | }, 122 | MaxTokens = 1000 123 | }; 124 | 125 | Response response = await client.GetChatCompletionsAsync( 126 | "Phi-3.5-mini-instruct-generic-cpu", // Use the id of your loaded model, found in 'foundry service ps' 127 | chatCompletionsOptions 128 | ); 129 | 130 | Console.WriteLine(response.Value.Choices[0].Message.Content); 131 | ``` 132 | 133 | Check out the streaming example [here](../includes/integrate-examples/csharp.md). 134 | 135 | ## Best Practices 136 | 137 | 1. **Error Handling**: Implement robust error handling to manage cases when the local service is unavailable or a model isn't loaded. 138 | 2. **Resource Management**: Be mindful of your local resources. Monitor CPU/RAM usage when making multiple concurrent requests. 139 | 3. **Fallback Strategy**: Consider implementing a fallback to cloud services for when local inference is insufficient. 140 | 4. **Model Preloading**: For production applications, ensure your model is preloaded before starting your application. 141 | 142 | ## Next steps 143 | 144 | - [Compile Hugging Face models for Foundry Local](./compile-models-for-foundry-local.md) 145 | - [Explore the Foundry Local CLI reference](../reference/reference-cli.md) 146 | -------------------------------------------------------------------------------- /docs/how-to/manage.md: -------------------------------------------------------------------------------- 1 | # Manage Foundry Local 2 | 3 | TODO 4 | 5 | ## Prerequisites 6 | 7 | - TODO 8 | 9 | ## Section 10 | 11 | TODO 12 | 13 | ## Next step 14 | 15 | TODO 16 | -------------------------------------------------------------------------------- /docs/includes/integrate-examples/csharp.md: -------------------------------------------------------------------------------- 1 | ## Basic Integration 2 | 3 | ```csharp 4 | // Install with: dotnet add package Azure.AI.OpenAI 5 | using Azure.AI.OpenAI; 6 | using Azure; 7 | 8 | // Create a client. Note the port is dynamically assigned, so check the logs for the correct port. 9 | OpenAIClient client = new OpenAIClient( 10 | new Uri("http://localhost:5273/v1"), 11 | new AzureKeyCredential("not-needed-for-local") 12 | ); 13 | 14 | // Chat completions 15 | ChatCompletionsOptions options = new ChatCompletionsOptions() 16 | { 17 | Messages = 18 | { 19 | new ChatMessage(ChatRole.User, "What is Foundry Local?") 20 | }, 21 | DeploymentName = "Phi-4-mini-instruct-cuda-gpu" // Use model name here 22 | }; 23 | 24 | Response response = await client.GetChatCompletionsAsync(options); 25 | string completion = response.Value.Choices[0].Message.Content; 26 | Console.WriteLine(completion); 27 | ``` 28 | 29 | ## Streaming Response 30 | 31 | ```csharp 32 | // Install with: dotnet add package Azure.AI.OpenAI 33 | using Azure.AI.OpenAI; 34 | using Azure; 35 | using System; 36 | using System.Threading.Tasks; 37 | 38 | async Task StreamCompletionsAsync() 39 | { 40 | // Note the port is dynamically assigned, so check the logs for the correct port. 41 | OpenAIClient client = new OpenAIClient( 42 | new Uri("http://localhost:5273/v1"), 43 | new AzureKeyCredential("not-needed-for-local") 44 | ); 45 | 46 | ChatCompletionsOptions options = new ChatCompletionsOptions() 47 | { 48 | Messages = 49 | { 50 | new ChatMessage(ChatRole.User, "Write a short story about AI") 51 | }, 52 | DeploymentName = "Phi-4-mini-instruct-cuda-gpu" 53 | }; 54 | 55 | await foreach (StreamingChatCompletionsUpdate update in client.GetChatCompletionsStreaming(options)) 56 | { 57 | if (update.ContentUpdate != null) 58 | { 59 | Console.Write(update.ContentUpdate); 60 | } 61 | } 62 | } 63 | 64 | // Call the async method 65 | await StreamCompletionsAsync(); 66 | ``` 67 | -------------------------------------------------------------------------------- /docs/includes/integrate-examples/javascript.md: -------------------------------------------------------------------------------- 1 | ## Using the OpenAI Node.js SDK 2 | 3 | ```javascript 4 | // Install with: npm install openai 5 | import OpenAI from "openai"; 6 | // Note the port is dynamically assigned, so check the logs for the correct port. 7 | const openai = new OpenAI({ 8 | baseURL: "http://localhost:5273/v1", 9 | apiKey: "not-needed-for-local", 10 | }); 11 | 12 | async function generateText() { 13 | const response = await openai.chat.completions.create({ 14 | model: "Phi-4-mini-instruct-cuda-gpu", 15 | messages: [ 16 | { 17 | role: "user", 18 | content: "How can I integrate Foundry Local with my app?", 19 | }, 20 | ], 21 | }); 22 | 23 | console.log(response.choices[0].message.content); 24 | } 25 | 26 | generateText(); 27 | ``` 28 | 29 | ## Using Fetch API 30 | 31 | // Note the port is dynamically assigned, so check the logs for the correct port. 32 | 33 | ```javascript 34 | async function queryModel() { 35 | const response = await fetch("http://localhost:5273/v1/chat/completions", { 36 | method: "POST", 37 | headers: { 38 | "Content-Type": "application/json", 39 | }, 40 | body: JSON.stringify({ 41 | model: "Phi-4-mini-instruct-cuda-gpu", 42 | messages: [ 43 | { role: "user", content: "What are the advantages of Foundry Local?" }, 44 | ], 45 | }), 46 | }); 47 | 48 | const data = await response.json(); 49 | console.log(data.choices[0].message.content); 50 | } 51 | 52 | queryModel(); 53 | ``` 54 | 55 | ## Streaming Responses 56 | 57 | ### Using OpenAI SDK 58 | 59 | ```javascript 60 | // Install with: npm install openai 61 | import OpenAI from "openai"; 62 | 63 | const openai = new OpenAI({ 64 | baseURL: "http://localhost:5273/v1", 65 | apiKey: "not-needed-for-local", 66 | }); 67 | 68 | async function streamCompletion() { 69 | const stream = await openai.chat.completions.create({ 70 | model: "Phi-4-mini-instruct-cuda-gpu", 71 | messages: [{ role: "user", content: "Write a short story about AI" }], 72 | stream: true, 73 | }); 74 | 75 | for await (const chunk of stream) { 76 | if (chunk.choices[0]?.delta?.content) { 77 | process.stdout.write(chunk.choices[0].delta.content); 78 | } 79 | } 80 | } 81 | 82 | streamCompletion(); 83 | ``` 84 | 85 | ### Using Fetch API and ReadableStream 86 | 87 | ```javascript 88 | async function streamWithFetch() { 89 | const response = await fetch("http://localhost:5273/v1/chat/completions", { 90 | method: "POST", 91 | headers: { 92 | "Content-Type": "application/json", 93 | Accept: "text/event-stream", 94 | }, 95 | body: JSON.stringify({ 96 | model: "Phi-4-mini-instruct-cuda-gpu", 97 | messages: [{ role: "user", content: "Write a short story about AI" }], 98 | stream: true, 99 | }), 100 | }); 101 | 102 | const reader = response.body.getReader(); 103 | const decoder = new TextDecoder(); 104 | 105 | while (true) { 106 | const { done, value } = await reader.read(); 107 | if (done) break; 108 | 109 | const chunk = decoder.decode(value); 110 | const lines = chunk.split("\n").filter((line) => line.trim() !== ""); 111 | 112 | for (const line of lines) { 113 | if (line.startsWith("data: ")) { 114 | const data = line.substring(6); 115 | if (data === "[DONE]") continue; 116 | 117 | try { 118 | const json = JSON.parse(data); 119 | const content = json.choices[0]?.delta?.content || ""; 120 | if (content) { 121 | // Print to console without line breaks, similar to process.stdout.write 122 | process.stdout.write(content); 123 | } 124 | } catch (e) { 125 | console.error("Error parsing JSON:", e); 126 | } 127 | } 128 | } 129 | } 130 | } 131 | 132 | // Call the function to start streaming 133 | streamWithFetch(); 134 | ``` 135 | -------------------------------------------------------------------------------- /docs/includes/integrate-examples/python.md: -------------------------------------------------------------------------------- 1 | ## Using the OpenAI SDK 2 | 3 | ```python 4 | # Install with: pip install openai 5 | import openai 6 | 7 | # Configure the client to use your local endpoint, noting the port is dynamically assigned 8 | client = openai.OpenAI( 9 | base_url="http://localhost:5273/v1", 10 | api_key="not-needed-for-local" # API key is not required for local usage 11 | ) 12 | 13 | # Chat completions 14 | response = client.chat.completions.create( 15 | model="Phi-4-mini-instruct-cuda-gpu", # Use a model loaded in your service 16 | messages=[ 17 | {"role": "user", "content": "Explain how Foundry Local works."} 18 | ] 19 | ) 20 | 21 | print(response.choices[0].message.content) 22 | ``` 23 | 24 | ## Using Direct HTTP Requests 25 | 26 | ```python 27 | # Install with: pip install requests 28 | import requests 29 | import json 30 | # note the port is dynamically assigned, so check the logs for the correct port 31 | url = "http://localhost:5273/v1/chat/completions" 32 | 33 | payload = { 34 | "model": "Phi-4-mini-instruct-cuda-gpu", 35 | "messages": [ 36 | {"role": "user", "content": "What are the benefits of running AI models locally?"} 37 | ] 38 | } 39 | 40 | headers = { 41 | "Content-Type": "application/json" 42 | } 43 | 44 | response = requests.post(url, headers=headers, data=json.dumps(payload)) 45 | print(response.json()["choices"][0]["message"]["content"]) 46 | ``` 47 | 48 | ## Streaming Response 49 | 50 | ```python 51 | import openai 52 | # note the port is dynamically assigned, so check the logs for the correct port 53 | client = openai.OpenAI( 54 | base_url="http://localhost:5273/v1", 55 | api_key="not-needed-for-local" 56 | ) 57 | 58 | stream = client.chat.completions.create( 59 | model="Phi-4-mini-instruct-cuda-gpu", 60 | messages=[{"role": "user", "content": "Write a short story about AI"}], 61 | stream=True 62 | ) 63 | 64 | for chunk in stream: 65 | if chunk.choices[0].delta.content is not None: 66 | print(chunk.choices[0].delta.content, end="") 67 | ``` 68 | -------------------------------------------------------------------------------- /docs/includes/integrate-examples/rest.md: -------------------------------------------------------------------------------- 1 | ## Basic Request 2 | 3 | For quick tests or integrations with command line scripts: 4 | 5 | ```bash 6 | curl http://localhost:5273/v1/chat/completions ^ 7 | -H "Content-Type: application/json" ^ 8 | -d "{\"model\": \"Phi-4-mini-instruct-cuda-gpu\", \"messages\": [{\"role\": \"user\", \"content\": \"Tell me a short story\"}]}" 9 | ``` 10 | 11 | ## Streaming Response 12 | 13 | **Note**: Please change the port to your dynamically assigned one. The example here works, but because there's no cleansing of the output, it may not be as clean as the other examples. 14 | 15 | ```bash 16 | curl http://localhost:5273/v1/chat/completions ^ 17 | -H "Content-Type: application/json" ^ 18 | -d "{\"model\": \"Phi-4-mini-instruct-cuda-gpu\", \"messages\": [{\"role\": \"user\", \"content\": \"Tell me a short story\"}], \"stream\": true}" 19 | ``` 20 | -------------------------------------------------------------------------------- /docs/media/architecture/foundry-local-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Foundry-Local/4c0ce3002f959cb3ed62650ea30fa039b347b8ef/docs/media/architecture/foundry-local-arch.png -------------------------------------------------------------------------------- /docs/reference/reference-cli.md: -------------------------------------------------------------------------------- 1 | # Foundry Local CLI Reference 2 | 3 | This article provides a comprehensive reference for the Foundry Local command-line interface (CLI). The foundry CLI is structured into several categories to help you manage models, control the service, and maintain your local cache. 4 | 5 | ## Overview 6 | 7 | To see all available commands, use the help option: 8 | 9 | ```bash 10 | foundry --help 11 | ``` 12 | 13 | The foundry CLI is structured into these main categories: 14 | 15 | - **Model**: Commands related to managing and running models 16 | - **Service**: Commands for managing the Foundry Local service 17 | - **Cache**: Commands for managing the local cache where models are stored 18 | 19 | ## Model commands 20 | 21 | The following table summarizes the commands related to managing and running models: 22 | 23 | | **Command** | **Description** | 24 | | -------------------------------- | -------------------------------------------------------------------------------- | 25 | | `foundry model --help` | Displays all available model-related commands and their usage. | 26 | | `foundry model run ` | Runs a specified model, downloading it if not cached, and starts an interaction. | 27 | | `foundry model list` | Lists all available models for local use. | 28 | | `foundry model info ` | Displays detailed information about a specific model. | 29 | | `foundry model download ` | Downloads a model to the local cache without running it. | 30 | | `foundry model load ` | Loads a model into the service. | 31 | | `foundry model unload ` | Unloads a model from the service. | 32 | 33 | ## Service commands 34 | 35 | The following table summarizes the commands related to managing the Foundry Local service: 36 | 37 | | **Command** | **Description** | 38 | | ------------------------------- | ---------------------------------------------------------------- | 39 | | `foundry service --help` | Displays all available service-related commands and their usage. | 40 | | `foundry service start` | Starts the Foundry Local service. | 41 | | `foundry service stop` | Stops the Foundry Local service. | 42 | | `foundry service restart` | Restarts the Foundry Local service. | 43 | | `foundry service status` | Displays the current status of the Foundry Local service. | 44 | | `foundry service ps` | Lists all models currently loaded in the Foundry Local service. | 45 | | `foundry service logs` | Displays the logs of the Foundry Local service. | 46 | | `foundry service set ` | Set configuration of the Foundry Local service. | 47 | 48 | ## Cache commands 49 | 50 | The following table summarizes the commands related to managing the local cache where models are stored: 51 | 52 | | **Command** | **Description** | 53 | | ------------------------------ | -------------------------------------------------------------- | 54 | | `foundry cache --help` | Displays all available cache-related commands and their usage. | 55 | | `foundry cache location` | Displays the current cache directory. | 56 | | `foundry cache list` | Lists all models stored in the local cache. | 57 | | `foundry cache remove ` | Deletes a model from the local cache. | 58 | | `foundry cache cd ` | Changes the cache directory. | 59 | 60 | ## Common CLI usage examples 61 | 62 | ### Quick start with a model 63 | 64 | ```bash 65 | # Download and run a model interactively 66 | foundry model run phi-4-mini 67 | 68 | # Check model information before running 69 | foundry model info phi-4-mini 70 | 71 | # Download a model without running it 72 | foundry model download phi-4-mini 73 | ``` 74 | 75 | ### Managing the service 76 | 77 | ```bash 78 | # Check service status 79 | foundry service status 80 | 81 | # View active models 82 | foundry service ps 83 | 84 | # Restart the service when troubleshooting 85 | foundry service restart 86 | ``` 87 | 88 | ### Working with the cache 89 | 90 | ```bash 91 | # List cached models 92 | foundry cache list 93 | 94 | # Remove a model that's no longer needed 95 | foundry cache remove old-model 96 | 97 | # Change cache location to a larger drive 98 | foundry cache cd /path/to/larger/drive 99 | ``` 100 | 101 | ### Advanced usage 102 | 103 | ```bash 104 | # View detailed model license information 105 | foundry model info phi-4-mini --license 106 | 107 | # Generate diagnostic logs for support 108 | foundry zip-logs 109 | 110 | # Configure GPU settings for better performance 111 | foundry service set --gpu 0 112 | ``` 113 | -------------------------------------------------------------------------------- /docs/reference/reference-security-privacy.md: -------------------------------------------------------------------------------- 1 | # Best practices and troubleshooting guide for Foundry Local 2 | 3 | This document provides best practices and troubleshooting tips for Foundry Local. 4 | 5 | ## Security and privacy considerations 6 | 7 | Foundry Local is designed with privacy and security as core principles: 8 | 9 | - **Local processing**: All data processed by Foundry Local remains on your device and is never sent to Microsoft or any external services. 10 | - **No telemetry**: Foundry Local does not collect usage data or model inputs. 11 | - **Air-gapped environments**: Foundry Local can be used in disconnected environments after initial model download. 12 | 13 | ## Security best practices 14 | 15 | - Use Foundry Local in environments that comply with your organization's security policies. 16 | - When handling sensitive data, ensure your device meets your organization's security requirements. 17 | - Use disk encryption on devices where cached models might contain sensitive fine-tuning data. 18 | 19 | ## Licensing considerations 20 | 21 | When using Foundry Local, be aware of the licensing implications for the models you run. You can view full terms of model license for each model in the model catalog using: 22 | 23 | ```bash 24 | foundry model info --license 25 | ``` 26 | 27 | Models available through Foundry Local are subject to their original licenses: 28 | 29 | - Open-source models maintain their original licenses (e.g., Apache 2.0, MIT). 30 | - Commercial models may have specific usage restrictions or require separate licensing. 31 | - Always review the licensing information for each model before deploying in production. 32 | 33 | ## Production deployment scope 34 | 35 | Foundry Local is designed for on-device inference and _not_ distributed, containerized, or multi-machine production deployments. 36 | 37 | ## Troubleshooting 38 | 39 | ### Common issues and solutions 40 | 41 | | Issue | Possible Cause | Solution | 42 | | -------------------------- | ----------------------------------------- | ----------------------------------------------------------------------------------- | 43 | | Slow inference | CPU-only model with large parameter count | Use GPU-optimized model variants when available | 44 | | Model download failures | Network connectivity issues | Check your internet connection and run `foundry cache list` to verify cache status | 45 | | The service fails to start | Port conflicts or permission issues | Try `foundry service restart` or report an issue with logs using `foundry zip-logs` | 46 | 47 | ### Improving performance 48 | 49 | If you experience slow inference, consider the following strategies: 50 | 51 | - Use GPU acceleration when available 52 | - Identify bottlenecks by monitoring memory usage during inference 53 | - Try more quantized model variants (like INT8 instead of FP16) 54 | - Adjust batch sizes for non-interactive workloads 55 | -------------------------------------------------------------------------------- /docs/reference/reference-troubleshooting.md: -------------------------------------------------------------------------------- 1 | # Troubleshooting 2 | 3 | ## Common issues and solutions 4 | 5 | | Issue | Possible Cause | Solution | 6 | | ----------------------- | --------------------------------------- | ----------------------------------------------------------------------------------------- | 7 | | Slow inference | CPU-only model on large parameter count | Use GPU-optimized model variants when available | 8 | | Model download failures | Network connectivity issues | Check your internet connection, try `foundry cache list` to verify cache state | 9 | | Service won't start | Port conflicts or permission issues | Try `foundry service restart` or post an issue providing logs with `foundry zip-logsrock` | 10 | 11 | ## Diagnosing performance issues 12 | 13 | If you're experiencing slow inference: 14 | 15 | 1. Check that you're using GPU acceleration if available 16 | 2. Monitor memory usage during inference to detect bottlenecks 17 | 3. Consider a more quantized model variant (e.g., INT8 instead of FP16) 18 | 4. Experiment with batch sizes for non-interactive workloads 19 | -------------------------------------------------------------------------------- /docs/tutorials/chat-application-with-open-web-ui.md: -------------------------------------------------------------------------------- 1 | # Build a chat application with Open Web UI 2 | 3 | This tutorial shows you how to create a chat application using Foundry Local and Open Web UI. When you finish, you'll have a working chat interface running entirely on your local device. 4 | 5 | ## Prerequisites 6 | 7 | Before you start this tutorial, you need: 8 | 9 | - **Foundry Local** [installed](../get-started.md) on your computer. 10 | - **At least one model loaded** with the `foundry model load` command, like this: 11 | ```bash 12 | foundry model load Phi-4-mini-instruct-cuda-gpu 13 | ``` 14 | 15 | ## Set up Open Web UI for chat 16 | 17 | 1. **Install Open Web UI** by following the instructions from the [Open Web UI GitHub repository](https://github.com/open-webui/open-webui). 18 | 19 | 2. **Launch Open Web UI** with this command in your terminal: 20 | 21 | ```bash 22 | open-webui serve 23 | ``` 24 | 25 | 3. Open your web browser and go to [http://localhost:8080](http://localhost:8080). 26 | 27 | 4. **Connect Open Web UI to Foundry Local**: 28 | 29 | 1. Select **Settings** in the navigation menu 30 | 2. Select **Connections** 31 | 3. Select **Manage Direct Connections** 32 | 4. Select the **+** icon to add a connection 33 | 5. Enter `http://localhost:PORT/v1` for the URL, where `PORT` is the port number assigned to your Foundry Local instance. 34 | 6. Type any value (like `test`) for the API Key, since it cannot be empty 35 | 7. Save your connection 36 | 37 | ![image](https://github.com/user-attachments/assets/82437726-2b80-442a-b9bc-df46eb7f3d77) 38 | 39 | 5. **Start chatting with your model**: 40 | 1. Your loaded models will appear in the dropdown at the top 41 | 2. Select any model from the list 42 | 3. Type your message in the input box at the bottom 43 | 44 | That's it! You're now chatting with an AI model running entirely on your local device. 45 | 46 | ## Next steps 47 | 48 | - [Build an application with LangChain](use-langchain-with-foundry-local.md) 49 | - [How to compile Hugging Face models to run on Foundry Local](../how-to/how-to-compile-hugging-face-models.md) 50 | -------------------------------------------------------------------------------- /docs/tutorials/use-langchain-with-foundry-local.md: -------------------------------------------------------------------------------- 1 | # Build an application with LangChain 2 | 3 | This tutorial shows you how to create an application using Foundry Local and LangChain. You learn how to integrate locally hosted AI models with the popular LangChain framework. 4 | 5 | ## Prerequisites 6 | 7 | Before starting this tutorial, you need: 8 | 9 | - **Foundry Local** [installed](../get-started.md) on your computer 10 | - **At least one model loaded** using the `Foundry Local SDK`: 11 | ```bash 12 | pip install foundry-local-sdk 13 | ``` 14 | ```python 15 | from foundry_local import FoundryLocalManager 16 | manager = FoundryLocalManager(model_id_or_alias=None, bootstrap=True) 17 | manager.download_model("Phi-4-mini-instruct-generic-cpu") 18 | manager.load_model("Phi-4-mini-instruct-generic-cpu") 19 | ``` 20 | - **LangChain with OpenAI support** installed: 21 | 22 | ```bash 23 | pip install langchain[openai] 24 | ``` 25 | 26 | ## Create a LangChain application 27 | 28 | Foundry Local supports the OpenAI Chat Completion API, making it easy to integrate with LangChain. Here's how to build a translation application: 29 | 30 | ```python 31 | import os 32 | 33 | from langchain_openai import ChatOpenAI 34 | from langchain_core.prompts import ChatPromptTemplate 35 | 36 | # Set a placeholder API key (not actually used by Foundry Local) 37 | if not os.environ.get("OPENAI_API_KEY"): 38 | os.environ["OPENAI_API_KEY"] = "no_key" 39 | 40 | # Configure ChatOpenAI to use your locally-running model, noting the port is dynamically assigned 41 | llm = ChatOpenAI( 42 | model="Phi-4-mini-instruct-generic-cpu", 43 | base_url="http://localhost:5273/v1/", 44 | temperature=0.0, 45 | streaming=False 46 | ) 47 | 48 | # Create a translation prompt template 49 | prompt = ChatPromptTemplate.from_messages([ 50 | ( 51 | "system", 52 | "You are a helpful assistant that translates {input_language} to {output_language}." 53 | ), 54 | ("human", "{input}") 55 | ]) 56 | 57 | # Build a simple chain by connecting the prompt to the language model 58 | chain = prompt | llm 59 | 60 | # Run the chain with your inputs 61 | ai_msg = chain.invoke({ 62 | "input_language": "English", 63 | "output_language": "French", 64 | "input": "I love programming." 65 | }) 66 | 67 | # Display the result 68 | print(ai_msg) 69 | ``` 70 | 71 | ## Next steps 72 | 73 | - Explore the [LangChain documentation](https://python.langchain.com/docs/introduction) for more advanced features and capabilities. 74 | - [How to compile Hugging Face models to run on Foundry Local](../how-to/how-to-compile-hugging-face-models.md) 75 | -------------------------------------------------------------------------------- /docs/what-is-foundry-local.md: -------------------------------------------------------------------------------- 1 | # What is Foundry Local? 2 | 3 | Foundry Local is a local version of Azure AI Foundry that enables local execution of large language models (LLMs) directly on your device. This on-device AI inference solution provides privacy, customization, and cost benefits compared to cloud-based alternatives. Best of all, it fits into your existing workflows and applications with an easy-to-use CLI and REST API! 4 | 5 | Foundry Local applies the optimization work of ONNX Runtime, Olive, and the ONNX ecosystem, Foundry Local delivers a highly optimized and performant user experience for running AI models locally. 6 | 7 | ## Key features 8 | 9 | - **On-Device Inference**: Run LLMs locally on your own hardware, reducing dependency on cloud services while keeping your data on-device. 10 | - **Model Customization**: Choose from preset models or bring your own to match your specific requirements and use cases. 11 | - **Cost Efficiency**: Avoid recurring cloud service costs by using your existing hardware, making AI tasks more accessible. 12 | - **Seamless Integration**: Easily interface with your applications via an endpoint or test with the CLI, with the option to scale to Azure AI Foundry as your workload demands increase. 13 | 14 | ## Use cases 15 | 16 | Foundry Local is ideal for scenarios where: 17 | 18 | - Data privacy and security are paramount 19 | - You need to operate in environments with limited or no internet connectivity 20 | - You want to reduce cloud inference costs 21 | - You need low-latency AI responses for real-time applications 22 | - You want to experiment with AI models before deploying to a cloud environment 23 | 24 | ## Pricing and billing 25 | 26 | Entirely Free! You're using your own hardware, and there are no extra costs associated with running AI models locally. 27 | 28 | ## How to get access 29 | 30 | Download from the Microsoft Store. (WIP) 31 | 32 | ## Next steps 33 | 34 | - [Get started with Foundry Local](./get-started.md) 35 | - [Compile Hugging Face models for Foundry Local](./how-to/compile-models-for-foundry-local.md) 36 | - [Learn more about ONNX Runtime](https://onnxruntime.ai/docs/) 37 | -------------------------------------------------------------------------------- /licenses/deepseek.md: -------------------------------------------------------------------------------- 1 | The DeepSeek R1 model is provided as a First Party Consumption Service and is not an Azure product. Your use of the DeepSeek R1 model is subject to the following license terms and must comply with the Acceptable Use Policy for Microsoft Online Services and the Microsoft Enterprise AI Services Code of Conduct. 2 | 3 | Copyright (c) 2023 DeepSeek 4 | 5 | MIT License 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 8 | 9 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /licenses/mistral.md: -------------------------------------------------------------------------------- 1 | This model is provided under the License Terms available at https://mistral.ai/terms-of-use/. -------------------------------------------------------------------------------- /licenses/phi.md: -------------------------------------------------------------------------------- 1 | Microsoft. Copyright (c) Microsoft Corporation. 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /media/icons/ai_studio_icon_black.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /media/icons/ai_studio_icon_color.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /media/icons/ai_studio_icon_white.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /samples/electron/foundry-chat/.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | node_modules/ 3 | package-lock.json 4 | 5 | 6 | # Build output 7 | dist/ 8 | build/ 9 | 10 | -------------------------------------------------------------------------------- /samples/electron/foundry-chat/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Debug Main Process", 9 | "type": "node", 10 | "request": "launch", 11 | "cwd": "${workspaceFolder}", 12 | "runtimeExecutable": "${workspaceFolder}/node_modules/.bin/electron", 13 | "windows": { 14 | "runtimeExecutable": "${workspaceFolder}/node_modules/.bin/electron.cmd" 15 | }, 16 | "args": ["."], 17 | "outputCapture": "std", 18 | "console": "integratedTerminal" 19 | }, 20 | { 21 | "name": "Debug Renderer Process", 22 | "type": "chrome", 23 | "request": "launch", 24 | "runtimeExecutable": "${workspaceFolder}/node_modules/.bin/electron", 25 | "windows": { 26 | "runtimeExecutable": "${workspaceFolder}/node_modules/.bin/electron.cmd" 27 | }, 28 | "runtimeArgs": [ 29 | "${workspaceFolder}", 30 | "--remote-debugging-port=9222" 31 | ], 32 | "webRoot": "${workspaceFolder}", 33 | "timeout": 30000 34 | } 35 | ] 36 | } -------------------------------------------------------------------------------- /samples/electron/foundry-chat/Readme.md: -------------------------------------------------------------------------------- 1 | # Foundry Local Chat Demo 2 | 3 | A simple Electron Chat application that can chat with cloud and Foundry local models. 4 | 5 | ## Prerequisites 6 | 7 | - Node.js (v16 or higher) 8 | - To install Node.js on Windows, run: 9 | ```powershell 10 | winget install OpenJS.NodeJS 11 | ``` 12 | - npm comes bundled with Node.js 13 | 14 | ## Setup Instructions 15 | 1. Download the latest Foundry .MSIX and install for your processor: 16 | [Foundry Releases](https://github.com/microsoft/Foundry-Local/releases) 17 | Then install it using the following powershell command. 18 | ```powershell 19 | add-appxpackage .msix 20 | ``` 21 | 22 | 2. Install dependencies: 23 | ```powershell 24 | npm install 25 | ``` 26 | 27 | 3. Set the following environment variables to your Cloud AI Service 28 | ```powershell 29 | YOUR_API_KEY 30 | YOUR_ENDPOINT 31 | YOUR_MODEL_NAME 32 | ``` 33 | 34 | 4. Start the application: 35 | ```powershell 36 | npm start 37 | ``` 38 | 39 | ## Building the Application (not necessary for testing) 40 | 41 | To build the application for your platform: 42 | ```powershell 43 | # For all platforms 44 | npm run build 45 | 46 | # For Windows specifically 47 | npm run build:win 48 | ``` 49 | 50 | The built application will be available in the `dist` directory. 51 | 52 | ## Project Structure 53 | 54 | - `main.js` - Main Electron process file 55 | - `chat.html` - Main application window 56 | - `preload.cjs` - Preload script for secure IPC communication 57 | 58 | ## Dependencies 59 | 60 | - Electron - Cross-platform desktop application framework 61 | - foundry-local-sdk - Local model integration 62 | - OpenAI - Cloud model integration 63 | 64 | -------------------------------------------------------------------------------- /samples/electron/foundry-chat/main.js: -------------------------------------------------------------------------------- 1 | import { app, BrowserWindow, Menu, ipcMain } from 'electron' 2 | import { fileURLToPath } from 'url' 3 | import path from 'path' 4 | import OpenAI from 'openai' 5 | import { FoundryLocalManager } from 'foundry-local-sdk' 6 | 7 | 8 | // Global variables 9 | let mainWindow 10 | let aiClient = null 11 | let currentModelType = 'cloud' // Add this to track current model type, default to cloud 12 | let modelName = null 13 | let endpoint = null 14 | let apiKey = "" 15 | 16 | const cloudApiKey = process.env.YOUR_API_KEY // load cloude api key from environment variable 17 | const cloudEndpoint = process.env.YOUR_ENDPOINT // load cloud endpoint from environment variable 18 | const cloudModelName = process.env.YOUR_MODEL_NAME // load cloud model name from environment variable 19 | // Check if all required environment variables are set 20 | if (!cloudApiKey || !cloudEndpoint || !cloudModelName) { 21 | console.error('Cloud API key, endpoint, or model name not set in environment variables, cloud mode will not work') 22 | console.error('Please set YOUR_API_KEY, YOUR_ENDPOINT, and YOUR_MODEL_NAME') 23 | } 24 | 25 | // Create and initialize the FoundryLocalManager and start the service 26 | const foundryManager = new FoundryLocalManager() 27 | if (!foundryManager.isServiceRunning()) { 28 | console.error('Foundry Local service is not running') 29 | app.quit() 30 | } 31 | 32 | // Simplified IPC handlers 33 | ipcMain.handle('send-message', (_, messages) => { 34 | return sendMessage(messages) 35 | }) 36 | 37 | // Add new IPC handler for getting local models 38 | ipcMain.handle('get-local-models', async () => { 39 | if (!foundryManager) { 40 | return { success: false, error: 'Local manager not initialized' } 41 | } 42 | try { 43 | const models = await foundryManager.listCachedModels() 44 | return { success: true, models } 45 | } catch (error) { 46 | return { success: false, error: error.message } 47 | } 48 | }) 49 | 50 | // Add new IPC handler for switching models 51 | ipcMain.handle('switch-model', async (_, modelId) => { 52 | try { 53 | if (modelId === 'cloud') { 54 | console.log("Switching to cloud model") 55 | currentModelType = 'cloud' 56 | endpoint = cloudEndpoint 57 | apiKey = cloudApiKey 58 | modelName = cloudModelName 59 | } else { 60 | console.log("Switching to local model") 61 | currentModelType = 'local' 62 | modelName = (await foundryManager.init(modelId)).id 63 | endpoint = foundryManager.endpoint 64 | apiKey = foundryManager.apiKey 65 | } 66 | 67 | aiClient = new OpenAI({ 68 | apiKey: apiKey, 69 | baseURL: endpoint 70 | }) 71 | 72 | return { 73 | success: true, 74 | endpoint: endpoint, 75 | modelName: modelName 76 | } 77 | } catch (error) { 78 | return { success: false, error: error.message } 79 | } 80 | }) 81 | 82 | export async function sendMessage(messages) { 83 | try { 84 | if (!aiClient) { 85 | throw new Error('Client not initialized') 86 | } 87 | 88 | const stream = await aiClient.chat.completions.create({ 89 | model: modelName, 90 | messages: messages, 91 | stream: true 92 | }) 93 | 94 | for await (const chunk of stream) { 95 | const content = chunk.choices[0]?.delta?.content 96 | if (content) { 97 | mainWindow.webContents.send('chat-chunk', content) 98 | } 99 | } 100 | 101 | mainWindow.webContents.send('chat-complete') 102 | return { success: true } 103 | } catch (error) { 104 | return { success: false, error: error.message } 105 | } 106 | } 107 | 108 | // Window management 109 | async function createWindow() { 110 | // Dynamically import the preload script 111 | const __filename = fileURLToPath(import.meta.url) 112 | const __dirname = path.dirname(__filename) 113 | const preloadPath = path.join(__dirname, 'preload.cjs') 114 | 115 | mainWindow = new BrowserWindow({ 116 | width: 1024, 117 | height: 768, 118 | autoHideMenuBar: false, 119 | webPreferences: { 120 | allowRunningInsecureContent: true, 121 | nodeIntegration: false, 122 | contextIsolation: true, 123 | preload: preloadPath, 124 | enableRemoteModule: false, 125 | sandbox: false 126 | } 127 | }) 128 | 129 | Menu.setApplicationMenu(null) 130 | 131 | console.log("Creating chat window") 132 | mainWindow.loadFile('chat.html') 133 | 134 | // Send initial config to renderer 135 | mainWindow.webContents.on('did-finish-load', () => { 136 | // Initialize with cloud model after page loads 137 | mainWindow.webContents.send('initialize-with-cloud') 138 | }) 139 | 140 | return mainWindow 141 | } 142 | 143 | // App lifecycle handlers 144 | app.whenReady().then(() => { 145 | createWindow() 146 | 147 | app.on('activate', () => { 148 | if (BrowserWindow.getAllWindows().length === 0) { 149 | createWindow() 150 | } 151 | }) 152 | }) 153 | 154 | app.on('window-all-closed', () => { 155 | if (process.platform !== 'darwin') { 156 | app.quit() 157 | } 158 | }) 159 | -------------------------------------------------------------------------------- /samples/electron/foundry-chat/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "foundry-local-chat-demo", 3 | "version": "1.0.0", 4 | "description": "A simple Electron Chat application that can chat with cloud and local models", 5 | "main": "main.js", 6 | "type": "module", 7 | "scripts": { 8 | "start": "electron .", 9 | "build": "electron-builder", 10 | "build:win": "electron-builder --win" 11 | }, 12 | "author": "", 13 | "license": "ISC", 14 | "devDependencies": { 15 | "electron": "^28.1.0", 16 | "electron-builder": "^24.9.1" 17 | }, 18 | "dependencies": { 19 | "foundry-local-sdk": "^0.3.0", 20 | "openai": "^4.98.0" 21 | }, 22 | "build": { 23 | "appId": "com.microsoft.foundrylocalchatdemo", 24 | "productName": "Foundry Local - Chat Demo", 25 | "directories": { 26 | "output": "dist" 27 | }, 28 | "win": { 29 | "target": "nsis" 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /samples/electron/foundry-chat/preload.cjs: -------------------------------------------------------------------------------- 1 | const { contextBridge, ipcRenderer } = require('electron'); 2 | 3 | console.log('Preload script starting...'); 4 | console.log('Current directory:', __dirname); 5 | console.log('Module paths:', module.paths); 6 | console.log('contextBridge available:', !!contextBridge); 7 | console.log('ipcRenderer available:', !!ipcRenderer); 8 | 9 | try { 10 | console.log('Electron modules loaded'); 11 | 12 | contextBridge.exposeInMainWorld('versions', { 13 | node: () => process.versions.node, 14 | chrome: () => process.versions.chrome, 15 | electron: () => process.versions.electron 16 | }) 17 | 18 | console.log('Versions bridge exposed'); 19 | 20 | contextBridge.exposeInMainWorld('mainAPI', { 21 | sendMessage: (messages) => ipcRenderer.invoke('send-message', messages), 22 | onChatChunk: (callback) => ipcRenderer.on('chat-chunk', (_, chunk) => callback(chunk)), 23 | onChatComplete: (callback) => ipcRenderer.on('chat-complete', () => callback()), 24 | removeAllChatListeners: () => { 25 | ipcRenderer.removeAllListeners('chat-chunk'); 26 | ipcRenderer.removeAllListeners('chat-complete'); 27 | }, 28 | getLocalModels: () => ipcRenderer.invoke('get-local-models'), 29 | switchModel: (modelId) => ipcRenderer.invoke('switch-model', modelId), 30 | onInitializeWithCloud: (callback) => ipcRenderer.on('initialize-with-cloud', () => callback()) 31 | }) 32 | 33 | console.log('mainAPI bridge exposed'); 34 | console.log('Preload script completed successfully'); 35 | } catch (error) { 36 | console.error('Error in preload script:', error); 37 | console.error('Error stack:', error.stack); 38 | } -------------------------------------------------------------------------------- /samples/js/hello-foundry-local/README.md: -------------------------------------------------------------------------------- 1 | # Sample: Hello Foundry Local! 2 | 3 | This is a simple example of how to use the Foundry Local SDK to run a model locally and make requests to it. The example demonstrates how to set up the SDK, initialize a model, and make a request to the model. 4 | 5 | Install the Foundry Local SDK and OpenAI packages using npm: 6 | 7 | ```bash 8 | npm install foundry-local-sdk openai 9 | ``` 10 | 11 | Run the application using Node.js: 12 | 13 | ```bash 14 | node src/app.js 15 | ``` 16 | -------------------------------------------------------------------------------- /samples/js/hello-foundry-local/src/app.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | import { OpenAI } from "openai"; 5 | import { FoundryLocalManager } from "foundry-local-sdk"; 6 | 7 | // By using an alias, the most suitable model will be downloaded 8 | // to your end-user's device. 9 | // TIP: You can find a list of available models by running the 10 | // following command in your terminal: `foundry model list`. 11 | const alias = "phi-3.5-mini"; 12 | 13 | // Create a FoundryLocalManager instance. This will start the Foundry 14 | // Local service if it is not already running. 15 | const foundryLocalManager = new FoundryLocalManager() 16 | 17 | // Initialize the manager with a model. This will download the model 18 | // if it is not already present on the user's device. 19 | const modelInfo = await foundryLocalManager.init(alias) 20 | console.log("Model Info:", modelInfo) 21 | 22 | const openai = new OpenAI({ 23 | baseURL: foundryLocalManager.endpoint, 24 | apiKey: foundryLocalManager.apiKey, 25 | }); 26 | 27 | async function streamCompletion() { 28 | const stream = await openai.chat.completions.create({ 29 | model: modelInfo.id, 30 | messages: [{ role: "user", content: "What is the golden ratio?" }], 31 | stream: true, 32 | }); 33 | 34 | for await (const chunk of stream) { 35 | if (chunk.choices[0]?.delta?.content) { 36 | process.stdout.write(chunk.choices[0].delta.content); 37 | } 38 | } 39 | } 40 | 41 | streamCompletion(); 42 | -------------------------------------------------------------------------------- /samples/python/hello-foundry-local/README.md: -------------------------------------------------------------------------------- 1 | # Sample: Hello Foundry Local! 2 | 3 | This is a simple example of how to use the Foundry Local SDK to run a model locally and make requests to it. The example demonstrates how to set up the SDK, initialize a model, and make a request to the model. 4 | 5 | Install the Foundry Local SDK and OpenAI packages using pip: 6 | 7 | ```bash 8 | pip install foundry-local-sdk openai 9 | ``` 10 | 11 | > [!TIP] 12 | > We recommend using a virtual environment to manage your Python packages using `venv` or `conda` to avoid conflicts with other packages. 13 | 14 | Run the application using Python: 15 | 16 | ```bash 17 | python src/app.py 18 | ``` 19 | -------------------------------------------------------------------------------- /samples/python/hello-foundry-local/src/app.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | import openai 5 | from foundry_local import FoundryLocalManager 6 | 7 | # By using an alias, the most suitable model will be downloaded 8 | # to your end-user's device. 9 | alias = "phi-3.5-mini" 10 | 11 | # Create a FoundryLocalManager instance. This will start the Foundry 12 | # Local service if it is not already running and load the specified model. 13 | manager = FoundryLocalManager(alias) 14 | 15 | # The remaining code uses the OpenAI Python SDK to interact with the local model. 16 | 17 | # Configure the client to use the local Foundry service 18 | client = openai.OpenAI( 19 | base_url=manager.endpoint, 20 | api_key=manager.api_key, # API key is not required for local usage 21 | ) 22 | 23 | # Set the model to use and generate a streaming response 24 | stream = client.chat.completions.create( 25 | model=manager.get_model_info(alias).id, 26 | messages=[{"role": "user", "content": "What is the golden ratio?"}], 27 | stream=True, 28 | ) 29 | 30 | # Print the streaming response 31 | for chunk in stream: 32 | if chunk.choices[0].delta.content is not None: 33 | print(chunk.choices[0].delta.content, end="", flush=True) 34 | -------------------------------------------------------------------------------- /samples/python/summarize/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | 5 | { 6 | "name": "Python Debugger: Current File with Arguments", 7 | "type": "debugpy", 8 | "request": "launch", 9 | "program": "${file}", 10 | "console": "integratedTerminal", 11 | "args": "\"The quick brown fox jumps over the lazy dog, packing my box with five dozen liquor jugs, and then the dog chased the fox around the corner of the house.\" --text" 12 | } 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /samples/python/summarize/README.md: -------------------------------------------------------------------------------- 1 | # Text Summarizer 2 | 3 | A simple command-line utility that uses Foundry Local to generate summaries of text files or direct text input. 4 | 5 | ## Setup 6 | 7 | 1. Install the required dependencies: 8 | ```bash 9 | pip install -r requirements.txt 10 | ``` 11 | 12 | ## Usage 13 | 14 | The utility can be used in two ways: 15 | 16 | 1. Summarize a text file: 17 | ```bash 18 | python summarize.py path/to/your/file.txt 19 | ``` 20 | 21 | 2. Summarize direct text input: 22 | ```bash 23 | python summarize.py "Your text to summarize here" --text 24 | ``` 25 | 26 | You can also specify which model to use with the `--model` parameter: 27 | ```bash 28 | python summarize.py path/to/your/file.txt --model "your-model-alias" 29 | ``` 30 | 31 | If the specified model is not found, the script will use the first available model. 32 | 33 | ## Requirements 34 | 35 | - Python 3.6 or higher 36 | - Foundry Local Service 37 | - Required Python packages (see requirements.txt) 38 | 39 | -------------------------------------------------------------------------------- /samples/python/summarize/requirements.txt: -------------------------------------------------------------------------------- 1 | openai>=1.0.0 2 | python-dotenv>=0.19.0 3 | foundry-local-sdk>=0.3.1 4 | -------------------------------------------------------------------------------- /samples/python/summarize/summarize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import argparse 5 | from openai import OpenAI 6 | from foundry_local import FoundryLocalManager 7 | 8 | 9 | def read_file_content(file_path): 10 | """Read content from a file.""" 11 | try: 12 | with open(file_path, "r", encoding="utf-8") as file: 13 | return file.read() 14 | except Exception as e: 15 | print(f"Error reading file: {e}") 16 | sys.exit(1) 17 | 18 | 19 | def get_summary(text, client, model_name): 20 | """Get summary from OpenAI API.""" 21 | try: 22 | response = client.chat.completions.create( 23 | model=model_name, 24 | messages=[ 25 | { 26 | "role": "system", 27 | "content": "You are a helpful assistant that summarizes text. Provide a concise summary.", 28 | }, 29 | {"role": "user", "content": f"Please summarize the following text:\n\n{text}"}, 30 | ], 31 | ) 32 | return response.choices[0].message.content 33 | except Exception as e: 34 | print(f"Error getting summary from OpenAI: {e}") 35 | sys.exit(1) 36 | 37 | 38 | def main(): 39 | parser = argparse.ArgumentParser(description="Summarize text from a file or string using OpenAI.") 40 | parser.add_argument("input", help="File path or text string to summarize") 41 | parser.add_argument("--text", action="store_true", help="Treat input as direct text instead of a file path") 42 | parser.add_argument("--model", help="Model alias to use for summarization") 43 | args = parser.parse_args() 44 | 45 | fl_manager = FoundryLocalManager() 46 | 47 | fl_manager.start_service() 48 | 49 | model_list = fl_manager.list_cached_models() 50 | 51 | if not model_list: 52 | print("No downloaded models available") 53 | sys.exit(1) 54 | 55 | # Select model based on alias or use first one 56 | if args.model: 57 | selected_model = next((model for model in model_list if model.alias == args.model), None) 58 | if selected_model: 59 | model_name = selected_model.id 60 | else: 61 | model_name = model_list[0].id 62 | print(f"Model alias '{args.model}' not found, using default model: {model_name}") 63 | else: 64 | model_name = model_list[0].id 65 | 66 | print(f"Using model: {model_name}") 67 | 68 | # Initialize OpenAI client 69 | client = OpenAI(base_url=fl_manager.endpoint, api_key=fl_manager.api_key) 70 | 71 | # Get input text 72 | if args.text: 73 | text = args.input 74 | else: 75 | text = read_file_content(args.input) 76 | 77 | # Get and print summary 78 | summary = get_summary(text, client, model_name) 79 | print("\nSummary:") 80 | print("-" * 50) 81 | print(summary) 82 | print("-" * 50) 83 | 84 | 85 | if __name__ == "__main__": 86 | main() 87 | -------------------------------------------------------------------------------- /samples/rust/Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "hello-foundry-local" 4 | ] 5 | resolver = "2" 6 | -------------------------------------------------------------------------------- /samples/rust/README.md: -------------------------------------------------------------------------------- 1 | # Foundry Local Rust Samples 2 | 3 | This directory contains samples demonstrating how to use the Foundry Local Rust SDK. 4 | 5 | ## Prerequisites 6 | 7 | - Rust 1.70.0 or later 8 | - Foundry Local installed and available on PATH 9 | 10 | ## Samples 11 | 12 | ### [Hello Foundry Local](./hello-foundry-local) 13 | 14 | A simple example that demonstrates how to: 15 | - Start the Foundry Local service 16 | - Download and load a model 17 | - Send a prompt to the model using the OpenAI-compatible API 18 | - Display the response from the model -------------------------------------------------------------------------------- /samples/rust/hello-foundry-local/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hello-foundry-local" 3 | version = "0.1.0" 4 | edition = "2021" 5 | description = "A simple example of using the Foundry Local Rust SDK" 6 | 7 | [dependencies] 8 | foundry-local = { path = "../../../sdk/rust" } 9 | tokio = { version = "1", features = ["full"] } 10 | anyhow = "1.0" 11 | reqwest = { version = "0.11", features = ["json"] } 12 | serde_json = "1.0" 13 | env_logger = "0.10" -------------------------------------------------------------------------------- /samples/rust/hello-foundry-local/README.md: -------------------------------------------------------------------------------- 1 | # Hello Foundry Local (Rust) 2 | 3 | A simple example that demonstrates using the Foundry Local Rust SDK to interact with AI models locally. 4 | 5 | ## Prerequisites 6 | 7 | - Rust 1.70.0 or later 8 | - Foundry Local installed and available on PATH 9 | 10 | ## Running the Sample 11 | 12 | 1. Make sure Foundry Local is installed 13 | 2. Run the sample: 14 | 15 | ```bash 16 | cargo run 17 | ``` 18 | 19 | ## What This Sample Does 20 | 21 | 1. Creates a FoundryLocalManager instance 22 | 2. Starts the Foundry Local service if it's not already running 23 | 3. Downloads and loads the phi-3-mini-4k model 24 | 4. Sends a prompt to the model using the OpenAI-compatible API 25 | 5. Displays the response from the model 26 | 27 | ## Code Structure 28 | 29 | - `src/main.rs` - The main application code 30 | - `Cargo.toml` - Project configuration and dependencies -------------------------------------------------------------------------------- /samples/rust/hello-foundry-local/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use foundry_local::FoundryLocalManager; 3 | 4 | #[tokio::main] 5 | async fn main() -> Result<()> { 6 | // Set up logging 7 | env_logger::init_from_env( 8 | env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "info"), 9 | ); 10 | 11 | println!("Hello Foundry Local!"); 12 | println!("==================="); 13 | 14 | // For this example, we will use the "phi-3-mini-4k" model which is 2.181 GB in size. 15 | let model_to_use: &str = "phi-3-mini-4k"; 16 | 17 | // Create a FoundryLocalManager instance using the builder pattern 18 | println!("\nInitializing Foundry Local manager..."); 19 | let mut manager = FoundryLocalManager::builder() 20 | // Alternatively to the checks below, you can specify the model to use directly during bootstrapping 21 | // .alias_or_model_id(model_to_use) 22 | .bootstrap(true) // Start the service if not running 23 | .build() 24 | .await?; 25 | 26 | // List all the models in the catalog 27 | println!("\nAvailable models in catalog:"); 28 | let models = manager.list_catalog_models().await?; 29 | let model_in_catalog = models.iter().any(|m| m.alias == model_to_use); 30 | for model in models { 31 | println!("- {model}"); 32 | } 33 | // Check if the model is in the catalog 34 | if !model_in_catalog { 35 | println!("Model '{model_to_use}' not found in catalog. Exiting."); 36 | return Ok(()); 37 | } 38 | 39 | // List available models in the local cache 40 | println!("\nAvailable models in local cache:"); 41 | let models = manager.list_cached_models().await?; 42 | let model_in_cache = models.iter().any(|m| m.alias == model_to_use); 43 | for model in models { 44 | println!("- {model}"); 45 | } 46 | 47 | // Check if the model is already cached and download if not 48 | if !model_in_cache { 49 | println!("Model '{model_to_use}' not found in local cache. Downloading..."); 50 | // Download the model if not in cache 51 | // NOTE if you've bootstrapped with `alias_or_model_id`, you can use that directly and skip this check 52 | manager.download_model(model_to_use, None, false).await?; 53 | println!("Model '{model_to_use}' downloaded successfully."); 54 | } 55 | 56 | // Get the model information 57 | let model_info = manager.get_model_info(model_to_use, true).await?; 58 | println!("\nUsing model: {model_info}"); 59 | 60 | // Build the prompt 61 | let prompt = "What is the golden ratio?"; 62 | println!("\nPrompt: {prompt}"); 63 | 64 | // Use the OpenAI compatible API to interact with the model 65 | let client = reqwest::Client::new(); 66 | let response = client 67 | .post(format!("{}/chat/completions", manager.endpoint()?)) 68 | .json(&serde_json::json!({ 69 | "model": model_info.id, 70 | "messages": [{"role": "user", "content": prompt}], 71 | })) 72 | .send() 73 | .await?; 74 | 75 | // Parse and display the response 76 | let result = response.json::().await?; 77 | if let Some(content) = result["choices"][0]["message"]["content"].as_str() { 78 | println!("\nResponse:\n{content}"); 79 | } else { 80 | println!("\nError: Failed to extract response content from API result"); 81 | println!("Full API response: {result}"); 82 | } 83 | 84 | Ok(()) 85 | } 86 | -------------------------------------------------------------------------------- /sdk/cs/.gitignore: -------------------------------------------------------------------------------- 1 | # Custom 2 | .dotnet/ 3 | artifacts/ 4 | .build/ 5 | .vscode/ 6 | 7 | ## Ignore Visual Studio temporary files, build results, and 8 | ## files generated by popular Visual Studio add-ons. 9 | ## 10 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 11 | 12 | # User-specific files 13 | *.suo 14 | *.user 15 | *.userosscache 16 | *.sln.docstates 17 | 18 | # User-specific files (MonoDevelop/Xamarin Studio) 19 | *.userprefs 20 | 21 | # Build results 22 | [Dd]ebug/ 23 | [Dd]ebugPublic/ 24 | [Rr]elease/ 25 | [Rr]eleases/ 26 | x64/ 27 | x86/ 28 | bld/ 29 | [Bb]in/ 30 | [Oo]bj/ 31 | [Ll]og/ 32 | 33 | # Visual Studio 2015 cache/options directory 34 | .vs/ 35 | # Uncomment if you have tasks that create the project's static files in wwwroot 36 | #wwwroot/ 37 | 38 | # MSTest test Results 39 | [Tt]est[Rr]esult*/ 40 | [Bb]uild[Ll]og.* 41 | 42 | # NUNIT 43 | *.VisualState.xml 44 | TestResult.xml 45 | 46 | # Build Results of an ATL Project 47 | [Dd]ebugPS/ 48 | [Rr]eleasePS/ 49 | dlldata.c 50 | 51 | # .NET Core 52 | project.lock.json 53 | project.fragment.lock.json 54 | artifacts/ 55 | **/Properties/launchSettings.json 56 | 57 | *_i.c 58 | *_p.c 59 | *_i.h 60 | *.ilk 61 | *.meta 62 | *.obj 63 | *.pch 64 | *.pdb 65 | *.pgc 66 | *.pgd 67 | *.rsp 68 | *.sbr 69 | *.tlb 70 | *.tli 71 | *.tlh 72 | *.tmp 73 | *.tmp_proj 74 | *.log 75 | *.vspscc 76 | *.vssscc 77 | .builds 78 | *.pidb 79 | *.svclog 80 | *.scc 81 | 82 | # Chutzpah Test files 83 | _Chutzpah* 84 | 85 | # Visual C++ cache files 86 | ipch/ 87 | *.aps 88 | *.ncb 89 | *.opendb 90 | *.opensdf 91 | *.sdf 92 | *.cachefile 93 | *.VC.db 94 | *.VC.VC.opendb 95 | 96 | # Visual Studio profiler 97 | *.psess 98 | *.vsp 99 | *.vspx 100 | *.sap 101 | 102 | # TFS 2012 Local Workspace 103 | $tf/ 104 | 105 | # Guidance Automation Toolkit 106 | *.gpState 107 | 108 | # ReSharper is a .NET coding add-in 109 | _ReSharper*/ 110 | *.[Rr]e[Ss]harper 111 | *.DotSettings.user 112 | 113 | # JustCode is a .NET coding add-in 114 | .JustCode 115 | 116 | # TeamCity is a build add-in 117 | _TeamCity* 118 | 119 | # DotCover is a Code Coverage Tool 120 | *.dotCover 121 | 122 | # Visual Studio code coverage results 123 | *.coverage 124 | *.coveragexml 125 | 126 | # NCrunch 127 | _NCrunch_* 128 | .*crunch*.local.xml 129 | nCrunchTemp_* 130 | 131 | # MightyMoose 132 | *.mm.* 133 | AutoTest.Net/ 134 | 135 | # Web workbench (sass) 136 | .sass-cache/ 137 | 138 | # Installshield output folder 139 | [Ee]xpress/ 140 | 141 | # DocProject is a documentation generator add-in 142 | DocProject/buildhelp/ 143 | DocProject/Help/*.HxT 144 | DocProject/Help/*.HxC 145 | DocProject/Help/*.hhc 146 | DocProject/Help/*.hhk 147 | DocProject/Help/*.hhp 148 | DocProject/Help/Html2 149 | DocProject/Help/html 150 | 151 | # Click-Once directory 152 | publish/ 153 | 154 | # Publish Web Output 155 | *.[Pp]ublish.xml 156 | *.azurePubxml 157 | # TODO: Comment the next line if you want to checkin your web deploy settings 158 | # but database connection strings (with potential passwords) will be unencrypted 159 | *.pubxml 160 | *.publishproj 161 | 162 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 163 | # checkin your Azure Web App publish settings, but sensitive information contained 164 | # in these scripts will be unencrypted 165 | PublishScripts/ 166 | 167 | # NuGet Packages 168 | *.nupkg 169 | # The packages folder can be ignored because of Package Restore 170 | **/packages/* 171 | # except build/, which is used as an MSBuild target. 172 | !**/packages/build/ 173 | # Uncomment if necessary however generally it will be regenerated when needed 174 | #!**/packages/repositories.config 175 | # NuGet v3's project.json files produces more ignorable files 176 | *.nuget.props 177 | *.nuget.targets 178 | 179 | # Microsoft Azure Build Output 180 | csx/ 181 | *.build.csdef 182 | 183 | # Microsoft Azure Emulator 184 | ecf/ 185 | rcf/ 186 | 187 | # Windows Store app package directories and files 188 | AppPackages/ 189 | BundleArtifacts/ 190 | Package.StoreAssociation.xml 191 | _pkginfo.txt 192 | 193 | # Visual Studio cache files 194 | # files ending in .cache can be ignored 195 | *.[Cc]ache 196 | # but keep track of directories ending in .cache 197 | !*.[Cc]ache/ 198 | 199 | # Others 200 | ClientBin/ 201 | ~$* 202 | *~ 203 | *.dbmdl 204 | *.dbproj.schemaview 205 | *.jfm 206 | *.pfx 207 | *.publishsettings 208 | orleans.codegen.cs 209 | 210 | # Since there are multiple workflows, uncomment next line to ignore bower_components 211 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 212 | #bower_components/ 213 | 214 | # RIA/Silverlight projects 215 | Generated_Code/ 216 | 217 | # Backup & report files from converting an old project file 218 | # to a newer Visual Studio version. Backup files are not needed, 219 | # because we have git ;-) 220 | _UpgradeReport_Files/ 221 | Backup*/ 222 | UpgradeLog*.XML 223 | UpgradeLog*.htm 224 | 225 | # SQL Server files 226 | *.mdf 227 | *.ldf 228 | 229 | # Business Intelligence projects 230 | *.rdl.data 231 | *.bim.layout 232 | *.bim_*.settings 233 | 234 | # Microsoft Fakes 235 | FakesAssemblies/ 236 | 237 | # GhostDoc plugin setting file 238 | *.GhostDoc.xml 239 | 240 | # Node.js Tools for Visual Studio 241 | .ntvs_analysis.dat 242 | node_modules/ 243 | 244 | # Typescript v1 declaration files 245 | typings/ 246 | 247 | # Visual Studio 6 build log 248 | *.plg 249 | 250 | # Visual Studio 6 workspace options file 251 | *.opt 252 | 253 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 254 | *.vbw 255 | 256 | # Visual Studio LightSwitch build output 257 | **/*.HTMLClient/GeneratedArtifacts 258 | **/*.DesktopClient/GeneratedArtifacts 259 | **/*.DesktopClient/ModelManifest.xml 260 | **/*.Server/GeneratedArtifacts 261 | **/*.Server/ModelManifest.xml 262 | _Pvt_Extensions 263 | 264 | # Paket dependency manager 265 | .paket/paket.exe 266 | paket-files/ 267 | 268 | # FAKE - F# Make 269 | .fake/ 270 | 271 | # JetBrains Rider 272 | .idea/ 273 | *.sln.iml 274 | 275 | # CodeRush 276 | .cr/ 277 | 278 | # Python Tools for Visual Studio (PTVS) 279 | __pycache__/ 280 | *.pyc 281 | 282 | # Cake - Uncomment if you are using it 283 | # tools/** 284 | # !tools/packages.config 285 | 286 | # Perfview trace 287 | *.etl.zip 288 | *.orig 289 | /src/BenchmarksDriver/results.md 290 | *.trace.zip 291 | /src/BenchmarksDriver/*.zip 292 | eventpipe.netperf 293 | *.netperf 294 | *.bench.json 295 | BenchmarkDotNet.Artifacts/ -------------------------------------------------------------------------------- /sdk/cs/FoundryLocal.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.0.31903.59 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.AI.Foundry.Local", "src\Microsoft.AI.Foundry.Local.csproj", "{247537D6-CBBA-C748-B91D-AA7B236563B4}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TestApp", "samples\TestApp\TestApp.csproj", "{91680EDB-3D1D-47D2-BF32-58C3544EDEEC}" 9 | EndProject 10 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{0C88DD14-F956-CE84-757C-A364CCF449FC}" 11 | EndProject 12 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FoundryLocal.Tests", "test\FoundryLocal.Tests\FoundryLocal.Tests.csproj", "{CD75C56B-0EB9-41F4-BEE0-9D7C674894CC}" 13 | EndProject 14 | Global 15 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 16 | Debug|Any CPU = Debug|Any CPU 17 | Debug|x64 = Debug|x64 18 | Debug|x86 = Debug|x86 19 | Release|Any CPU = Release|Any CPU 20 | Release|x64 = Release|x64 21 | Release|x86 = Release|x86 22 | EndGlobalSection 23 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 24 | {247537D6-CBBA-C748-B91D-AA7B236563B4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 25 | {247537D6-CBBA-C748-B91D-AA7B236563B4}.Debug|Any CPU.Build.0 = Debug|Any CPU 26 | {247537D6-CBBA-C748-B91D-AA7B236563B4}.Debug|x64.ActiveCfg = Debug|Any CPU 27 | {247537D6-CBBA-C748-B91D-AA7B236563B4}.Debug|x64.Build.0 = Debug|Any CPU 28 | {247537D6-CBBA-C748-B91D-AA7B236563B4}.Debug|x86.ActiveCfg = Debug|Any CPU 29 | {247537D6-CBBA-C748-B91D-AA7B236563B4}.Debug|x86.Build.0 = Debug|Any CPU 30 | {247537D6-CBBA-C748-B91D-AA7B236563B4}.Release|Any CPU.ActiveCfg = Release|Any CPU 31 | {247537D6-CBBA-C748-B91D-AA7B236563B4}.Release|Any CPU.Build.0 = Release|Any CPU 32 | {247537D6-CBBA-C748-B91D-AA7B236563B4}.Release|x64.ActiveCfg = Release|Any CPU 33 | {247537D6-CBBA-C748-B91D-AA7B236563B4}.Release|x64.Build.0 = Release|Any CPU 34 | {247537D6-CBBA-C748-B91D-AA7B236563B4}.Release|x86.ActiveCfg = Release|Any CPU 35 | {247537D6-CBBA-C748-B91D-AA7B236563B4}.Release|x86.Build.0 = Release|Any CPU 36 | {91680EDB-3D1D-47D2-BF32-58C3544EDEEC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 37 | {91680EDB-3D1D-47D2-BF32-58C3544EDEEC}.Debug|Any CPU.Build.0 = Debug|Any CPU 38 | {91680EDB-3D1D-47D2-BF32-58C3544EDEEC}.Debug|x64.ActiveCfg = Debug|Any CPU 39 | {91680EDB-3D1D-47D2-BF32-58C3544EDEEC}.Debug|x64.Build.0 = Debug|Any CPU 40 | {91680EDB-3D1D-47D2-BF32-58C3544EDEEC}.Debug|x86.ActiveCfg = Debug|Any CPU 41 | {91680EDB-3D1D-47D2-BF32-58C3544EDEEC}.Debug|x86.Build.0 = Debug|Any CPU 42 | {91680EDB-3D1D-47D2-BF32-58C3544EDEEC}.Release|Any CPU.ActiveCfg = Release|Any CPU 43 | {91680EDB-3D1D-47D2-BF32-58C3544EDEEC}.Release|Any CPU.Build.0 = Release|Any CPU 44 | {91680EDB-3D1D-47D2-BF32-58C3544EDEEC}.Release|x64.ActiveCfg = Release|Any CPU 45 | {91680EDB-3D1D-47D2-BF32-58C3544EDEEC}.Release|x64.Build.0 = Release|Any CPU 46 | {91680EDB-3D1D-47D2-BF32-58C3544EDEEC}.Release|x86.ActiveCfg = Release|Any CPU 47 | {91680EDB-3D1D-47D2-BF32-58C3544EDEEC}.Release|x86.Build.0 = Release|Any CPU 48 | {CD75C56B-0EB9-41F4-BEE0-9D7C674894CC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 49 | {CD75C56B-0EB9-41F4-BEE0-9D7C674894CC}.Debug|Any CPU.Build.0 = Debug|Any CPU 50 | {CD75C56B-0EB9-41F4-BEE0-9D7C674894CC}.Debug|x64.ActiveCfg = Debug|Any CPU 51 | {CD75C56B-0EB9-41F4-BEE0-9D7C674894CC}.Debug|x64.Build.0 = Debug|Any CPU 52 | {CD75C56B-0EB9-41F4-BEE0-9D7C674894CC}.Debug|x86.ActiveCfg = Debug|Any CPU 53 | {CD75C56B-0EB9-41F4-BEE0-9D7C674894CC}.Debug|x86.Build.0 = Debug|Any CPU 54 | {CD75C56B-0EB9-41F4-BEE0-9D7C674894CC}.Release|Any CPU.ActiveCfg = Release|Any CPU 55 | {CD75C56B-0EB9-41F4-BEE0-9D7C674894CC}.Release|Any CPU.Build.0 = Release|Any CPU 56 | {CD75C56B-0EB9-41F4-BEE0-9D7C674894CC}.Release|x64.ActiveCfg = Release|Any CPU 57 | {CD75C56B-0EB9-41F4-BEE0-9D7C674894CC}.Release|x64.Build.0 = Release|Any CPU 58 | {CD75C56B-0EB9-41F4-BEE0-9D7C674894CC}.Release|x86.ActiveCfg = Release|Any CPU 59 | {CD75C56B-0EB9-41F4-BEE0-9D7C674894CC}.Release|x86.Build.0 = Release|Any CPU 60 | EndGlobalSection 61 | GlobalSection(SolutionProperties) = preSolution 62 | HideSolutionNode = FALSE 63 | EndGlobalSection 64 | GlobalSection(NestedProjects) = preSolution 65 | {CD75C56B-0EB9-41F4-BEE0-9D7C674894CC} = {0C88DD14-F956-CE84-757C-A364CCF449FC} 66 | EndGlobalSection 67 | GlobalSection(ExtensibilityGlobals) = postSolution 68 | SolutionGuid = {0138DEC3-F200-43EC-A1A2-6FD8F2C609CB} 69 | EndGlobalSection 70 | EndGlobal 71 | -------------------------------------------------------------------------------- /sdk/cs/README.md: -------------------------------------------------------------------------------- 1 | # Foundry Local C# SDK (under active development) 2 | 3 | > This SDK is under active development and may not be fully functional. 4 | 5 | ## Installation 6 | 7 | To build the SDK, run the following command in your terminal: 8 | 9 | ```bash 10 | cd sdk/cs 11 | dotnet build 12 | ``` 13 | 14 | You can also load [FoundryLocal.sln](./FoundryLocal.sln) in Visual Studio 2022 or VSCode. Update your 15 | `nuget.config` to include the local path to the generated NuGet package: 16 | 17 | ```xml 18 | 19 | 20 | 21 | 22 | 23 | ``` 24 | 25 | Then, install the package using the following command: 26 | 27 | ```bash 28 | dotnet add package FoundryLocal --source foundry-local 29 | ``` 30 | 31 | An official NuGet package will be available soon. 32 | 33 | ## Usage 34 | 35 | ```csharp 36 | using Microsoft.AI.Foundry.Local; 37 | using OpenAI; 38 | using OpenAI.Chat; 39 | using System.ClientModel; 40 | using System.Diagnostics.Metrics; 41 | 42 | var alias = "phi-3.5-mini"; 43 | 44 | var manager = await FoundryManager.StartModelAsync(aliasOrModelId: alias); 45 | 46 | var model = await manager.GetModelInfoAsync(aliasOrModelId: alias); 47 | ApiKeyCredential key = new ApiKeyCredential(manager.ApiKey); 48 | OpenAIClient client = new OpenAIClient(key, new OpenAIClientOptions 49 | { 50 | Endpoint = manager.Endpoint 51 | }); 52 | 53 | var chatClient = client.GetChatClient(model?.ModelId); 54 | 55 | var completionUpdates = chatClient.CompleteChatStreaming("Why is the sky blue'"); 56 | 57 | Console.Write($"[ASSISTANT]: "); 58 | foreach (var completionUpdate in completionUpdates) 59 | { 60 | if (completionUpdate.ContentUpdate.Count > 0) 61 | { 62 | Console.Write(completionUpdate.ContentUpdate[0].Text); 63 | } 64 | } 65 | ``` 66 | -------------------------------------------------------------------------------- /sdk/cs/samples/TestApp/Program.cs: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Copyright (c) Microsoft. All rights reserved. 4 | // 5 | // -------------------------------------------------------------------------------------------------------------------- 6 | 7 | using System.ClientModel; 8 | 9 | using Microsoft.AI.Foundry.Local; 10 | 11 | using OpenAI; 12 | using OpenAI.Chat; 13 | 14 | public class TestApp 15 | { 16 | public static async Task Main(string[] args) 17 | { 18 | var app = new TestApp(); // Create an instance of TestApp 19 | 20 | Console.WriteLine(new string('=', 80)); // Separator for clarity 21 | Console.WriteLine("Testing catalog integration..."); 22 | await app.TestCatalog(); // Call the instance method 23 | 24 | Console.WriteLine(new string('=', 80)); // Separator for clarity 25 | Console.WriteLine("Testing cache operations..."); 26 | await app.TestCacheOperations(); // Call the instance method 27 | 28 | Console.WriteLine(new string('=', 80)); // Separator for clarity 29 | Console.WriteLine("Testing OpenAI integration (from stopped service)..."); 30 | using var manager = new FoundryLocalManager(); 31 | if (manager != null) 32 | { 33 | await manager.StopServiceAsync(); 34 | } 35 | await app.TestOpenAIIntegration("qwen2.5-0.5b"); 36 | 37 | Console.WriteLine(new string('=', 80)); // Separator for clarity 38 | Console.WriteLine("Testing OpenAI integration (test again service is started)..."); 39 | await app.TestOpenAIIntegration("qwen2.5-0.5b"); 40 | 41 | Console.WriteLine(new string('=', 80)); // Separator for clarity 42 | Console.WriteLine("Testing service operations"); 43 | await app.TestService(); // Call the instance method 44 | 45 | Console.WriteLine(new string('=', 80)); // Separator for clarity 46 | Console.WriteLine("Testing model (un)loading"); 47 | await app.TestModelLoadUnload("qwen2.5-0.5b"); // Call the instance method 48 | 49 | Console.WriteLine(new string('=', 80)); // Separator for clarity 50 | Console.WriteLine("Testing downloading"); 51 | await app.TestDownload("qwen2.5-0.5b"); // Call the instance method 52 | 53 | Console.WriteLine("Press any key to exit..."); 54 | Console.ReadKey(true); 55 | } 56 | 57 | private async Task TestCacheOperations() 58 | { 59 | using var manager = new FoundryLocalManager(); 60 | Console.WriteLine($"Model cache location at {await manager.GetCacheLocationAsync()}"); 61 | // Print out models in the cache 62 | var models = await manager.ListCachedModelsAsync(); 63 | Console.WriteLine($"Found {models.Count} models in the cache:"); 64 | foreach (var m in models) 65 | { 66 | Console.WriteLine($"Model: {m.Alias} ({m.ModelId})"); 67 | } 68 | } 69 | 70 | private async Task TestService() 71 | { 72 | using var manager = new FoundryLocalManager(); 73 | await manager.StartServiceAsync(); 74 | // Print out whether the service is running 75 | Console.WriteLine($"Service running (should be true): {manager.IsServiceRunning}"); 76 | // Print out the service endpoint and API key 77 | Console.WriteLine($"Service Uri: {manager.ServiceUri}"); 78 | Console.WriteLine($"Endpoint {manager.Endpoint}"); 79 | Console.WriteLine($"ApiKey: {manager.ApiKey}"); 80 | // stop the service 81 | await manager.StopServiceAsync(); 82 | Console.WriteLine($"Service stopped"); 83 | Console.WriteLine($"Service running (should be false): {manager.IsServiceRunning}"); 84 | } 85 | 86 | private async Task TestCatalog() 87 | // First test catalog listing 88 | { 89 | using var manager = new FoundryLocalManager(); 90 | foreach (var m in await manager.ListCatalogModelsAsync()) 91 | { 92 | Console.WriteLine($"Model: {m.Alias} ({m.ModelId})"); 93 | } 94 | } 95 | 96 | private async Task TestOpenAIIntegration(string aliasOrModelId) 97 | { 98 | var manager = await FoundryLocalManager.StartModelAsync(aliasOrModelId); 99 | 100 | var model = await manager.GetModelInfoAsync(aliasOrModelId); 101 | var key = new ApiKeyCredential(manager.ApiKey); 102 | var client = new OpenAIClient(key, new OpenAIClientOptions 103 | { 104 | Endpoint = manager.Endpoint 105 | }); 106 | 107 | var chatClient = client.GetChatClient(model?.ModelId); 108 | 109 | CollectionResult completionUpdates = chatClient.CompleteChatStreaming("Why is the sky blue'"); 110 | 111 | Console.Write($"[ASSISTANT]: "); 112 | foreach (StreamingChatCompletionUpdate completionUpdate in completionUpdates) 113 | { 114 | if (completionUpdate.ContentUpdate.Count > 0) 115 | { 116 | Console.Write(completionUpdate.ContentUpdate[0].Text); 117 | } 118 | } 119 | } 120 | 121 | private async Task TestModelLoadUnload(string aliasOrModelId) 122 | { 123 | using var manager = new FoundryLocalManager(); 124 | // Load a model 125 | var model = await manager.LoadModelAsync(aliasOrModelId); 126 | Console.WriteLine($"Loaded model: {model.Alias} ({model.ModelId})"); 127 | // Unload the model 128 | await manager.UnloadModelAsync(aliasOrModelId); 129 | Console.WriteLine($"Unloaded model: {model.Alias} ({model.ModelId})"); 130 | } 131 | 132 | private async Task TestDownload(string aliasOrModelId) 133 | { 134 | using var manager = new FoundryLocalManager(); 135 | 136 | // Download a model 137 | var model = await manager.DownloadModelAsync(aliasOrModelId, force: true); 138 | 139 | // test that the model can be loaded 140 | Console.WriteLine($"Downloaded model: {model!.Alias} ({model.ModelId})"); 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /sdk/cs/samples/TestApp/TestApp.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | net9.0 6 | enable 7 | enable 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /sdk/cs/src/FoundryModelInfo.cs: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------------------------------------------------- 2 | // 3 | // Copyright (c) Microsoft. All rights reserved. 4 | // 5 | // -------------------------------------------------------------------------------------------------------------------- 6 | 7 | namespace Microsoft.AI.Foundry.Local; 8 | 9 | using System.Text.Json; 10 | using System.Text.Json.Serialization; 11 | 12 | public record PromptTemplate 13 | { 14 | [JsonPropertyName("assistant")] 15 | public string Assistant { get; init; } = default!; 16 | 17 | [JsonPropertyName("prompt")] 18 | public string Prompt { get; init; } = default!; 19 | } 20 | 21 | public record Runtime 22 | { 23 | [JsonPropertyName("deviceType")] 24 | public DeviceType DeviceType { get; init; } = default!; 25 | 26 | [JsonPropertyName("executionProvider")] 27 | public ExecutionProvider ExecutionProvider { get; init; } = default!; 28 | } 29 | 30 | public record ModelSettings 31 | { 32 | // The sample shows an empty array; keep it open‑ended. 33 | [JsonPropertyName("parameters")] 34 | public List Parameters { get; init; } = []; 35 | } 36 | 37 | public record FoundryCachedModel(string Name, string? Id); 38 | 39 | public record FoundryDownloadResult(bool Success, string? ErrorMessage); 40 | 41 | internal sealed record FoundryModelDownload( 42 | string Name, 43 | string Uri, 44 | string Path, 45 | string ProviderType, 46 | PromptTemplate PromptTemplate); 47 | 48 | internal sealed record FoundryDownloadBody(FoundryModelDownload Model, bool IgnorePipeReport); 49 | 50 | public record ModelInfo 51 | { 52 | [JsonPropertyName("name")] 53 | public string ModelId { get; init; } = default!; 54 | 55 | [JsonPropertyName("displayName")] 56 | public string DisplayName { get; init; } = default!; 57 | 58 | [JsonPropertyName("providerType")] 59 | public string ProviderType { get; init; } = default!; 60 | 61 | [JsonPropertyName("uri")] 62 | public string Uri { get; init; } = default!; 63 | 64 | [JsonPropertyName("version")] 65 | public string Version { get; init; } = default!; 66 | 67 | [JsonPropertyName("modelType")] 68 | public string ModelType { get; init; } = default!; 69 | 70 | [JsonPropertyName("promptTemplate")] 71 | public PromptTemplate PromptTemplate { get; init; } = default!; 72 | 73 | [JsonPropertyName("publisher")] 74 | public string Publisher { get; init; } = default!; 75 | 76 | [JsonPropertyName("task")] 77 | public string Task { get; init; } = default!; 78 | 79 | [JsonPropertyName("runtime")] 80 | public Runtime Runtime { get; init; } = default!; 81 | 82 | [JsonPropertyName("fileSizeMb")] 83 | public long FileSizeMb { get; init; } 84 | 85 | [JsonPropertyName("modelSettings")] 86 | public ModelSettings ModelSettings { get; init; } = default!; 87 | 88 | [JsonPropertyName("alias")] 89 | public string Alias { get; init; } = default!; 90 | 91 | [JsonPropertyName("supportsToolCalling")] 92 | public bool SupportsToolCalling { get; init; } 93 | 94 | [JsonPropertyName("license")] 95 | public string License { get; init; } = default!; 96 | 97 | [JsonPropertyName("licenseDescription")] 98 | public string LicenseDescription { get; init; } = default!; 99 | 100 | [JsonPropertyName("parentModelUri")] 101 | public string ParentModelUri { get; init; } = default!; 102 | } 103 | 104 | internal sealed class DownloadRequest 105 | { 106 | internal sealed class ModelInfo 107 | { 108 | [JsonPropertyName("Name")] 109 | public required string Name { get; set; } 110 | [JsonPropertyName("Uri")] 111 | public required string Uri { get; set; } 112 | [JsonPropertyName("ProviderType")] 113 | public required string ProviderType { get; set; } 114 | [JsonPropertyName("PromptTemplate")] 115 | public required PromptTemplate PromptTemplate { get; set; } 116 | } 117 | 118 | [JsonPropertyName("Model")] 119 | public required ModelInfo Model { get; set; } 120 | 121 | [JsonPropertyName("token")] 122 | public required string Token { get; set; } 123 | 124 | [JsonPropertyName("IgnorePipeReport")] 125 | public required bool IgnorePipeReport { get; set; } 126 | 127 | } 128 | 129 | public record ModelDownloadProgress 130 | { 131 | public double Percentage { get; init; } 132 | public bool IsCompleted { get; init; } 133 | public ModelInfo? ModelInfo { get; init; } 134 | public string? ErrorMessage { get; init; } 135 | 136 | public static ModelDownloadProgress Progress(double percentage) => 137 | new() 138 | { Percentage = percentage, IsCompleted = false }; 139 | 140 | public static ModelDownloadProgress Completed(ModelInfo modelInfo) => 141 | new() 142 | { Percentage = 100, IsCompleted = true, ModelInfo = modelInfo }; 143 | 144 | public static ModelDownloadProgress Error(string errorMessage) => 145 | new() 146 | { IsCompleted = true, ErrorMessage = errorMessage }; 147 | } 148 | 149 | [JsonSerializable(typeof(ModelInfo))] 150 | [JsonSerializable(typeof(List))] 151 | [JsonSerializable(typeof(int))] 152 | [JsonSerializable(typeof(ModelDownloadProgress))] 153 | public partial class ModelGenerationContext : JsonSerializerContext 154 | { 155 | } 156 | -------------------------------------------------------------------------------- /sdk/cs/src/Microsoft.AI.Foundry.Local.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | net8.0;net9.0 5 | enable 6 | enable 7 | True 8 | 0.1.0-preview 9 | 10 | 11 | 12 | README.md 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /sdk/cs/src/Microsoft.AI.Foundry.Local.csproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | ProjectDebugger 5 | 6 | 7 | Microsoft.AI.Foundry.Local 8 | 9 | -------------------------------------------------------------------------------- /sdk/cs/test/FoundryLocal.Tests/FoundryLocal.Tests.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | net9.0 5 | enable 6 | enable 7 | false 8 | 9 | 10 | 11 | 12 | runtime; build; native; contentfiles; analyzers; buildtransitive 13 | all 14 | 15 | 16 | 17 | 18 | 19 | runtime; build; native; contentfiles; analyzers; buildtransitive 20 | all 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /sdk/js/.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | 'use strict' 5 | 6 | module.exports = { 7 | root: true, 8 | ignorePatterns: ['node_modules/', 'dist/'], 9 | env: { 10 | commonjs: true, 11 | es2021: true, 12 | node: true, 13 | }, 14 | parser: '@typescript-eslint/parser', 15 | parserOptions: { 16 | ecmaVersion: 'latest', 17 | sourceType: 'module', 18 | project: './tsconfig.json', 19 | }, 20 | plugins: ['@typescript-eslint', 'header', 'import', 'jsdoc'], 21 | extends: [ 22 | 'eslint:recommended', 23 | 'plugin:@typescript-eslint/eslint-recommended', 24 | 'plugin:@typescript-eslint/recommended', 25 | ], 26 | rules: { 27 | 'header/header': [ 28 | 2, 29 | 'line', 30 | [' Copyright (c) Microsoft Corporation. All rights reserved.', ' Licensed under the MIT License.'], 31 | 2, 32 | ], 33 | 'import/no-extraneous-dependencies': ['error', { devDependencies: false }], 34 | 'import/no-unassigned-import': 'error', 35 | 'jsdoc/check-alignment': 'error', 36 | 'jsdoc/check-indentation': 'error', 37 | '@typescript-eslint/await-thenable': 'error', 38 | camelcase: 'error', 39 | curly: 'error', 40 | 'no-debugger': 'error', 41 | 'no-unused-vars': 'off', 42 | }, 43 | } 44 | -------------------------------------------------------------------------------- /sdk/js/.npmignore: -------------------------------------------------------------------------------- 1 | # Folders 2 | node_modules 3 | build 4 | src 5 | test 6 | .vscode 7 | 8 | # Files 9 | .eslintrc.cjs 10 | .gitignore 11 | package-lock.json 12 | .prettierrc.json 13 | .prettierignore 14 | tsconfig.json 15 | *.tgz -------------------------------------------------------------------------------- /sdk/js/.prettierignore: -------------------------------------------------------------------------------- 1 | # ignore dist folder 2 | dist/ 3 | -------------------------------------------------------------------------------- /sdk/js/.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "endOfLine": "auto", 3 | "printWidth": 120, 4 | "semi": false, 5 | "singleQuote": true, 6 | "trailingComma": "all" 7 | } 8 | -------------------------------------------------------------------------------- /sdk/js/LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /sdk/js/README.md: -------------------------------------------------------------------------------- 1 | # Foundry Local JavaScript SDK 2 | 3 | The Foundry Local SDK simplifies AI model management in local environments by providing control-plane operations separate from data-plane inferencing code. 4 | 5 | ## Prerequisites 6 | 7 | Foundry Local must be installed and findable in your PATH. 8 | 9 | ## Getting Started 10 | 11 | ```bash 12 | npm install foundry-local-sdk 13 | ``` 14 | 15 | ## Usage 16 | 17 | The SDK provides a simple interface to interact with the Foundry Local API. You can use it to manage models, check the status of the service, and make requests to the models. 18 | 19 | ### Bootstrapping 20 | 21 | The SDK can _bootstrap_ Foundry Local, which will initiate the following sequence: 22 | 23 | 1. Start the Foundry Local service, if it is not already running. 24 | 1. Automatically detect the hardware and software requirements for the model. 25 | 1. Download the most performance model for the detected hardware, if it is not already downloaded. 26 | 1. Load the model into memory. 27 | 28 | To use the SDK with bootstrapping, you can use the following code: 29 | 30 | ```js 31 | // foundry-local-sdk supports both CommonJS and ES module syntax 32 | // CommonJS 33 | const { FoundryLocalManager } = require('foundry-local-sdk') 34 | // ES module 35 | // import { FoundryLocalManager } from 'foundry-local-sdk' 36 | 37 | const alias = 'phi-3.5-mini' 38 | const foundryLocalManager = new FoundryLocalManager() 39 | 40 | // initialize the SDK with an optional alias or model ID 41 | const modelInfo = await foundryLocalManager.init(alias) 42 | console.log('Model Info:', modelInfo) 43 | 44 | // check that the service is running 45 | const isRunning = await foundryLocalManager.isServiceRunning() 46 | console.log(`Service running: ${isRunning}`) 47 | 48 | // list all available models in the catalog 49 | const catalogModels = await foundryLocalManager.listCatalogModels() 50 | console.log('Catalog Models:', catalogModels) 51 | 52 | // list all downloaded models 53 | const localModels = await foundryLocalManager.listCachedModels() 54 | console.log('Local Models:', localModels) 55 | ``` 56 | 57 | Alternatively, you can use the `FoundryLocalManager` class to manage the service and models manually. This is useful if you want to control the service and models without bootstrapping. For example, you want to present to the end user what is happening in the background. 58 | 59 | ```js 60 | const { FoundryLocalManager } = require('foundry-local-sdk') 61 | 62 | const alias = 'phi-3.5-mini' 63 | const foundryLocalManager = new FoundryLocalManager() 64 | 65 | // start the service 66 | await foundryLocalManager.startService() 67 | // or await foundryLocalManager.init() 68 | 69 | // download the model 70 | // the download api also accepts an optional event handler to track the download progress 71 | // it must be of the signature (progress: number) => void 72 | await foundryLocalManager.downloadModel(alias) 73 | 74 | // load the model 75 | const modelInfo = await foundryLocalManager.loadModel(alias) 76 | console.log('Model Info:', modelInfo) 77 | ``` 78 | 79 | ## Using the SDK with OpenAI API 80 | 81 | Use the foundry local endpoint with an OpenAI compatible API client. For example, install the `openai` package using npm: 82 | 83 | ```bash 84 | npm install openai 85 | ``` 86 | 87 | Then copy-and-paste the following code into a file called `app.js`: 88 | 89 | ```js 90 | import { OpenAI } from 'openai' 91 | import { FoundryLocalManager } from 'foundry-local-sdk' 92 | 93 | // By using an alias, the most suitable model will be downloaded 94 | // to your end-user's device. 95 | // TIP: You can find a list of available models by running the 96 | // following command in your terminal: `foundry model list`. 97 | const alias = 'phi-3.5-mini' 98 | 99 | // Create a FoundryLocalManager instance. This will start the Foundry 100 | // Local service if it is not already running. 101 | const foundryLocalManager = new FoundryLocalManager() 102 | 103 | // Initialize the manager with a model. This will download the model 104 | // if it is not already present on the user's device. 105 | const modelInfo = await foundryLocalManager.init(alias) 106 | console.log('Model Info:', modelInfo) 107 | 108 | const openai = new OpenAI({ 109 | baseURL: foundryLocalManager.endpoint, 110 | apiKey: foundryLocalManager.apiKey, 111 | }) 112 | 113 | async function streamCompletion() { 114 | const stream = await openai.chat.completions.create({ 115 | model: modelInfo.id, 116 | messages: [{ role: 'user', content: 'What is the golden ratio?' }], 117 | stream: true, 118 | }) 119 | 120 | for await (const chunk of stream) { 121 | if (chunk.choices[0]?.delta?.content) { 122 | process.stdout.write(chunk.choices[0].delta.content) 123 | } 124 | } 125 | } 126 | 127 | streamCompletion() 128 | ``` 129 | 130 | Run the application using Node.js: 131 | 132 | ```bash 133 | node app.js 134 | ``` 135 | 136 | ## Browser Usage 137 | 138 | The SDK also provides a browser-compatible version. However, it requires you to provide the service URL manually. You can use the `FoundryLocalManager` class in the browser as follows: 139 | 140 | ```js 141 | import { FoundryLocalManager } from 'foundry-local-sdk/browser' 142 | 143 | const foundryLocalManager = new FoundryLocalManager({ host: 'http://localhost:8080' }) 144 | 145 | // the rest of the code is the same as above other than the init, isServiceRunning, and startService methods 146 | // which are not available in the browser version. 147 | ``` 148 | -------------------------------------------------------------------------------- /sdk/js/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "foundry-local-sdk", 3 | "version": "0.3.1", 4 | "description": "Foundry Local Manager Javascript SDK", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/microsoft/Foundry-Local.git" 8 | }, 9 | "author": "Microsoft Corporation ", 10 | "main": "dist/index.cjs", 11 | "module": "dist/index.mjs", 12 | "types": "dist/index.d.ts", 13 | "license": "MIT", 14 | "scripts": { 15 | "build": "unbuild", 16 | "format": "prettier --write .", 17 | "format:check": "prettier --check .", 18 | "lint": "eslint ./src/*", 19 | "test": "vitest --run" 20 | }, 21 | "exports": { 22 | ".": { 23 | "require": "./dist/index.cjs", 24 | "import": "./dist/index.mjs", 25 | "types": "./dist/index.d.ts" 26 | }, 27 | "./browser": { 28 | "require": "./dist/base.cjs", 29 | "import": "./dist/base.mjs", 30 | "types": "./dist/base.d.ts" 31 | } 32 | }, 33 | "dependencies": { 34 | "whatwg-fetch": "^3.6.20" 35 | }, 36 | "devDependencies": { 37 | "@types/node": "^18.14.6", 38 | "@types/whatwg-fetch": "^0.0.33", 39 | "@typescript-eslint/eslint-plugin": "^7.4.0", 40 | "@typescript-eslint/parser": "^7.4.0", 41 | "eslint": "^8.49.0", 42 | "eslint-plugin-header": "^3.1.1", 43 | "eslint-plugin-import": "^2.28.1", 44 | "eslint-plugin-jsdoc": "^46.8.2", 45 | "prettier": "^3.2.4", 46 | "typescript": "^5.2.2", 47 | "unbuild": "^3.5.0", 48 | "vitest": "^3.1.3" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /sdk/js/src/client.ts: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | import type { Fetch } from './types.js' 5 | 6 | /** 7 | * Handles fetch requests with error handling. 8 | * @param {Fetch} fetch - The fetch function to use. 9 | * @param {string} url - The URL to fetch. 10 | * @param {RequestInit} [options] - Optional request options. 11 | * @returns {Promise} The fetch response. 12 | * @throws {Error} If the fetch request fails or returns a non-OK status. 13 | */ 14 | async function fetchWithErrorHandling(fetch: Fetch, url: string, options?: RequestInit): Promise { 15 | try { 16 | const response = await fetch(url, options) 17 | if (!response.ok) { 18 | let responseContent = '' 19 | try { 20 | responseContent = await response.text() 21 | } catch (error) { 22 | responseContent = 'Unable to read response content' 23 | } 24 | return Promise.reject(new Error(`HTTP error! status: ${response.status}, response: ${responseContent}`)) 25 | } 26 | return response 27 | } catch (error) { 28 | const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred' 29 | throw new Error( 30 | `Network error! Please check if the foundry service is running and the host URL is correct. Error: ${errorMessage}`, 31 | ) 32 | } 33 | } 34 | 35 | /** 36 | * Sends a GET request to the specified host with optional query parameters. 37 | * @param {Fetch} fetch - The fetch function to use. 38 | * @param {string} host - The host URL. 39 | * @param {Record} [queryParams] - Optional query parameters. 40 | * @returns {Promise} The fetch response. 41 | */ 42 | export const get = async (fetch: Fetch, host: string, queryParams?: Record): Promise => { 43 | const endpoint = host + (queryParams ? '?' + new URLSearchParams(queryParams).toString() : '') 44 | return await fetchWithErrorHandling(fetch, endpoint) 45 | } 46 | 47 | /** 48 | * Parses a percentage value from a string. 49 | * @param {string} line - The string containing the percentage value. 50 | * @returns {number | null} The parsed percentage value, or null if not found. 51 | */ 52 | function parsePercentage(line: string): number | null { 53 | const match = line.match(/(\d+(?:\.\d+)?)%/) 54 | return match ? Math.min(parseInt(match[1]), 100) : null 55 | } 56 | 57 | /** 58 | * Sends a POST request with progress updates. 59 | * @param {Fetch} fetch - The fetch function to use. 60 | * @param {string} host - The host URL. 61 | * @param {Record} [body] - The request body. 62 | * @param {(progress: number) => void} [onProgress] - Optional progress callback. 63 | * @returns {Promise>} The parsed response body. 64 | */ 65 | export const postWithProgress = async ( 66 | fetch: Fetch, 67 | host: string, 68 | body?: Record, 69 | onProgress?: (progress: number) => void, 70 | ): Promise> => { 71 | // Sending a POST request and getting a streamable response 72 | const response = await fetchWithErrorHandling(fetch, host, { 73 | method: 'POST', 74 | headers: { 'Content-Type': 'application/json' }, 75 | body: body ? JSON.stringify(body) : undefined, 76 | }) 77 | 78 | // Set up progress tracking 79 | const reader = response.body?.getReader() 80 | let finalJson = '' 81 | let prevPercent = 0 82 | 83 | // Read and process the response stream 84 | // eslint-disable-next-line no-constant-condition 85 | while (true) { 86 | const { done, value } = (await reader?.read()) ?? { 87 | done: true, 88 | value: new Uint8Array(), 89 | } 90 | 91 | if (done) { 92 | break 93 | } 94 | 95 | const line = new TextDecoder('utf-8').decode(value) 96 | 97 | // Accumulate the final JSON when we start receiving JSON data 98 | if (finalJson || line.startsWith('{')) { 99 | finalJson += line 100 | continue 101 | } 102 | 103 | // If onProgress is not provided, skip progress tracking 104 | if (!onProgress) { 105 | continue 106 | } 107 | 108 | // Parse progress percentage from the line 109 | const percent = parsePercentage(line) 110 | if (percent !== null && percent > prevPercent) { 111 | prevPercent = percent 112 | onProgress(percent) // Update the progress 113 | } 114 | } 115 | 116 | // Parse and return the final JSON response 117 | if (finalJson) { 118 | try { 119 | return JSON.parse(finalJson) 120 | } catch (error) { 121 | const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred' 122 | throw new Error(`Error parsing JSON response: ${errorMessage}`) 123 | } 124 | } else { 125 | throw new Error('No JSON data received!') 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /sdk/js/src/index.ts: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | import { FoundryLocalManager as FoundryLocalManagerBase } from './base.js' 5 | import * as service from './service.js' 6 | 7 | import type { FoundryModelInfo, Fetch } from './types.js' 8 | 9 | /** 10 | * Class representing the Foundry Local Manager. 11 | */ 12 | export class FoundryLocalManager extends FoundryLocalManagerBase { 13 | /** 14 | * The service URL for the Foundry service. 15 | */ 16 | protected _serviceUrl: string | null = null 17 | 18 | /** 19 | * Constructs a new FoundryLocalManager instance 20 | * @param {Object} [options] - Optional configuration options for the FoundryLocalManager. 21 | * @param {Fetch} [options.fetch] - Optional custom fetch implementation to use for HTTP requests. 22 | * If not provided, the global fetch will be used. 23 | */ 24 | constructor({ fetch: overriddenFetch = fetch }: { fetch?: Fetch } = {}) { 25 | service.assertFoundryAvailable() 26 | super({ 27 | serviceUrl: '', 28 | fetch: overriddenFetch, 29 | }) 30 | } 31 | 32 | /** 33 | * Gets the service URL. 34 | * @throws {Error} If the service URL is not set. 35 | * @returns {string} The service URL. 36 | */ 37 | get serviceUrl(): string { 38 | if (this._serviceUrl) { 39 | return this._serviceUrl 40 | } 41 | throw new Error('Service URL is not set. Please start the service first.') 42 | } 43 | 44 | /** 45 | * Initializes the Foundry Local Manager with a model. 46 | * @param {string | null} aliasOrModelId - The alias or ID of the model to initialize with. 47 | * @returns {Promise} The model information if initialized, otherwise null. 48 | */ 49 | async init(aliasOrModelId: string | null): Promise { 50 | await this.startService() 51 | 52 | if (aliasOrModelId) { 53 | await this.downloadModel(aliasOrModelId) 54 | const modelInfo = await this.loadModel(aliasOrModelId) 55 | return modelInfo 56 | } 57 | return null 58 | } 59 | 60 | /** 61 | * Checks if the Foundry service is running. 62 | * @returns {Promise} True if the service is running, otherwise false. 63 | */ 64 | async isServiceRunning(): Promise { 65 | this._serviceUrl = await service.getServiceUrl() 66 | return this._serviceUrl !== null 67 | } 68 | 69 | /** 70 | * Starts the Foundry service. 71 | * @throws {Error} If the service fails to start. 72 | * @returns {Promise} Resolves when the service is successfully started. 73 | */ 74 | async startService(): Promise { 75 | this._serviceUrl = await service.startService() 76 | if (!this._serviceUrl) { 77 | throw new Error('Failed to start the service.') 78 | } 79 | } 80 | } 81 | 82 | export * from './types.js' 83 | -------------------------------------------------------------------------------- /sdk/js/src/service.ts: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | import { exec, execSync } from 'child_process' 5 | 6 | /** 7 | * Ensures that Foundry is available in the system's PATH. 8 | * Throws an error if Foundry is not installed or not found in the PATH. 9 | */ 10 | export function assertFoundryAvailable(): void { 11 | try { 12 | if (process.platform === 'win32') { 13 | execSync('where foundry') 14 | } else { 15 | execSync('which foundry') 16 | } 17 | } catch (error) { 18 | throw new Error('Foundry is not installed or not on PATH!') 19 | } 20 | } 21 | 22 | /** 23 | * Retrieves the service URL by executing the `foundry service status` command. 24 | * @returns {Promise} The service URL if found, otherwise null. 25 | */ 26 | export async function getServiceUrl(): Promise { 27 | try { 28 | const stdout = await new Promise((resolve, reject) => { 29 | exec('foundry service status', (err: Error | null, stdout: string) => { 30 | if (err) { 31 | reject(err) 32 | return 33 | } 34 | resolve(stdout) 35 | }) 36 | }) 37 | const match = stdout.match(/http:\/\/(?:[a-zA-Z0-9.-]+|\d{1,3}(\.\d{1,3}){3}):\d+/) 38 | if (match) { 39 | return match[0] 40 | } 41 | return null 42 | } catch (error) { 43 | console.error('Error getting service status:', error) 44 | return null 45 | } 46 | } 47 | 48 | /** 49 | * Starts the Foundry service. 50 | * @returns {Promise} The service URL if successfully started, otherwise null. 51 | */ 52 | export async function startService(): Promise { 53 | let serviceUrl = await getServiceUrl() 54 | if (serviceUrl !== null) { 55 | return serviceUrl 56 | } 57 | 58 | // Start the service without waiting for stdout, it never completes 59 | exec('foundry service start', (err: Error | null) => { 60 | if (err) { 61 | console.error('Error starting service:', err.message) 62 | return null 63 | } 64 | }) 65 | 66 | // Retry loop to check for service URL availability 67 | let retries = 10 68 | while (retries > 0) { 69 | serviceUrl = await getServiceUrl() 70 | if (serviceUrl !== null) { 71 | return serviceUrl 72 | } 73 | 74 | // Wait 100ms before trying again 75 | await new Promise((resolve) => setTimeout(resolve, 100)) 76 | retries-- 77 | } 78 | 79 | console.warn('Foundry service did not start within the expected time. May not be running.') 80 | return null 81 | } 82 | -------------------------------------------------------------------------------- /sdk/js/src/types.ts: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | /** 5 | * Type alias for the fetch function. 6 | */ 7 | export type Fetch = typeof fetch 8 | 9 | /** 10 | * Enum representing the types of devices. 11 | */ 12 | export enum DeviceType { 13 | CPU = 'CPU', 14 | GPU = 'GPU', 15 | NPU = 'NPU', 16 | } 17 | 18 | /** 19 | * Enum representing the execution providers. 20 | */ 21 | export enum ExecutionProvider { 22 | CPU = 'CPUExecutionProvider', 23 | WEBGPU = 'WebGpuExecutionProvider', 24 | CUDA = 'CUDAExecutionProvider', 25 | QNN = 'QNNExecutionProvider', 26 | } 27 | 28 | /** 29 | * Interface representing the runtime configuration of a model. 30 | */ 31 | export interface ModelRuntime { 32 | /** 33 | * The type of device used for execution. 34 | */ 35 | deviceType: DeviceType 36 | 37 | /** 38 | * The execution provider used for running the model. 39 | */ 40 | executionProvider: ExecutionProvider 41 | } 42 | 43 | /** 44 | * Interface representing the response model for a Foundry list operation. 45 | */ 46 | export interface FoundryListResponseModel { 47 | /** 48 | * The name of the model. 49 | */ 50 | name: string 51 | 52 | /** 53 | * The display name of the model. 54 | */ 55 | displayName: string 56 | 57 | /** 58 | * The type of the model. 59 | */ 60 | modelType: string 61 | 62 | /** 63 | * The provider type of the model. 64 | */ 65 | providerType: string 66 | 67 | /** 68 | * The URI of the model. 69 | */ 70 | uri: string 71 | 72 | /** 73 | * The version of the model. 74 | */ 75 | version: string 76 | 77 | /** 78 | * The prompt template associated with the model. 79 | */ 80 | promptTemplate: Record 81 | 82 | /** 83 | * The publisher of the model. 84 | */ 85 | publisher: string 86 | 87 | /** 88 | * The task the model is designed for. 89 | */ 90 | task: string 91 | 92 | /** 93 | * The runtime configuration of the model. 94 | */ 95 | runtime: ModelRuntime 96 | 97 | /** 98 | * The file size of the model in megabytes. 99 | */ 100 | fileSizeMb: number 101 | 102 | /** 103 | * The settings of the model. 104 | */ 105 | modelSettings: Record[]> 106 | 107 | /** 108 | * The alias of the model. 109 | */ 110 | alias: string 111 | 112 | /** 113 | * Indicates whether the model supports tool calling. 114 | */ 115 | supportsToolCalling: boolean 116 | 117 | /** 118 | * The license of the model. 119 | */ 120 | license: string 121 | 122 | /** 123 | * The description of the license. 124 | */ 125 | licenseDescription: string 126 | 127 | /** 128 | * The URI of the parent model. 129 | */ 130 | parentModelUri: string 131 | } 132 | 133 | /** 134 | * Interface representing information about a Foundry model. 135 | */ 136 | export interface FoundryModelInfo { 137 | /** 138 | * The alias of the model. 139 | */ 140 | alias: string 141 | 142 | /** 143 | * The unique identifier of the model. 144 | */ 145 | id: string 146 | 147 | /** 148 | * The version of the model. 149 | */ 150 | version: string 151 | 152 | /** 153 | * The execution provider used for the model. 154 | */ 155 | runtime: ExecutionProvider 156 | 157 | /** 158 | * The URI of the model. 159 | */ 160 | uri: string 161 | 162 | /** 163 | * The size of the model in megabytes. 164 | */ 165 | modelSize: number 166 | 167 | /** 168 | * The prompt template associated with the model. 169 | */ 170 | promptTemplate: Record 171 | 172 | /** 173 | * The provider of the model. 174 | */ 175 | provider: string 176 | 177 | /** 178 | * The publisher of the model. 179 | */ 180 | publisher: string 181 | 182 | /** 183 | * The license of the model. 184 | */ 185 | license: string 186 | 187 | /** 188 | * The task the model is designed for. 189 | */ 190 | task: string 191 | } 192 | 193 | /** 194 | * Interface representing the body of a download request. 195 | */ 196 | export interface DownloadBody { 197 | /** 198 | * The name of the model. 199 | */ 200 | Name: string 201 | 202 | /** 203 | * The URI of the model. 204 | */ 205 | Uri: string 206 | 207 | /** 208 | * The publisher of the model. 209 | */ 210 | Publisher: string 211 | 212 | /** 213 | * The provider type of the model. 214 | */ 215 | ProviderType: string 216 | 217 | /** 218 | * The prompt template associated with the model. 219 | */ 220 | PromptTemplate: Record 221 | } 222 | -------------------------------------------------------------------------------- /sdk/js/test/client.test.ts: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | import { describe, expect, it, vi, beforeEach } from 'vitest' 5 | import * as client from '../src/client' 6 | import type { Fetch } from '../src/types' 7 | 8 | describe('Client', () => { 9 | const mockFetch = vi.fn() as unknown as Fetch 10 | const mockResponse = { 11 | ok: true, 12 | json: vi.fn(), 13 | text: vi.fn(), 14 | body: { getReader: vi.fn() }, 15 | status: 200, 16 | } 17 | 18 | beforeEach(() => { 19 | vi.restoreAllMocks() 20 | mockFetch.mockReset() 21 | mockResponse.ok = true 22 | mockResponse.status = 200 23 | mockResponse.text.mockReset() 24 | mockResponse.json.mockReset() 25 | mockResponse.body.getReader.mockReset() 26 | mockFetch.mockResolvedValue(mockResponse as unknown as Response) 27 | }) 28 | 29 | describe('get', () => { 30 | it('calls fetch with correct URL', async () => { 31 | await client.get(mockFetch, 'http://example.com') 32 | expect(mockFetch).toHaveBeenCalledWith('http://example.com', undefined) 33 | }) 34 | 35 | it('appends query params', async () => { 36 | await client.get(mockFetch, 'http://example.com', { a: '1', b: '2' }) 37 | expect(mockFetch).toHaveBeenCalledWith('http://example.com?a=1&b=2', undefined) 38 | }) 39 | }) 40 | 41 | describe('postWithProgress', () => { 42 | let mockReader: any 43 | let onProgress: ReturnType 44 | 45 | beforeEach(() => { 46 | onProgress = vi.fn() 47 | mockReader = { read: vi.fn() } 48 | mockResponse.body.getReader.mockReturnValue(mockReader) 49 | }) 50 | 51 | it('sends POST with JSON', async () => { 52 | const body = { key: 'value' } 53 | 54 | mockReader.read 55 | .mockResolvedValueOnce({ done: false, value: new TextEncoder().encode('{"ok":true}') }) // valid JSON 56 | .mockResolvedValueOnce({ done: true, value: undefined }) 57 | 58 | const result = await client.postWithProgress(mockFetch, 'http://example.com', body) 59 | 60 | expect(result).toEqual({ ok: true }) 61 | expect(mockFetch).toHaveBeenCalledWith('http://example.com', { 62 | method: 'POST', 63 | headers: { 'Content-Type': 'application/json' }, 64 | body: JSON.stringify(body), 65 | }) 66 | }) 67 | 68 | it('reports progress if onProgress is given', async () => { 69 | const mockOnProgress = vi.fn() 70 | const progress1 = new TextEncoder().encode('Progress: 50%') 71 | const progress2 = new TextEncoder().encode('Progress: 100%') 72 | const finalJson1 = new TextEncoder().encode('{"done"') 73 | const finalJson2 = new TextEncoder().encode(':true}') 74 | 75 | mockReader.read 76 | .mockResolvedValueOnce({ done: false, value: progress1 }) 77 | .mockResolvedValueOnce({ done: false, value: progress2 }) 78 | .mockResolvedValueOnce({ done: false, value: finalJson1 }) 79 | .mockResolvedValueOnce({ done: false, value: finalJson2 }) 80 | .mockResolvedValueOnce({ done: true, value: undefined }) 81 | 82 | const result = await client.postWithProgress(mockFetch, 'http://example.com', {}, mockOnProgress) 83 | 84 | expect(mockOnProgress).toHaveBeenCalledWith(50) 85 | expect(mockOnProgress).toHaveBeenCalledWith(100) 86 | expect(result).toEqual({ done: true }) 87 | }) 88 | 89 | it('parses final JSON response', async () => { 90 | const jsonChunk = new TextEncoder().encode('{"message":"ok"}') 91 | mockReader.read 92 | .mockResolvedValueOnce({ done: false, value: jsonChunk }) 93 | .mockResolvedValueOnce({ done: true, value: undefined }) 94 | 95 | const result = await client.postWithProgress(mockFetch, 'http://example.com') 96 | expect(result).toEqual({ message: 'ok' }) 97 | }) 98 | 99 | it('throws on invalid JSON', async () => { 100 | mockReader.read 101 | .mockResolvedValueOnce({ done: false, value: new TextEncoder().encode('{invalid') }) 102 | .mockResolvedValueOnce({ done: true, value: undefined }) 103 | 104 | await expect(client.postWithProgress(mockFetch, 'http://example.com')).rejects.toThrow( 105 | /Error parsing JSON response/, 106 | ) 107 | }) 108 | 109 | it('throws if no JSON is received', async () => { 110 | mockReader.read.mockResolvedValueOnce({ done: true, value: undefined }) 111 | 112 | await expect(client.postWithProgress(mockFetch, 'http://example.com')).rejects.toThrow('No JSON data received!') 113 | }) 114 | }) 115 | }) 116 | -------------------------------------------------------------------------------- /sdk/js/test/index.test.ts: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | import { describe, expect, it, vi, beforeEach } from 'vitest' 5 | import { FoundryLocalManager } from '../src/index' 6 | import * as service from '../src/service' 7 | import { FoundryModelInfo, ExecutionProvider } from '../src/types' 8 | 9 | // Mock service module 10 | vi.mock('../src/service', () => ({ 11 | assertFoundryAvailable: vi.fn(), 12 | getServiceUrl: vi.fn(), 13 | startService: vi.fn(), 14 | })) 15 | 16 | // Mock the base class 17 | vi.mock('../src/base', () => { 18 | return { 19 | FoundryLocalManager: vi.fn().mockImplementation(() => ({ 20 | downloadModel: vi.fn(), 21 | loadModel: vi.fn(), 22 | })), 23 | } 24 | }) 25 | 26 | describe('FoundryLocalManager (index)', () => { 27 | let manager: FoundryLocalManager 28 | 29 | beforeEach(() => { 30 | vi.resetAllMocks() 31 | manager = new FoundryLocalManager() 32 | 33 | // Ensure these are spies in all tests that reference them 34 | manager.downloadModel = vi.fn() 35 | manager.loadModel = vi.fn() 36 | }) 37 | 38 | describe('constructor', () => { 39 | it('should check if foundry is available', () => { 40 | expect(service.assertFoundryAvailable).toHaveBeenCalled() 41 | }) 42 | }) 43 | 44 | describe('serviceUrl', () => { 45 | it('should return the service URL if set', () => { 46 | // Accessing private field for testing 47 | manager['_serviceUrl'] = 'http://localhost:5273' 48 | expect(manager.serviceUrl).toBe('http://localhost:5273') 49 | }) 50 | 51 | it('should throw error if service URL is not set', () => { 52 | manager['_serviceUrl'] = null 53 | expect(() => manager.serviceUrl).toThrow('Service URL is not set') 54 | }) 55 | }) 56 | 57 | describe('isServiceRunning', () => { 58 | it('should return true if service URL is found', async () => { 59 | vi.mocked(service.getServiceUrl).mockResolvedValue('http://localhost:5273') 60 | 61 | const result = await manager.isServiceRunning() 62 | 63 | expect(service.getServiceUrl).toHaveBeenCalled() 64 | expect(manager['_serviceUrl']).toBe('http://localhost:5273') 65 | expect(result).toBe(true) 66 | }) 67 | 68 | it('should return false if service URL is not found', async () => { 69 | vi.mocked(service.getServiceUrl).mockResolvedValue(null) 70 | 71 | const result = await manager.isServiceRunning() 72 | 73 | expect(service.getServiceUrl).toHaveBeenCalled() 74 | expect(manager['_serviceUrl']).toBeNull() 75 | expect(result).toBe(false) 76 | }) 77 | }) 78 | 79 | describe('startService', () => { 80 | it('should set service URL if service starts successfully', async () => { 81 | vi.mocked(service.startService).mockResolvedValue('http://localhost:5273') 82 | 83 | await manager.startService() 84 | 85 | expect(service.startService).toHaveBeenCalled() 86 | expect(manager['_serviceUrl']).toBe('http://localhost:5273') 87 | }) 88 | 89 | it('should throw if service fails to start', async () => { 90 | vi.mocked(service.startService).mockResolvedValue(null) 91 | 92 | await expect(manager.startService()).rejects.toThrow('Failed to start the service') 93 | }) 94 | }) 95 | 96 | describe('init', () => { 97 | it('should initialize with a model', async () => { 98 | vi.mocked(service.startService).mockResolvedValue('http://localhost:5273') 99 | 100 | const mockModelInfo: FoundryModelInfo = { 101 | alias: 'model_alias', 102 | id: 'model_name', 103 | version: '1', 104 | runtime: ExecutionProvider.CPU, 105 | uri: 'azureml://registries/azureml/models/model_name/versions/1', 106 | modelSize: 10403, 107 | promptTemplate: { prompt: '<|start|>{Content}<|end|>' }, 108 | provider: 'AzureFoundry', 109 | publisher: 'Microsoft', 110 | license: 'MIT', 111 | task: 'chat-completion', 112 | } 113 | 114 | vi.mocked(manager.downloadModel).mockResolvedValue(undefined) 115 | vi.mocked(manager.loadModel).mockResolvedValue(mockModelInfo) 116 | 117 | const result = await manager.init('model_alias') 118 | 119 | expect(service.startService).toHaveBeenCalled() 120 | expect(manager.downloadModel).toHaveBeenCalledWith('model_alias') 121 | expect(manager.loadModel).toHaveBeenCalledWith('model_alias') 122 | expect(result).toEqual(mockModelInfo) 123 | }) 124 | 125 | it('should initialize without a model', async () => { 126 | vi.mocked(service.startService).mockResolvedValue('http://localhost:5273') 127 | 128 | const result = await manager.init(null) 129 | 130 | expect(service.startService).toHaveBeenCalled() 131 | expect(manager.downloadModel).not.toHaveBeenCalled() 132 | expect(manager.loadModel).not.toHaveBeenCalled() 133 | expect(result).toBeNull() 134 | }) 135 | }) 136 | }) 137 | -------------------------------------------------------------------------------- /sdk/js/test/service.test.ts: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | import { describe, expect, it, vi, beforeEach } from 'vitest' 5 | import * as service from '../src/service' 6 | import * as child_process from 'child_process' 7 | 8 | vi.mock('child_process', () => ({ 9 | exec: vi.fn(), 10 | execSync: vi.fn(), 11 | })) 12 | 13 | describe('Service', () => { 14 | beforeEach(() => { 15 | vi.restoreAllMocks() 16 | vi.resetAllMocks() 17 | }) 18 | 19 | describe('assertFoundryAvailable', () => { 20 | it('should not throw error if foundry is available on Windows', () => { 21 | vi.spyOn(process, 'platform', 'get').mockReturnValue('win32') 22 | vi.mocked(child_process.execSync).mockReturnValue(Buffer.from('C:\\foundry.exe')) 23 | 24 | expect(() => service.assertFoundryAvailable()).not.toThrow() 25 | expect(child_process.execSync).toHaveBeenCalledWith('where foundry') 26 | }) 27 | 28 | it('should not throw error if foundry is available on non-Windows', () => { 29 | vi.spyOn(process, 'platform', 'get').mockReturnValue('darwin') 30 | vi.mocked(child_process.execSync).mockReturnValue(Buffer.from('/usr/local/bin/foundry')) 31 | 32 | expect(() => service.assertFoundryAvailable()).not.toThrow() 33 | expect(child_process.execSync).toHaveBeenCalledWith('which foundry') 34 | }) 35 | 36 | it('should throw error if foundry is not available', () => { 37 | vi.mocked(child_process.execSync).mockImplementation(() => { 38 | throw new Error('Command failed') 39 | }) 40 | 41 | expect(() => service.assertFoundryAvailable()).toThrow('Foundry is not installed or not on PATH') 42 | }) 43 | }) 44 | 45 | describe('getServiceUrl', () => { 46 | it('should extract service URL from command output', async () => { 47 | vi.mocked(child_process.exec).mockImplementation((cmd, callback) => { 48 | callback(null, 'Model management service is running on http://localhost:5273/openai/status') 49 | }) 50 | 51 | const url = await service.getServiceUrl() 52 | expect(url).toBe('http://localhost:5273') 53 | }) 54 | 55 | it('should handle IP address in service URL', async () => { 56 | vi.mocked(child_process.exec).mockImplementation((cmd, callback) => { 57 | callback(null, 'Model management service is running on http://127.0.0.1:5273/openai/status') 58 | }) 59 | 60 | const url = await service.getServiceUrl() 61 | expect(url).toBe('http://127.0.0.1:5273') 62 | }) 63 | 64 | it('should return null if no URL is found in output', async () => { 65 | vi.mocked(child_process.exec).mockImplementation((cmd, callback) => { 66 | callback(null, 'Model management service is not running!') 67 | }) 68 | 69 | const url = await service.getServiceUrl() 70 | expect(url).toBeNull() 71 | }) 72 | 73 | it('should return null if command fails', async () => { 74 | vi.spyOn(console, 'error').mockImplementation(() => {}) 75 | vi.mocked(child_process.exec).mockImplementation((cmd, callback) => { 76 | callback(new Error('Command failed'), '') 77 | }) 78 | 79 | const url = await service.getServiceUrl() 80 | expect(url).toBeNull() 81 | expect(console.error).toHaveBeenCalled() 82 | }) 83 | }) 84 | 85 | describe('startService', () => { 86 | it('should return existing service URL if service is already running', async () => { 87 | vi.mocked(child_process.exec).mockImplementation((cmd, callback) => { 88 | callback(null, 'Model management service is running on http://localhost:5273/openai/status') 89 | }) 90 | 91 | const url = await service.startService() 92 | 93 | expect(url).toBe('http://localhost:5273') 94 | expect(child_process.exec).not.toHaveBeenCalledWith('foundry service start', expect.any(Function)) 95 | }) 96 | 97 | it('should start service and return URL after retry', async () => { 98 | vi.useFakeTimers() 99 | 100 | let callCount = 0 101 | vi.mocked(child_process.exec).mockImplementation((cmd, callback) => { 102 | if (cmd === 'foundry service status') { 103 | if (callCount++ === 0) { 104 | callback(null, 'Model management service is not running!') 105 | } else { 106 | callback(null, 'Model management service is running on http://localhost:5273/openai/status') 107 | } 108 | } else if (cmd === 'foundry service start') { 109 | callback(null) 110 | } 111 | }) 112 | 113 | const promise = service.startService() 114 | await vi.advanceTimersByTimeAsync(100) 115 | const url = await promise 116 | 117 | expect(url).toBe('http://localhost:5273') 118 | expect(child_process.exec).toHaveBeenCalledWith('foundry service start', expect.any(Function)) 119 | 120 | vi.useRealTimers() 121 | }) 122 | 123 | it('should return null if service fails to start after retries', async () => { 124 | vi.useFakeTimers() 125 | vi.spyOn(console, 'warn').mockImplementation(() => {}) 126 | 127 | // Always return status output with no URL 128 | vi.mocked(child_process.exec).mockImplementation((cmd, callback) => { 129 | if (cmd === 'foundry service status') { 130 | callback(null, 'Model management service is not running!') 131 | } else if (cmd === 'foundry service start') { 132 | callback(null) 133 | } 134 | }) 135 | 136 | const promise = service.startService() 137 | await vi.advanceTimersByTimeAsync(1000) // 10 * 100ms retry delay 138 | const url = await promise 139 | 140 | expect(url).toBeNull() 141 | expect(child_process.exec).toHaveBeenCalledWith('foundry service start', expect.any(Function)) 142 | expect(console.warn).toHaveBeenCalledWith( 143 | 'Foundry service did not start within the expected time. May not be running.', 144 | ) 145 | 146 | vi.useRealTimers() 147 | }) 148 | }) 149 | }) 150 | -------------------------------------------------------------------------------- /sdk/js/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "noImplicitAny": false, 4 | "noImplicitThis": true, 5 | "strictNullChecks": true, 6 | "esModuleInterop": true, 7 | "declaration": true, 8 | "declarationMap": true, 9 | "skipLibCheck": true, 10 | "strict": true, 11 | "forceConsistentCasingInFileNames": true, 12 | "moduleResolution": "node", 13 | "module": "ES2022", 14 | "outDir": "./dist", 15 | "target": "ES6", 16 | "lib": ["es6", "es2018.asyncgenerator", "dom"] 17 | }, 18 | 19 | "ts-node": { 20 | "swc": true, 21 | "esm": true 22 | }, 23 | 24 | "include": ["./src/**/*.ts"], 25 | 26 | "exclude": ["node_modules"] 27 | } 28 | -------------------------------------------------------------------------------- /sdk/python/.lintrunner.toml: -------------------------------------------------------------------------------- 1 | # Configuration for lintrunner https://github.com/suo/lintrunner 2 | # You can install the dependencies and initialize with 3 | # 4 | # ```sh 5 | # pip install lintrunner lintrunner-adapters 6 | # lintrunner init 7 | # ``` 8 | # 9 | # This will install lintrunner on your system and download all the necessary 10 | # dependencies to run linters locally. 11 | # If you want to see what lintrunner init will install, run 12 | # `lintrunner init --dry-run`. 13 | # 14 | # To lint local changes: 15 | # 16 | # ```bash 17 | # lintrunner 18 | # ``` 19 | # 20 | # To lint all files: 21 | # 22 | # ```bash 23 | # lintrunner --all-files 24 | # ``` 25 | # 26 | # To format files: 27 | # 28 | # ```bash 29 | # lintrunner f --all-files 30 | # ``` 31 | # 32 | # To read more about lintrunner, see [wiki](https://github.com/pytorch/pytorch/wiki/lintrunner). 33 | 34 | merge_base_with = 'main' 35 | 36 | [[linter]] 37 | code = 'RUFF' 38 | include_patterns = [ 39 | '**/*.py', 40 | '**/*.pyi', 41 | ] 42 | exclude_patterns = [ 43 | ] 44 | command = [ 45 | 'python', 46 | '-m', 47 | 'lintrunner_adapters', 48 | 'run', 49 | 'ruff_linter', 50 | '--config=pyproject.toml', 51 | '@{{PATHSFILE}}' 52 | ] 53 | init_command = [ 54 | 'python', 55 | '-m', 56 | 'lintrunner_adapters', 57 | 'run', 58 | 'pip_init', 59 | '--dry-run={{DRYRUN}}', 60 | '--requirement=requirements-lintrunner.txt', 61 | ] 62 | is_formatter = true 63 | 64 | 65 | [[linter]] 66 | code = 'RUFF-FORMAT' 67 | include_patterns = [ 68 | '**/*.py', 69 | ] 70 | exclude_patterns = [ 71 | ] 72 | command = [ 73 | 'python', 74 | '-m', 75 | 'lintrunner_adapters', 76 | 'run', 77 | 'ruff_format_linter', 78 | '--', 79 | '@{{PATHSFILE}}' 80 | ] 81 | init_command = [ 82 | 'python', 83 | '-m', 84 | 'lintrunner_adapters', 85 | 'run', 86 | 'pip_init', 87 | '--dry-run={{DRYRUN}}', 88 | '--requirement=requirements-lintrunner.txt', 89 | ] 90 | is_formatter = true 91 | 92 | [[linter]] 93 | code = 'PYLINT' 94 | include_patterns = [ 95 | '**/*.py', 96 | ] 97 | exclude_patterns = [ 98 | ] 99 | command = [ 100 | 'python', 101 | '-m', 102 | 'lintrunner_adapters', 103 | 'run', 104 | 'pylint_linter', 105 | '--rcfile=pyproject.toml', 106 | '--show-disable', 107 | '--', 108 | '@{{PATHSFILE}}' 109 | ] 110 | init_command = [ 111 | 'python', 112 | '-m', 113 | 'lintrunner_adapters', 114 | 'run', 115 | 'pip_init', 116 | '--dry-run={{DRYRUN}}', 117 | '--requirement=requirements-lintrunner.txt', 118 | ] 119 | 120 | [[linter]] 121 | code = 'EDITORCONFIG-CHECKER' 122 | include_patterns = ['**'] 123 | exclude_patterns = [ 124 | ] 125 | command = [ 126 | 'python', 127 | '-m', 128 | 'lintrunner_adapters', 129 | 'run', 130 | 'editorconfig_checker_linter', 131 | '--', 132 | '@{{PATHSFILE}}' 133 | ] 134 | init_command = [ 135 | 'python', 136 | '-m', 137 | 'lintrunner_adapters', 138 | 'run', 139 | 'pip_init', 140 | '--dry-run={{DRYRUN}}', 141 | '--requirement=requirements-lintrunner.txt', 142 | ] 143 | -------------------------------------------------------------------------------- /sdk/python/LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /sdk/python/README.md: -------------------------------------------------------------------------------- 1 | # Foundry Local Python SDK 2 | The Foundry Local SDK simplifies AI model management in local environments by providing control-plane operations separate from data-plane inferencing code. 3 | 4 | ## Prerequisites 5 | Foundry Local must be installed and findable in your PATH. 6 | 7 | ## Getting Started 8 | ``` 9 | pip install foundry-local-sdk 10 | ``` 11 | 12 | ## Usage 13 | 14 | The SDK provides a simple interface to interact with the Foundry Local API. You can use it to manage models, check the status of the service, and make requests to the models. 15 | 16 | ### Bootstrapping 17 | 18 | The SDK can *bootstrap* Foundry Local, which will initiate the following sequence: 19 | 20 | 1. Start the Foundry Local service, if it is not already running. 21 | 1. Automatically detect the hardware and software requirements for the model. 22 | 1. Download the highest-performance model for the detected hardware, if it is not already downloaded. 23 | 1. Load the model into memory. 24 | 25 | To use the SDK with bootstrapping, you can use the following code: 26 | 27 | ```python 28 | from foundry_local import FoundryLocalManager 29 | 30 | # Provide the alias of the model you want to use 31 | # The alias is a string that identifies the model in the Foundry Local catalog. 32 | # The alias can be found in the Foundry Local catalog. 33 | # For example, "phi-3.5-mini" is the alias for the Phi 3.5 model. 34 | # Foundry local will automatically download the most suitable model for your hardware. 35 | alias = "phi-3.5-mini" 36 | fl_manager = FoundryLocalManager(alias) 37 | 38 | # check that the service is running 39 | print(fl_manager.is_service_running()) 40 | 41 | # list all available models in the catalog 42 | print(fl_manager.list_catalog_models()) 43 | 44 | # list all downloaded models 45 | print(fl_manager.list_cached_models()) 46 | 47 | # get information on the selected model 48 | model_info = fl_manager.get_model_info(alias) 49 | print(model_info) 50 | ``` 51 | 52 | Alternatively, you can use the `FoundryLocalManager` class to manage the service and models manually. This is useful if you want to control the service and models without bootstrapping. For example, you want to present to the end user what is happening in the background. 53 | 54 | ```python 55 | from foundry_local import FoundryLocalManager 56 | 57 | alias = "phi-3.5-mini" 58 | fl_manager = FoundryLocalManager() 59 | 60 | # start the service 61 | fl_manager.start_service() 62 | 63 | # download the model 64 | fl_manager.download_model(alias) 65 | 66 | # load the model 67 | model_info = fl_manager.load_model(alias) 68 | print(model_info) 69 | ``` 70 | 71 | Use the foundry local endpoint with an OpenAI compatible API client. For example, using the `openai` package: 72 | 73 | ```python 74 | import openai 75 | from foundry_local import FoundryLocalManager 76 | 77 | # By using an alias, the most suitable model will be downloaded 78 | # to your end-user's device. 79 | alias = "phi-3.5-mini" 80 | 81 | # Create a FoundryLocalManager instance. This will start the Foundry 82 | # Local service if it is not already running and load the specified model. 83 | manager = FoundryLocalManager(alias) 84 | 85 | # The remaining code us es the OpenAI Python SDK to interact with the local model. 86 | 87 | # Configure the client to use the local Foundry service 88 | client = openai.OpenAI( 89 | base_url=manager.endpoint, 90 | api_key=manager.api_key # API key is not required for local usage 91 | ) 92 | 93 | # Set the model to use and generate a streaming response 94 | stream = client.chat.completions.create( 95 | model=manager.get_model_info(alias).id, 96 | messages=[{"role": "user", "content": "What is the golden ratio?"}], 97 | stream=True 98 | ) 99 | 100 | # Print the streaming response 101 | for chunk in stream: 102 | if chunk.choices[0].delta.content is not None: 103 | print(chunk.choices[0].delta.content, end="", flush=True) 104 | ``` 105 | -------------------------------------------------------------------------------- /sdk/python/foundry_local/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import logging 6 | import sys 7 | 8 | from foundry_local.api import FoundryLocalManager 9 | 10 | _logger = logging.getLogger(__name__) 11 | _logger.setLevel(logging.WARNING) 12 | 13 | _sc = logging.StreamHandler(stream=sys.stdout) 14 | _formatter = logging.Formatter( 15 | "[foundry-local] | %(asctime)s | %(levelname)-8s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S" 16 | ) 17 | _sc.setFormatter(_formatter) 18 | _logger.addHandler(_sc) 19 | _logger.propagate = False 20 | 21 | __all__ = ["FoundryLocalManager"] 22 | 23 | __version__ = "0.3.1" 24 | -------------------------------------------------------------------------------- /sdk/python/foundry_local/client.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from __future__ import annotations 6 | 7 | import json 8 | import logging 9 | import re 10 | 11 | import httpx 12 | from tqdm import tqdm 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class HttpResponseError(Exception): 18 | pass 19 | 20 | 21 | class HttpxClient: 22 | """ 23 | Client for Foundry Local SDK. 24 | 25 | Attributes: 26 | _client (httpx.Client): HTTP client instance. 27 | """ 28 | 29 | def __init__(self, host: str, timeout: float | httpx.Timeout | None = None) -> None: 30 | """ 31 | Initialize the HttpxClient with the host. 32 | 33 | Args: 34 | host (str): Base URL of the host. 35 | timeout (float | httpx.Timeout | None): Timeout for the HTTP client. 36 | """ 37 | self._client = httpx.Client(base_url=host, timeout=timeout) 38 | 39 | def _request(self, *args, **kwargs) -> httpx.Response: 40 | """ 41 | Send an HTTP request. 42 | 43 | Args: 44 | *args: Positional arguments for the request. 45 | **kwargs: Keyword arguments for the request. 46 | 47 | Returns: 48 | httpx.Response: HTTP response object. 49 | 50 | Raises: 51 | RuntimeError: If an HTTP error or connection error occurs. 52 | """ 53 | try: 54 | response = self._client.request(*args, **kwargs) 55 | response.raise_for_status() 56 | except httpx.HTTPStatusError as e: 57 | raise HttpResponseError(f"{e.response.status_code} - {e.response.text}") from None 58 | except httpx.ConnectError: 59 | raise ConnectionError( 60 | "Could not connect to Foundry Local! Please check if the Foundry Local service is running and the host" 61 | " URL is correct." 62 | ) from None 63 | return response 64 | 65 | def get(self, path: str, query_params: dict[str, str] | None = None) -> dict | list | None: 66 | """ 67 | Send a GET request to the specified path with optional query parameters. 68 | 69 | Args: 70 | path (str): Path for the GET request. 71 | query_params (dict[str, str] | None): Query parameters for the request. 72 | 73 | Returns: 74 | dict | list | None: JSON response or None if no content. 75 | """ 76 | response = self._request("GET", path, params=query_params) 77 | return response.json() if response.text else None 78 | 79 | def post_with_progress(self, path: str, body: dict | None = None) -> dict: 80 | """ 81 | Send a POST request to the specified path with optional request body and show progress. 82 | 83 | Args: 84 | path (str): Path for the POST request. 85 | body (dict | None): Request body in JSON format. 86 | 87 | Returns: 88 | dict: JSON response. 89 | 90 | Raises: 91 | ValueError: If the JSON response is invalid. 92 | """ 93 | with self._client.stream("POST", path, json=body, timeout=None) as response: 94 | progress_bar = None 95 | prev_percent = 0.0 96 | if logger.isEnabledFor(logging.INFO): 97 | progress_bar = tqdm(total=100.0) 98 | final_json = "" 99 | for line in response.iter_lines(): 100 | if final_json or line.startswith("{"): 101 | final_json += line 102 | continue 103 | if not progress_bar: 104 | continue 105 | if match := re.search(r"(\d+(?:\.\d+)?)%", line): 106 | percent = min(float(match.group(1)), 100.0) 107 | delta = percent - prev_percent 108 | if delta > 0: 109 | progress_bar.update(delta) 110 | prev_percent = percent 111 | if progress_bar: 112 | progress_bar.close() 113 | 114 | if not final_json.endswith("}"): 115 | raise ValueError(f"Invalid JSON response: {final_json}") 116 | 117 | return json.loads(final_json) 118 | -------------------------------------------------------------------------------- /sdk/python/foundry_local/logging.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import logging 6 | 7 | 8 | def get_logger(): 9 | """ 10 | Get the logger for the sdk. 11 | 12 | Returns: 13 | logging.Logger: Logger instance for the module. 14 | """ 15 | return logging.getLogger(__name__.split(".", maxsplit=1)[0]) 16 | 17 | 18 | def set_verbosity(verbose): 19 | """ 20 | Set the verbosity level for the logger. 21 | 22 | Args: 23 | verbose (int): Verbosity level (e.g., logging.INFO, logging.DEBUG). 24 | """ 25 | get_logger().setLevel(verbose) 26 | 27 | 28 | def set_verbosity_info(): 29 | """Set the verbosity level to INFO.""" 30 | set_verbosity(logging.INFO) 31 | 32 | 33 | def set_verbosity_warning(): 34 | """Set the verbosity level to WARNING.""" 35 | set_verbosity(logging.WARNING) 36 | 37 | 38 | def set_verbosity_debug(): 39 | """Set the verbosity level to DEBUG.""" 40 | set_verbosity(logging.DEBUG) 41 | 42 | 43 | def set_verbosity_error(): 44 | """Set the verbosity level to ERROR.""" 45 | set_verbosity(logging.ERROR) 46 | 47 | 48 | def set_verbosity_critical(): 49 | """Set the verbosity level to CRITICAL.""" 50 | set_verbosity(logging.CRITICAL) 51 | 52 | 53 | def get_verbosity() -> int: 54 | """ 55 | Get the current verbosity level of the logger. 56 | 57 | Returns: 58 | int: Verbosity level as an integer. 59 | """ 60 | return get_logger().getEffectiveLevel() 61 | 62 | 63 | def get_logger_level(level): 64 | """ 65 | Get Python logging level for the integer level. 66 | 67 | Args: 68 | level (int): Verbosity level (0: DEBUG, 1: INFO, 2: WARNING, 3: ERROR, 4: CRITICAL). 69 | 70 | Returns: 71 | int: Corresponding Python logging level. 72 | 73 | Raises: 74 | ValueError: If the level is invalid. 75 | """ 76 | level_map = {0: logging.DEBUG, 1: logging.INFO, 2: logging.WARNING, 3: logging.ERROR, 4: logging.CRITICAL} 77 | # check if level is valid 78 | if level not in level_map: 79 | raise ValueError(f"Invalid level {level}, should be one of {list(level_map.keys())}") 80 | 81 | return level_map[level] 82 | 83 | 84 | def set_default_logger_severity(level): 85 | """ 86 | Set the default log level for the logger. 87 | 88 | Args: 89 | level (int): Verbosity level (0: DEBUG, 1: INFO, 2: WARNING, 3: ERROR, 4: CRITICAL). 90 | """ 91 | # set logger level 92 | set_verbosity(get_logger_level(level)) 93 | -------------------------------------------------------------------------------- /sdk/python/foundry_local/models.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from __future__ import annotations 6 | 7 | import sys 8 | 9 | from pydantic import BaseModel, Field 10 | 11 | if sys.version_info >= (3, 11): 12 | from enum import StrEnum 13 | 14 | else: 15 | from enum import Enum 16 | 17 | class StrEnum(str, Enum): 18 | def __str__(self) -> str: 19 | return self.value 20 | 21 | 22 | # ruff: noqa: N815 23 | 24 | 25 | class DeviceType(StrEnum): 26 | """Enumeration of devices supported by the model.""" 27 | 28 | CPU = "CPU" 29 | GPU = "GPU" 30 | NPU = "NPU" 31 | 32 | 33 | class ExecutionProvider(StrEnum): 34 | """Enumeration of execution providers supported by the model.""" 35 | 36 | CPU = "CPUExecutionProvider" 37 | WEBGPU = "WebGpuExecutionProvider" 38 | CUDA = "CUDAExecutionProvider" 39 | QNN = "QNNExecutionProvider" 40 | 41 | def get_alias(self) -> str: 42 | """ 43 | Get the alias for the execution provider. 44 | 45 | Returns: 46 | str: Alias of the execution provider. 47 | """ 48 | return self.value.replace("ExecutionProvider", "").lower() 49 | 50 | 51 | class ModelRuntime(BaseModel): 52 | """Model runtime information.""" 53 | 54 | deviceType: DeviceType = Field(..., description="Device type supported by the model") 55 | executionProvider: ExecutionProvider = Field( 56 | ..., 57 | description="Execution provider supported by the model", 58 | ) 59 | 60 | 61 | class FoundryListResponseModel(BaseModel): 62 | """Response model for listing models.""" 63 | 64 | name: str = Field(..., description="Name of the model") 65 | displayName: str = Field(..., description="Display name of the model") 66 | modelType: str = Field(..., description="Type of the model") 67 | providerType: str = Field(..., description="Provider type of the model") 68 | uri: str = Field(..., description="URI of the model") 69 | version: str = Field(..., description="Version of the model") 70 | promptTemplate: dict = Field(..., description="Prompt template for the model") 71 | publisher: str = Field(..., description="Publisher of the model") 72 | task: str = Field(..., description="Task of the model") 73 | runtime: ModelRuntime = Field(..., description="Runtime information of the model") 74 | fileSizeMb: int = Field(..., description="File size of the model in MB") 75 | modelSettings: dict = Field(..., description="Model settings") 76 | alias: str = Field(..., description="Alias name of the model") 77 | supportsToolCalling: bool = Field(..., description="Whether the model supports tool calling") 78 | license: str = Field(..., description="License of the model") 79 | licenseDescription: str = Field(..., description="License description of the model") 80 | parentModelUri: str = Field(..., description="Parent model URI of the model") 81 | 82 | 83 | class FoundryModelInfo(BaseModel): 84 | """Model information.""" 85 | 86 | alias: str = Field(..., description="Alias of the model") 87 | id: str = Field(..., description="Unique identifier of the model") 88 | version: str = Field(..., description="Version of the model") 89 | runtime: ExecutionProvider = Field(..., description="Execution provider of the model") 90 | uri: str = Field(..., description="URI of the model") 91 | file_size_mb: int = Field(..., description="Size of the model on disk in MB") 92 | prompt_template: dict = Field(..., description="Prompt template for the model") 93 | provider: str = Field(..., description="Provider of the model") 94 | publisher: str = Field(..., description="Publisher of the model") 95 | license: str = Field(..., description="License of the model") 96 | task: str = Field(..., description="Task of the model") 97 | 98 | def __repr__(self) -> str: 99 | return ( 100 | f"FoundryModelInfo(alias={self.alias}, id={self.id}, runtime={self.runtime.get_alias()}," 101 | f" file_size={self.file_size_mb} MB, license={self.license})" 102 | ) 103 | 104 | @classmethod 105 | def from_list_response(cls, response: dict | FoundryListResponseModel) -> FoundryModelInfo: 106 | """ 107 | Create a FoundryModelInfo object from a FoundryListResponseModel object. 108 | 109 | Args: 110 | response (dict | FoundryListResponseModel): Response data. 111 | 112 | Returns: 113 | FoundryModelInfo: Instance of FoundryModelInfo. 114 | """ 115 | if isinstance(response, dict): 116 | response = FoundryListResponseModel.model_validate(response) 117 | return cls( 118 | alias=response.alias, 119 | id=response.name, 120 | version=response.version, 121 | runtime=response.runtime.executionProvider, 122 | uri=response.uri, 123 | file_size_mb=response.fileSizeMb, 124 | prompt_template=response.promptTemplate, 125 | provider=response.providerType, 126 | publisher=response.publisher, 127 | license=response.license, 128 | task=response.task, 129 | ) 130 | 131 | def to_download_body(self) -> dict: 132 | """ 133 | Convert the FoundryModelInfo object to a dictionary for download. 134 | 135 | Returns: 136 | dict: Dictionary representation for download. 137 | """ 138 | return { 139 | "Name": self.id, 140 | "Uri": self.uri, 141 | "Publisher": self.publisher, 142 | "ProviderType": f"{self.provider}Local" if self.provider == "AzureFoundry" else self.provider, 143 | "PromptTemplate": self.prompt_template, 144 | } 145 | -------------------------------------------------------------------------------- /sdk/python/foundry_local/service.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from __future__ import annotations 6 | 7 | import logging 8 | import re 9 | import shutil 10 | import subprocess 11 | import time 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def assert_foundry_installed(): 17 | """ 18 | Assert that Foundry is installed. 19 | 20 | Raises: 21 | RuntimeError: If Foundry is not installed or not on PATH. 22 | """ 23 | if shutil.which("foundry") is None: 24 | raise RuntimeError("Foundry is not installed or not on PATH!") 25 | 26 | 27 | def get_service_uri() -> str | None: 28 | """ 29 | Get the service URI if the service is running. 30 | 31 | Returns: 32 | str | None: URI of the running Foundry service, or None if not running. 33 | """ 34 | with subprocess.Popen(["foundry", "service", "status"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc: 35 | stdout, _ = proc.communicate() 36 | match = re.search(r"http://(?:[a-zA-Z0-9.-]+|\d{1,3}(\.\d{1,3}){3}):\d+", stdout.decode()) 37 | if match: 38 | return match.group(0) 39 | return None 40 | 41 | 42 | def start_service() -> str | None: 43 | """ 44 | Start the Foundry service. 45 | 46 | Returns: 47 | str | None: URI of the started Foundry service, or None if it failed to start. 48 | """ 49 | if (service_url := get_service_uri()) is not None: 50 | logger.info("Foundry service is already running at %s", service_url) 51 | return service_url 52 | 53 | with subprocess.Popen(["foundry", "service", "start"]): 54 | # not checking the process output since it never finishes communication 55 | for _ in range(10): 56 | if (service_url := get_service_uri()) is not None: 57 | logger.info("Foundry service started successfully at %s", service_url) 58 | return service_url 59 | time.sleep(0.1) 60 | logger.warning("Foundry service did not start within the expected time. May not be running.") 61 | return None 62 | -------------------------------------------------------------------------------- /sdk/python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.pylint.messages_control] 2 | disable = [ 3 | "consider-using-from-import", 4 | "cyclic-import", 5 | "duplicate-code", 6 | "fixme", 7 | "format", 8 | "import-error", 9 | "invalid-name", 10 | "line-too-long", 11 | "missing-docstring", 12 | "no-else-return", 13 | "no-member", 14 | "no-name-in-module", 15 | "too-few-public-methods", 16 | "too-many-ancestors", 17 | "too-many-arguments", 18 | "too-many-branches", 19 | "too-many-instance-attributes", 20 | "too-many-lines", 21 | "too-many-locals", 22 | "too-many-positional-arguments", 23 | "too-many-public-methods", 24 | "too-many-return-statements", 25 | "too-many-statements", 26 | "unnecessary-ellipsis" 27 | ] 28 | 29 | [tool.ruff] 30 | line-length = 120 31 | target-version = "py39" 32 | 33 | [tool.ruff.lint] 34 | select = [ 35 | "B", # flake8-bugbear 36 | "C4", # flake8-comprehensions 37 | "D", # pydocstyle 38 | "E", # pycodestyle 39 | "F", # Pyflakes 40 | "G", # flake8-logging-format 41 | "I", # isort 42 | "ISC", # flake8-implicit-str-concat 43 | "LOG", # flake8-logging 44 | "N", # pep8-naming 45 | "NPY", # modern numpy 46 | "PERF", # Perflint 47 | "PIE", # flake8-pie 48 | "PYI", # flake8-pyi 49 | "RUF", # Ruff-specific rules 50 | "SIM", # flake8-simplify 51 | "SLOT", # flake8-slot 52 | "T10", # flake8-debugger 53 | "TID", # Disallow relative imports 54 | "TRY", # flake8-try-except-raise 55 | "UP", # pyupgrade 56 | "W", # pycodestyle 57 | "YTT", # flake8-2020 58 | ] 59 | ignore = [ 60 | "B9", # Opinionated bugbear rules 61 | "C408", # Sometimes it is preferable when we construct kwargs 62 | "D1", # D1 is for missing docstrings, which is not yet enforced. 63 | "D202", # D202 Too strict. "No blank lines allowed after function docstring" 64 | "D205", # D205 Too strict. "1 blank line required between summary line and description" 65 | "D212", 66 | "D400", 67 | "D401", # First line of docstring should be in imperative mood 68 | "D415", # D415 Not yet enforced. "First line should end with a period, question mark, or exclamation point" 69 | "E1", "E2", "E3", # Pycodestyle formatting rules that conflicts with the formatter 70 | "E501", # Line length. Not enforced because black will handle formatting 71 | "SIM103", # "Return the condition directly" obscures logic sometimes 72 | "N802", # Nxx: ONNX Script function sometimes use upper case for names. 73 | "N803", 74 | "N806", 75 | "N999", # Invalid module name 76 | "NPY002", # We may not always need a generator 77 | "PERF203", # try-except in loops sometimes necessary 78 | "PERF401", # List comprehension is not always readable 79 | "PYI041", # int | float is more clear 80 | "RUF022", # We don't need to sort __all__ for elements to be grouped 81 | "RUF027", # This is used for creating rest API urls 82 | "RUF031", # Parentheses for tuple in subscripts is more readable 83 | "RUF052", # Variables with `_` prefix may not be dummy variables in all cases 84 | "SIM102", # Collapible if statements are not always more readable 85 | "SIM108", # We don't always encourage ternary operators 86 | "SIM114", # Don't always combine if branches for debugability 87 | "SIM116", # Don't use dict lookup to replace if-else 88 | "TRY002", # Create your own exception 89 | "TRY003", # Messages can be constructed in the exception 90 | "UP006", # keep-runtime-typing 91 | "UP007", # keep-runtime-typing 92 | "UP045", # TODO: Support new style type annotations 93 | ] 94 | ignore-init-module-imports = true 95 | 96 | [tool.ruff.lint.flake8-tidy-imports] 97 | # Disallow all relative imports. 98 | ban-relative-imports = "all" 99 | 100 | [tool.ruff.lint.pydocstyle] 101 | convention = "google" 102 | -------------------------------------------------------------------------------- /sdk/python/requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | build 3 | coverage 4 | pytest 5 | -------------------------------------------------------------------------------- /sdk/python/requirements-lintrunner.txt: -------------------------------------------------------------------------------- 1 | lintrunner==0.12.7 2 | lintrunner-adapters>=0.8.0 3 | # RUFF, RUFF-FIX 4 | ruff==0.11.4 5 | # PYLINT 6 | pylint==3.3.6 7 | # EDITORCONFIG-CHECKER 8 | editorconfig-checker==3.2.0 -------------------------------------------------------------------------------- /sdk/python/requirements.txt: -------------------------------------------------------------------------------- 1 | httpx 2 | pydantic>=2.0.0 3 | tqdm 4 | -------------------------------------------------------------------------------- /sdk/python/setup.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from pathlib import Path 6 | 7 | from setuptools import find_packages, setup 8 | 9 | 10 | def read(rel_path: str) -> str: 11 | """Read a file and return its contents. 12 | 13 | Args: 14 | rel_path (str): Relative path to the file. 15 | 16 | Returns: 17 | str: Contents of the file. 18 | """ 19 | this_dir = Path(__file__).parent 20 | with (this_dir / rel_path).open(encoding="utf-8") as fp: 21 | return fp.read() 22 | 23 | 24 | def get_version(rel_path): 25 | for line in read(rel_path).splitlines(): 26 | if line.startswith("__version__"): 27 | delim = '"' if '"' in line else "'" 28 | return line.split(delim)[1] 29 | raise RuntimeError("Unable to find version string.") 30 | 31 | 32 | VERSION = get_version("foundry_local/__init__.py") 33 | 34 | requirements = read("requirements.txt").splitlines() 35 | 36 | CLASSIFIERS = [ 37 | "Development Status :: 3 - Alpha", 38 | "Intended Audience :: Developers", 39 | "Topic :: Scientific/Engineering", 40 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 41 | "Topic :: Software Development", 42 | "Topic :: Software Development :: Libraries", 43 | "Topic :: Software Development :: Libraries :: Python Modules", 44 | "Programming Language :: Python", 45 | "Programming Language :: Python :: 3 :: Only", 46 | "Programming Language :: Python :: 3.9", 47 | "Programming Language :: Python :: 3.10", 48 | "Programming Language :: Python :: 3.11", 49 | "Programming Language :: Python :: 3.12", 50 | "Programming Language :: Python :: 3.13", 51 | ] 52 | 53 | description = "Foundry Local Manager Python SDK: Control-plane SDK for Foundry Local." 54 | 55 | long_description = read("README.md") 56 | 57 | setup( 58 | name="foundry-local-sdk", 59 | version=VERSION, 60 | description=description, 61 | long_description=long_description, 62 | long_description_content_type="text/markdown", 63 | author="Microsoft Corporation", 64 | author_email="foundrylocaldevs@microsoft.com", 65 | license="MIT License", 66 | license_files=["LICENSE.txt"], 67 | classifiers=CLASSIFIERS, 68 | url="https://github.com/microsoft/Foundry-Local", 69 | packages=find_packages(include=["foundry_local"]), 70 | python_requires=">=3.9", 71 | install_requires=requirements, 72 | include_package_data=False, 73 | ) 74 | -------------------------------------------------------------------------------- /sdk/python/test/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /sdk/python/test/test_client.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from unittest import mock 6 | 7 | import httpx 8 | import pytest 9 | from foundry_local.client import HttpResponseError, HttpxClient 10 | 11 | 12 | def test_initialization(): 13 | """Test initialization of HttpxClient.""" 14 | with mock.patch("httpx.Client") as mock_client: 15 | HttpxClient("http://localhost:5273") 16 | mock_client.assert_called_once_with(base_url="http://localhost:5273", timeout=None) 17 | 18 | # Test with timeout 19 | HttpxClient("http://localhost:5273", timeout=30.0) 20 | mock_client.assert_called_with(base_url="http://localhost:5273", timeout=30.0) 21 | 22 | 23 | # pylint: disable=protected-access 24 | def test_request(): 25 | """Test _request method.""" 26 | with mock.patch("httpx.Client") as mock_client: 27 | mock_instance = mock_client.return_value 28 | client = HttpxClient("http://localhost:5273") 29 | 30 | # Test successful request 31 | mock_response = mock.MagicMock() 32 | mock_response.raise_for_status.return_value = None 33 | mock_instance.request.return_value = mock_response 34 | 35 | result = client._request("GET", "/test") 36 | mock_instance.request.assert_called_once_with("GET", "/test") 37 | assert result == mock_response 38 | 39 | # Test HTTP error 40 | mock_response = mock.MagicMock() 41 | mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( 42 | "Error", request=mock.MagicMock(), response=mock.MagicMock() 43 | ) 44 | mock_response.status_code = 404 45 | mock_response.text = "Not Found" 46 | mock_instance.request.return_value = mock_response 47 | 48 | with pytest.raises(HttpResponseError): 49 | client._request("GET", "/test") 50 | 51 | # Test connection error 52 | mock_instance.request.side_effect = httpx.ConnectError("Connection failed") 53 | 54 | with pytest.raises(ConnectionError, match="Could not connect to Foundry Local"): 55 | client._request("GET", "/test") 56 | 57 | 58 | def test_get(): 59 | """Test get method.""" 60 | with mock.patch.object(HttpxClient, "_request") as mock_request: 61 | client = HttpxClient("http://localhost:5273") 62 | 63 | # Test with JSON response 64 | mock_response = mock.MagicMock() 65 | mock_response.text = '{"key": "value"}' 66 | mock_response.json.return_value = {"key": "value"} 67 | mock_request.return_value = mock_response 68 | 69 | result = client.get("/test") 70 | mock_request.assert_called_once_with("GET", "/test", params=None) 71 | assert result == {"key": "value"} 72 | 73 | # Test with empty response 74 | mock_response.text = "" 75 | result = client.get("/test") 76 | assert result is None 77 | 78 | # Test with query parameters 79 | mock_response.text = '{"key": "value"}' 80 | result = client.get("/test", query_params={"param": "value"}) 81 | mock_request.assert_called_with("GET", "/test", params={"param": "value"}) 82 | assert result == {"key": "value"} 83 | 84 | 85 | def test_post_with_progress(): 86 | """Test post_with_progress method.""" 87 | with ( 88 | mock.patch("httpx.Client") as mock_client, 89 | mock.patch("foundry_local.client.tqdm") as mock_tqdm, 90 | mock.patch("foundry_local.client.logger") as mock_logger, 91 | ): 92 | mock_instance = mock_client.return_value 93 | mock_logger_instance = mock.MagicMock() 94 | mock_logger_instance.isEnabledFor.return_value = True 95 | mock_logger.return_value = mock_logger_instance 96 | 97 | # Mock the stream context manager 98 | mock_response = mock.MagicMock() 99 | mock_instance.stream.return_value.__enter__.return_value = mock_response 100 | 101 | # Mock tqdm progress bar 102 | mock_progress_bar = mock.MagicMock() 103 | mock_tqdm.return_value = mock_progress_bar 104 | 105 | # Mock iter_lines to return progress and JSON 106 | mock_response.iter_lines.return_value = [ 107 | "Progress: 10.1%", 108 | "Progress: 50%", 109 | "Progress: 100%", 110 | '{"status": "complete"}', 111 | ] 112 | 113 | client = HttpxClient("http://localhost:5273") 114 | result = client.post_with_progress("/test", body={"key": "value"}) 115 | 116 | mock_instance.stream.assert_called_once_with("POST", "/test", json={"key": "value"}, timeout=None) 117 | mock_progress_bar.update.assert_any_call(10.1) # 10.1% - 0% 118 | mock_progress_bar.update.assert_any_call(39.9) # 50% - 10.1% 119 | mock_progress_bar.update.assert_any_call(50.0) # 100% - 50% 120 | mock_progress_bar.close.assert_called_once() 121 | assert result == {"status": "complete"} 122 | 123 | # Test invalid JSON response 124 | mock_response.iter_lines.return_value = ["Progress: 100%", '{"status": "incomplete'] 125 | 126 | with pytest.raises(ValueError, match="Invalid JSON response"): 127 | client.post_with_progress("/test", body={"key": "value"}) 128 | 129 | # Test with no progress bar (logging disabled) 130 | mock_logger_instance.isEnabledFor.return_value = False 131 | mock_response.iter_lines.return_value = ['{"status": "complete"}'] 132 | 133 | result = client.post_with_progress("/test", body={"key": "value"}) 134 | assert result == {"status": "complete"} 135 | -------------------------------------------------------------------------------- /sdk/python/test/test_service.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from unittest import mock 6 | 7 | import pytest 8 | from foundry_local.service import assert_foundry_installed, get_service_uri, start_service 9 | 10 | 11 | def test_assert_foundry_installed(): 12 | """Test checking if foundry is installed.""" 13 | # Test when foundry is installed 14 | with mock.patch("shutil.which") as mock_which: 15 | mock_which.return_value = "/usr/local/bin/foundry" 16 | assert_foundry_installed() # Should not raise an exception 17 | 18 | # Test when foundry is not installed 19 | with mock.patch("shutil.which") as mock_which: 20 | mock_which.return_value = None 21 | with pytest.raises(RuntimeError, match="Foundry is not installed or not on PATH!"): 22 | assert_foundry_installed() 23 | 24 | 25 | def test_get_service_uri(): 26 | """Test getting service URI.""" 27 | # Test when service is running 28 | with mock.patch("subprocess.Popen") as mock_popen: 29 | mock_process = mock.MagicMock() 30 | mock_process.communicate.return_value = ( 31 | b"Model management service is running on http://localhost:5273/openai/status", 32 | b"", 33 | ) 34 | mock_popen.return_value.__enter__.return_value = mock_process 35 | assert get_service_uri() == "http://localhost:5273" 36 | 37 | # Test when service is not running 38 | with mock.patch("subprocess.Popen") as mock_popen: 39 | mock_process = mock.MagicMock() 40 | mock_process.communicate.return_value = (b"Model management service is not running!", b"") 41 | mock_popen.return_value.__enter__.return_value = mock_process 42 | assert get_service_uri() is None 43 | 44 | # Test with IPv4 address 45 | with mock.patch("subprocess.Popen") as mock_popen: 46 | mock_process = mock.MagicMock() 47 | mock_process.communicate.return_value = ( 48 | b"Model management service is running on http://127.0.0.1:5273/openai/status", 49 | b"", 50 | ) 51 | mock_popen.return_value.__enter__.return_value = mock_process 52 | assert get_service_uri() == "http://127.0.0.1:5273" 53 | 54 | 55 | def test_start_service(): 56 | """Test starting the service.""" 57 | # Test when service is already running 58 | with mock.patch("foundry_local.service.get_service_uri") as mock_get_uri: 59 | mock_get_uri.return_value = "http://localhost:5273" 60 | assert start_service() == "http://localhost:5273" 61 | mock_get_uri.assert_called_once() 62 | 63 | # Test when service needs to be started and starts successfully 64 | with ( 65 | mock.patch("foundry_local.service.get_service_uri") as mock_get_uri, 66 | mock.patch("subprocess.Popen") as mock_popen, 67 | mock.patch("time.sleep") as mock_sleep, 68 | ): 69 | # First call returns None (not running), second call returns URI (started) 70 | mock_get_uri.side_effect = [None, "http://localhost:5273"] 71 | assert start_service() == "http://localhost:5273" 72 | assert mock_get_uri.call_count == 2 73 | mock_popen.assert_called_once_with(["foundry", "service", "start"]) 74 | mock_sleep.assert_not_called() 75 | 76 | # Test when service fails to start 77 | with ( 78 | mock.patch("foundry_local.service.get_service_uri") as mock_get_uri, 79 | mock.patch("subprocess.Popen") as mock_popen, 80 | mock.patch("foundry_local.service.time.sleep") as mock_sleep, 81 | ): 82 | # Always return None (never starts) 83 | num_sleeps = 10 84 | mock_get_uri.return_value = None 85 | assert start_service() is None 86 | assert mock_get_uri.call_count == num_sleeps + 1 87 | mock_popen.assert_called_once_with(["foundry", "service", "start"]) 88 | assert mock_sleep.call_count == num_sleeps 89 | -------------------------------------------------------------------------------- /sdk/rust/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "foundry-local" 3 | version = "0.1.0" 4 | edition = "2021" 5 | description = "SDK for Microsoft Foundry Local service" 6 | license = "MIT" 7 | repository = "https://github.com/microsoft/Foundry-Local" 8 | readme = "README.md" 9 | 10 | [features] 11 | integration-tests = [] 12 | 13 | [dependencies] 14 | reqwest = { version = "0.11", features = ["json", "stream", "rustls-tls"], default-features = false } 15 | tokio = { version = "1", features = ["full"] } 16 | serde = { version = "1.0", features = ["derive"] } 17 | serde_json = "1.0" 18 | thiserror = "1.0" 19 | log = "0.4" 20 | env_logger = "0.10" 21 | anyhow = "1.0" 22 | which = "4.4" 23 | regex = "1.9" 24 | dirs = "5.0" 25 | indicatif = "0.17" 26 | async-trait = "0.1" 27 | futures-util = "0.3" 28 | url = "=2.4.1" 29 | 30 | [dev-dependencies] 31 | axum = "0.7" 32 | tower = "0.4" 33 | tower-http = { version = "0.5", features = ["trace"] } 34 | -------------------------------------------------------------------------------- /sdk/rust/README.md: -------------------------------------------------------------------------------- 1 | # Foundry Local Rust SDK 2 | 3 | A Rust SDK for interacting with the Microsoft Foundry Local service. This SDK allows you to manage and use AI models locally on your device. 4 | 5 | ## Features 6 | - Start and manage the Foundry Local service 7 | - Download models from the Foundry catalog 8 | - Load and unload models 9 | - List available, cached, and loaded models 10 | - Interact with loaded models using a simple API 11 | 12 | ## Usage 13 | 14 | ### Prerequisites 15 | 16 | To use this SDK, ensure you have the following prerequisites: 17 | 18 | - Foundry Local must be installed and available on the PATH 19 | - Rust 1.70.0 or later 20 | 21 | ### Create a new Rust project: 22 | 23 | ```bash 24 | cargo new hello-foundry-local 25 | cd hello-foundry-local 26 | ``` 27 | 28 | ### Install crates 29 | 30 | Install the following Rust crates using Cargo: 31 | 32 | ```bash 33 | cargo add foundry-local anyhow env_logger serde_json 34 | cargo add reqwest --features json 35 | cargo add tokio --features full 36 | ``` 37 | 38 | Alternatively, you can add these dependencies manually to your `Cargo.toml` file. 39 | 40 | ```toml 41 | [dependencies] 42 | anyhow = "1.0.98" 43 | env_logger = "0.11.8" 44 | foundry-local = "0.1.0" 45 | reqwest = { version = "0.12.19", features = ["json"] } 46 | serde_json = "1.0.140" 47 | tokio = { version = "1.45.1", features = ["full"] } 48 | ``` 49 | 50 | ### Update the `main.rs` file 51 | 52 | Replace the contents of `src/main.rs` with the following code to create a simple application that interacts with the Foundry Local service: 53 | 54 | ```rust 55 | use foundry_local::FoundryLocalManager; 56 | use anyhow::Result; 57 | 58 | #[tokio::main] 59 | async fn main() -> Result<()> { 60 | // Create a FoundryLocalManager instance with default options 61 | let mut manager = FoundryLocalManager::builder() 62 | .alias_or_model_id("phi-3.5-mini") // Specify the model to use 63 | .bootstrap(true) // Start the service if not running 64 | .build() 65 | .await?; 66 | 67 | // Use the OpenAI compatible API to interact with the model 68 | let client = reqwest::Client::new(); 69 | let endpoint = manager.endpoint()?; 70 | let response = client.post(&format!("{}/chat/completions", endpoint)) 71 | .header("Content-Type", "application/json") 72 | .header("Authorization", format!("Bearer {}", manager.api_key())) 73 | .json(&serde_json::json!({ 74 | "model": manager.get_model_info("phi-3.5-mini", true).await?.id, 75 | "messages": [{"role": "user", "content": "What is the golden ratio?"}], 76 | })) 77 | .send() 78 | .await?; 79 | 80 | let result = response.json::().await?; 81 | println!("{}", result["choices"][0]["message"]["content"]); 82 | 83 | Ok(()) 84 | } 85 | ``` 86 | 87 | ### Run the application 88 | 89 | To run the application, execute the following command in your terminal: 90 | 91 | ```bash 92 | cargo run 93 | ``` 94 | 95 | ## License 96 | 97 | Licensed under the MIT License. -------------------------------------------------------------------------------- /sdk/rust/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # Foundry Local SDK 2 | //! 3 | //! A Rust SDK for interacting with the Microsoft Foundry Local service. 4 | //! This SDK allows you to manage and use AI models locally on your device. 5 | //! 6 | //! ## Features 7 | //! - Start and manage the Foundry Local service 8 | //! - Download models from the Foundry catalog 9 | //! - Load and unload models 10 | //! - List available, cached, and loaded models 11 | //! - Interact with loaded models using a simple API 12 | //! 13 | //! ## Example 14 | //! 15 | //! ```rust 16 | //! use foundry_local::FoundryLocalManager; 17 | //! use anyhow::Result; 18 | //! 19 | //! #[tokio::main] 20 | //! async fn main() -> Result<()> { 21 | //! // Create a FoundryLocalManager instance for a model with default options 22 | //! let mut manager = FoundryLocalManager::builder() 23 | //! .alias_or_model_id("phi-4-mini") 24 | //! .build() 25 | //! .await?; 26 | //! 27 | //! // Use the OpenAI compatible API to interact with the model 28 | //! let client = reqwest::Client::new(); 29 | //! let response = client.post(&format!("{}/chat/completions", manager.endpoint()?)) 30 | //! .header("Content-Type", "application/json") 31 | //! .header("Authorization", format!("Bearer {}", manager.api_key())) 32 | //! .json(&serde_json::json!({ 33 | //! "model": manager.get_model_info("phi-4-mini", true).await?.id, 34 | //! "messages": [{"role": "user", "content": "What is the golden ratio?"}], 35 | //! })) 36 | //! .send() 37 | //! .await?; 38 | //! 39 | //! let result = response.json::().await?; 40 | //! println!("{}", result["choices"][0]["message"]["content"]); 41 | //! 42 | //! Ok(()) 43 | //! } 44 | //! ``` 45 | 46 | pub mod api; 47 | mod client; 48 | pub mod models; 49 | mod service; 50 | 51 | pub use api::FoundryLocalManager; 52 | -------------------------------------------------------------------------------- /sdk/rust/src/models.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | use std::fmt; 3 | 4 | /// Enumeration of devices supported by the model. 5 | #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] 6 | #[serde(rename_all = "UPPERCASE")] 7 | pub enum DeviceType { 8 | CPU, 9 | GPU, 10 | NPU, 11 | } 12 | 13 | impl fmt::Display for DeviceType { 14 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 15 | match self { 16 | DeviceType::CPU => write!(f, "CPU"), 17 | DeviceType::GPU => write!(f, "GPU"), 18 | DeviceType::NPU => write!(f, "NPU"), 19 | } 20 | } 21 | } 22 | 23 | /// Enumeration of execution providers supported by the model. 24 | #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] 25 | pub enum ExecutionProvider { 26 | #[serde(rename = "CPUExecutionProvider")] 27 | CPU, 28 | #[serde(rename = "WebGpuExecutionProvider")] 29 | WebGPU, 30 | #[serde(rename = "CUDAExecutionProvider")] 31 | CUDA, 32 | #[serde(rename = "QNNExecutionProvider")] 33 | QNN, 34 | } 35 | 36 | impl ExecutionProvider { 37 | /// Get the alias for the execution provider. 38 | pub fn get_alias(&self) -> String { 39 | match self { 40 | ExecutionProvider::CPU => "cpu".to_string(), 41 | ExecutionProvider::WebGPU => "webgpu".to_string(), 42 | ExecutionProvider::CUDA => "cuda".to_string(), 43 | ExecutionProvider::QNN => "qnn".to_string(), 44 | } 45 | } 46 | } 47 | 48 | impl fmt::Display for ExecutionProvider { 49 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 50 | write!( 51 | f, 52 | "{}", 53 | match self { 54 | ExecutionProvider::CPU => "CPUExecutionProvider", 55 | ExecutionProvider::WebGPU => "WebGpuExecutionProvider", 56 | ExecutionProvider::CUDA => "CUDAExecutionProvider", 57 | ExecutionProvider::QNN => "QNNExecutionProvider", 58 | } 59 | ) 60 | } 61 | } 62 | 63 | /// Model runtime information. 64 | #[derive(Debug, Clone, Serialize, Deserialize)] 65 | pub struct ModelRuntime { 66 | #[serde(rename = "deviceType")] 67 | pub device_type: DeviceType, 68 | #[serde(rename = "executionProvider")] 69 | pub execution_provider: ExecutionProvider, 70 | } 71 | 72 | /// Response model for listing models. 73 | #[derive(Debug, Clone, Serialize, Deserialize)] 74 | pub struct FoundryListResponseModel { 75 | pub name: String, 76 | #[serde(rename = "displayName")] 77 | pub display_name: String, 78 | #[serde(rename = "modelType")] 79 | pub model_type: String, 80 | #[serde(rename = "providerType")] 81 | pub provider_type: String, 82 | pub uri: String, 83 | pub version: String, 84 | #[serde(rename = "promptTemplate")] 85 | pub prompt_template: serde_json::Value, 86 | pub publisher: String, 87 | pub task: String, 88 | pub runtime: ModelRuntime, 89 | #[serde(rename = "fileSizeMb")] 90 | pub file_size_mb: i32, 91 | #[serde(rename = "modelSettings")] 92 | pub model_settings: serde_json::Value, 93 | pub alias: String, 94 | #[serde(rename = "supportsToolCalling")] 95 | pub supports_tool_calling: bool, 96 | pub license: String, 97 | #[serde(rename = "licenseDescription")] 98 | pub license_description: String, 99 | #[serde(rename = "parentModelUri")] 100 | pub parent_model_uri: String, 101 | } 102 | 103 | /// Model information. 104 | #[derive(Debug, Clone, Serialize, Deserialize)] 105 | pub struct FoundryModelInfo { 106 | pub alias: String, 107 | pub id: String, 108 | pub version: String, 109 | pub runtime: ExecutionProvider, 110 | pub uri: String, 111 | pub file_size_mb: i32, 112 | pub prompt_template: serde_json::Value, 113 | pub provider: String, 114 | pub publisher: String, 115 | pub license: String, 116 | pub task: String, 117 | } 118 | 119 | impl FoundryModelInfo { 120 | /// Create a FoundryModelInfo object from a FoundryListResponseModel object. 121 | pub fn from_list_response(response: &FoundryListResponseModel) -> Self { 122 | Self { 123 | alias: response.alias.clone(), 124 | id: response.name.clone(), 125 | version: response.version.clone(), 126 | runtime: response.runtime.execution_provider.clone(), 127 | uri: response.uri.clone(), 128 | file_size_mb: response.file_size_mb, 129 | prompt_template: response.prompt_template.clone(), 130 | provider: response.provider_type.clone(), 131 | publisher: response.publisher.clone(), 132 | license: response.license.clone(), 133 | task: response.task.clone(), 134 | } 135 | } 136 | 137 | /// Convert the FoundryModelInfo object to a dictionary for download. 138 | pub fn to_download_body(&self) -> serde_json::Value { 139 | let provider_type = if self.provider == "AzureFoundry" { 140 | format!("{}Local", self.provider) 141 | } else { 142 | self.provider.clone() 143 | }; 144 | 145 | serde_json::json!({ 146 | "model": { 147 | "Name": self.id, 148 | "Uri": self.uri, 149 | "Publisher": self.publisher, 150 | "ProviderType": provider_type, 151 | "PromptTemplate": self.prompt_template, 152 | }, 153 | "IgnorePipeReport": true 154 | }) 155 | } 156 | } 157 | 158 | impl fmt::Display for FoundryModelInfo { 159 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 160 | write!( 161 | f, 162 | "FoundryModelInfo(alias={}, id={}, runtime={}, file_size={} MB, license={})", 163 | self.alias, 164 | self.id, 165 | self.runtime.get_alias(), 166 | self.file_size_mb, 167 | self.license 168 | ) 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /sdk/rust/src/service.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{anyhow, Result}; 2 | use log::{info, warn}; 3 | use regex::Regex; 4 | use std::{ 5 | process::{Command, Stdio}, 6 | thread, 7 | time::Duration, 8 | }; 9 | use which::which; 10 | 11 | /// Checks that Foundry is installed. 12 | /// 13 | /// # Errors 14 | /// 15 | /// Returns an error if Foundry is not installed or not on PATH. 16 | pub fn check_foundry_installed() -> Result<()> { 17 | which("foundry").map_err(|_| anyhow!("Foundry is not installed or not on PATH!"))?; 18 | Ok(()) 19 | } 20 | 21 | /// Get the service URI if the service is running. 22 | /// 23 | /// # Returns 24 | /// 25 | /// URI of the running Foundry service, or None if not running. 26 | pub fn get_service_uri() -> Option { 27 | let output = Command::new("foundry") 28 | .args(["service", "status"]) 29 | .stdout(Stdio::piped()) 30 | .stderr(Stdio::piped()) 31 | .output() 32 | .ok()?; 33 | 34 | let stdout = String::from_utf8_lossy(&output.stdout); 35 | let re = Regex::new(r"http://(?:[a-zA-Z0-9.-]+|\d{1,3}(\.\d{1,3}){3}):\d+").ok()?; 36 | 37 | re.find(&stdout).map(|m| m.as_str().to_string()) 38 | } 39 | 40 | /// Start the Foundry service. 41 | /// 42 | /// # Returns 43 | /// 44 | /// URI of the started Foundry service, or None if it failed to start. 45 | pub fn start_service() -> Result { 46 | if let Some(service_url) = get_service_uri() { 47 | info!("Foundry service is already running at {service_url}"); 48 | return Ok(service_url); 49 | } 50 | 51 | // Start the service in the background 52 | let _child = Command::new("foundry").args(["service", "start"]).spawn()?; 53 | 54 | // Check for service availability 55 | for _ in 0..10 { 56 | if let Some(service_url) = get_service_uri() { 57 | info!("Foundry service started successfully at {service_url}"); 58 | return Ok(service_url); 59 | } 60 | thread::sleep(Duration::from_millis(100)); 61 | } 62 | 63 | warn!("Foundry service did not start within the expected time. May not be running."); 64 | Err(anyhow!("Failed to start Foundry service")) 65 | } 66 | -------------------------------------------------------------------------------- /sdk/rust/tests/README.md: -------------------------------------------------------------------------------- 1 | # Foundry Local SDK Tests 2 | 3 | This directory contains tests for the Foundry Local SDK, with a focus on testing the API without requiring an actual Foundry Local service to be running. 4 | 5 | ## Mock Service 6 | 7 | The `mock_service.rs` file contains a mock implementation of the Foundry Local service that can be used for testing. The mock service provides endpoints that mimic the behavior of the real service, but with predictable responses and no actual model loading or execution. 8 | 9 | Key features: 10 | - In-memory state management for models, cache, etc. 11 | - HTTP endpoints that match the real service 12 | - Configurable responses 13 | 14 | ## Running Tests 15 | 16 | To run all tests: 17 | 18 | ```bash 19 | cargo test --package foundry-local 20 | ``` 21 | 22 | To run a specific test: 23 | 24 | ```bash 25 | cargo test --package foundry-local test_list_catalog_models 26 | ``` 27 | 28 | To run the integration tests: 29 | ```bash 30 | cargo test --features integration-tests 31 | ``` 32 | 33 | ## Adding New Tests 34 | 35 | When adding new tests: 36 | 37 | 1. Add test functions to `test_api.rs` 38 | 2. Make sure each test starts its own mock server and shuts it down when done 39 | 3. Use the `TestableFoundryLocalManager` trait to create a manager that uses the mock server 40 | 41 | Example: 42 | 43 | ```rust 44 | #[tokio::test] 45 | async fn test_my_new_feature() { 46 | // Start the mock server 47 | let (server_uri, shutdown_tx) = start_mock_server().await; 48 | 49 | // Create a manager with the mock server URI 50 | let mut manager = FoundryLocalManager::with_test_uri(&server_uri); 51 | 52 | // Test your feature 53 | // ... 54 | 55 | // Shutdown the mock server 56 | shutdown_tx.send(()).unwrap(); 57 | } 58 | ``` 59 | 60 | ## Extending the Mock Service 61 | 62 | To add new endpoints or behaviors to the mock service: 63 | 64 | 1. Add new state fields to `MockState` if needed 65 | 2. Implement handler functions for the new endpoints 66 | 3. Add routes to the router in `start_mock_server` -------------------------------------------------------------------------------- /sdk/rust/tests/integration_tests.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "integration-tests")] 2 | mod integration_tests { 3 | use foundry_local::FoundryLocalManager; 4 | 5 | async fn setup_manager() -> FoundryLocalManager { 6 | FoundryLocalManager::builder() 7 | .bootstrap(true) 8 | .build() 9 | .await 10 | .expect("Failed to create manager") 11 | } 12 | 13 | #[tokio::test] 14 | async fn test_real_service_workflow() { 15 | // Create manager 16 | let mut manager = setup_manager().await; 17 | 18 | // List catalog models and get the first model ID 19 | let model_id = { 20 | let catalog_models = manager 21 | .list_catalog_models() 22 | .await 23 | .expect("Failed to list catalog models"); 24 | 25 | assert!(!catalog_models.is_empty(), "No models found in catalog"); 26 | 27 | let first_model = &catalog_models[0]; 28 | println!( 29 | "Testing with model: {} ({})", 30 | first_model.alias, first_model.id 31 | ); 32 | first_model.id.clone() 33 | }; 34 | 35 | // Download the model 36 | let model_info = manager 37 | .download_model(&model_id, None, false) 38 | .await 39 | .expect("Failed to download model"); 40 | assert_eq!(model_info.id, model_id); 41 | 42 | // Verify model is cached 43 | let cached_models = manager 44 | .list_cached_models() 45 | .await 46 | .expect("Failed to list cached models"); 47 | assert!( 48 | cached_models.iter().any(|m| m.id == model_id), 49 | "Downloaded model not found in cache" 50 | ); 51 | 52 | // Load the model 53 | let model_info = manager 54 | .load_model(&model_id, Some(300)) 55 | .await 56 | .expect("Failed to load model"); 57 | assert_eq!(model_info.id, model_id); 58 | 59 | // Verify model is loaded 60 | let loaded_models = manager 61 | .list_loaded_models() 62 | .await 63 | .expect("Failed to list loaded models"); 64 | assert!(loaded_models.iter().any(|m| m.id == model_id)); 65 | 66 | // Get the endpoint before making the request 67 | let endpoint = manager.endpoint().expect("Failed to get endpoint"); 68 | 69 | // Use the OpenAI compatible API to interact with the model 70 | let client = reqwest::Client::new(); 71 | let response = client 72 | .post(&format!("{}/chat/completions", endpoint)) 73 | .json(&serde_json::json!({ 74 | "model": model_info.id, 75 | "messages": [{"role": "user", "content": "What is 2+2?"}], 76 | })) 77 | .send() 78 | .await 79 | .expect("Failed to send request"); 80 | 81 | // Parse and display the response 82 | let result = response 83 | .json::() 84 | .await 85 | .expect("Failed to parse response"); 86 | if let Some(content) = result["choices"][0]["message"]["content"].as_str() { 87 | println!("\nResponse:\n{}", content); 88 | } else { 89 | println!("\nError: Failed to extract response content from API result"); 90 | println!("Full API response: {}", result); 91 | } 92 | 93 | // Unload the model 94 | manager 95 | .unload_model(&model_id, true) 96 | .await 97 | .expect("Failed to unload model"); 98 | 99 | // Verify model is unloaded 100 | let loaded_models = manager 101 | .list_loaded_models() 102 | .await 103 | .expect("Failed to list loaded models"); 104 | assert!(!loaded_models.iter().any(|m| m.id == model_id)); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /sdk/rust/tests/test_api.rs: -------------------------------------------------------------------------------- 1 | mod mock_service; 2 | 3 | use foundry_local::{models::ExecutionProvider, FoundryLocalManager}; 4 | use mock_service::start_mock_server; 5 | use std::env; 6 | 7 | // Helper trait to inject mock service URI into FoundryLocalManager for testing 8 | trait TestableFoundryLocalManager { 9 | async fn with_test_uri(uri: &str) -> Self; 10 | } 11 | 12 | impl TestableFoundryLocalManager for FoundryLocalManager { 13 | async fn with_test_uri(uri: &str) -> Self { 14 | let mut manager = FoundryLocalManager::builder().build().await.unwrap(); 15 | manager.set_test_service_uri(uri); 16 | manager 17 | } 18 | } 19 | 20 | #[tokio::test] 21 | async fn test_list_catalog_models() { 22 | // Start the mock server 23 | let (server_uri, shutdown_tx) = start_mock_server().await; 24 | 25 | // Create a manager with the mock server URI 26 | let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await; 27 | 28 | // Test listing catalog models 29 | let catalog_models = manager.list_catalog_models().await.unwrap(); 30 | 31 | // Verify the result 32 | assert_eq!(catalog_models.len(), 2); 33 | assert_eq!(catalog_models[0].id, "mock-model-1"); 34 | assert_eq!(catalog_models[0].alias, "mock-small"); 35 | assert_eq!(catalog_models[0].runtime, ExecutionProvider::CPU); 36 | assert_eq!(catalog_models[1].id, "mock-model-2"); 37 | assert_eq!(catalog_models[1].alias, "mock-medium"); 38 | assert_eq!(catalog_models[1].runtime, ExecutionProvider::CUDA); 39 | 40 | // Shutdown the mock server 41 | shutdown_tx.send(()).unwrap(); 42 | } 43 | 44 | #[tokio::test] 45 | async fn test_get_model_info() { 46 | // Start the mock server 47 | let (server_uri, shutdown_tx) = start_mock_server().await; 48 | 49 | // Create a manager with the mock server URI 50 | let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await; 51 | 52 | // Test getting model info by ID 53 | let model_info = manager.get_model_info("mock-model-1", false).await.unwrap(); 54 | assert_eq!(model_info.id, "mock-model-1"); 55 | assert_eq!(model_info.alias, "mock-small"); 56 | 57 | // Test getting model info by alias 58 | let model_info = manager.get_model_info("mock-small", false).await.unwrap(); 59 | assert_eq!(model_info.id, "mock-model-1"); 60 | assert_eq!(model_info.alias, "mock-small"); 61 | 62 | // Shutdown the mock server 63 | shutdown_tx.send(()).unwrap(); 64 | } 65 | 66 | #[tokio::test] 67 | async fn test_get_cache_location() { 68 | // Start the mock server 69 | let (server_uri, shutdown_tx) = start_mock_server().await; 70 | 71 | // Create a manager with the mock server URI 72 | let manager = FoundryLocalManager::with_test_uri(&server_uri).await; 73 | 74 | // Test getting cache location 75 | let cache_location = manager.get_cache_location().await.unwrap(); 76 | assert_eq!(cache_location, "/tmp/mock-cache"); 77 | 78 | // Shutdown the mock server 79 | shutdown_tx.send(()).unwrap(); 80 | } 81 | 82 | #[tokio::test] 83 | async fn test_list_cached_models() { 84 | // Start the mock server 85 | let (server_uri, shutdown_tx) = start_mock_server().await; 86 | 87 | // Create a manager with the mock server URI 88 | let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await; 89 | 90 | // Test listing cached models 91 | let cached_models = manager.list_cached_models().await.unwrap(); 92 | assert_eq!(cached_models.len(), 1); 93 | assert_eq!(cached_models[0].id, "mock-model-1"); 94 | 95 | // Shutdown the mock server 96 | shutdown_tx.send(()).unwrap(); 97 | } 98 | 99 | #[tokio::test] 100 | async fn test_download_model() { 101 | // Start the mock server 102 | let (server_uri, shutdown_tx) = start_mock_server().await; 103 | 104 | // Create a manager with the mock server URI 105 | let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await; 106 | 107 | // Test downloading a model 108 | let model_info = manager 109 | .download_model("mock-model-2", None, false) 110 | .await 111 | .unwrap(); 112 | assert_eq!(model_info.id, "mock-model-2"); 113 | 114 | // Verify the model is now cached 115 | let cached_models = manager.list_cached_models().await.unwrap(); 116 | assert_eq!(cached_models.len(), 2); 117 | assert!(cached_models.iter().any(|m| m.id == "mock-model-1")); 118 | assert!(cached_models.iter().any(|m| m.id == "mock-model-2")); 119 | 120 | // Shutdown the mock server 121 | shutdown_tx.send(()).unwrap(); 122 | } 123 | 124 | #[tokio::test] 125 | async fn test_load_and_unload_model() { 126 | // Start the mock server 127 | let (server_uri, shutdown_tx) = start_mock_server().await; 128 | 129 | // Create a manager with the mock server URI 130 | let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await; 131 | 132 | // Test loading a model 133 | let model_info = manager.load_model("mock-model-1", Some(300)).await.unwrap(); 134 | assert_eq!(model_info.id, "mock-model-1"); 135 | 136 | // Verify the model is loaded 137 | let loaded_models = manager.list_loaded_models().await.unwrap(); 138 | assert_eq!(loaded_models.len(), 1); 139 | assert_eq!(loaded_models[0].id, "mock-model-1"); 140 | 141 | // Test unloading the model 142 | manager.unload_model("mock-model-1", false).await.unwrap(); 143 | 144 | // Verify the model is unloaded 145 | let loaded_models = manager.list_loaded_models().await.unwrap(); 146 | assert_eq!(loaded_models.len(), 0); 147 | 148 | // Shutdown the mock server 149 | shutdown_tx.send(()).unwrap(); 150 | } 151 | 152 | #[tokio::test] 153 | async fn test_endpoint_and_api_key() { 154 | // Start the mock server 155 | let (server_uri, shutdown_tx) = start_mock_server().await; 156 | 157 | // Create a manager with the mock server URI 158 | let manager = FoundryLocalManager::with_test_uri(&server_uri).await; 159 | 160 | // Test getting endpoint 161 | let endpoint = manager.endpoint().unwrap(); 162 | assert_eq!(endpoint, format!("{}/v1", server_uri)); 163 | 164 | // Test getting API key (default) 165 | let api_key = manager.api_key(); 166 | assert_eq!(api_key, "OPENAI_API_KEY"); 167 | 168 | // Test getting API key (with env var) 169 | env::set_var("OPENAI_API_KEY", "test-api-key"); 170 | let api_key = manager.api_key(); 171 | assert_eq!(api_key, "test-api-key"); 172 | 173 | // Shutdown the mock server 174 | shutdown_tx.send(()).unwrap(); 175 | } 176 | --------------------------------------------------------------------------------