44 |
45 | )
46 | }
47 |
--------------------------------------------------------------------------------
/docs/tailwind.config.ts:
--------------------------------------------------------------------------------
1 | import { type Config } from 'tailwindcss'
2 | import typographyStyles from './typography'
3 | import typographyPlugin from '@tailwindcss/typography'
4 | import headlessuiPlugin from '@headlessui/tailwindcss'
5 |
6 | export default {
7 | content: ['./src/**/*.{js,mjs,jsx,ts,tsx,mdx}'],
8 | darkMode: 'class',
9 | theme: {
10 | fontSize: {
11 | '2xs': ['0.75rem', { lineHeight: '1.25rem' }],
12 | xs: ['0.8125rem', { lineHeight: '1.5rem' }],
13 | sm: ['0.875rem', { lineHeight: '1.5rem' }],
14 | base: ['1rem', { lineHeight: '1.75rem' }],
15 | lg: ['1.125rem', { lineHeight: '1.75rem' }],
16 | xl: ['1.25rem', { lineHeight: '1.75rem' }],
17 | '2xl': ['1.5rem', { lineHeight: '2rem' }],
18 | '3xl': ['1.875rem', { lineHeight: '2.25rem' }],
19 | '4xl': ['2.25rem', { lineHeight: '2.5rem' }],
20 | '5xl': ['3rem', { lineHeight: '1' }],
21 | '6xl': ['3.75rem', { lineHeight: '1' }],
22 | '7xl': ['4.5rem', { lineHeight: '1' }],
23 | '8xl': ['6rem', { lineHeight: '1' }],
24 | '9xl': ['8rem', { lineHeight: '1' }],
25 | },
26 | typography: typographyStyles,
27 | extend: {
28 | boxShadow: {
29 | glow: '0 0 4px rgb(0 0 0 / 0.1)',
30 | },
31 | maxWidth: {
32 | lg: '33rem',
33 | '2xl': '40rem',
34 | '3xl': '50rem',
35 | '5xl': '66rem',
36 | },
37 | opacity: {
38 | 1: '0.01',
39 | 2.5: '0.025',
40 | 7.5: '0.075',
41 | 15: '0.15',
42 | },
43 | fill:{
44 | cl1:'#333333',
45 | cl2:'#ffd000'
46 | },
47 | screens: {
48 | 'clg': '1120px',
49 | },
50 | },
51 | },
52 | plugins: [typographyPlugin, headlessuiPlugin],
53 | } satisfies Config
54 |
--------------------------------------------------------------------------------
/docs/src/app/documentation/models/page.mdx:
--------------------------------------------------------------------------------
1 | export const metadata = {
2 | title: 'Models',
3 | description: 'Models supported by Edgen.',
4 | }
5 |
6 | # Models
7 |
8 | ## Chat Completions Endpoint
9 | For the chat completions endpoint, Edgen supports any model on Hugging Face that is tagged with the library GGUF. You can explore the available models in the [HuggingFace repository](https://huggingface.co/models?library=gguf).
10 |
11 | ## Audio Transcriptions Endpoint
12 | For the audio transcriptions endpoint, Edgen supports all whisper.cpp models:
13 |
14 | | Models | Model URL |
15 | |------------|-----------|
16 | | Whisper (all variants) | [ggerganov/whisper.cpp](https://huggingface.co/ggerganov/whisper.cpp) |
17 | | distil-whisper-small.en | [distil-whisper/distil-small.en](https://huggingface.co/distil-whisper/distil-small.en/resolve/main/ggml-distil-small.en.bin) |
18 | | distil-whisper-medium.en | [distil-whisper/distil-medium.en](https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/ggml-medium-32-2.en.bin) |
19 | | distil-whisper-large-v2 | [distil-whisper/distil-large-v2](https://huggingface.co/distil-whisper/distil-large-v2/resolve/main/ggml-large-32-2.en.bin) |
20 |
21 | ## How to switch an active model?
22 | Just change the configuration file! Check [Documentation » Configuration](/documentation/configuration) and if Edgen cannot find the model you specified locally, it'll download it automatically from HuggingFace. See also [API Reference » Models](/api-reference/models).
23 |
24 | You can also download your model manually and copy it to the model directory. In this case, Edgen will not manage this model.
25 |
26 | The configured model can be overridden by the "model" parameter of endpoint requests. See the [API Reference](/api-reference) for details.
27 |
--------------------------------------------------------------------------------
/docs/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "edgen-docs",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "dev": "next dev",
7 | "build": "next build",
8 | "start": "next start",
9 | "lint": "next lint"
10 | },
11 | "browserslist": "defaults, not ie <= 11",
12 | "dependencies": {
13 | "@algolia/autocomplete-core": "^1.7.3",
14 | "@headlessui/react": "^1.7.15",
15 | "@headlessui/tailwindcss": "^0.2.0",
16 | "@mdx-js/loader": "^2.3.0",
17 | "@mdx-js/react": "^2.3.0",
18 | "@next/mdx": "13.4.16",
19 | "@sindresorhus/slugify": "^2.1.1",
20 | "@tailwindcss/typography": "^0.5.10",
21 | "@types/mdx": "^2.0.8",
22 | "@types/node": "20.4.7",
23 | "@types/react": "18.2.18",
24 | "@types/react-dom": "18.2.7",
25 | "@types/react-highlight-words": "^0.16.4",
26 | "acorn": "^8.8.1",
27 | "autoprefixer": "^10.4.7",
28 | "clsx": "^1.2.0",
29 | "fast-glob": "^3.3.0",
30 | "flexsearch": "^0.7.31",
31 | "framer-motion": "7.8.1",
32 | "mdast-util-to-string": "^3.2.0",
33 | "mdx-annotations": "^0.1.1",
34 | "next": "^14.0.5",
35 | "next-themes": "^0.2.1",
36 | "react": "18.2.0",
37 | "react-dom": "18.2.0",
38 | "react-highlight-words": "^0.20.0",
39 | "remark": "^14.0.2",
40 | "remark-gfm": "^3.0.1",
41 | "remark-mdx": "^2.3.0",
42 | "shiki": "^0.11.1",
43 | "simple-functional-loader": "^1.2.1",
44 | "tailwindcss": "^3.3.3",
45 | "typescript": "5.1.6",
46 | "unist-util-filter": "^4.0.1",
47 | "unist-util-visit": "^4.1.1",
48 | "zustand": "^4.3.2"
49 | },
50 | "devDependencies": {
51 | "eslint": "8.45.0",
52 | "eslint-config-next": "13.4.16",
53 | "prettier": "^3.0.1",
54 | "prettier-plugin-tailwindcss": "^0.5.2",
55 | "sharp": "^0.32.6"
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/docs/src/app/documentation/getting-started/page.mdx:
--------------------------------------------------------------------------------
1 | export const metadata = {
2 | title: 'Getting Started',
3 | description: 'Get started in Edgen development',
4 | }
5 |
6 | # Getting Started
7 |
8 | This section is dedicated for developers looking to contribute to Edgen or build it from source. {{ className: 'lead' }}
9 |
10 |
11 | If you just want to quickly use Edgen by using pre-built binaries, please check [Guides » Quickstart](/guides/quickstart).
12 |
13 |
14 | ## Prerequisites
15 |
16 | ### Windows
17 |
18 | {/* #### 1. Microsoft Visual Studio C++ Build Tools */}
19 | TODO
20 |
21 |
22 | ### MacOS
23 |
24 | #### 1. CLang and macOS development dependencies
25 | ```bash
26 | xcode-select --install
27 | ```
28 | #### 2. Install rust
29 | ```bash
30 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
31 | ```
32 |
33 | ### Linux
34 |
35 | #### 1. Install system dependencies
36 |
37 |
38 | ```bash {{ title: 'Ubuntu' }}
39 | sudo apt update
40 | sudo apt install libwebkit2gtk-4.0-dev \
41 | build-essential \
42 | curl \
43 | wget \
44 | file \
45 | libssl-dev \
46 | libgtk-3-dev \
47 | libayatana-appindicator3-dev \
48 | librsvg2-dev \
49 | cmake \
50 | llvm \
51 | clang
52 | ```
53 |
54 | ```bash {{ title: 'NixOS' }}
55 | # with flake.nix
56 | nix develop
57 | ```
58 |
59 |
60 |
61 | #### 2. Install rust
62 | ```bash
63 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
64 | ```
65 |
66 | ## Build
67 |
68 | ### 1. Clone the repository
69 | ```bash
70 | git clone https://github.com/edgenai/edgen.git
71 | ```
72 |
73 | ### 2. Build and run
74 | ```bash
75 | # or without --release for debug build
76 | cargo run --release
77 | ```
78 | ### 2.1 Run without GUI
79 | ```bash
80 | cargo run --release -- --nogui
81 | ```
82 |
--------------------------------------------------------------------------------
/docs/src/images/logos/go.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/tests/test_chat.py:
--------------------------------------------------------------------------------
1 |
2 | from edgen import Edgen, APIConnectionError
3 | import pytest
4 |
5 | client = Edgen()
6 |
7 | def test_completions_streaming():
8 | try:
9 | stream = client.chat.completions.create(
10 | model="default",
11 | messages=[
12 | {
13 | "role": "user",
14 | "content": "What is the result of 1+2?",
15 | },
16 | ],
17 | stream=True,
18 | )
19 | except APIConnectionError:
20 | pytest.fail("No connection. Is edgen running?")
21 |
22 | assert(stream.response.status_code == 200)
23 |
24 | answer = ""
25 | for chunk in stream:
26 | if not chunk.choices:
27 | continue
28 |
29 | answer += chunk.choices[0].delta.content
30 |
31 | # print(answer)
32 | assert(type(answer) is str)
33 |
34 | def test_completions():
35 | try:
36 | answer = client.chat.completions.create(
37 | model="default",
38 | messages=[
39 | {
40 | "role": "user",
41 | "content": "What is the result of 1+2?",
42 | },
43 | ]
44 | )
45 | except APIConnectionError:
46 | pytest.fail("No connection. Is edgen running?")
47 |
48 | content = answer.choices[0].message.content
49 | print(content)
50 | assert(type(content) is str)
51 | assert("3" in content)
52 |
53 | def test_completions_status():
54 | try:
55 | status = client.chat.completions.status.create()
56 | except APIConnectionError:
57 | pytest.fail("No connection. Is edgen running?")
58 |
59 | model = status.active_model
60 | assert(type(model) is str)
61 | print(model)
62 |
63 | if __name__ == "__main__":
64 | test_completions_streaming()
65 | test_completions()
66 |
--------------------------------------------------------------------------------
/edgen/README.md:
--------------------------------------------------------------------------------
1 | # Edgen
2 | A Local GenAI API Server: A drop-in replacement for OpenAI's API for Local GenAI
3 | - [Description](#description)
4 | - [Getting Started](#getting-started)
5 | - [Dependencies](#dependencies)
6 | - [Installing](#installing)
7 | - [Executing program](#executing-program)
8 | - [Documentation](#documentation)
9 | - [Help](#help)
10 | - [Running the Application Locally](#running-the-application-locally)
11 | - [License](#license)
12 |
13 | ## Description
14 |
15 | Edgen is a Local, private GenAI server alternative to OpenAI. No GPU is required. Run AI models locally: LLMs (Llama2, Mistral, Mixtral...), Speech-to-text (whisper) and many others.
16 |
17 | ## Getting Started
18 |
19 | ### Dependencies
20 |
21 | - [Rust](https://www.rust-lang.org/tools/install)
22 | - [NodeJs](https://nodejs.org/en/download/)
23 | - [pnpm](https://pnpm.io/installation)
24 |
25 | ### Installing
26 |
27 | See the [releases](https://github.com/edgenai/edgen/releases) page for the latest binary. All major platforms are supported.
28 |
29 |
30 | ## Documentation
31 | See the [documentation page](https://docs.edgen.co) for help and support
32 |
33 | ## Help
34 | Should any error be encountered when building the application locally, ensure the following system dependencies are met
35 |
36 | 1. Minimum Required Rust Toolchain
37 | ```shell
38 | # use the rustup toolchain command to install the required toolchain
39 | rustup toolchain add beta-2023-11-21
40 | ```
41 |
42 | 2. Install cmake from [https://cmake.org/download/](https://cmake.org/download/)
43 |
44 | ## Running the Application Locally
45 | To run the application locally, ensure the dependencies are met
46 | ```shell
47 | pnpm install
48 | pnpm tauri dev
49 | ```
50 |
51 |
52 |
53 | ## License
54 |
55 | This project is licensed under the Apache 2.0 License - see the [LICENSE](../LICENSE) file for details
56 |
--------------------------------------------------------------------------------
/edgen/public/EdgenAI_dark_colored.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/src/components/ButtonRow.tsx:
--------------------------------------------------------------------------------
1 | 'use client'
2 |
3 | import clsx from 'clsx'
4 | import React, { useState } from 'react'
5 | import { Col } from './mdx'
6 |
7 | export function ButtonRow({
8 | types,
9 | children
10 | }: {
11 | types: string[]
12 | children: React.ReactNode
13 | }) {
14 | const [currentType, setCurrentType] = useState(0)
15 |
16 | const onlyCurrent = (children: React.ReactNode ) => (
17 |
18 | {React.Children.toArray(children).map((child, i) => i == currentType ?
{child}
: )}
19 |
20 | );
21 |
22 | return (
23 | <>
24 |
25 |
26 | {types.map((t, i) => {
27 | return (
28 |
setCurrentType(i)}
38 | >
39 |
40 | {t}
41 |
42 |
43 | )
44 | })}
45 |
46 |
47 |
48 | {onlyCurrent(children)}
49 |
50 | >
51 | )
52 | }
53 |
--------------------------------------------------------------------------------
/docs/src/components/icons/CogIcon.tsx:
--------------------------------------------------------------------------------
1 | export function CogIcon(props: React.ComponentPropsWithoutRef<'svg'>) {
2 | return (
3 |
4 |
10 |
16 |
17 |
18 | )
19 | }
20 |
--------------------------------------------------------------------------------
/edgen/src/styles.css:
--------------------------------------------------------------------------------
1 | :root {
2 | font-family: Inter, Avenir, Helvetica, Arial, sans-serif;
3 | font-size: 16px;
4 | line-height: 24px;
5 | font-weight: 400;
6 |
7 | color: #0f0f0f;
8 | background-color: #f6f6f6;
9 |
10 | font-synthesis: none;
11 | text-rendering: optimizeLegibility;
12 | -webkit-font-smoothing: antialiased;
13 | -moz-osx-font-smoothing: grayscale;
14 | -webkit-text-size-adjust: 100%;
15 | }
16 |
17 | .container {
18 | margin: 0;
19 | padding-top: 10vh;
20 | display: flex;
21 | flex-direction: column;
22 | justify-content: center;
23 | text-align: center;
24 | }
25 |
26 | .logo {
27 | height: 6em;
28 | padding: 1.5em;
29 | will-change: filter;
30 | transition: 0.75s;
31 | }
32 |
33 | .logo:hover {
34 | filter: drop-shadow(0 0 2em #FFD000);
35 | }
36 |
37 | .row {
38 | display: flex;
39 | justify-content: center;
40 | }
41 |
42 | a {
43 | font-weight: 500;
44 | text-decoration: inherit;
45 | }
46 |
47 | /* a:hover {
48 | color: #535bf2;
49 | } */
50 |
51 | h1 {
52 | text-align: center;
53 | }
54 |
55 | input,
56 | button {
57 | border-radius: 8px;
58 | border: 1px solid transparent;
59 | padding: 0.6em 1.2em;
60 | font-size: 1em;
61 | font-weight: 500;
62 | font-family: inherit;
63 | color: #0f0f0f;
64 | background-color: #ffffff;
65 | transition: border-color 0.25s;
66 | box-shadow: 0 2px 2px rgba(0, 0, 0, 0.2);
67 | }
68 |
69 | button {
70 | cursor: pointer;
71 | }
72 |
73 | button:active {
74 | background-color: #e8e8e8;
75 | }
76 |
77 | input,
78 | button {
79 | outline: none;
80 | }
81 |
82 | #greet-input {
83 | margin-right: 5px;
84 | }
85 |
86 | @media (prefers-color-scheme: dark) {
87 | :root {
88 | color: #f6f6f6;
89 | background-color: #2f2f2f;
90 | }
91 |
92 | a:hover {
93 | color: #24c8db;
94 | }
95 |
96 | input,
97 | button {
98 | color: #ffffff;
99 | background-color: #0f0f0f98;
100 | }
101 | button:active {
102 | background-color: #0f0f0f69;
103 | }
104 | }
105 |
--------------------------------------------------------------------------------
/docs/src/components/Tag.tsx:
--------------------------------------------------------------------------------
1 | import clsx from 'clsx'
2 |
3 | const variantStyles = {
4 | small: '',
5 | medium: 'rounded-lg px-1.5 ring-1 ring-inset',
6 | }
7 |
8 | const colorStyles = {
9 | yellow: {
10 | small: 'text-yellow-500 dark:text-yellow-400',
11 | medium:
12 | 'ring-yellow-300 dark:ring-yellow-400/30 bg-yellow-400/10 text-yellow-500 dark:text-yellow-400',
13 | large:
14 | 'ring-yellow-300 dark:ring-yellow-400/30 bg-yellow-400/10 text-yellow-500 dark:text-yellow-400',
15 | },
16 | sky: {
17 | small: 'text-sky-500',
18 | medium:
19 | 'ring-sky-300 bg-sky-400/10 text-sky-500 dark:ring-sky-400/30 dark:bg-sky-400/10 dark:text-sky-400',
20 | large:
21 | 'ring-sky-300 bg-sky-400/10 text-sky-500 dark:ring-sky-400/30 dark:bg-sky-400/10 dark:text-sky-400',
22 | },
23 | amber: {
24 | small: 'text-amber-500',
25 | medium:
26 | 'ring-amber-300 bg-amber-400/10 text-amber-500 dark:ring-amber-400/30 dark:bg-amber-400/10 dark:text-amber-400',
27 | },
28 | rose: {
29 | small: 'text-red-500 dark:text-rose-500',
30 | medium:
31 | 'ring-rose-200 bg-rose-50 text-red-500 dark:ring-rose-500/20 dark:bg-rose-400/10 dark:text-rose-400',
32 | },
33 | zinc: {
34 | small: 'text-zinc-400 dark:text-zinc-500',
35 | medium:
36 | 'ring-zinc-200 bg-zinc-50 text-zinc-500 dark:ring-zinc-500/20 dark:bg-zinc-400/10 dark:text-zinc-400',
37 | },
38 | }
39 |
40 | const valueColorMap = {
41 | GET: 'yellow',
42 | POST: 'sky',
43 | PUT: 'amber',
44 | DELETE: 'rose',
45 | } as Record
46 |
47 | export function Tag({
48 | children,
49 | variant = 'medium',
50 | color = valueColorMap[children] ?? 'yellow',
51 | }: {
52 | children: keyof typeof valueColorMap & (string | {})
53 | variant?: keyof typeof variantStyles
54 | color?: keyof typeof colorStyles
55 | }) {
56 | return (
57 |
64 | {children}
65 |
66 | )
67 | }
68 |
--------------------------------------------------------------------------------
/crates/edgen_server/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "edgen_server"
3 | version = "0.1.5"
4 | edition = "2021"
5 |
6 | [dependencies]
7 | argh = { workspace = true }
8 | axum = { workspace = true, features = ["tokio", "multipart"] }
9 | axum_typed_multipart = "0.11.0"
10 | axum-test = "14.4.0"
11 | console-subscriber = { workspace = true }
12 | dashmap = { workspace = true }
13 | derive_more = { workspace = true }
14 | edgen_core = { path = "../edgen_core" }
15 | edgen_rt_chat_faker = { path = "../edgen_rt_chat_faker" }
16 | edgen_rt_llama_cpp = { path = "../edgen_rt_llama_cpp" }
17 | edgen_rt_image_generation_candle = { path = "../edgen_rt_image_generation_candle" }
18 | edgen_rt_whisper_cpp = { path = "../edgen_rt_whisper_cpp" }
19 | either = { workspace = true, features = ["serde"] }
20 | futures = { workspace = true }
21 | hf-hub = "0.3.2"
22 | hyper = { workspace = true }
23 | hyper-util = { workspace = true }
24 | once_cell = { workspace = true }
25 | pin-project = { workspace = true }
26 | rand = "0.8.5"
27 | reqwest = { workspace = true, features = ["blocking", "multipart", "json"] }
28 | reqwest-eventsource = "0.6.0"
29 | rubato = "0.15.0"
30 | serde = { workspace = true }
31 | serde_derive = { workspace = true }
32 | serde_json = { workspace = true }
33 | serde_yaml = { workspace = true }
34 | testcontainers = "0.15.0"
35 | time = { workspace = true }
36 | tinyvec = { workspace = true, features = ["serde"] }
37 | thiserror = { workspace = true }
38 | tokio = { workspace = true, features = ["full", "tracing"] }
39 | tokio-stream = { workspace = true }
40 | tokio-util = { workspace = true }
41 | toml_edit = { workspace = true }
42 | tower-http = { version = "0.5.1", features = ["cors"] }
43 | tracing = { workspace = true }
44 | tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
45 | utoipa = { workspace = true }
46 | uuid = { workspace = true, features = ["v4", "serde"] }
47 |
48 | [dev-dependencies]
49 | levenshtein = "1.0.5"
50 | tempfile = { workspace = true }
51 | copy_dir = "0.1.3"
52 |
53 | [features]
54 | llama_vulkan = ["edgen_rt_llama_cpp/vulkan"]
55 | llama_cuda = ["edgen_rt_llama_cpp/cuda"]
56 | llama_metal = ["edgen_rt_llama_cpp/metal"]
57 | whisper_cuda = ["edgen_rt_whisper_cpp/cuda"]
58 | candle_cuda = ["edgen_rt_image_generation_candle/cuda"]
59 |
60 | [[bin]]
61 | name = "chatter"
62 | test = false
63 | bench = false
64 |
--------------------------------------------------------------------------------
/edgen/src-tauri/src/main.rs:
--------------------------------------------------------------------------------
1 | /* Copyright 2023- The Binedge, Lda team. All rights reserved.
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | * http://www.apache.org/licenses/LICENSE-2.0
6 | * Unless required by applicable law or agreed to in writing, software
7 | * distributed under the License is distributed on an "AS IS" BASIS,
8 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | * See the License for the specific language governing permissions and
10 | * limitations under the License.
11 | */
12 |
13 | #![cfg_attr(windows, windows_subsystem = "windows")]
14 |
15 | #[cfg(not(feature = "no_gui"))]
16 | mod gui;
17 |
18 | use edgen_server;
19 | use edgen_server::{cli, start, EdgenResult};
20 |
21 | use once_cell::sync::Lazy;
22 |
23 | #[cfg(not(feature = "no_gui"))]
24 | fn main() -> EdgenResult {
25 | try_attach_terminal();
26 |
27 | Lazy::force(&cli::PARSED_COMMANDS);
28 |
29 | match &cli::PARSED_COMMANDS.subcommand {
30 | None => serve(&cli::PARSED_COMMANDS, true)?,
31 | Some(cli::Command::Serve(args)) => serve(&cli::PARSED_COMMANDS, !args.nogui)?,
32 | Some(_) => start(&cli::PARSED_COMMANDS)?,
33 | }
34 |
35 | Ok(())
36 | }
37 |
38 | #[cfg(feature = "no_gui")]
39 | fn main() -> EdgenResult {
40 | try_attach_terminal();
41 |
42 | Lazy::force(&cli::PARSED_COMMANDS);
43 | start(&cli::PARSED_COMMANDS)
44 | }
45 |
46 | #[cfg(not(feature = "no_gui"))]
47 | fn serve(command: &'static cli::TopLevel, start_gui: bool) -> EdgenResult {
48 | let handle = std::thread::spawn(|| match start(command) {
49 | Ok(()) => std::process::exit(0),
50 | Err(e) => {
51 | eprintln!("{:?}", e);
52 | std::process::exit(1);
53 | }
54 | });
55 |
56 | if start_gui {
57 | gui::run();
58 | }
59 |
60 | handle.join()?
61 | }
62 |
63 | /// On Windows, attempt to attach to a parent process terminal if not already attached.
64 | ///
65 | /// This needed due to this being a Windows Subsystem binary.
66 | fn try_attach_terminal() {
67 | #[cfg(windows)]
68 | {
69 | use winapi::um::wincon;
70 | unsafe {
71 | if wincon::GetConsoleWindow().is_null() {
72 | wincon::AttachConsole(wincon::ATTACH_PARENT_PROCESS);
73 | }
74 | }
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/crates/edgen_server/src/graceful_shutdown.rs:
--------------------------------------------------------------------------------
1 | /* Copyright 2023- The Binedge, Lda team. All rights reserved.
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | * http://www.apache.org/licenses/LICENSE-2.0
6 | * Unless required by applicable law or agreed to in writing, software
7 | * distributed under the License is distributed on an "AS IS" BASIS,
8 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | * See the License for the specific language governing permissions and
10 | * limitations under the License.
11 | */
12 |
13 | //! Mechanisms for shutting down application without destroying anything important.
14 |
15 | use time::{Duration, OffsetDateTime};
16 | use tokio::signal;
17 | use tokio::sync::OnceCell;
18 | use tracing::warn;
19 |
20 | /// The duration between [`global_shutdown_starts`] and [`global_shutdown_ends`].
21 | pub const SHUTDOWN_GRACE_PERIOD: Duration = Duration::seconds(30);
22 |
23 | static SHUTDOWN_INVOKED_AT: OnceCell = OnceCell::const_new();
24 |
25 | /// Listens for signals that cause the application to shut down; namely, `CTRL+C`.
26 | async fn signal_listener() -> OffsetDateTime {
27 | while signal::ctrl_c().await.is_err() { /* spin */ }
28 |
29 | warn!(
30 | "Global shutdown has been invoked at {}, and will result in a hard termination at {}",
31 | OffsetDateTime::now_utc(),
32 | OffsetDateTime::now_utc() + SHUTDOWN_GRACE_PERIOD
33 | );
34 |
35 | OffsetDateTime::now_utc()
36 | }
37 |
38 | /// Resolves when a global shutdown has started.
39 | ///
40 | /// All threads **should** start gracefully exiting by this time.
41 | pub async fn global_shutdown_starts() {
42 | yield_until(*SHUTDOWN_INVOKED_AT.get_or_init(signal_listener).await).await;
43 | }
44 |
45 | /// Resolves when the application is about to unconditionally shut down, following
46 | /// [`global_shutdown_starts`].
47 | ///
48 | /// This fires after a grace period of [`SHUTDOWN_GRACE_PERIOD`].
49 | pub async fn global_shutdown_ends() {
50 | yield_until(*SHUTDOWN_INVOKED_AT.get_or_init(signal_listener).await + SHUTDOWN_GRACE_PERIOD)
51 | .await;
52 | }
53 |
54 | /// Yields until a [`time`]-based [`OffsetDateTime`] has elapsed.
55 | pub async fn yield_until(t: OffsetDateTime) {
56 | let now = OffsetDateTime::now_utc();
57 |
58 | if t > now {
59 | tokio::time::sleep((t - OffsetDateTime::now_utc()).unsigned_abs()).await;
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/crates/edgen_server/src/llm.rs:
--------------------------------------------------------------------------------
1 | /* Copyright 2023- The Binedge, Lda team. All rights reserved.
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | * http://www.apache.org/licenses/LICENSE-2.0
6 | * Unless required by applicable law or agreed to in writing, software
7 | * distributed under the License is distributed on an "AS IS" BASIS,
8 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | * See the License for the specific language governing permissions and
10 | * limitations under the License.
11 | */
12 |
13 | use futures::Stream;
14 | use once_cell::sync::Lazy;
15 |
16 | use edgen_core::llm::{CompletionArgs, LLMEndpoint, LLMEndpointError};
17 | use edgen_rt_llama_cpp::LlamaCppEndpoint;
18 |
19 | use crate::model::Model;
20 | use crate::util::StoppingStream;
21 |
22 | static ENDPOINT: Lazy = Lazy::new(Default::default);
23 |
24 | pub async fn chat_completion(
25 | model: Model,
26 | args: CompletionArgs,
27 | ) -> Result {
28 | ENDPOINT
29 | .chat_completions(
30 | model
31 | .file_path()
32 | .map_err(move |e| LLMEndpointError::Load(e.to_string()))?,
33 | args,
34 | )
35 | .await
36 | }
37 |
38 | pub async fn chat_completion_stream(
39 | model: Model,
40 | args: CompletionArgs,
41 | ) -> Result + Unpin + Send>>, LLMEndpointError> {
42 | let stream = ENDPOINT
43 | .stream_chat_completions(
44 | model
45 | .file_path()
46 | .map_err(move |e| LLMEndpointError::Load(e.to_string()))?,
47 | args,
48 | )
49 | .await?;
50 |
51 | Ok(StoppingStream::wrap_with_stop_words(
52 | stream,
53 | vec![
54 | "<|ASSISTANT|>".to_string(),
55 | "<|USER|>".to_string(),
56 | "<|TOOL|>".to_string(),
57 | "<|SYSTEM|>".to_string(),
58 | ],
59 | ))
60 | }
61 |
62 | pub async fn embeddings(
63 | model: Model,
64 | input: Vec,
65 | ) -> Result>, LLMEndpointError> {
66 | ENDPOINT
67 | .embeddings(
68 | model
69 | .file_path()
70 | .map_err(move |e| LLMEndpointError::Load(e.to_string()))?,
71 | input,
72 | )
73 | .await
74 | }
75 |
76 | pub async fn reset_environment() {
77 | ENDPOINT.reset()
78 | }
79 |
--------------------------------------------------------------------------------
/docs/src/components/APIClients.tsx:
--------------------------------------------------------------------------------
1 | import Image from 'next/image'
2 |
3 | import { Button } from '@/components/Button'
4 | import { Heading } from '@/components/Heading'
5 | import logoGo from '@/images/logos/go.svg'
6 | import logoNode from '@/images/logos/node.svg'
7 | import logoPython from '@/images/logos/python.svg'
8 | import logoRust from '@/images/logos/rust.svg'
9 |
10 | const apiclients = [
11 | {
12 | href: 'https://github.com/edgenai/edgen-client-python',
13 | name: 'Python',
14 | description:
15 | 'Python is a programming language that lets you work quickly and integrate systems more effectively.',
16 | logo: logoPython,
17 | },
18 | {
19 | href: 'https://github.com/edgenai/edgen-client-node',
20 | name: 'Node.js',
21 | description:
22 | 'Node.js® is an open-source, cross-platform JavaScript runtime environment.',
23 | logo: logoNode,
24 | },
25 | // {
26 | // href: '#',
27 | // name: 'Rust',
28 | // description:
29 | // 'An open-source programming language that prioritizes memory safety and high performance.',
30 | // logo: logoRust,
31 | // },
32 | // {
33 | // href: '#',
34 | // name: 'Go',
35 | // description:
36 | // 'An open-source programming language supported by Google with built-in concurrency.',
37 | // logo: logoGo,
38 | // },
39 | ]
40 |
41 | export function APIClients() {
42 | return (
43 |
44 |
45 | Official API Clients
46 |
47 |
48 | {apiclients.map((library) => (
49 |
50 |
51 |
52 | {library.name}
53 |
54 |
55 | {library.description}
56 |
57 |
58 |
61 |
62 |
63 |
69 |
70 | ))}
71 |
72 |
73 | )
74 | }
75 |
--------------------------------------------------------------------------------
/crates/edgen_server/src/chat_faker.rs:
--------------------------------------------------------------------------------
1 | /* Copyright 2023- The Binedge, Lda team. All rights reserved.
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | * http://www.apache.org/licenses/LICENSE-2.0
6 | * Unless required by applicable law or agreed to in writing, software
7 | * distributed under the License is distributed on an "AS IS" BASIS,
8 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | * See the License for the specific language governing permissions and
10 | * limitations under the License.
11 | */
12 |
13 | //! Endpoint for the chat faker model RT
14 |
15 | use futures::Stream;
16 | use once_cell::sync::Lazy;
17 |
18 | use edgen_core::llm::{CompletionArgs, LLMEndpoint, LLMEndpointError};
19 | use edgen_rt_chat_faker::ChatFakerEndpoint;
20 |
21 | use crate::model::Model;
22 | use crate::util::StoppingStream;
23 |
24 | static ENDPOINT: Lazy = Lazy::new(Default::default);
25 |
26 | pub async fn chat_completion(
27 | model: Model,
28 | args: CompletionArgs,
29 | ) -> Result {
30 | ENDPOINT
31 | .chat_completions(
32 | model
33 | .file_path()
34 | .map_err(move |e| LLMEndpointError::Load(e.to_string()))?,
35 | args,
36 | )
37 | .await
38 | }
39 |
40 | pub async fn chat_completion_stream(
41 | model: Model,
42 | args: CompletionArgs,
43 | ) -> Result + Unpin + Send>>, LLMEndpointError> {
44 | let stream = ENDPOINT
45 | .stream_chat_completions(
46 | model
47 | .file_path()
48 | .map_err(move |e| LLMEndpointError::Load(e.to_string()))?,
49 | args,
50 | )
51 | .await?;
52 |
53 | Ok(StoppingStream::wrap_with_stop_words(
54 | stream,
55 | vec![
56 | "<|ASSISTANT|>".to_string(),
57 | "<|USER|>".to_string(),
58 | "<|TOOL|>".to_string(),
59 | "<|SYSTEM|>".to_string(),
60 | ],
61 | ))
62 | }
63 |
64 | pub async fn embeddings(
65 | model: Model,
66 | input: Vec,
67 | ) -> Result>, LLMEndpointError> {
68 | ENDPOINT
69 | .embeddings(
70 | model
71 | .file_path()
72 | .map_err(move |e| LLMEndpointError::Load(e.to_string()))?,
73 | input,
74 | )
75 | .await
76 | }
77 |
78 | // Not needed. Just for completeness.
79 | #[allow(dead_code)]
80 | pub async fn reset_environment() {
81 | ENDPOINT.reset()
82 | }
83 |
--------------------------------------------------------------------------------
/docs/src/app/documentation/errors/page.mdx:
--------------------------------------------------------------------------------
1 | export const metadata = {
2 | title: 'Errors',
3 | description:
4 | 'In this guide, we will talk about what happens when something goes wrong while you work with the API.',
5 | }
6 |
7 | # Errors
8 |
9 | In this guide, we will talk about what happens when something goes wrong while you work with the API. Mistakes happen, and mostly they will be yours, not ours. Let's look at some status codes and error types you might encounter. {{ className: 'lead' }}
10 |
11 | You can tell if your request was successful by checking the status code when receiving an API response. If a response comes back unsuccessful, you can use the error type and error message to figure out what has gone wrong and do some rudimentary debugging (before contacting support).
12 |
13 |
14 | Before reaching out to support with an error, please be aware that 99% of all
15 | reported errors are, in fact, user errors. Therefore, please carefully check
16 | your code before contacting Protocol support.
17 |
18 |
19 | ---
20 |
21 | ## Status codes
22 |
23 | Here is a list of the different categories of status codes returned by the Protocol API. Use these to understand if a request was successful.
24 |
25 |
26 |
27 | A 2xx status code indicates a successful response.
28 |
29 |
30 | A 4xx status code indicates a client error — this means it's a _you_
31 | problem.
32 |
33 |
34 | A 5xx status code indicates a server error — you won't be seeing these.
35 |
36 |
37 |
38 | ---
39 |
40 | ## Error types
41 |
42 |
43 |
44 |
45 | Whenever a request is unsuccessful, the Protocol API will return an error response with an error type and message. You can use this information to understand better what has gone wrong and how to fix it. Most of the error messages are pretty helpful and actionable.
46 |
47 | Here is a list of the two error types supported by the Protocol API — use these to understand what you have done wrong.
48 |
49 |
50 |
51 | This means that we made an error, which is highly speculative and unlikely.
52 |
53 |
54 | This means that you made an error, which is much more likely.
55 |
56 |
57 |
58 |
59 |
16 | Given a list of messages belonging to a chat history, generate a response.
17 |
18 | ### Required attributes
19 |
20 |
21 |
22 | One or multiple pieces of text from which embeddings will be generated. For each piece of text, one embedding is generated.
23 |
24 |
25 |
26 |
27 |
28 | The model used for chat completions.
29 |
30 |
31 | If the model name is "default", the chat model from the configuration is used (see [Documentation » Configuration](/documentation/configuration) for details).
32 |
33 |
34 | If the model name follows the format repo-owner/repo-name/model-name, the indicated model is used and, if it is not present, it will be downloaded from [huggingface](https://huggingface.co/). If it cannot be downloaded, Edgen responds with an error. Example: "nomic-ai/nomic-embed-text-v1.5-GGUF/nomic-embed-text-v1.5.f16.gguf".
35 |
36 |
37 | If the model name contains just a file name, e.g.: "my-model.bin", Edgen will try using the file of this name in the data directory as defined in the configuration. If the the file does not exist there, Edgen responds with an error.
38 |
39 |
40 |
41 |
42 |
43 | ### Optional attributes
44 |
45 |
46 |
47 | The format to return the embeddings in. Can be either `float` or `base64`.
48 |
49 |
50 |
51 |
52 |
53 | The number of dimensions the resulting output embeddings should have. Only supported in some models.
54 |
55 |
56 |
57 |
58 |
129 | )
130 | }
131 |
--------------------------------------------------------------------------------
/crates/edgen_server/src/whisper.rs:
--------------------------------------------------------------------------------
1 | /* Copyright 2023- The Binedge, Lda team. All rights reserved.
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | * http://www.apache.org/licenses/LICENSE-2.0
6 | * Unless required by applicable law or agreed to in writing, software
7 | * distributed under the License is distributed on an "AS IS" BASIS,
8 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | * See the License for the specific language governing permissions and
10 | * limitations under the License.
11 | */
12 |
13 | use once_cell::sync::Lazy;
14 | use uuid::Uuid;
15 |
16 | use edgen_core::whisper::{TranscriptionArgs, WhisperEndpoint, WhisperEndpointError};
17 | use edgen_rt_whisper_cpp::WhisperCppEndpoint;
18 |
19 | use crate::model::Model;
20 |
21 | static ENDPOINT: Lazy = Lazy::new(Default::default);
22 |
23 | pub async fn create_transcription(
24 | file: &[u8],
25 | model: Model,
26 | language: Option<&str>,
27 | prompt: Option<&str>,
28 | temperature: Option,
29 | create_session: bool,
30 | session: Option,
31 | ) -> Result<(String, Option), WhisperEndpointError> {
32 | let args = TranscriptionArgs {
33 | file: file.to_vec(),
34 | language: language.map(move |s| s.to_string()),
35 | prompt: prompt.map(move |s| s.to_string()),
36 | temperature,
37 | create_session,
38 | session,
39 | };
40 |
41 | ENDPOINT
42 | .transcription(
43 | model
44 | .file_path()
45 | .map_err(move |e| WhisperEndpointError::Load(e.to_string()))?,
46 | args,
47 | )
48 | .await
49 | }
50 |
51 | pub async fn reset_environment() {
52 | ENDPOINT.reset()
53 | }
54 |
55 | #[cfg(test)]
56 | mod tests {
57 | use super::*;
58 | use crate::model::{Model, ModelKind};
59 | use crate::types::Endpoint;
60 | use edgen_core::settings::SETTINGS;
61 | use levenshtein;
62 | use std::path::PathBuf;
63 |
64 | async fn init_settings_for_test() {
65 | SETTINGS
66 | .write()
67 | .await
68 | .init()
69 | .await
70 | .expect("Failed to initialise settings");
71 | }
72 |
73 | fn frost() -> String {
74 | " The woods are lovely, dark and deep, \
75 | but I have promises to keep \
76 | and miles to go before I sleep, \
77 | and miles to go before I sleep."
78 | .to_string()
79 | }
80 |
81 | #[tokio::test]
82 | #[ignore] // this test hangs sometimes
83 | async fn test_create_transcription() {
84 | init_settings_for_test().await;
85 | let model_name = "ggml-distil-small.en.bin".to_string();
86 | let repo = "distil-whisper/distil-small.en".to_string();
87 | let dir = SETTINGS
88 | .read()
89 | .await
90 | .read()
91 | .await
92 | .audio_transcriptions_models_dir
93 | .to_string();
94 | let mut model = Model::new(ModelKind::Whisper, &model_name, &repo, &PathBuf::from(&dir));
95 | assert!(model.preload(Endpoint::AudioTranscriptions).await.is_ok());
96 |
97 | let sound = include_bytes!("../resources/frost.wav");
98 | let response = create_transcription(sound, model, None, None, None, true, None).await;
99 |
100 | assert!(response.is_ok(), "cannot create transcription");
101 |
102 | let expected_text = frost();
103 | let (actual_text, session) = response.unwrap();
104 |
105 | println!("{:?}", session);
106 |
107 | // Calculate Levenshtein distance
108 | let distance = levenshtein::levenshtein(&expected_text, &actual_text);
109 |
110 | // Calculate similarity percentage
111 | let similarity_percentage =
112 | 100.0 - ((distance as f64 / expected_text.len() as f64) * 100.0);
113 |
114 | // Assert that the similarity is at least 90%
115 | assert!(
116 | similarity_percentage >= 90.0,
117 | "Text similarity is less than 90%"
118 | );
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/docs/src/app/api-reference/image/page.mdx:
--------------------------------------------------------------------------------
1 | export const metadata = {
2 | title: 'Image',
3 | description: 'Generate images',
4 | }
5 |
6 | # Image
7 |
8 | Generate images from text. {{ className: 'lead' }}
9 |
10 | ---
11 |
12 |
13 |
14 |
15 | ## Create image {{tag:'POST', label:'http://localhost:33322/v1/image/generations'}}
16 |
17 | Given a text prompt, generate 1 or more images according to the prompt.
18 |
19 | ### Required attributes
20 |
21 |
22 |
23 | A description of the images to be generated.
24 |
25 |
26 |
27 |
28 |
29 | The model used for image generations. (WARNING: At the moment only "stable-diffusion-2-1" is allowed)
30 |
31 |
32 | If the model name is "default", the chat model from the configuration is used (see [Documentation » Configuration](/documentation/configuration) for details).
33 |
34 |
35 | If the model name is a valid model name recognized by Edgen, it is what is used.
36 |
37 |
38 |
39 |
40 |
41 |
42 | ### Optional attributes
43 |
44 |
45 |
46 | The width of the generated image.
47 |
48 |
49 |
50 |
51 |
52 | The height of the generated image.
53 |
54 |
55 |
56 |
57 |
58 | The optional unconditional prompt.
59 |
60 |
61 |
62 |
63 |
64 | The number of steps to be used in the diffusion process.
65 |
66 |
67 |
68 |
69 |
70 | The number of images to generate.
71 | Default: 1
72 |
73 |
74 |
75 |
76 |
77 | The random number generator seed to use for the generation.
78 | By default, a random seed is used.
79 |
80 |
81 |
82 |
83 |
84 | The guidance scale to use for generation, that is, how much should the model follow the prompt.
85 | Values below 1 disable guidance. (the prompt is ignored)
86 |
87 |
88 |
89 |
90 |
91 | The Variational Auto-Encoder scale to use for generation.
92 | Required if `model` is not a pre-made descriptor name.
93 | This value should probably not be set, if `model` is a pre-made descriptor name.
94 |
95 |
96 |
97 |
98 |
63 | )}
64 | {children}
65 |
66 | )
67 | }
68 |
69 | export function Heading({
70 | children,
71 | tag,
72 | label,
73 | level,
74 | anchor = true,
75 | ...props
76 | }: React.ComponentPropsWithoutRef<`h${Level}`> & {
77 | id: string
78 | tag?: string
79 | label?: string
80 | level?: Level
81 | anchor?: boolean
82 | }) {
83 | level = level ?? (2 as Level)
84 | let Component = `h${level}` as 'h2' | 'h3'
85 | let ref = useRef(null)
86 | let registerHeading = useSectionStore((s) => s.registerHeading)
87 |
88 | let inView = useInView(ref, {
89 | margin: `${remToPx(-3.5)}px 0px 0px 0px`,
90 | amount: 'all',
91 | })
92 |
93 | useEffect(() => {
94 | if (level === 2) {
95 | registerHeading({ id: props.id, ref, offsetRem: tag || label ? 8 : 6 })
96 | }
97 | })
98 |
99 | return (
100 | <>
101 |
102 |
107 | {anchor ? (
108 |
109 | {children}
110 |
111 | ) : (
112 | children
113 | )}
114 |
115 | >
116 | )
117 | }
118 |
119 | export function HeadingNoEyebrow({
120 | children,
121 | tag,
122 | label,
123 | level,
124 | anchor = true,
125 | ...props
126 | }: React.ComponentPropsWithoutRef<`h${Level}`> & {
127 | id: string
128 | tag?: string
129 | label?: string
130 | level?: Level
131 | anchor?: boolean
132 | }) {
133 | level = level ?? (2 as Level)
134 | let Component = `h${level}` as 'h2' | 'h3'
135 | let ref = useRef(null)
136 | let registerHeading = useSectionStore((s) => s.registerHeading)
137 |
138 | let inView = useInView(ref, {
139 | margin: `${remToPx(-3.5)}px 0px 0px 0px`,
140 | amount: 'all',
141 | })
142 |
143 | useEffect(() => {
144 | if (level === 2) {
145 | registerHeading({ id: props.id, ref, offsetRem: tag || label ? 8 : 6 })
146 | }
147 | })
148 |
149 | return (
150 | <>
151 |
156 | {
157 | {children}
158 | }
159 |
160 | >
161 | )
162 | }
163 |
--------------------------------------------------------------------------------
/docs/src/components/EdgenAI_dark_colored.svg:
--------------------------------------------------------------------------------
1 |
2 |
13 |
15 |
33 |
38 |
43 |
48 |
53 |
58 |
63 |
68 |
73 |
74 |
--------------------------------------------------------------------------------
/docs/src/app/documentation/configuration/page.mdx:
--------------------------------------------------------------------------------
1 | export const metadata = {
2 | title: 'Configuration',
3 | description: 'Edgen configuration.',
4 | }
5 |
6 | # Configuration
7 | The Edgen configuration. It is read from a file where you can define your models' locations, select which model to use for each endpoint, the number of threads Edgen can use and more. {{ className: 'lead' }}
8 |
9 | | Config Name | Description | Default Value |
10 | | --------------------------------- | ------------------------------------------ | ------------------------------------------------ |
11 | | `threads` | Number of CPU threads for processing | \ -1 |
12 | | `default_uri` | Default URI for communication | http://127.0.0.1:33322 |
13 | | `chat_completions_models_dir` | Directory for chat completions models | `/edgen/models/chat/completions` |
14 | | `chat_completions_model_name` | Name of chat completions model | neural-chat-7b-v3-3.Q4_K_M.gguf |
15 | | `chat_completions_model_repo` | HuggingFace repo for chat completions | TheBloke/neural-chat-7B-v3-3-GGUF |
16 | | `audio_transcriptions_models_dir` | Directory for audio transcriptions models | `/edgen/models/audio/transcriptions` |
17 | | `audio_transcriptions_model_name` | Name of audio transcriptions model | ggml-distil-small.en.bin |
18 | | `audio_transcriptions_model_repo` | HuggingFace repo for audio transcriptions | distil-whisper/distil-small.en |
19 | | `gpu_policy` | Policy to choose how a model gets loaded | !always_device |
20 | | `max_request_size` | Maximum size a request can have | 100 Megabytes |
21 |
22 | ## Configuration Paths for DATA_DIR
23 |
24 | | Platform | Value | Example |
25 | | -------- | ----------------------------------------------------------------- | ------------------------------------------------- |
26 | | Linux | `$XDG_DATA_HOME/_project_path_` or `$HOME/.local/share/_project_path_` | `/home/Alex/.local/share/edgen` |
27 | | macOS | `$HOME/Library/Application Support/_project_path_` | `/Users/Alex/Library/Application Support/com.EdgenAI.Edgen` |
28 | | Windows | `{FOLDERID_RoamingAppData}\_project_path_\data` | `C:\Users\Alex\AppData\Roaming\EdgenAI\Edgen\data` |
29 |
30 | ## Model Name and Repo
31 |
32 | Model name and repo define the model to use and how to obtain it automatically. If you download the model yourself you just have to copy it to the corresponding model directory and set the `model_name` setting to the file name. The repo has only informative character in this case, for instance:
33 |
34 | | Config Name | Your Value |
35 | | --------------------------------- | ------------------------------------------ |
36 | | `chat_completions_models_dir` | `/edgen/models/chat/completions` |
37 | | `chat_completions_model_name` | my-fancy-model |
38 | | `chat_completions_model_repo` | ModelMaster/fancy-model-1.v1-1.GGUF |
39 |
40 | If you prefer to let Edgen manage your models, you need to fill in the correct model name and repo, e.g.
41 |
42 | | Config Name | Your Value |
43 | | --------------------------------- | ------------------------------------------ |
44 | | `chat_completions_models_dir` | `/edgen/models/chat/completions` |
45 | | `chat_completions_model_name` | fancy-model-1.v1-1.gguf |
46 | | `chat_completions_model_repo` | ModelMaster/fancy-model-1.v1-1.GGUF |
47 |
48 | In this case, if the model does not exist in the model directory, Edgen will automatically download for you. You can use the model manager ([API Reference » Models](/api-reference/models)) to inspect and delete automatically downloaded models.
49 |
50 | ## GPU policies
51 |
52 | Edgen supports the following policies, each with their own sub-settings:
53 |
54 | - `!always_device` - Models will always get loaded to a GPU.
55 | - `overflow_to_cpu` - If true, when a model can't be loaded to a GPU, it gets loaded to system memory. Else, Edgen will free GPU memory until the model can be loaded. **WARNING**: neither of these systems are currently implemented.
56 | - `!always_cpu` - Models will always get loaded to system memory.
57 | - `overflow_to_device` - If true, when a model can't be loaded to system memory, it gets loaded to a GPU. Else, Edgen will free system memory until the model can be loaded. **WARNING**: neither of these systems are currently implemented.
58 |
59 |
--------------------------------------------------------------------------------
/crates/edgen_async_compat/src/lib.rs:
--------------------------------------------------------------------------------
1 | /* Copyright 2023- The Binedge, Lda team. All rights reserved.
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | * http://www.apache.org/licenses/LICENSE-2.0
6 | * Unless required by applicable law or agreed to in writing, software
7 | * distributed under the License is distributed on an "AS IS" BASIS,
8 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | * See the License for the specific language governing permissions and
10 | * limitations under the License.
11 | */
12 |
13 | //! Shims around [`smol`][smol], [`tokio`][tokio], and [`glommio`] to provide a unified interface
14 | //! for asynchronous programming.
15 | //!
16 | //! This crate exports [`spawn`], [`spawn_local`], and [`unblock`] functions that will
17 | //! defer to the appropriate runtime depending on the feature flags enabled.
18 | //!
19 | //! The following feature flags are available and **mutually exclusive**:
20 | //!
21 | //! - `runtime-smol`: Use [`smol`] as the runtime.
22 | //! - `runtime-tokio`: Use [`tokio`] as the runtime.
23 | //! - `runtime-glommio`: Use [`glommio`] as the runtime.
24 | //!
25 | //! You must enable **exactly one** of these. Failing to enable any, or enabling more than one,
26 | //! will cause the crate to fail to compile.
27 | //!
28 | //! [smol]: https://docs.rs/smol
29 | //! [tokio]: https://docs.rs/tokio
30 | //! [glommio]: https://docs.rs/glommio
31 |
32 | #![forbid(unsafe_code)]
33 | #![warn(missing_docs)]
34 |
35 | use core::future::Future;
36 |
37 | static_assertions::assert_cfg!(
38 | any(
39 | all(feature = "runtime-smol", not(any(feature = "runtime-tokio", feature = "runtime-glommio"))),
40 | all(feature = "runtime-tokio", not(any(feature = "runtime-smol", feature = "runtime-glommio"))),
41 | all(feature = "runtime-glommio", not(any(feature = "runtime-smol", feature = "runtime-tokio"))),
42 | ),
43 | "You must enable exactly one of the `runtime-smol`, `runtime-tokio`, or `runtime-glommio` feature flags."
44 | );
45 |
46 | /// Spawns a future onto the current executor, causing it to start executing almost immediately.
47 | ///
48 | /// This will automatically select for `smol`, `tokio`, or `glommio` depending on the feature
49 | /// flags enabled.
50 | pub async fn spawn(future: F)
51 | where
52 | F: Future