├── .gitignore ├── Cargo.toml ├── Dioxus.toml ├── LICENSE ├── README.md ├── index.html ├── input.css ├── models ├── flux │ ├── Cargo.toml │ └── src │ │ ├── flux.py │ │ └── main.rs ├── llava │ ├── Cargo.toml │ └── src │ │ ├── img.txt │ │ ├── llavanext.py │ │ ├── main.rs │ │ └── test.py ├── moonmodel │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── pyworker │ ├── Cargo.toml │ ├── build.rs │ └── src │ │ ├── main.rs │ │ └── qwen2.py └── qwen │ ├── Cargo.toml │ ├── src │ └── main.rs │ └── test.py ├── package.json ├── public ├── favicon.ico ├── header.svg ├── login.js ├── manifest.json ├── sw.js └── tailwind.css ├── server.config ├── src ├── authorization.rs ├── data.rs ├── ipc.rs ├── lib.rs ├── llama.rs ├── main.rs ├── master_server.rs ├── master_state.rs ├── model.rs ├── phi3.rs ├── token_output_stream.rs ├── web.rs ├── web_state.rs └── worker_server.rs ├── tailwind.config.js └── youtube--play.jpg /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | /dist/ 5 | /static/ 6 | /.dioxus/ 7 | /node_modules/ 8 | /.vscode/ 9 | /raw.html 10 | /out.html 11 | /Cargo.lock 12 | /package-lock.json 13 | **/target/ 14 | **/Cargo.lock 15 | **/__pycache__ 16 | # These are backup files generated by rustfmt 17 | **/*.rs.bk 18 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "moonweb" 3 | version = "0.1.0" 4 | authors = ["Lyn-liyuan "] 5 | edition = "2021" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | dioxus = { version = "0.5", features = ["web", "router"] } 11 | image-base64-wasm = "0.5.0" 12 | # Debug 13 | dioxus-logger = "0.5.1" 14 | manganis = "0.2.2" 15 | serde = {version = "1.0.203", features = ["derive"] } 16 | serde_json = "1.0" 17 | reqwest = {version = "0.12.5", features = ["json","stream"] } 18 | structopt = "0.3" 19 | clap = { version = "3.0", features = ["derive"] } 20 | lazy_static = "1.4" 21 | comrak = "0.24" 22 | web-sys ={ version = "0.3.69", features = ["Window", "Document", "Element","HtmlInputElement"] } 23 | eventsource-stream = "0.2.3" 24 | futures = "0.3.30" 25 | wasm-bindgen = "0.2.92" 26 | js-sys = "0.3.69" 27 | sqids = "0.4.1" 28 | md5 = "0.7.0" 29 | chrono = "0.4.38" 30 | headers = "0.4.0" 31 | axum-extra = { version = "0.9.3",features = ["typed-header"]} 32 | axum-auth = "0.7.0" 33 | 34 | 35 | [target.'cfg(not(target_arch = "wasm32"))'.dependencies] 36 | candle-core = { git = "https://github.com/huggingface/candle.git", features = ["cuda"] } 37 | candle-transformers = { git = "https://github.com/huggingface/candle.git", features = ["cuda"] } 38 | candle-nn = { git = "https://github.com/huggingface/candle.git",features = ["cuda"]} 39 | tokenizers = { version = "0.19.1", features = ["onig"] } 40 | cpal= { version = "0.15.2", optional = true } 41 | csv = "1.3.0" 42 | half = { version = "2.4.1", optional = true } 43 | hf-hub = {version = "0.3.2", features = ["tokio"]} 44 | anyhow = "1.0.86" 45 | tokio = { version = "1.38.0", features = ["full"] } 46 | axum = "0.7.5" 47 | dashmap = "5.5.3" 48 | signal-hook = "0.3.17" 49 | ipc-channel = "0.18.1" 50 | async-stream = "0.3.5" 51 | tokio-stream = "0.1.15" 52 | tower = "0.4.13" 53 | tower-http = {version = "0.5.2", features = ["fs"]} 54 | 55 | 56 | [dev-dependencies] 57 | clap = "*" 58 | 59 | 60 | -------------------------------------------------------------------------------- /Dioxus.toml: -------------------------------------------------------------------------------- 1 | [application] 2 | 3 | # App (Project) Name 4 | name = "moonweb" 5 | 6 | # Dioxus App Default Platform 7 | # desktop, web 8 | default_platform = "web" 9 | 10 | # `build` & `serve` dist path 11 | out_dir = "dist" 12 | 13 | # resource (assets) file folder 14 | asset_dir = "public" 15 | 16 | [web.app] 17 | 18 | # HTML title tag content 19 | title = "moonweb" 20 | 21 | 22 | 23 | [web.watcher] 24 | 25 | # when watcher trigger, regenerate the `index.html` 26 | reload_html = true 27 | 28 | # which files or dirs will be watcher monitoring 29 | watch_path = ["src", "public"] 30 | 31 | # include `assets` in web platform 32 | [web.resource] 33 | 34 | # CSS style file 35 | 36 | style = ["/tailwind.css"] 37 | 38 | # Javascript code file 39 | script = ["/sw.js","/login.js"] 40 | 41 | [web.resource.dev] 42 | 43 | # Javascript code file 44 | # serve: [dev-server] only 45 | script = [] 46 | 47 | [[web.proxy]] 48 | backend = "http://127.0.0.1:3081/api/" -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Yuan lI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Moonweb: LLM Chat Tool** 2 | 3 | Welcome to Moonweb, a web chat tool developed with Rust, Dioxus, and Candle frameworks that supports a variety of open-source Large Language Models (LLMs). This project aims to provide a dynamic and flexible platform for integrating and testing different LLMs. 4 | 5 | **Features** 6 | 7 | - **Multi-Model Support**: Seamless integration of various open-source LLMs. 8 | - **Dynamic Model Loading**: Supports dynamic loading and unloading of models at runtime. 9 | - **Independent Process Isolation**: Each model runs in an independent process, providing services through ipc_channel, ensuring stability and responsiveness. 10 | - **Web Interface**: A responsive and user-friendly web interface built with the Dioxus framework. It supports SSE(Server send event). 11 | - **Open Source**: Fully open source, encouraging community contributions and customization. 12 | 13 | The model services that have been implemented are as follows: 14 | 15 | - **meta-llama/Meta-Llama-3-8B-Instruct** 16 | - **lmms-lab/llama3-llava-next-8b** 17 | - **Qwen/Qwen2-7B-Instruct** 18 | - **Qwen/Qwen2-1.5B-Instruct** 19 | - **microsoft/Phi-3-medium-4k-instruct** 20 | 21 | 22 | [![Moonweb Screen Recording Video](https://github.com/Lyn-liyuan/moonweb/blob/main/youtube--play.jpg?raw=true)](https://youtu.be/AfdswX82FOo "Moonweb Screen Recording Video") 23 | 24 | **Quick Start** 25 | 26 | 1. **Install Rust**: Ensure that Rust is installed on your system. Visit the [Rust official website](https://www.rust-lang.org/) for installation instructions. 27 | 2. **Install Dioxus**: dioxus is a react and vue like web framework. Visit the [document ](https://dioxuslabs.com/learn/0.5/getting_started)of dioxus. 28 | 3. **Clone the Repository**: Clone the Moonweb project to your local machine using Git. 29 | 30 | git clone https://github.com/ Lyn-liyuan/moonweb.git 31 | 32 | 4. **Build the Project**: Navigate to the project directory and build the project using Cargo. 33 | 34 | ```shell 35 | cd moonweb 36 | cargo build 37 | ``` 38 | 5. **Run the Services**: Start the LLM model services. 39 | 40 | If you want to use the load command to start model services in the models directory, you need to compile these services first. Navigate to the directories containing the Cargo.toml files and execute `cargo build --release` to compile these services. After the compilation is complete, set the program in the server.config file to the executable file of the compiled model service. 41 | 42 | Before compiling the pyworker model service, you need to specify the Python interpreter used by the Python code via the environment variable PYO3_PYTHON. You can activate the Python environment with `conda activate my_env` and set the environment variable using `export PYO3_PYTHON=$(which python)`. 43 | 44 | ```shell 45 | cargo run –-release -- --server master 46 | ``` 47 | 6. **Build the Web** : Compile rust to WASM. 48 | ```shell 49 | dx build --release 50 | ``` 51 | **Architecture Overview** 52 | 53 | - **Frontend**: The web interface built with Dioxus, responsible for displaying chat content and user input. 54 | - **Backend**: Rust backend services that handle web requests and communicate with LLM model services. 55 | - **Model Services**: Each LLM model runs as an independent process, communicating with the backend service via ipc_channel. 56 | 57 | **Model Integration** 58 | 59 | To integrate a new LLM model, follow these steps: 60 | 61 | 1. Create a model service process that implements ipc_channel communication. 62 | 2. Edit the server.config file and add the server config to the servers field. 63 | 3. Use web interface send /load model_id to robot. 64 | 65 | **Update Records** 66 | - **June 25, 2024**: Implement dynamic loading of model services. The model service can be an independent program. As long as it complies with the IPC communication specification, the service can be started through the /load model_id command on the web page. 67 | - **July 2, 2024**: Added qwen2 model, supported python as model service, and implemented Qwen/Qwen-7B-Instruct model service with python. 68 | - **July 4, 2024**: Implement the /unload command to stop the model service process. For example, enter /unload Qwen/Qwen2-1.5B-Instruct in the text box of the web interface to stop the corresponding model process. 69 | - **July 6, 2024**: To start the HTTP server using axum, you no longer need to use dx serve to start the server. Use the highlight.js library to add syntax highlighting functionality. 70 | - **July 11, 2024**: To implement the llava model server. 71 | - **July 19, 2024**: Added conversation management, using the browser local storage to store historical conversations. 72 | - **July 23, 2024**: Remove temperature and top_p configuration from the configuration dialog, so that each session can configure its own system prompts. 73 | - **July 29, 2024**: Implemented user login and the function of deleting conversations. 74 | - **Aug 4, 2024**: Implemented the black-forest-labs/FLUX.1-schnell model service. FLUX.1 [schnell] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. 75 | 76 | **Contributing** 77 | 78 | We welcome contributions in any form, including but not limited to: 79 | 80 | - Code submissions 81 | - Feature requests 82 | - Bug reports 83 | - Documentation improvements 84 | 85 | 86 | **License** 87 | 88 | This project is licensed under the "MIT License". 89 | 90 | **Contact** 91 | 92 | - Project Maintainer: [LYN] 93 | - Email: [yuanli13@asu.edu] 94 | - GitHub: [@LYN](https://github.com/Lyn-liyuan) 95 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {app_title} 5 | 6 | 7 | 8 | 9 | 10 | {style_include} 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 | 20 | 28 | {script_include} 29 | 30 | 31 | -------------------------------------------------------------------------------- /input.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; -------------------------------------------------------------------------------- /models/flux/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "flux" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | clap = { version = "3.0", features = ["derive"] } 10 | ipc-channel = "0.18.1" 11 | pyo3 = "0.21.1" 12 | moonweb ={ path = "../../"} 13 | 14 | [build-dependencies] 15 | pyo3-build-config = { git = "https://github.com/pyo3/pyo3", features = ["resolve-config"] } -------------------------------------------------------------------------------- /models/flux/src/flux.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from diffusers import FluxPipeline 3 | from PIL import Image 4 | import base64 5 | import io 6 | import requests 7 | import copy 8 | import torch 9 | import json 10 | import time 11 | import random 12 | import string 13 | 14 | from moonipc import IpcChannel; 15 | 16 | def generate_image_filename(): 17 | timestamp = str(int(time.time())) 18 | random_chars = ''.join(random.choices(string.ascii_uppercase + string.digits, k=6)) 19 | return f"{timestamp}{random_chars}.png" 20 | 21 | pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16) 22 | pipe.enable_model_cpu_offload() 23 | 24 | 25 | 26 | def run(ipc_name,model_id = "black-forest-labs/FLUX.1-schnell"): 27 | ipc = IpcChannel(ipc_name); 28 | print(f"{model_id} server start!") 29 | while True: 30 | request = json.loads(ipc.recv()) 31 | 32 | if request['cmd'] == "QUIT": 33 | break 34 | msg = request['msg_list'][-1] 35 | prompt = msg['content'] 36 | image = pipe( 37 | prompt, 38 | guidance_scale=0.0, 39 | output_type="pil", 40 | num_inference_steps=4, 41 | max_sequence_length=256, 42 | generator=torch.Generator("cpu").manual_seed(0) 43 | ).images[0] 44 | filename = generate_image_filename() 45 | image.save(f"dist/images/{filename}") 46 | ipc.send(f"![{prompt}](/images/{filename})") 47 | ipc.send("<|endoftext|>") 48 | 49 | -------------------------------------------------------------------------------- /models/flux/src/main.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use ipc_channel::ipc::{IpcReceiver, IpcSender}; 3 | use pyo3::prelude::*; 4 | use moonweb::ipc::accept; 5 | 6 | 7 | #[pyclass] 8 | struct IpcChannel { 9 | sender: IpcSender, 10 | receiver: IpcReceiver, 11 | } 12 | #[pymethods] 13 | impl IpcChannel { 14 | #[new] 15 | fn new(ipc_name: String) -> Self { 16 | let (receiver, sender) = accept(ipc_name); 17 | IpcChannel { sender, receiver } 18 | } 19 | 20 | fn send(&self, msg: &str) -> PyResult<()> { 21 | self.sender 22 | .send(msg.to_string()) 23 | .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string())) 24 | } 25 | 26 | fn recv(&self) -> PyResult { 27 | self.receiver 28 | .recv() 29 | .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string())) 30 | } 31 | } 32 | 33 | #[pymodule] 34 | fn moonipc(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { 35 | m.add_class::()?; 36 | Ok(()) 37 | } 38 | 39 | #[derive(Parser, Debug)] 40 | #[clap(author, version, about)] 41 | struct Args { 42 | #[clap(short, long)] 43 | server: Option, 44 | #[clap(short, long)] 45 | ipc_name: Option, 46 | #[clap(short, long)] 47 | model_id: Option, 48 | 49 | #[clap(short = 'h', long)] 50 | temp: Option, 51 | 52 | #[clap(short = 't', long)] 53 | top_p: Option, 54 | } 55 | 56 | fn main() { 57 | let args = Args::parse(); 58 | let ipc_name = args.ipc_name.unwrap(); 59 | let model_id = args.model_id.unwrap(); 60 | 61 | let code = include_str!("flux.py"); 62 | 63 | pyo3::append_to_inittab!(moonipc); 64 | pyo3::prepare_freethreaded_python(); 65 | let args = (ipc_name.as_str(), model_id.as_str()); 66 | Python::with_gil(|py| { 67 | let activators = PyModule::from_code_bound(py, code, "flux.py", "flux").unwrap(); 68 | activators.getattr("run").unwrap().call1(args).unwrap(); 69 | }); 70 | } 71 | -------------------------------------------------------------------------------- /models/llava/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "llava" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | clap = { version = "3.0", features = ["derive"] } 10 | ipc-channel = "0.18.1" 11 | pyo3 = "0.21.1" 12 | moonweb ={ path = "../../"} 13 | 14 | [build-dependencies] 15 | pyo3-build-config = { git = "https://github.com/pyo3/pyo3", features = ["resolve-config"] } -------------------------------------------------------------------------------- /models/llava/src/llavanext.py: -------------------------------------------------------------------------------- 1 | from llava.model.builder import load_pretrained_model 2 | from llava.mm_utils import get_model_name_from_path, process_images, tokenizer_image_token 3 | from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN, IGNORE_INDEX 4 | from llava.conversation import conv_templates, SeparatorStyle 5 | 6 | from PIL import Image 7 | import base64 8 | import io 9 | import requests 10 | import copy 11 | import torch 12 | import json 13 | from transformers import TextStreamer,AutoTokenizer 14 | from moonipc import IpcChannel; 15 | 16 | 17 | class IpcStreamer(TextStreamer): 18 | def __init__( 19 | self, tokenizer: AutoTokenizer, skip_prompt: bool = False, ipc: IpcChannel = None, **decode_kwargs, 20 | ): 21 | super().__init__(tokenizer, skip_prompt, **decode_kwargs) 22 | self.ipc = ipc 23 | 24 | def on_finalized_text(self, text: str, stream_end: bool = False): 25 | self.ipc.send(text) 26 | if stream_end: 27 | self.ipc.send("<|endoftext|>") 28 | 29 | def run(ipc_name,model_id = "lmms-lab/llama3-llava-next-8b"): 30 | 31 | ipc = IpcChannel(ipc_name); 32 | model_name = "llava_llama3" 33 | device = "cuda" 34 | device_map = "auto" 35 | tokenizer, model, image_processor, max_length = load_pretrained_model(model_id, None, model_name, device_map=device_map,repeat_penalty=1.8,repeat_last_n=64) 36 | streamer = IpcStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True,ipc=ipc) 37 | conv_template = "llava_llama_3" 38 | 39 | conv = copy.deepcopy(conv_templates[conv_template]) 40 | 41 | print(f"{model_id} server start!") 42 | while True: 43 | request = json.loads(ipc.recv()) 44 | 45 | if request['cmd'] == "QUIT": 46 | break 47 | conv.system = request['system_prompt'] 48 | conv.messages.clear() 49 | image_list = [] 50 | has_image = False 51 | for msg in request['msg_list']: 52 | if msg['img'] is not None: 53 | base64_string = msg['img'] 54 | if base64_string.startswith('data:image'): 55 | base64_string = base64_string.split(',')[1] 56 | image_data = base64.b64decode(base64_string) 57 | image = Image.open(io.BytesIO(image_data)) 58 | image_list.append(image) 59 | has_image = True 60 | else: 61 | image_tag = "" 62 | if has_image: 63 | image_tag = DEFAULT_IMAGE_TOKEN+"\n" 64 | has_image = False 65 | if msg['role']=='User': 66 | conv.append_message(conv.roles[0], image_tag+msg['content']) 67 | else: 68 | conv.append_message(conv.roles[1], msg['content']) 69 | 70 | conv.append_message(conv.roles[1], None) 71 | prompt_question = conv.get_prompt() 72 | print(prompt_question) 73 | input_ids = tokenizer_image_token(prompt_question, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device) 74 | 75 | image_tensor = process_images(image_list, image_processor, model.config) 76 | image_tensor = [_image.to(dtype=torch.float16, device=device) for _image in image_tensor] 77 | image_sizes = [image.size for image in image_list] 78 | 79 | cont = model.generate( 80 | input_ids, 81 | images=image_tensor, 82 | image_sizes=image_sizes, 83 | do_sample=True, 84 | top_p=0.95, 85 | temperature=0.5, 86 | pad_token_id=tokenizer.eos_token_id, 87 | max_new_tokens=256, 88 | streamer = streamer, 89 | ) 90 | -------------------------------------------------------------------------------- /models/llava/src/main.rs: -------------------------------------------------------------------------------- 1 | use clap::*; 2 | use ipc_channel::ipc::{IpcReceiver, IpcSender}; 3 | use pyo3::prelude::*; 4 | use moonweb::ipc::accept; 5 | 6 | 7 | #[pyclass] 8 | struct IpcChannel { 9 | sender: IpcSender, 10 | receiver: IpcReceiver, 11 | } 12 | 13 | #[pymethods] 14 | impl IpcChannel { 15 | #[new] 16 | fn new(ipc_name: String) -> Self { 17 | let (receiver, sender) = accept(ipc_name); 18 | IpcChannel { sender, receiver } 19 | } 20 | 21 | fn send(&self, msg: &str) -> PyResult<()> { 22 | self.sender 23 | .send(msg.to_string()) 24 | .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string())) 25 | } 26 | 27 | fn recv(&self) -> PyResult { 28 | self.receiver 29 | .recv() 30 | .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string())) 31 | } 32 | } 33 | 34 | #[pymodule] 35 | fn moonipc(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { 36 | m.add_class::()?; 37 | Ok(()) 38 | } 39 | 40 | #[derive(Parser, Debug)] 41 | #[clap(author, version, about)] 42 | struct Args { 43 | #[clap(short, long)] 44 | server: Option, 45 | #[clap(short, long)] 46 | ipc_name: Option, 47 | #[clap(short, long)] 48 | model_id: Option, 49 | 50 | #[clap(short = 'h', long)] 51 | temp: Option, 52 | 53 | #[clap(short = 't', long)] 54 | top_p: Option, 55 | } 56 | 57 | fn main() { 58 | let args = Args::parse(); 59 | let ipc_name = args.ipc_name.unwrap(); 60 | let model_id = args.model_id.unwrap(); 61 | 62 | let code = include_str!("llavanext.py"); 63 | 64 | pyo3::append_to_inittab!(moonipc); 65 | pyo3::prepare_freethreaded_python(); 66 | let args = (ipc_name.as_str(), model_id.as_str()); 67 | Python::with_gil(|py| { 68 | let activators = PyModule::from_code_bound(py, code, "llavanext.py", "llavanext").unwrap(); 69 | activators.getattr("run").unwrap().call1(args).unwrap(); 70 | }); 71 | } 72 | -------------------------------------------------------------------------------- /models/llava/src/test.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import base64 3 | import io 4 | 5 | with open("img.txt") as f : 6 | base64_string = f.read() 7 | if base64_string.startswith('data:image'): 8 | base64_string = base64_string.split(',')[1] 9 | image_data = base64.b64decode(base64_string) 10 | image = Image.open(io.BytesIO(image_data)) -------------------------------------------------------------------------------- /models/moonmodel/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "moonmodel" 3 | version = "0.1.0" 4 | authors = ["Lyn-liyuan "] 5 | edition = "2021" 6 | 7 | [dependencies] 8 | moonweb ={ path = "../../"} 9 | clap = { version = "3.0", features = ["derive"] } 10 | serde = {version = "1.0.203", features = ["derive"] } 11 | serde_json = "1.0" -------------------------------------------------------------------------------- /models/moonmodel/src/main.rs: -------------------------------------------------------------------------------- 1 | use moonweb::ipc::{accept,OutputStream}; 2 | use moonweb::data::Request; 3 | use clap::*; 4 | #[derive(Parser, Debug)] 5 | #[clap(author, version, about, long_about = None)] 6 | struct Args { 7 | #[clap(short, long)] 8 | server: Option, 9 | 10 | #[clap(short, long)] 11 | ipc_name: Option, 12 | 13 | #[clap(short, long)] 14 | model_id: Option, 15 | 16 | #[clap(short='h', long)] 17 | temp: Option, 18 | 19 | #[clap(short='t', long)] 20 | top_p: Option, 21 | 22 | } 23 | 24 | fn main() { 25 | let args = Args::parse(); 26 | let ipc_name = args.ipc_name.unwrap(); 27 | let model_id = args.model_id.unwrap(); 28 | let (receiver,sender) = accept(ipc_name); 29 | println!("{} server start!",model_id); 30 | loop { 31 | let msg = receiver.recv().unwrap(); 32 | if let Ok(req) = serde_json::from_str::(msg.as_str()) { 33 | if req.cmd.eq("QUIT") { 34 | break; 35 | } 36 | let response = format!("{} recv {:?}",model_id,req.msg_list); 37 | for char in response.chars() { 38 | sender.write(format!("{}",char)).unwrap(); 39 | } 40 | sender.end().unwrap(); 41 | } 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /models/pyworker/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pyworker" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | clap = { version = "3.0", features = ["derive"] } 10 | ipc-channel = "0.18.1" 11 | pyo3 = "0.21.1" 12 | 13 | [build-dependencies] 14 | pyo3-build-config = { git = "https://github.com/pyo3/pyo3", features = ["resolve-config"] } 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /models/pyworker/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | // Configure pyo3 to use the correct Python interpreter 3 | pyo3_build_config::use_pyo3_cfgs(); 4 | //println!("cargo:rustc-link-search=native=/home/lyn/miniconda3/envs/torch/lib"); 5 | } 6 | -------------------------------------------------------------------------------- /models/pyworker/src/main.rs: -------------------------------------------------------------------------------- 1 | 2 | use pyo3::prelude::*; 3 | use clap::*; 4 | 5 | use ipc_channel::ipc::{self,IpcSender, IpcReceiver}; 6 | 7 | fn accept(ipc_name: String) -> (IpcReceiver, IpcSender) { 8 | let (client_sender, receiver): (IpcSender, IpcReceiver) = ipc::channel().unwrap(); 9 | let connector = IpcSender::connect(ipc_name.clone()).expect(format!("Failed to connect {}",ipc_name).as_str()); 10 | connector.send(client_sender).expect("Failed to send client sender"); 11 | let (sender, client_receiver): (IpcSender, IpcReceiver) = ipc::channel().unwrap(); 12 | let client_name = receiver.recv().expect("Failed to recv!"); 13 | let connector = IpcSender::connect(client_name.clone()).expect(format!("Failed to connect client: {}",client_name).as_str()); 14 | connector.send(client_receiver).expect("Failed to send client receive"); 15 | (receiver, sender) 16 | } 17 | 18 | #[pyclass] 19 | struct IpcChannel { 20 | sender: IpcSender, 21 | receiver: IpcReceiver, 22 | } 23 | 24 | #[pymethods] 25 | impl IpcChannel { 26 | #[new] 27 | fn new(ipc_name: String) -> Self { 28 | let (receiver, sender) = accept(ipc_name); 29 | IpcChannel { sender, receiver } 30 | } 31 | 32 | fn send(&self, msg: &str) -> PyResult<()> { 33 | self.sender.send(msg.to_string()).map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string())) 34 | } 35 | 36 | fn recv(&self) -> PyResult { 37 | self.receiver.recv().map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string())) 38 | } 39 | } 40 | 41 | #[pymodule] 42 | fn moonipc(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { 43 | m.add_class::()?; 44 | Ok(()) 45 | } 46 | 47 | #[derive(Parser, Debug)] 48 | #[clap(author, version, about, long_about = None)] 49 | struct Args { 50 | #[clap(short, long)] 51 | server: Option, 52 | 53 | #[clap(short, long)] 54 | ipc_name: Option, 55 | 56 | #[clap(short, long)] 57 | model_id: Option, 58 | 59 | #[clap(short='h', long)] 60 | temp: Option, 61 | 62 | #[clap(short='t', long)] 63 | top_p: Option, 64 | 65 | } 66 | 67 | fn main() { 68 | let args = Args::parse(); 69 | let ipc_name = args.ipc_name.unwrap(); 70 | let model_id = args.model_id.unwrap(); 71 | let code = include_str!("qwen2.py"); 72 | pyo3::append_to_inittab!(moonipc); 73 | pyo3::prepare_freethreaded_python(); 74 | 75 | let args=(ipc_name.as_str(),model_id.as_str()); 76 | Python::with_gil(|py| { 77 | let activators = PyModule::from_code_bound(py,code,"qwen.py","qwen").unwrap(); 78 | activators.getattr("run").unwrap().call1(args).unwrap(); 79 | }); 80 | } 81 | -------------------------------------------------------------------------------- /models/pyworker/src/qwen2.py: -------------------------------------------------------------------------------- 1 | from moonipc import IpcChannel; 2 | from transformers import TextStreamer 3 | from transformers import AutoModelForCausalLM, AutoTokenizer 4 | import json 5 | import torch 6 | 7 | class IpcStreamer(TextStreamer): 8 | def __init__( 9 | self, tokenizer: AutoTokenizer, skip_prompt: bool = False, ipc: IpcChannel = None, **decode_kwargs, 10 | ): 11 | super().__init__(tokenizer, skip_prompt, **decode_kwargs) 12 | self.ipc = ipc 13 | 14 | def on_finalized_text(self, text: str, stream_end: bool = False): 15 | self.ipc.send(text) 16 | if stream_end: 17 | self.ipc.send("<|endoftext|>"); 18 | 19 | def run(ipc_name,model_id): 20 | 21 | ipc = IpcChannel(ipc_name); 22 | 23 | device = "cuda" 24 | model = AutoModelForCausalLM.from_pretrained( 25 | model_id, 26 | torch_dtype="auto", 27 | device_map="auto", 28 | attn_implementation="flash_attention_2", 29 | ) 30 | tokenizer = AutoTokenizer.from_pretrained(model_id) 31 | streamer = IpcStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True,ipc=ipc) 32 | print(f"{model_id} server start!") 33 | while True: 34 | request = json.loads(ipc.recv()) 35 | 36 | if request['cmd'] == "QUIT": 37 | break 38 | messages = [{"role": "system", "content": request['system_prompt']}] 39 | for msg in request['msg_list'] : 40 | if msg['role']=='User': 41 | messages.append({"role":"user","content":msg['content']}) 42 | else: 43 | messages.append({"role":"assistant","content":msg['content']}) 44 | text = tokenizer.apply_chat_template( 45 | messages, 46 | tokenize=False, 47 | add_generation_prompt=True 48 | ) 49 | model_inputs = tokenizer([text], return_tensors="pt").to(device) 50 | 51 | print("model.generate!!") 52 | model.generate( 53 | model_inputs.input_ids, 54 | 55 | max_new_tokens=512, 56 | streamer=streamer, 57 | ) 58 | 59 | -------------------------------------------------------------------------------- /models/qwen/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "qwen" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | moonweb={path="../../"} 10 | candle-core = { git = "https://github.com/huggingface/candle.git", version ="0.6.0" ,features = ["cuda"] } 11 | candle-transformers = { git = "https://github.com/huggingface/candle.git", version ="0.6.0" ,features = ["cuda"] } 12 | candle-nn = { git = "https://github.com/huggingface/candle.git", version ="0.6.0" ,features = ["cuda"] } 13 | tokenizers = { version = "0.19.1", features = ["onig"] } 14 | 15 | half = { optional = true } 16 | hf-hub = { features = ["tokio"] } 17 | clap = { version = "4.5.7", features = ["derive"] } 18 | anyhow = "1.0.86" 19 | serde = {version = "1.0.203", features = ["derive"] } 20 | serde_json = "1.0" -------------------------------------------------------------------------------- /models/qwen/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{Error as E, Result}; 2 | use candle_core::utils::cuda_is_available; 3 | use clap::Parser; 4 | 5 | use candle_transformers::models::qwen2::{Config as ConfigBase, ModelForCausalLM as ModelBase}; 6 | 7 | use candle_core::{DType, Device, Tensor}; 8 | use moonweb::token_output_stream::TokenOutputStream; 9 | use candle_nn::VarBuilder; 10 | use candle_transformers::generation::LogitsProcessor; 11 | use hf_hub::{api::sync::Api, Repo, RepoType}; 12 | use tokenizers::Tokenizer; 13 | use moonweb::ipc::{accept,OutputStream}; 14 | use moonweb::data::{Request,Message,Role}; 15 | 16 | struct TextGeneration { 17 | model: ModelBase, 18 | device: Device, 19 | tokenizer: TokenOutputStream, 20 | logits_processor: LogitsProcessor, 21 | repeat_penalty: f32, 22 | repeat_last_n: usize, 23 | } 24 | 25 | impl TextGeneration { 26 | #[allow(clippy::too_many_arguments)] 27 | fn new( 28 | model: ModelBase, 29 | tokenizer: Tokenizer, 30 | seed: u64, 31 | temp: Option, 32 | top_p: Option, 33 | repeat_penalty: f32, 34 | repeat_last_n: usize, 35 | device: &Device, 36 | ) -> Self { 37 | let logits_processor = LogitsProcessor::new(seed, temp, top_p); 38 | Self { 39 | model, 40 | tokenizer: TokenOutputStream::new(tokenizer), 41 | logits_processor, 42 | repeat_penalty, 43 | repeat_last_n, 44 | device: device.clone(), 45 | } 46 | } 47 | 48 | fn run(&mut self,output: &impl OutputStream,prompt: &str, sample_len: usize) -> Result<()> { 49 | self.model.clear_kv_cache(); 50 | self.tokenizer.clear(); 51 | let mut tokens = self 52 | .tokenizer 53 | .tokenizer() 54 | .encode(prompt, true) 55 | .map_err(E::msg)? 56 | .get_ids() 57 | .to_vec(); 58 | for &t in tokens.iter() { 59 | if let Some(t) = self.tokenizer.next_token(t)? { 60 | print!("{t}") 61 | } 62 | } 63 | 64 | 65 | let mut generated_tokens = 0usize; 66 | let eos_token = match self.tokenizer.get_token("<|endoftext|>") { 67 | Some(token) => token, 68 | None => anyhow::bail!("cannot find the <|endoftext|> token"), 69 | }; 70 | let start_gen = std::time::Instant::now(); 71 | for index in 0..sample_len { 72 | let context_size = if index > 0 { 1 } else { tokens.len() }; 73 | let start_pos = tokens.len().saturating_sub(context_size); 74 | let ctxt = &tokens[start_pos..]; 75 | let input = Tensor::new(ctxt, &self.device)?.unsqueeze(0)?; 76 | let logits = self.model.forward(&input, start_pos)?; 77 | let logits = logits.squeeze(0)?.squeeze(0)?.to_dtype(DType::F32)?; 78 | let logits = if self.repeat_penalty == 1. { 79 | logits 80 | } else { 81 | let start_at = tokens.len().saturating_sub(self.repeat_last_n); 82 | candle_transformers::utils::apply_repeat_penalty( 83 | &logits, 84 | self.repeat_penalty, 85 | &tokens[start_at..], 86 | )? 87 | }; 88 | 89 | let next_token = self.logits_processor.sample(&logits)?; 90 | tokens.push(next_token); 91 | generated_tokens += 1; 92 | if next_token == eos_token { 93 | break; 94 | } 95 | if let Some(t) = self.tokenizer.next_token(next_token)? { 96 | //print!("{t}"); 97 | //std::io::stdout().flush()?; 98 | output.write(format!("{t}")).unwrap(); 99 | } 100 | } 101 | output.end().unwrap(); 102 | let dt = start_gen.elapsed(); 103 | if let Some(rest) = self.tokenizer.decode_rest().map_err(E::msg)? { 104 | print!("{rest}"); 105 | } 106 | 107 | println!( 108 | "\n{generated_tokens} tokens generated ({:.2} token/s)", 109 | generated_tokens as f64 / dt.as_secs_f64(), 110 | ); 111 | Ok(()) 112 | } 113 | } 114 | 115 | 116 | #[derive(Parser, Debug)] 117 | #[command(author, version, about, long_about = None)] 118 | struct Args { 119 | 120 | #[clap(short, long)] 121 | server: String, 122 | 123 | #[clap(short, long)] 124 | ipc_name: String, 125 | 126 | #[arg(long)] 127 | temperature: Option, 128 | 129 | /// Nucleus sampling probability cutoff. 130 | #[arg(long)] 131 | top_p: Option, 132 | 133 | /// The seed to use when generating random samples. 134 | #[arg(long, default_value_t = 299792458)] 135 | seed: u64, 136 | 137 | /// The length of the sample to generate (in tokens). 138 | #[arg(long, short = 'n', default_value_t = 10000)] 139 | sample_len: usize, 140 | 141 | #[arg(long,default_value = "Qwen/Qwen2-7B")] 142 | model_id: Option, 143 | 144 | #[arg(long, default_value = "main")] 145 | revision: String, 146 | 147 | #[arg(long)] 148 | tokenizer_file: Option, 149 | 150 | #[arg(long)] 151 | weight_files: Option, 152 | 153 | /// Penalty to be applied for repeating tokens, 1. means no penalty. 154 | #[arg(long, default_value_t = 1.1)] 155 | repeat_penalty: f32, 156 | 157 | /// The context size to consider for the repeat penalty. 158 | #[arg(long, default_value_t = 64)] 159 | repeat_last_n: usize, 160 | } 161 | 162 | 163 | 164 | fn messages_chat_template(msg_list: &Vec,system_prompt:&str)->String { 165 | let mut history = String::new(); 166 | history.push_str("<|im_start|>system\n"); 167 | history.push_str(system_prompt); 168 | history.push_str("<|im_end|>\n"); 169 | for msg in msg_list { 170 | history.push_str("<|im_start|>"); 171 | if msg.role == Role::User { 172 | history.push_str("user\n"); 173 | } else { 174 | history.push_str("assistant\n"); 175 | } 176 | history.push_str(msg.content.as_str()); 177 | history.push_str("<|im_end|>\n"); 178 | } 179 | history.push_str("<|im_start|>assistant\n"); 180 | history 181 | } 182 | 183 | fn main() -> Result<()> { 184 | 185 | let args = Args::parse(); 186 | let start = std::time::Instant::now(); 187 | let api = Api::new()?; 188 | let model_id = "Qwen/Qwen2-1.5B-Instruct".to_string(); 189 | let repo = api.repo(Repo::with_revision( 190 | model_id.clone(), 191 | RepoType::Model, 192 | "main".to_string(), 193 | )); 194 | let tokenizer_filename = repo.get("tokenizer.json")?; 195 | let filenames = vec![repo.get("model.safetensors")?]; 196 | println!("retrieved the files in {:?}", start.elapsed()); 197 | let tokenizer = Tokenizer::from_file(tokenizer_filename).map_err(E::msg)?; 198 | 199 | let start = std::time::Instant::now(); 200 | let config_file = repo.get("config.json")?; 201 | let device = if cuda_is_available() { 202 | Device::new_cuda(0).expect("create cuda device failed!") 203 | } else { 204 | Device::Cpu 205 | }; 206 | 207 | let dtype = if device.is_cuda() { 208 | DType::BF16 209 | } else { 210 | DType::F32 211 | }; 212 | let vb = unsafe { VarBuilder::from_mmaped_safetensors(&filenames, dtype, &device)? }; 213 | let model = { 214 | let config: ConfigBase = serde_json::from_str(&std::fs::read_to_string(config_file)?)?; 215 | ModelBase::new(&config, vb)? 216 | }; 217 | 218 | println!("loaded the model in {:?}", start.elapsed()); 219 | let temp = args.temperature.unwrap_or_else(|| 0.3f64); 220 | let top_p = args.top_p.unwrap_or_else(|| 0.95f64); 221 | let mut pipeline = TextGeneration::new( 222 | model, 223 | tokenizer, 224 | 299792458u64, 225 | Some(temp), 226 | Some(top_p), 227 | 1.8f32, 228 | 64usize, 229 | &device, 230 | ); 231 | let ipc_name = args.ipc_name; 232 | let (receiver,sender) = accept(ipc_name); 233 | println!("{} server start!",model_id); 234 | loop { 235 | let msg = receiver.recv().unwrap(); 236 | if let Ok(req) = serde_json::from_str::(msg.as_str()) { 237 | if req.cmd.eq("QUIT") { 238 | break; 239 | } 240 | let prompt = messages_chat_template(&req.msg_list,"你是源胖子开发的AI助手,你善于回答科普问题。"); 241 | 242 | pipeline.run(&sender,prompt.as_str(), 1000usize)?; 243 | } 244 | 245 | } 246 | 247 | Ok(()) 248 | } -------------------------------------------------------------------------------- /models/qwen/test.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoModelForCausalLM, AutoTokenizer 2 | device = "cuda" # the device to load the model onto 3 | 4 | model = AutoModelForCausalLM.from_pretrained( 5 | "Qwen/Qwen2-7B-Instruct", 6 | torch_dtype="auto", 7 | device_map="auto", 8 | attn_implementation="flash_attention_2", 9 | ) 10 | tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B-Instruct") 11 | 12 | prompt = "你好呀" 13 | messages = [ 14 | {"role": "system", "content": "You are a helpful assistant."}, 15 | {"role": "user", "content": prompt} 16 | ] 17 | text = tokenizer.apply_chat_template( 18 | messages, 19 | tokenize=False, 20 | add_generation_prompt=True 21 | ) 22 | model_inputs = tokenizer([text], return_tensors="pt").to(device) 23 | 24 | generated_ids = model.generate( 25 | model_inputs.input_ids, 26 | max_new_tokens=512 27 | ) 28 | generated_ids = [ 29 | output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) 30 | ] 31 | 32 | response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] 33 | print(response) 34 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "flowbite": "^2.3.0" 4 | }, 5 | "devDependencies": { 6 | "tailwindcss": "^3.4.3" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lyn-liyuan/moonweb/dbe7593d9fd4689a24816b5d017dc9a3010dce48/public/favicon.ico -------------------------------------------------------------------------------- /public/header.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/login.js: -------------------------------------------------------------------------------- 1 | 2 | function showLogin(closeable=false) { 3 | const options = { 4 | placement: 'bottom-right', 5 | backdrop: 'dynamic', 6 | backdropClasses: 7 | 'bg-gray-900/50 dark:bg-gray-900/80 fixed inset-0 z-40', 8 | closable: closeable, 9 | }; 10 | 11 | const instanceOptions = { 12 | id: 'login-modal', 13 | override: true 14 | }; 15 | const $targetEl = document.getElementById('login-modal'); 16 | const modal = new Modal($targetEl, options, instanceOptions); 17 | modal.show(); 18 | } 19 | 20 | function closeLogin() { 21 | const options = { 22 | placement: 'bottom-right', 23 | backdrop: 'dynamic', 24 | backdropClasses: 25 | 'bg-gray-900/50 dark:bg-gray-900/80 fixed inset-0 z-40', 26 | closable: true, 27 | }; 28 | 29 | const instanceOptions = { 30 | id: 'login-modal', 31 | override: true 32 | }; 33 | const $targetEl = document.getElementById('login-modal'); 34 | const modal = new Modal($targetEl, options, instanceOptions); 35 | modal.hide(); 36 | } 37 | -------------------------------------------------------------------------------- /public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Dioxus", 3 | 4 | "start_url": "/", 5 | "id": "/", 6 | "display": "standalone", 7 | "display_override": ["window-control-overlay", "standalone"], 8 | "scope": "/", 9 | "theme_color": "#000000", 10 | "background_color": "#ffffff", 11 | "short_name": "Dioxus", 12 | "description": "Dioxus is a portable, performant, and ergonomic framework for building cross-platform user interfaces in Rust.", 13 | "dir": "ltr", 14 | "lang": "en", 15 | "orientation": "portrait" 16 | } -------------------------------------------------------------------------------- /public/sw.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | //console.log('WORKER: executing.'); 4 | 5 | /* A version number is useful when updating the worker logic, 6 | allowing you to remove outdated cache entries during the update. 7 | */ 8 | var version = 'v1.0.0::'; 9 | 10 | /* These resources will be downloaded and cached by the service worker 11 | during the installation process. If any resource fails to be downloaded, 12 | then the service worker won't be installed either. 13 | */ 14 | var offlineFundamentals = [ 15 | // add here the files you want to cache 16 | 'favicon.ico' 17 | ]; 18 | 19 | /* The install event fires when the service worker is first installed. 20 | You can use this event to prepare the service worker to be able to serve 21 | files while visitors are offline. 22 | */ 23 | self.addEventListener("install", function (event) { 24 | //console.log('WORKER: install event in progress.'); 25 | /* Using event.waitUntil(p) blocks the installation process on the provided 26 | promise. If the promise is rejected, the service worker won't be installed. 27 | */ 28 | event.waitUntil( 29 | /* The caches built-in is a promise-based API that helps you cache responses, 30 | as well as finding and deleting them. 31 | */ 32 | caches 33 | /* You can open a cache by name, and this method returns a promise. We use 34 | a versioned cache name here so that we can remove old cache entries in 35 | one fell swoop later, when phasing out an older service worker. 36 | */ 37 | .open(version + 'fundamentals') 38 | .then(function (cache) { 39 | /* After the cache is opened, we can fill it with the offline fundamentals. 40 | The method below will add all resources in `offlineFundamentals` to the 41 | cache, after making requests for them. 42 | */ 43 | return cache.addAll(offlineFundamentals); 44 | }) 45 | .then(function () { 46 | //console.log('WORKER: install completed'); 47 | }) 48 | ); 49 | }); 50 | 51 | /* The fetch event fires whenever a page controlled by this service worker requests 52 | a resource. This isn't limited to `fetch` or even XMLHttpRequest. Instead, it 53 | comprehends even the request for the HTML page on first load, as well as JS and 54 | CSS resources, fonts, any images, etc. 55 | */ 56 | self.addEventListener("fetch", function (event) { 57 | //console.log('WORKER: fetch event in progress.'); 58 | 59 | /* We should only cache GET requests, and deal with the rest of method in the 60 | client-side, by handling failed POST,PUT,PATCH,etc. requests. 61 | */ 62 | if (event.request.method !== 'GET') { 63 | /* If we don't block the event as shown below, then the request will go to 64 | the network as usual. 65 | */ 66 | //console.log('WORKER: fetch event ignored.', event.request.method, event.request.url); 67 | return; 68 | } 69 | /* Similar to event.waitUntil in that it blocks the fetch event on a promise. 70 | Fulfillment result will be used as the response, and rejection will end in a 71 | HTTP response indicating failure. 72 | */ 73 | event.respondWith( 74 | caches 75 | /* This method returns a promise that resolves to a cache entry matching 76 | the request. Once the promise is settled, we can then provide a response 77 | to the fetch request. 78 | */ 79 | .match(event.request) 80 | .then(function (cached) { 81 | /* Even if the response is in our cache, we go to the network as well. 82 | This pattern is known for producing "eventually fresh" responses, 83 | where we return cached responses immediately, and meanwhile pull 84 | a network response and store that in the cache. 85 | 86 | Read more: 87 | https://ponyfoo.com/articles/progressive-networking-serviceworker 88 | */ 89 | var networked = fetch(event.request) 90 | // We handle the network request with success and failure scenarios. 91 | .then(fetchedFromNetwork, unableToResolve) 92 | // We should catch errors on the fetchedFromNetwork handler as well. 93 | .catch(unableToResolve); 94 | 95 | /* We return the cached response immediately if there is one, and fall 96 | back to waiting on the network as usual. 97 | */ 98 | //console.log('WORKER: fetch event', cached ? '(cached)' : '(network)', event.request.url); 99 | return cached || networked; 100 | 101 | function fetchedFromNetwork(response) { 102 | /* We copy the response before replying to the network request. 103 | This is the response that will be stored on the ServiceWorker cache. 104 | */ 105 | var cacheCopy = response.clone(); 106 | 107 | //console.log('WORKER: fetch response from network.', event.request.url); 108 | 109 | caches 110 | // We open a cache to store the response for this request. 111 | .open(version + 'pages') 112 | .then(function add(cache) { 113 | /* We store the response for this request. It'll later become 114 | available to caches.match(event.request) calls, when looking 115 | for cached responses. 116 | */ 117 | cache.put(event.request, cacheCopy); 118 | }) 119 | .then(function () { 120 | //console.log('WORKER: fetch response stored in cache.', event.request.url); 121 | }); 122 | 123 | // Return the response so that the promise is settled in fulfillment. 124 | return response; 125 | } 126 | 127 | /* When this method is called, it means we were unable to produce a response 128 | from either the cache or the network. This is our opportunity to produce 129 | a meaningful response even when all else fails. It's the last chance, so 130 | you probably want to display a "Service Unavailable" view or a generic 131 | error response. 132 | */ 133 | function unableToResolve() { 134 | /* There's a couple of things we can do here. 135 | - Test the Accept header and then return one of the `offlineFundamentals` 136 | e.g: `return caches.match('/some/cached/image.png')` 137 | - You should also consider the origin. It's easier to decide what 138 | "unavailable" means for requests against your origins than for requests 139 | against a third party, such as an ad provider. 140 | - Generate a Response programmaticaly, as shown below, and return that. 141 | */ 142 | 143 | //console.log('WORKER: fetch request failed in both cache and network.'); 144 | 145 | /* Here we're creating a response programmatically. The first parameter is the 146 | response body, and the second one defines the options for the response. 147 | */ 148 | return new Response('

Service Unavailable

', { 149 | status: 503, 150 | statusText: 'Service Unavailable', 151 | headers: new Headers({ 152 | 'Content-Type': 'text/html' 153 | }) 154 | }); 155 | } 156 | }) 157 | ); 158 | }); 159 | 160 | /* The activate event fires after a service worker has been successfully installed. 161 | It is most useful when phasing out an older version of a service worker, as at 162 | this point you know that the new worker was installed correctly. In this example, 163 | we delete old caches that don't match the version in the worker we just finished 164 | installing. 165 | */ 166 | self.addEventListener("activate", function (event) { 167 | /* Just like with the install event, event.waitUntil blocks activate on a promise. 168 | Activation will fail unless the promise is fulfilled. 169 | */ 170 | //console.log('WORKER: activate event in progress.'); 171 | 172 | event.waitUntil( 173 | caches 174 | /* This method returns a promise which will resolve to an array of available 175 | cache keys. 176 | */ 177 | .keys() 178 | .then(function (keys) { 179 | // We return a promise that settles when all outdated caches are deleted. 180 | return Promise.all( 181 | keys 182 | .filter(function (key) { 183 | // Filter by keys that don't start with the latest version prefix. 184 | return !key.startsWith(version); 185 | }) 186 | .map(function (key) { 187 | /* Return a promise that's fulfilled 188 | when each outdated cache is deleted. 189 | */ 190 | return caches.delete(key); 191 | }) 192 | ); 193 | }) 194 | .then(function () { 195 | //console.log('WORKER: activate completed.'); 196 | }) 197 | ); 198 | }); -------------------------------------------------------------------------------- /server.config: -------------------------------------------------------------------------------- 1 | { 2 | "ports": [ 3 | 11000, 4 | 11001, 5 | 11002 6 | ], 7 | "master_addr": "0.0.0.0:12081", 8 | "working_servers": [ 9 | { 10 | "model_id": "black-forest-labs/FLUX.1-schnell", 11 | "program": "/disk/lyn/workspace/moondream/moonweb/models/flux/target/release/flux", 12 | "temp": 0.6, 13 | "top_p": 0.9 14 | }, 15 | { 16 | "model_id": "Qwen/Qwen2-7B-Instruct", 17 | "program": "/disk/lyn/workspace/moondream/moonweb/models/pyworker/target/release/pyworker", 18 | "temp": 0.6, 19 | "top_p": 0.9 20 | } 21 | ], 22 | "servers": [ 23 | { 24 | "model_id": "black-forest-labs/FLUX.1-schnell", 25 | "program": "/disk/lyn/workspace/moondream/moonweb/models/flux/target/release/flux", 26 | "temp": 0.6, 27 | "top_p": 0.9 28 | }, 29 | { 30 | "model_id": "lmms-lab/llama3-llava-next-8b", 31 | "program": "/disk/lyn/workspace/moondream/moonweb/models/llava/target/release/llava", 32 | "temp": 0.6, 33 | "top_p": 0.9 34 | }, 35 | { 36 | "model_id": "Qwen/Qwen2-7B-Instruct", 37 | "program": "/disk/lyn/workspace/moondream/moonweb/models/pyworker/target/release/pyworker", 38 | "temp": 0.6, 39 | "top_p": 0.9 40 | }, 41 | { 42 | "model_id": "Qwen/Qwen2-1.5B-Instruct", 43 | "program": "/disk/lyn/workspace/moondream/moonweb/models/qwen/target/release/qwen", 44 | "temp": 0.6, 45 | "top_p": 0.9 46 | }, 47 | { 48 | "model_id": "meta-llama/Meta-Llama-3-8B-Instruct", 49 | "program": "self", 50 | "temp": 0.6, 51 | "top_p": 0.9 52 | }, 53 | { 54 | "model_id": "microsoft/Phi-3-medium-4k-instruct", 55 | "program": "self", 56 | "temp": 0.6, 57 | "top_p": 0.9 58 | }, 59 | { 60 | "model_id": "yuanli/moonmodel", 61 | "program": "/home/lyn/workspace/moondream/moonweb/models/moonmodel/target/release/moonmodel", 62 | "temp": 0.6, 63 | "top_p": 0.9 64 | } 65 | ] 66 | } -------------------------------------------------------------------------------- /src/authorization.rs: -------------------------------------------------------------------------------- 1 | use crate::data::{AuthResponse, WebUser}; 2 | use dioxus::prelude::*; 3 | use js_sys::Date; 4 | use js_sys::Reflect; 5 | use reqwest::header::CONTENT_TYPE; 6 | use reqwest::Client; 7 | use sqids::Sqids; 8 | use wasm_bindgen::prelude::*; 9 | use wasm_bindgen::JsCast; 10 | use web_sys::{window, Document, HtmlInputElement}; 11 | 12 | const SQIDS_ALPHABET: &str = "VRHIrU2je0gxcSGlzvMWBAkpufqDiyEoY931JLTC5wN6KbaQFPOdsXn48h7mZt"; 13 | const SALT: &str = "Akpu#fqDiy@EoY931J_VRHIrU2"; 14 | 15 | fn get_input_element_by_id(document: &Document, id: &str) -> Result { 16 | document 17 | .get_element_by_id(id) 18 | .ok_or_else(|| JsValue::from_str("element not found")) 19 | .and_then(|element| { 20 | element 21 | .dyn_into::() 22 | .map_err(|_| JsValue::from_str("element is not an HtmlInputElement")) 23 | }) 24 | } 25 | 26 | pub fn get_user() -> Option { 27 | if let Some(window) = window() { 28 | if let Ok(Some(storage)) = window.local_storage() { 29 | if let Ok(Some(value)) = storage.get_item("auth_user") { 30 | if let Ok(user) = serde_json::from_str::(value.as_str()) { 31 | return Some(user); 32 | } 33 | } 34 | } 35 | } 36 | return None; 37 | } 38 | 39 | async fn do_login(endpoint: Signal, mut logined: Signal,mut login_failed: Signal) { 40 | if let Some(window) = window() { 41 | if let Some(document) = window.document() { 42 | let role = if let Ok(user) = get_input_element_by_id(&document, "role-1") { 43 | if user.checked() { 44 | Some("User") 45 | } else { 46 | None 47 | } 48 | } else { 49 | None 50 | }; 51 | let role = match role { 52 | Some(r) => r, 53 | None => { 54 | if let Ok(user) = get_input_element_by_id(&document, "role-2") { 55 | if user.checked() { 56 | "Administrator" 57 | } else { 58 | "User" 59 | } 60 | } else { 61 | "User" 62 | } 63 | } 64 | }; 65 | let token = if let Ok(key) = get_input_element_by_id(&document, "key") { 66 | key.value() 67 | } else { 68 | String::new() 69 | }; 70 | if token != "" { 71 | let token_digest = md5::compute(format!("{}{}", SALT, token).as_bytes()); 72 | let response = Client::new() 73 | .post(format!("{}signin", endpoint())) 74 | .header(CONTENT_TYPE, "application/json") 75 | .body(format!( 76 | "{{\"role\":\"{}\",\"token\":\"{:x}\"}}", 77 | role, token_digest 78 | )) 79 | .send() 80 | .await 81 | .unwrap() 82 | .json::() 83 | .await 84 | .unwrap(); 85 | if response.success { 86 | logined.set(true); 87 | login_failed.set(false); 88 | let user = 89 | WebUser::make(role.parse().unwrap(), response.auth_key, response.expire); 90 | if let Ok(Some(storage)) = window.local_storage() { 91 | storage 92 | .set_item("auth_user", serde_json::json!(user).to_string().as_str()) 93 | .unwrap(); 94 | } 95 | } else { 96 | logined.set(false); 97 | if let Ok(Some(storage)) = window.local_storage() { 98 | storage.delete("auth_user").unwrap(); 99 | } 100 | login_failed.set(true); 101 | } 102 | } 103 | } 104 | } 105 | } 106 | 107 | fn is_signin() -> bool { 108 | let sqids = Sqids::builder() 109 | .alphabet(SQIDS_ALPHABET.chars().collect()) 110 | .build() 111 | .unwrap(); 112 | 113 | if let Some(user) = get_user() { 114 | if let Some(expire) = user.expire { 115 | let expire_raw = sqids.decode(expire.as_str()); 116 | let expire = Date::new_with_year_month_day( 117 | expire_raw[0] as u32, 118 | expire_raw[1] as i32, 119 | expire_raw[2] as i32, 120 | ); 121 | let now = Date::new_0(); 122 | if now.value_of() <= expire.value_of() { 123 | return true; 124 | } 125 | } 126 | } 127 | 128 | return false; 129 | } 130 | 131 | pub fn show_login(closeable:bool) { 132 | if let Some(window) = window() { 133 | let show_login_js = Reflect::get(&window, &JsValue::from_str("showLogin")) 134 | .unwrap() 135 | .dyn_into::() 136 | .unwrap(); 137 | show_login_js.call0(&JsValue::from_bool(closeable)).unwrap(); 138 | } 139 | } 140 | 141 | pub fn close_login() { 142 | if let Some(window) = window() { 143 | let close_login_js = Reflect::get(&window, &JsValue::from_str("closeLogin")) 144 | .unwrap() 145 | .dyn_into::() 146 | .unwrap(); 147 | close_login_js.call0(&JsValue::NULL).unwrap(); 148 | } 149 | } 150 | 151 | 152 | #[component] 153 | pub fn LoginBox(endpoint: Signal) -> Element { 154 | let logined = use_signal(|| is_signin()); 155 | let login_failed = use_signal(|| false); 156 | use_effect(move || { 157 | if logined() { 158 | close_login(); 159 | } else { 160 | show_login(false); 161 | } 162 | }); 163 | rsx! { 164 | div { 165 | tabindex: "-1", 166 | "aria-hidden": "true", 167 | class: "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-full max-h-full", 168 | id: "login-modal", 169 | div { class: "relative p-4 w-full max-w-md max-h-full", 170 | div { class: "relative bg-white rounded-lg shadow dark:bg-gray-700", 171 | div { class: "flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600", 172 | h3 { class: "text-lg font-semibold text-gray-900 dark:text-white", 173 | "\n User Sign In\n " 174 | } 175 | if logined() { 176 | button { 177 | r#type: "button", 178 | class: "text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white", 179 | onclick: |_| { 180 | close_login(); 181 | }, 182 | svg { 183 | "xmlns": "http://www.w3.org/2000/svg", 184 | "fill": "none", 185 | "aria-hidden": "true", 186 | "viewBox": "0 0 14 14", 187 | class: "w-3 h-3", 188 | path { 189 | "d": "m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6", 190 | "stroke": "currentColor", 191 | "stroke-linejoin": "round", 192 | "stroke-linecap": "round", 193 | "stroke-width": "2" 194 | } 195 | } 196 | span { class: "sr-only", "Remove Conversation" } 197 | } 198 | } 199 | } 200 | div { class: "p-4 md:p-5", 201 | p { class: "text-gray-500 dark:text-gray-400 mb-4", 202 | "Select your desired position:" 203 | } 204 | ul { class: "space-y-4 mb-4", 205 | li { 206 | input { 207 | name: "role", 208 | required: true, 209 | value: "user", 210 | r#type: "radio", 211 | class: "hidden peer", 212 | id: "role-1", 213 | checked:true, 214 | } 215 | label { 216 | r#for: "role-1", 217 | class: "inline-flex items-center justify-between w-full p-5 text-gray-900 bg-white border border-gray-200 rounded-lg cursor-pointer dark:hover:text-gray-300 dark:border-gray-500 dark:peer-checked:text-blue-500 peer-checked:border-blue-600 peer-checked:text-blue-600 hover:text-gray-900 hover:bg-gray-100 dark:text-white dark:bg-gray-600 dark:hover:bg-gray-500", 218 | div { class: "block", 219 | div { class: "w-full text-base font-semibold", "User" } 220 | } 221 | 222 | } 223 | } 224 | li { 225 | input { 226 | name: "role", 227 | required: true, 228 | value: "administrator", 229 | r#type: "radio", 230 | class: "hidden peer", 231 | id: "role-2" 232 | } 233 | label { 234 | r#for: "role-2", 235 | class: "inline-flex items-center justify-between w-full p-5 text-gray-900 bg-white border border-gray-200 rounded-lg cursor-pointer dark:hover:text-gray-300 dark:border-gray-500 dark:peer-checked:text-blue-500 peer-checked:border-blue-600 peer-checked:text-blue-600 hover:text-gray-900 hover:bg-gray-100 dark:text-white dark:bg-gray-600 dark:hover:bg-gray-500", 236 | div { class: "block", 237 | div { class: "w-full text-base font-semibold", "Administrator" } 238 | 239 | } 240 | 241 | } 242 | } 243 | } 244 | if !login_failed() { 245 | div { class: "space-y-4 mb-4", 246 | label { 247 | r#for: "key", 248 | class: "block mb-2 text-gray-500 dark:text-white", 249 | "Authentication Key:" 250 | } 251 | input { 252 | r#type: "text", 253 | placeholder: "Authentication Key", 254 | required: true, 255 | name: "key", 256 | class: "bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-primary-600 focus:border-primary-600 block w-full p-2.5 dark:bg-gray-600 dark:border-gray-500 dark:placeholder-gray-400 dark:text-white dark:focus:ring-primary-500 dark:focus:border-primary-500", 257 | id: "key" 258 | } 259 | } 260 | } else { 261 | div { class: "space-y-4 mb-4", 262 | label { 263 | r#for: "key", 264 | class: "block mb-2 text-red-700 dark:text-white", 265 | "Authentication Key:" 266 | } 267 | input { 268 | r#type: "text", 269 | placeholder: "Authentication Key", 270 | required: "true", 271 | name: "key", 272 | class: "bg-gray-50 border border-red-500 text-red-900 placeholder-red-700 text-sm rounded-lg focus:ring-primary-600 focus:border-primary-600 block w-full p-2.5 dark:bg-gray-600 dark:border-gray-500 dark:placeholder-gray-400 dark:text-white dark:focus:ring-primary-500 dark:focus:border-primary-500", 273 | id: "key" 274 | } 275 | p { 276 | class:"mt-2 text-sm text-red-600 dark:text-red-500", 277 | span { 278 | class:"font-medium", 279 | "Oops! " 280 | } 281 | " Authentication Key Error!" 282 | } 283 | } 284 | } 285 | button { class: "text-white inline-flex w-full justify-center bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800", 286 | onclick: move |_| async move { 287 | do_login(endpoint,logined,login_failed).await; 288 | }, 289 | "\n Sign In\n " 290 | } 291 | } 292 | } 293 | } 294 | } 295 | if logined() { 296 | span { dangerous_inner_html: " "} 297 | } 298 | } 299 | } 300 | -------------------------------------------------------------------------------- /src/data.rs: -------------------------------------------------------------------------------- 1 | use dioxus::prelude::*; 2 | use serde::{Deserialize, Serialize}; 3 | 4 | #[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] 5 | pub enum Role { 6 | Robot, 7 | User, 8 | Administrator, 9 | } 10 | 11 | impl std::str::FromStr for Role { 12 | type Err = String; 13 | fn from_str(s: &str) -> Result { 14 | match s { 15 | "User" => Ok(Role::User), 16 | "Robot" => Ok(Role::Robot), 17 | "Administrator" => Ok(Role::Administrator), 18 | _ => Err(format!("'{}' is not a valid value for Role", s)), 19 | } 20 | } 21 | } 22 | 23 | #[derive(Props, Clone, PartialEq, Debug, Serialize, Deserialize)] 24 | pub struct Message { 25 | pub id: usize, 26 | pub role: Role, 27 | pub content: String, 28 | pub img: Option, 29 | pub loading: bool, 30 | } 31 | #[derive(Debug, Serialize, Deserialize)] 32 | pub struct Request { 33 | pub cmd:String, 34 | pub system_prompt:String, 35 | pub msg_list:Vec, 36 | } 37 | 38 | #[derive(Props, Clone, PartialEq, Debug, Serialize, Deserialize)] 39 | pub struct SelectOption { 40 | pub text:String, 41 | pub selected: bool, 42 | pub value: String, 43 | } 44 | 45 | #[derive(Props, Clone, PartialEq, Debug, Serialize, Deserialize)] 46 | pub struct AuthRequest { 47 | pub role: Role, 48 | pub token: String, 49 | } 50 | 51 | #[derive(Props, Clone, PartialEq, Debug, Serialize, Deserialize)] 52 | pub struct AuthResponse { 53 | pub success: bool, 54 | pub auth_key: String, 55 | pub expire: String, 56 | } 57 | 58 | #[derive(Props, Clone, PartialEq, Debug, Serialize, Deserialize)] 59 | pub struct WebUser { 60 | pub role: Role, 61 | pub auth_key: Option, 62 | pub expire: Option, 63 | } 64 | 65 | impl WebUser { 66 | pub fn new() ->Self { 67 | WebUser { 68 | role: Role::User, 69 | auth_key: None, 70 | expire: None, 71 | } 72 | } 73 | pub fn make(role:Role,key:String,expire: String) -> Self { 74 | WebUser { 75 | role: role, 76 | auth_key: Some(key), 77 | expire: Some(expire), 78 | } 79 | } 80 | } 81 | 82 | 83 | -------------------------------------------------------------------------------- /src/ipc.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{Error, Result}; 2 | 3 | 4 | use ipc_channel::ipc::{self, IpcSender, IpcReceiver}; 5 | 6 | pub fn accept(ipc_name: String) -> (IpcReceiver, IpcSender) { 7 | let (client_sender, receiver): (IpcSender, IpcReceiver) = ipc::channel().unwrap(); 8 | let connector = IpcSender::connect(ipc_name.clone()).expect(format!("Failed to connect {}",ipc_name).as_str()); 9 | connector.send(client_sender).expect("Failed to send client sender"); 10 | let (sender, client_receiver): (IpcSender, IpcReceiver) = ipc::channel().unwrap(); 11 | let client_name = receiver.recv().expect("Failed to recv!"); 12 | let connector = IpcSender::connect(client_name.clone()).expect(format!("Failed to connect client: {}",client_name).as_str()); 13 | connector.send(client_receiver).expect("Failed to send client receive"); 14 | (receiver, sender) 15 | } 16 | 17 | pub trait OutputStream { 18 | fn write(&self, text: String) -> Result<(), Error>; 19 | fn end(&self) -> Result<(), Error>; 20 | } 21 | 22 | impl OutputStream for IpcSender { 23 | fn write(&self, text: String) -> Result<(), Error> { 24 | self.send(text)?; 25 | Ok(()) 26 | } 27 | 28 | fn end(&self) -> Result<(), Error> { 29 | self.send("<|endoftext|>".to_string())?; 30 | Ok(()) 31 | } 32 | } 33 | 34 | 35 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #[cfg(not(target_arch = "wasm32"))] 2 | pub mod token_output_stream; 3 | #[cfg(not(target_arch = "wasm32"))] 4 | pub mod model; 5 | #[cfg(not(target_arch = "wasm32"))] 6 | pub mod llama; 7 | #[cfg(not(target_arch = "wasm32"))] 8 | pub mod phi3; 9 | pub mod data; 10 | pub mod web; 11 | #[cfg(not(target_arch = "wasm32"))] 12 | pub mod ipc; 13 | #[cfg(not(target_arch = "wasm32"))] 14 | pub mod worker_server; 15 | #[cfg(not(target_arch = "wasm32"))] 16 | mod master_state; 17 | #[cfg(not(target_arch = "wasm32"))] 18 | pub mod master_server; 19 | pub mod web_state; 20 | pub mod authorization; 21 | -------------------------------------------------------------------------------- /src/llama.rs: -------------------------------------------------------------------------------- 1 | use core::str; 2 | 3 | use anyhow::{Error, Result}; 4 | use crate::data::{Role,Message}; 5 | use candle_core::utils::cuda_is_available; 6 | use candle_core::{DType, Device, Tensor}; 7 | use candle_nn::VarBuilder; 8 | use candle_transformers::generation::{LogitsProcessor, Sampling}; 9 | use hf_hub::{api::sync::Api, Repo, RepoType}; 10 | 11 | use candle_transformers::models::llama as model; 12 | use model::{Llama, LlamaConfig, Config}; 13 | 14 | use crate::token_output_stream::TokenOutputStream; 15 | use crate::model::TextGenModel; 16 | use crate::ipc::OutputStream; 17 | use tokenizers::Tokenizer; 18 | 19 | 20 | const EOS_TOKEN: &str = "<|eot_id|>"; 21 | 22 | pub struct TextGeneration { 23 | model: Llama, 24 | device: Device, 25 | tokenizer: TokenOutputStream, 26 | logits_processor: LogitsProcessor, 27 | eos_token_id: Option, 28 | config: Config, 29 | repeat_penalty: f32, 30 | repeat_last_n: usize, 31 | } 32 | impl TextGeneration { 33 | #[allow(clippy::too_many_arguments)] 34 | fn new( 35 | model: Llama, 36 | tokenizer: Tokenizer, 37 | eos_token_id: Option, 38 | config: Config, 39 | seed: u64, 40 | temp: f64, 41 | top_p: f64, 42 | repeat_penalty: f32, 43 | repeat_last_n: usize, 44 | device: &Device, 45 | ) -> Self { 46 | let logits_processor = LogitsProcessor::from_sampling( 47 | seed, 48 | Sampling::TopP { 49 | p: top_p, 50 | temperature: temp, 51 | }, 52 | ); 53 | Self { 54 | model, 55 | tokenizer: TokenOutputStream::new(tokenizer), 56 | eos_token_id: eos_token_id, 57 | config: config, 58 | logits_processor, 59 | repeat_penalty, 60 | repeat_last_n, 61 | device: device.clone(), 62 | } 63 | } 64 | } 65 | 66 | impl TextGenModel for TextGeneration { 67 | fn run(&mut self, output:&dyn OutputStream, prompt: &str, sample_len: usize) -> Result<(), Error> { 68 | self.tokenizer.clear(); 69 | let mut tokens = self 70 | .tokenizer 71 | .tokenizer() 72 | .encode(prompt, true) 73 | .map_err(Error::msg)? 74 | .get_ids() 75 | .to_vec(); 76 | let mut cache = model::Cache::new(true, DType::F32, &self.config, &self.device)?; 77 | println!("starting the inference loop"); 78 | print!("{prompt}"); 79 | let mut start_gen = std::time::Instant::now(); 80 | let mut index_pos = 0; 81 | let mut token_generated = 0; 82 | 83 | for index in 0..sample_len { 84 | let (context_size, context_index) = if cache.use_kv_cache && index > 0 { 85 | (1, index_pos) 86 | } else { 87 | (tokens.len(), 0) 88 | }; 89 | if index == 1 { 90 | start_gen = std::time::Instant::now() 91 | } 92 | let ctxt = &tokens[tokens.len().saturating_sub(context_size)..]; 93 | let input = Tensor::new(ctxt, &self.device)?.unsqueeze(0)?; 94 | let logits = self.model.forward(&input, context_index, &mut cache)?; 95 | let logits = logits.squeeze(0)?; 96 | let logits = if self.repeat_penalty == 1. { 97 | logits 98 | } else { 99 | let start_at = tokens.len().saturating_sub(self.repeat_last_n); 100 | candle_transformers::utils::apply_repeat_penalty( 101 | &logits, 102 | self.repeat_penalty, 103 | &tokens[start_at..], 104 | )? 105 | }; 106 | index_pos += ctxt.len(); 107 | 108 | let next_token = self.logits_processor.sample(&logits)?; 109 | token_generated += 1; 110 | tokens.push(next_token); 111 | 112 | if Some(next_token) == self.eos_token_id { 113 | break; 114 | } 115 | 116 | if let Some(t) = self.tokenizer.next_token(next_token)? { 117 | output.write(t)?; 118 | } 119 | 120 | } 121 | output.end().unwrap(); 122 | if let Some(rest) = self.tokenizer.decode_rest().map_err(Error::msg)? { 123 | print!("{rest}"); 124 | } 125 | let dt = start_gen.elapsed(); 126 | println!( 127 | "\n\n{} tokens generated ({} token/s)\n", 128 | token_generated, 129 | (token_generated - 1) as f64 / dt.as_secs_f64(), 130 | ); 131 | Ok(()) 132 | } 133 | fn messages_chat_template(&self,msg_list: &Vec,system_prompt:&str)->String { 134 | let mut history = String::new(); 135 | history.push_str("<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"); 136 | history.push_str(format!("{}<|eot_id|>",system_prompt).as_str()); 137 | 138 | for msg in msg_list { 139 | if msg.role == Role::User { 140 | history.push_str("<|start_header_id|>user<|end_header_id|>\n\n"); 141 | } else { 142 | history.push_str("<|start_header_id|>assistant<|end_header_id|>\n\n"); 143 | } 144 | history.push_str(msg.content.as_str()); 145 | history.push_str("<|eot_id|>\n"); 146 | 147 | } 148 | history.push_str("<|start_header_id|>assistant<|end_header_id|>\n\n"); 149 | history 150 | } 151 | } 152 | 153 | 154 | 155 | fn hub_load_safetensors( 156 | repo: &hf_hub::api::sync::ApiRepo, 157 | json_file: &str, 158 | ) -> Result> { 159 | let json_file = repo.get(json_file).map_err(candle_core::Error::wrap)?; 160 | let json_file = std::fs::File::open(json_file)?; 161 | let json: serde_json::Value = 162 | serde_json::from_reader(&json_file).map_err(candle_core::Error::wrap)?; 163 | let weight_map = match json.get("weight_map") { 164 | None => anyhow::bail!("no weight map in {json_file:?}"), 165 | Some(serde_json::Value::Object(map)) => map, 166 | Some(_) => anyhow::bail!("weight map in {json_file:?} is not a map"), 167 | }; 168 | let mut safetensors_files = std::collections::HashSet::new(); 169 | for value in weight_map.values() { 170 | if let Some(file) = value.as_str() { 171 | safetensors_files.insert(file.to_string()); 172 | } 173 | } 174 | let safetensors_files = safetensors_files 175 | .iter() 176 | .map(|v| repo.get(v).map_err(Error::new)) 177 | .collect::>>()?; 178 | Ok(safetensors_files) 179 | } 180 | 181 | // pub fn load()->TextGeneration { 182 | // load_model("meta-llama/Meta-Llama-3-8B-Instruct",0.6f64,0.9f64) 183 | // } 184 | 185 | 186 | 187 | pub fn load_model(model_id:&str, temp: f64, 188 | top_p: f64,) -> impl TextGenModel { 189 | 190 | let revision = String::from("main"); 191 | 192 | let device = if cuda_is_available() { 193 | Device::new_cuda(0).expect("create cuda device failed!") 194 | } else { 195 | Device::Cpu 196 | }; 197 | 198 | let dtype = DType::F32; 199 | let api = Api::new().expect("create Api failed!"); 200 | let api = api.repo(Repo::with_revision(model_id.to_string(), RepoType::Model, revision)); 201 | 202 | let tokenizer_filename = api 203 | .get("tokenizer.json") 204 | .expect("get tokenizer.json failed!"); 205 | let config_filename = api.get("config.json").expect("get config.json failed!"); 206 | let config: LlamaConfig = 207 | serde_json::from_slice(&std::fs::read(config_filename).expect("read config file failed!")) 208 | .expect("serde_json from slice config file failed!"); 209 | let config = config.into_config(false); 210 | let filenames = hub_load_safetensors(&api, "model.safetensors.index.json") 211 | .expect("hub_load_safetensors failed!"); 212 | 213 | let vb = unsafe { 214 | VarBuilder::from_mmaped_safetensors(&filenames, dtype, &device) 215 | .expect("var builder failed!") 216 | }; 217 | let llama = Llama::load(vb, &config).expect("llama load failed!"); 218 | let tokenizer = Tokenizer::from_file(tokenizer_filename) 219 | .map_err(Error::msg) 220 | .expect("load tokenzier failed!"); 221 | 222 | let eos_token_id = tokenizer.token_to_id(EOS_TOKEN); 223 | 224 | 225 | TextGeneration::new( 226 | llama, 227 | tokenizer, 228 | eos_token_id, 229 | config, 230 | 299792458u64, 231 | temp, 232 | top_p, 233 | 1.8f32, 234 | 16usize, 235 | &device, 236 | ) 237 | } 238 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case, unused)] 2 | 3 | use clap::*; 4 | use dioxus::prelude::*; 5 | use dioxus_logger::tracing::{info, Level}; 6 | use moonweb::web::app; 7 | use std::str::FromStr; 8 | 9 | #[cfg(not(target_arch = "wasm32"))] 10 | use moonweb::master_server::master_server; 11 | #[cfg(not(target_arch = "wasm32"))] 12 | use moonweb::worker_server::worker_server; 13 | 14 | // Urls are relative to your Cargo.toml file 15 | const _TAILWIND_URL: &str = manganis::mg!(file("public/tailwind.css")); 16 | #[derive(Debug)] 17 | enum ServerNode { 18 | Master, 19 | Worker, 20 | Web, 21 | } 22 | 23 | impl FromStr for ServerNode { 24 | type Err = String; 25 | 26 | fn from_str(s: &str) -> std::result::Result { 27 | match s.to_lowercase().as_str() { 28 | "master" => Ok(ServerNode::Master), 29 | "worker" => Ok(ServerNode::Worker), 30 | "web" => Ok(ServerNode::Web), 31 | _ => Err(format!("'{}' is not a valid ServerNode", s)), 32 | } 33 | } 34 | } 35 | 36 | #[derive(Parser, Debug)] 37 | #[clap(author, version, about, long_about = None)] 38 | struct Args { 39 | #[clap(short, long)] 40 | server: Option, 41 | 42 | #[clap(short, long)] 43 | ipc_name: Option, 44 | 45 | #[clap(short, long)] 46 | model_id: Option, 47 | 48 | #[clap(short='h', long)] 49 | temp: Option, 50 | 51 | #[clap(short='t', long)] 52 | top_p: Option, 53 | 54 | #[clap(short='e', long)] 55 | master_port: Option, 56 | 57 | } 58 | 59 | fn main() { 60 | let args = Args::parse(); 61 | let server_type = args.server.unwrap_or_else(||ServerNode::Web); 62 | 63 | match server_type { 64 | ServerNode::Web => { 65 | dioxus_logger::init(Level::INFO).expect("logger failed to init"); 66 | 67 | launch(app); 68 | } 69 | ServerNode::Master => { 70 | #[cfg(not(target_arch = "wasm32"))] 71 | { 72 | let runtime = tokio::runtime::Runtime::new().expect("Create runtime failed!"); 73 | runtime.block_on(master_server()); 74 | } 75 | 76 | } 77 | ServerNode::Worker => { 78 | #[cfg(not(target_arch = "wasm32"))] 79 | { 80 | let model_id = args 81 | .model_id 82 | .unwrap_or_else(|| "meta-llama/Meta-Llama-3-8B-Instruct".into()); 83 | let temp = args.temp.unwrap_or_else(|| 0.6f64); 84 | let top_p = args.top_p.unwrap_or_else(|| 0.9f64); 85 | let ipc_name = args.ipc_name.unwrap(); 86 | let runtime = tokio::runtime::Runtime::new().expect("Create runtime failed!"); 87 | runtime.block_on(worker_server( 88 | ipc_name, 89 | model_id.clone(), 90 | temp, 91 | top_p, 92 | )); 93 | } 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/master_server.rs: -------------------------------------------------------------------------------- 1 | use crate::data::{AuthRequest, AuthResponse, Message, Request, Role}; 2 | 3 | use crate::master_state::{ 4 | get_master_addr, get_program, get_servers, get_working_servers, new_working_server, 5 | remove_working_server, 6 | }; 7 | use axum::{ 8 | self, 9 | extract::DefaultBodyLimit, 10 | response::sse::{Event, Sse}, 11 | routing::{get, post}, 12 | Json, Router, 13 | }; 14 | 15 | use axum_auth::AuthBearer; 16 | 17 | use chrono::{Datelike, Utc}; 18 | use dashmap::DashMap; 19 | use futures::stream::Stream; 20 | use ipc_channel::ipc::{IpcOneShotServer, IpcReceiver, IpcSender}; 21 | use lazy_static::lazy_static; 22 | use sqids::Sqids; 23 | use std::convert::Infallible; 24 | use std::path::PathBuf; 25 | use std::process; 26 | use std::process::Command; 27 | use tokio::signal; 28 | #[cfg(unix)] 29 | use tokio::signal::unix::SignalKind; 30 | use tokio::sync::mpsc::{self, Receiver, Sender}; 31 | use tokio::time::Duration; 32 | use tower_http::services::{ServeDir, ServeFile}; 33 | 34 | const SQIDS_ALPHABET: &str = "VRHIrU2je0gxcSGlzvMWBAkpufqDiyEoY931JLTC5wN6KbaQFPOdsXn48h7mZt"; 35 | const ADMIN_TOKEN: &str = "5bfc427e-c7ca-4612-a445-76d7d141eds"; 36 | const USER_TOKEN: &str = "d272c71d-3579-4c85-b6cd-1f144190c98a"; 37 | const SALT: &str = "Akpu#fqDiy@EoY931J_VRHIrU2"; 38 | 39 | lazy_static! { 40 | static ref WORKER_HUB: DashMap = DashMap::::new(); 41 | } 42 | pub struct Worker { 43 | pub model_id: String, 44 | pub sender: Sender<(Option>, Request)>, 45 | } 46 | 47 | async fn modal_actor( 48 | sender: IpcSender, 49 | receiver: IpcReceiver, 50 | mut rx: Receiver<(Option>, Request)>, 51 | ) { 52 | loop { 53 | if let Some((response_tx, request_data)) = rx.recv().await { 54 | let data = serde_json::json!(request_data).to_string(); 55 | sender 56 | .send(data) 57 | .expect("Failed to send request to worker process!"); 58 | if request_data.cmd == "QUIT" { 59 | break; 60 | } 61 | loop { 62 | if let Ok(response) = receiver.recv() { 63 | if response == "<|endoftext|>" { 64 | break; 65 | } 66 | if response_tx.clone().unwrap().send(response).await.is_err() { 67 | break; 68 | } 69 | } else { 70 | break; 71 | } 72 | } 73 | }; 74 | } 75 | } 76 | 77 | pub async fn call_worker( 78 | AuthBearer(token): AuthBearer, 79 | Json(request): Json, 80 | ) -> Sse>> { 81 | println!("call_worker!! {}", request.cmd); 82 | let model_id = request.cmd; 83 | 84 | let mut receiver = if valid_token(token.as_str()) { 85 | if let Some(worker) = WORKER_HUB.get(&model_id) { 86 | let (response_tx, response_rx) = mpsc::channel::(1); 87 | let req = Request { 88 | cmd: "chat".to_string(), 89 | system_prompt: request.system_prompt, 90 | msg_list: request.msg_list, 91 | }; 92 | worker 93 | .sender 94 | .send((Some(response_tx), req)) 95 | .await 96 | .expect("Failed to send to worker"); 97 | Some(response_rx) 98 | } else { 99 | None 100 | } 101 | } else { 102 | None 103 | }; 104 | use tokio_stream::StreamExt as _; 105 | 106 | let stream = async_stream::stream! { 107 | match receiver { 108 | Some(ref mut rx)=> loop { 109 | let msg = match rx.recv().await { 110 | Some(text) => Event::default().data(text), 111 | None => { 112 | break; 113 | } 114 | }; 115 | yield msg; 116 | }, 117 | None => loop { 118 | println!("worker is None!!!!"); 119 | yield Event::default().data("[DONE]"); 120 | break; 121 | } 122 | } 123 | } 124 | .map(Ok); 125 | 126 | Sse::new(stream).keep_alive( 127 | axum::response::sse::KeepAlive::new() 128 | .interval(Duration::from_secs(2)) 129 | .text("keep-alive-text"), 130 | ) 131 | } 132 | 133 | #[cfg(unix)] 134 | async fn handle_unix_signals() { 135 | let mut sigterm = signal::unix::signal(SignalKind::terminate()).unwrap(); 136 | let mut sigint = signal::unix::signal(SignalKind::interrupt()).unwrap(); 137 | 138 | tokio::select! { 139 | _ = sigterm.recv() => { 140 | println!("Received SIGTERM"); 141 | }, 142 | _ = sigint.recv() => { 143 | println!("Received SIGINT"); 144 | }, 145 | } 146 | for mut kv in WORKER_HUB.iter_mut() { 147 | let worker = kv.value_mut(); 148 | let req = Request { 149 | cmd: "QUIT".to_string(), 150 | system_prompt: "".to_string(), 151 | msg_list: Vec::::new(), 152 | }; 153 | 154 | let _ = worker.sender.send((None, req)).await.is_err_and(|x| { 155 | println!("{:?}", x); 156 | process::exit(0) 157 | }); 158 | } 159 | process::exit(0); 160 | } 161 | 162 | pub async fn master_server() { 163 | for server in get_working_servers().await.iter() { 164 | let model_id = &server.model_id; 165 | let program = get_program(server); 166 | launch_worker(&program, model_id); 167 | } 168 | 169 | #[cfg(unix)] 170 | let rt = tokio::runtime::Runtime::new().expect("Create runtime failed!"); 171 | #[cfg(unix)] 172 | rt.spawn(handle_unix_signals()); 173 | 174 | let serve_dir = ServeDir::new("dist").not_found_service(ServeFile::new("dist/index.html")); 175 | 176 | let app = Router::new() 177 | .route("/api/chat", post(call_worker)) 178 | .route("/api/load", post(call_command)) 179 | 180 | .route("/api/models", get(modal_list)) 181 | .route("/api/signin", post(signin)) 182 | .layer(DefaultBodyLimit::disable()) 183 | .nest_service("/", serve_dir.clone()) 184 | .fallback_service(serve_dir); 185 | let addr = get_master_addr().await; 186 | let listener = tokio::net::TcpListener::bind(addr.clone()).await.unwrap(); 187 | println!("listenning in {}", addr); 188 | axum::serve(listener, app).await.unwrap(); 189 | } 190 | 191 | fn launch_worker(program: &PathBuf, model_id: &String) { 192 | let (one_shot_serv, ipc_name) = IpcOneShotServer::new().expect("Failed to ipc one shot server"); 193 | let e = Command::new(program.as_os_str()) 194 | .arg("--server") 195 | .arg("Worker") 196 | .arg("--model-id") 197 | .arg(model_id.as_str()) 198 | .arg("--ipc-name") 199 | .arg(ipc_name.as_str()) 200 | .spawn(); 201 | if e.is_err() { 202 | println!("Worker server {} failed to start", model_id); 203 | return; 204 | } 205 | 206 | let (_, sender): (_, IpcSender) = 207 | one_shot_serv.accept().expect("Failed to accept sender!"); 208 | let (one_shot_serv, ipc_name) = IpcOneShotServer::new().unwrap(); 209 | sender.send(ipc_name).expect("Failed to send ipc name"); 210 | let (_, receiver): (_, IpcReceiver) = 211 | one_shot_serv.accept().expect("Failed to accept receiver!"); 212 | let (tx, rx) = mpsc::channel::<(Option>, Request)>(1); 213 | WORKER_HUB.insert( 214 | model_id.clone(), 215 | Worker { 216 | model_id: model_id.clone(), 217 | sender: tx, 218 | }, 219 | ); 220 | tokio::spawn(modal_actor(sender, receiver, rx)); 221 | } 222 | 223 | pub async fn modal_list() -> Json> { 224 | let list: Vec = get_working_servers() 225 | .await 226 | .iter() 227 | .map(|serv| serv.model_id.clone()) 228 | .collect(); 229 | Json::from(list) 230 | } 231 | 232 | fn get_expire() -> String { 233 | let now = Utc::now().date_naive(); 234 | let one_month_later = now + chrono::Duration::days(30); 235 | let sqids = Sqids::builder() 236 | .alphabet(SQIDS_ALPHABET.chars().collect()) 237 | .build() 238 | .unwrap(); 239 | let expire_raw = [ 240 | one_month_later.year() as u64, 241 | one_month_later.month() as u64, 242 | one_month_later.day() as u64, 243 | ]; 244 | sqids.encode(&expire_raw).unwrap() 245 | } 246 | 247 | fn valid_token(token: &str) -> bool { 248 | if token 249 | == format!( 250 | "{:x}", 251 | md5::compute(format!("{}{}", SALT, USER_TOKEN).as_bytes()) 252 | ) 253 | || token 254 | == format!( 255 | "{:x}", 256 | md5::compute(format!("{}{}", SALT, ADMIN_TOKEN).as_bytes()) 257 | ) 258 | { 259 | true 260 | } else { 261 | false 262 | } 263 | } 264 | 265 | fn valid_admin_token(token: &str) -> bool { 266 | if token 267 | == format!( 268 | "{:x}", 269 | md5::compute(format!("{}{}", SALT, ADMIN_TOKEN).as_bytes()) 270 | ) 271 | { 272 | true 273 | } else { 274 | false 275 | } 276 | } 277 | 278 | pub async fn signin(Json(request): Json) -> Json { 279 | let response_of_failed = AuthResponse { 280 | success: false, 281 | auth_key: String::new(), 282 | expire: String::new(), 283 | }; 284 | let response = match request.role { 285 | Role::User => { 286 | if request.token 287 | == format!( 288 | "{:x}", 289 | md5::compute(format!("{}{}", SALT, USER_TOKEN).as_bytes()) 290 | ) 291 | { 292 | AuthResponse { 293 | success: true, 294 | auth_key: request.token, 295 | expire: get_expire(), 296 | } 297 | } else { 298 | response_of_failed 299 | } 300 | } 301 | Role::Administrator => { 302 | if request.token 303 | == format!( 304 | "{:x}", 305 | md5::compute(format!("{}{}", SALT, ADMIN_TOKEN).as_bytes()) 306 | ) 307 | { 308 | AuthResponse { 309 | success: true, 310 | auth_key: request.token, 311 | expire: get_expire(), 312 | } 313 | } else { 314 | response_of_failed 315 | } 316 | } 317 | Role::Robot => response_of_failed, 318 | }; 319 | Json::from(response) 320 | } 321 | 322 | pub async fn call_command(AuthBearer(token): AuthBearer,cmd: String) -> String { 323 | if valid_admin_token(token.as_str()) { 324 | let commands: Vec<&str> = cmd 325 | .split(|c: char| c.is_whitespace()) 326 | .filter(|&s| !s.is_empty()) 327 | .collect(); 328 | if commands.len() > 1 { 329 | match commands[0] { 330 | "/load" => { 331 | let model_id = commands[1].to_string(); 332 | 333 | match get_working_servers() 334 | .await 335 | .iter() 336 | .find(|ser| ser.model_id == model_id) 337 | { 338 | None => { 339 | if let Some(server) = get_servers() 340 | .await 341 | .iter() 342 | .find(|ser| ser.model_id == model_id) 343 | { 344 | let program = get_program(server); 345 | launch_worker(&program, &model_id); 346 | new_working_server(server.clone()).await; 347 | format!("{} server start!", model_id) 348 | } else { 349 | format!("{} is not exist!", model_id) 350 | } 351 | } 352 | Some(_) => format!("{} server is runing!", model_id), 353 | } 354 | } 355 | "/unload" => { 356 | let model_id = commands[1].to_string(); 357 | if let Some((_, server)) = WORKER_HUB.remove(model_id.as_str()) { 358 | let req = Request { 359 | cmd: "QUIT".to_string(), 360 | system_prompt: "".to_string(), 361 | msg_list: Vec::::new(), 362 | }; 363 | server.sender.send((None, req)).await.unwrap(); 364 | remove_working_server(model_id.as_str()).await; 365 | format!("{} server stop!", model_id) 366 | } else { 367 | format!("{} server is not runing", model_id) 368 | } 369 | } 370 | _ => { 371 | format!("Command {} is not exist", commands[0]) 372 | } 373 | } 374 | } else { 375 | format!("{} is error command!", cmd) 376 | } 377 | } else { 378 | String::from("Authentication failed. Only administrators can execute commands.") 379 | } 380 | } 381 | -------------------------------------------------------------------------------- /src/master_state.rs: -------------------------------------------------------------------------------- 1 | use lazy_static::lazy_static; 2 | use std::fs; 3 | use std::io::Read; 4 | use std::sync::Arc; 5 | use tokio::sync::RwLock; 6 | use serde::{Deserialize, Serialize}; 7 | use std::env; 8 | use std::path::PathBuf; 9 | 10 | #[derive(Deserialize, Serialize, Debug, Clone)] 11 | pub(crate) struct WorkerServer { 12 | pub model_id: String, 13 | pub program: String, 14 | pub temp: f64, 15 | pub top_p: f64, 16 | } 17 | 18 | #[derive(Deserialize, Serialize, Debug, Clone)] 19 | struct ServerConfig { 20 | pub ports: Vec, 21 | pub master_addr: String, 22 | pub working_servers: Vec, 23 | pub servers: Vec, 24 | } 25 | 26 | lazy_static! { 27 | static ref CONFIG:Arc> = Arc::new(RwLock::new(load_config())); 28 | } 29 | 30 | fn load_config() -> ServerConfig { 31 | let mut file = fs::File::open("server.config").expect("Failed to read server.config!"); 32 | let mut contents = String::new(); 33 | file.read_to_string(&mut contents) 34 | .expect("Failed to read server.config to string!"); 35 | let config = 36 | serde_json::from_str::(contents.as_str()).expect("Failed to deseriablize."); 37 | config 38 | } 39 | fn save_config(config:&ServerConfig) { 40 | let content = serde_json::to_string_pretty(config).unwrap(); 41 | fs::write("server.config", content.as_bytes()).unwrap(); 42 | } 43 | 44 | pub(crate) fn get_program(server: &WorkerServer) -> PathBuf { 45 | let program = if server.program == "self" { 46 | env::current_exe().expect("Failed to determine the current executable path") 47 | } else { 48 | PathBuf::from(server.program.clone()) 49 | }; 50 | program 51 | } 52 | 53 | pub(crate) async fn get_working_servers()->Vec { 54 | CONFIG.read().await.working_servers.clone() 55 | } 56 | 57 | pub(crate) async fn get_master_addr() -> String { 58 | CONFIG.read().await.master_addr.clone() 59 | } 60 | 61 | pub(crate) async fn get_servers()->Vec { 62 | CONFIG.read().await.servers.clone() 63 | } 64 | 65 | pub(crate) async fn new_working_server(server: WorkerServer) { 66 | let new_config = { 67 | let mut config = CONFIG.write().await; 68 | config.working_servers.push(server); 69 | config 70 | }; 71 | save_config(&new_config); 72 | } 73 | 74 | pub(crate) async fn remove_working_server(model_id: &str) { 75 | let new_config = { 76 | let mut config = CONFIG.write().await; 77 | config.working_servers.retain(|s| s.model_id!=model_id.to_string()); 78 | config 79 | }; 80 | save_config(&new_config); 81 | } 82 | 83 | 84 | -------------------------------------------------------------------------------- /src/model.rs: -------------------------------------------------------------------------------- 1 | use core::str; 2 | 3 | use crate::data::Message; 4 | use crate::llama; 5 | use anyhow::{Error, Result}; 6 | use crate::ipc::OutputStream; 7 | 8 | 9 | pub trait TextGenModel { 10 | fn run(&mut self, output:&dyn OutputStream,prompt: &str, sample_len: usize) -> Result<(), Error>; 11 | fn messages_chat_template(&self, msg_list: &Vec, system_prompt: &str) -> String; 12 | } 13 | 14 | pub fn load(model_id: &str, temp: f64, top_p: f64) -> Option> { 15 | match model_id { 16 | "meta-llama/Meta-Llama-3-8B-Instruct" => Some(Box::new(llama::load_model(model_id, temp, top_p))), 17 | "microsoft/Phi-3-medium-4k-instruct" => Some(Box::new(crate::phi3::load())), 18 | _ => None, 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/phi3.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{Error, Result}; 2 | 3 | use crate::token_output_stream::TokenOutputStream; 4 | 5 | use candle_transformers::models::phi3::{Config as Phi3Config, Model as Phi3}; 6 | 7 | 8 | use candle_core::{DType, Device, IndexOp, Tensor}; 9 | use candle_core::utils::cuda_is_available; 10 | use candle_nn::VarBuilder; 11 | use candle_transformers::generation::LogitsProcessor; 12 | use hf_hub::{api::sync::Api, Repo, RepoType}; 13 | use tokenizers::Tokenizer; 14 | use crate::model::TextGenModel; 15 | use crate::data::{Message,Role}; 16 | use crate::ipc::OutputStream; 17 | 18 | 19 | pub struct TextGeneration { 20 | model: Phi3, 21 | device: Device, 22 | tokenizer: TokenOutputStream, 23 | logits_processor: LogitsProcessor, 24 | repeat_penalty: f32, 25 | repeat_last_n: usize, 26 | } 27 | 28 | impl TextGeneration { 29 | #[allow(clippy::too_many_arguments)] 30 | fn new( 31 | model: Phi3, 32 | tokenizer: Tokenizer, 33 | seed: u64, 34 | temp: Option, 35 | top_p: Option, 36 | repeat_penalty: f32, 37 | repeat_last_n: usize, 38 | device: &Device, 39 | ) -> Self { 40 | let logits_processor = LogitsProcessor::new(seed, temp, top_p); 41 | Self { 42 | model, 43 | tokenizer: TokenOutputStream::new(tokenizer), 44 | logits_processor, 45 | repeat_penalty, 46 | repeat_last_n, 47 | device: device.clone(), 48 | } 49 | } 50 | } 51 | 52 | impl TextGenModel for TextGeneration { 53 | 54 | 55 | fn run(&mut self,output:&dyn OutputStream, prompt: &str, sample_len: usize) -> Result<(), Error> { 56 | 57 | println!("starting the inference loop"); 58 | self.model.clear_kv_cache(); 59 | self.tokenizer.clear(); 60 | let tokens = self.tokenizer 61 | .tokenizer() 62 | .encode(prompt, true) 63 | .map_err(Error::msg)?; 64 | if tokens.is_empty() { 65 | anyhow::bail!("Empty prompts are not supported in the phi model.") 66 | } 67 | 68 | let mut tokens = tokens.get_ids().to_vec(); 69 | let mut generated_tokens = 0usize; 70 | let eos_token = match self.tokenizer.get_token("<|end|>") { 71 | Some(token) => token, 72 | None => anyhow::bail!("cannot find the endoftext token"), 73 | }; 74 | 75 | let start_gen = std::time::Instant::now(); 76 | let mut pos = 0; 77 | //let mut content = String::new(); 78 | for index in 0..sample_len { 79 | let context_size = if index > 0 { 1 } else { tokens.len() }; 80 | let ctxt = &tokens[tokens.len().saturating_sub(context_size)..]; 81 | let input = Tensor::new(ctxt, &self.device).expect("create input tensor failed!").unsqueeze(0).expect("unsqueeze failed!"); 82 | let logits =self.model.forward(&input, pos).expect(format!("model forward failed at {}",index).as_str()).i((.., 0, ..)).expect("i failed!"); 83 | let logits = logits.squeeze(0).expect("logits.squeeze failed!").to_dtype(DType::F32).expect("to dtype failed!"); 84 | let logits = if self.repeat_penalty == 1. { 85 | logits 86 | } else { 87 | let start_at = tokens.len().saturating_sub(self.repeat_last_n); 88 | candle_transformers::utils::apply_repeat_penalty( 89 | &logits, 90 | self.repeat_penalty, 91 | &tokens[start_at..], 92 | )? 93 | }; 94 | 95 | let next_token = self.logits_processor.sample(&logits).expect("logits processor sample failed!"); 96 | tokens.push(next_token); 97 | generated_tokens += 1; 98 | if next_token == eos_token { 99 | break; 100 | } 101 | 102 | if let Some(t) = self.tokenizer.next_token(next_token).expect("tokenizer netx_token failed!") { 103 | output.write(t)? 104 | } 105 | pos += context_size; 106 | } 107 | output.end().unwrap(); 108 | let dt = start_gen.elapsed(); 109 | println!( 110 | "\n{generated_tokens} tokens generated ({:.2} token/s)", 111 | generated_tokens as f64 / dt.as_secs_f64(), 112 | ); 113 | Ok(()) 114 | } 115 | 116 | fn messages_chat_template(&self,msg_list: &Vec,system_prompt:&str)->String { 117 | let mut history = String::new(); 118 | history.push_str(system_prompt); 119 | history.push_str("\n"); 120 | for msg in msg_list { 121 | if msg.role == Role::User { 122 | history.push_str("<|user|>\n"); 123 | } else { 124 | history.push_str("<|assistant|>\n"); 125 | } 126 | history.push_str(msg.content.as_str()); 127 | history.push_str("<|end|>\n"); 128 | } 129 | history.push_str("<|assistant|>\n"); 130 | history 131 | } 132 | } 133 | 134 | fn hub_load_safetensors( 135 | repo: &hf_hub::api::sync::ApiRepo, 136 | json_file: &str, 137 | ) -> Result> { 138 | let json_file = repo.get(json_file).map_err(candle_core::Error::wrap)?; 139 | let json_file = std::fs::File::open(json_file)?; 140 | let json: serde_json::Value = 141 | serde_json::from_reader(&json_file).map_err(candle_core::Error::wrap)?; 142 | let weight_map = match json.get("weight_map") { 143 | None => anyhow::bail!("no weight map in {json_file:?}"), 144 | Some(serde_json::Value::Object(map)) => map, 145 | Some(_) => anyhow::bail!("weight map in {json_file:?} is not a map"), 146 | }; 147 | let mut safetensors_files = std::collections::HashSet::new(); 148 | for value in weight_map.values() { 149 | if let Some(file) = value.as_str() { 150 | safetensors_files.insert(file.to_string()); 151 | } 152 | } 153 | let safetensors_files = safetensors_files 154 | .iter() 155 | .map(|v| repo.get(v).map_err(Error::new)) 156 | .collect::>>()?; 157 | Ok(safetensors_files) 158 | } 159 | 160 | pub fn load() -> impl TextGenModel { 161 | let model_id = String::from("microsoft/Phi-3-medium-4k-instruct"); 162 | let revision = String::from("main"); 163 | let api = Api::new().expect("hf_hub api load failed!"); 164 | let repo = api.repo(Repo::with_revision(model_id, RepoType::Model, revision)); 165 | let tokenizer_filename = repo.get("tokenizer.json").expect("load tokenizer.json failed !"); 166 | let filenames = hub_load_safetensors( 167 | &repo, 168 | "model.safetensors.index.json", 169 | ).expect("hub_load_safetensors failed!"); 170 | 171 | let tokenizer = Tokenizer::from_file(tokenizer_filename).map_err(Error::msg).expect("Tokenizer from file failed!"); 172 | let device = if cuda_is_available() { Device::new_cuda(0).expect("create cuda device failed!") } else { Device::Cpu }; 173 | let dtype = if device.is_cuda() {DType::BF16} else {DType::F32}; 174 | let vb = unsafe { VarBuilder::from_mmaped_safetensors(&filenames, dtype, &device).expect("var builder failed!") }; 175 | let config_filename = repo.get("config.json").expect("get config filename failed!"); 176 | let config = std::fs::read_to_string(config_filename).expect("Read to string failed!"); 177 | let config: Phi3Config = serde_json::from_str(&config).expect("load Phi3Config failed!"); 178 | let phi3 = Phi3::new(&config, vb).expect("create Phi3 failed!"); 179 | 180 | TextGeneration::new( 181 | phi3, 182 | tokenizer, 183 | 299792458u64, 184 | Some(0.7f64), 185 | Some(0.95f64), 186 | 2.8f32, 187 | 16usize, 188 | &device) 189 | 190 | } -------------------------------------------------------------------------------- /src/token_output_stream.rs: -------------------------------------------------------------------------------- 1 | use candle_core::Result; 2 | 3 | /// This is a wrapper around a tokenizer to ensure that tokens can be returned to the user in a 4 | /// streaming way rather than having to wait for the full decoding. 5 | pub struct TokenOutputStream { 6 | tokenizer: tokenizers::Tokenizer, 7 | tokens: Vec, 8 | prev_index: usize, 9 | current_index: usize, 10 | } 11 | 12 | impl TokenOutputStream { 13 | pub fn new(tokenizer: tokenizers::Tokenizer) -> Self { 14 | Self { 15 | tokenizer, 16 | tokens: Vec::new(), 17 | prev_index: 0, 18 | current_index: 0, 19 | } 20 | } 21 | 22 | pub fn into_inner(self) -> tokenizers::Tokenizer { 23 | self.tokenizer 24 | } 25 | 26 | fn decode(&self, tokens: &[u32]) -> Result { 27 | match self.tokenizer.decode(tokens, true) { 28 | Ok(str) => Ok(str), 29 | Err(err) => candle_core::bail!("cannot decode: {err}"), 30 | } 31 | } 32 | 33 | pub fn put_token(&mut self, token: u32) { 34 | self.tokens.push(token); 35 | } 36 | 37 | pub fn next_token(&mut self, token: u32) -> Result> { 38 | let prev_text = if self.tokens.is_empty() { 39 | String::new() 40 | } else { 41 | let tokens = &self.tokens[self.prev_index..self.current_index]; 42 | self.decode(tokens)? 43 | }; 44 | self.tokens.push(token); 45 | let text = self.decode(&self.tokens[self.prev_index..])?; 46 | if text.len() > prev_text.len() && text.chars().last().unwrap().is_alphanumeric() { 47 | let text = text.split_at(prev_text.len()); 48 | self.prev_index = self.current_index; 49 | self.current_index = self.tokens.len(); 50 | Ok(Some(text.1.to_string())) 51 | } else { 52 | Ok(None) 53 | } 54 | } 55 | 56 | pub fn decode_rest(&self) -> Result> { 57 | let prev_text = if self.tokens.is_empty() { 58 | String::new() 59 | } else { 60 | let tokens = &self.tokens[self.prev_index..self.current_index]; 61 | self.decode(tokens)? 62 | }; 63 | let text = self.decode(&self.tokens[self.prev_index..])?; 64 | if text.len() > prev_text.len() { 65 | let text = text.split_at(prev_text.len()); 66 | Ok(Some(text.1.to_string())) 67 | } else { 68 | Ok(None) 69 | } 70 | } 71 | 72 | pub fn decode_all(&self) -> Result { 73 | self.decode(&self.tokens) 74 | } 75 | 76 | pub fn decode_text(&self) -> Result> { 77 | let mut pure_text = String::new(); 78 | let text = self.decode(&self.tokens).expect("decode failed!"); 79 | for ch in text.chars() { 80 | if ch.is_alphanumeric() { 81 | pure_text.push(ch) 82 | } 83 | } 84 | if pure_text.len()>0 { 85 | Ok(Some(pure_text)) 86 | } else { 87 | Ok(None) 88 | } 89 | 90 | } 91 | 92 | pub fn get_token(&self, token_s: &str) -> Option { 93 | self.tokenizer.get_vocab(true).get(token_s).copied() 94 | } 95 | 96 | pub fn tokenizer(&self) -> &tokenizers::Tokenizer { 97 | &self.tokenizer 98 | } 99 | 100 | pub fn clear(&mut self) { 101 | self.tokens.clear(); 102 | self.prev_index = 0; 103 | self.current_index = 0; 104 | } 105 | } -------------------------------------------------------------------------------- /src/web.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case, unused)] 2 | extern crate image_base64_wasm; 3 | 4 | use crate::data::{Message, Role, SelectOption, WebUser}; 5 | use crate::web_state::{Session, Store, TempSession}; 6 | use crate::authorization::{LoginBox,get_user,show_login}; 7 | use dioxus::prelude::*; 8 | use dioxus_logger::tracing::{info, Level}; 9 | use futures::StreamExt; 10 | use js_sys::Reflect; 11 | use serde::{Deserialize, Serialize}; 12 | use wasm_bindgen::prelude::*; 13 | use wasm_bindgen::JsValue; 14 | use web_sys::window; 15 | 16 | #[component] 17 | fn Pulse() -> Element { 18 | rsx!( 19 | div { class: "border w-80 h-full border-blue-300 shadow rounded-md p-4 max-w-sm mx-auto", 20 | div { class: "animate-pulse flex space-x-4", 21 | div { class: "flex-1 space-y-6 py-1", 22 | div { class: "h-2 bg-slate-200 rounded" } 23 | div { class: "space-y-3", 24 | div { class: "grid grid-cols-3 gap-4", 25 | div { class: "h-2 bg-slate-200 rounded col-span-2" } 26 | div { class: "h-2 bg-slate-200 rounded col-span-1" } 27 | } 28 | div { class: "h-2 bg-slate-200 rounded" } 29 | } 30 | } 31 | } 32 | } 33 | ) 34 | } 35 | 36 | #[component] 37 | fn ModelConfig( 38 | model_id: Signal, 39 | modelOptions: Signal>, 40 | mut system_prompt: Signal, 41 | ) -> Element { 42 | let mut session = use_context::>(); 43 | rsx!( 44 | div { 45 | "aria-hidden": "true", 46 | tabindex: "-1", 47 | class: "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full", 48 | id: "model-config", 49 | div { class: "relative p-4 w-full max-w-md max-h-full", 50 | div { class: "relative bg-white rounded-lg shadow dark:bg-gray-700", 51 | div { class: "flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600", 52 | h3 { class: "text-lg font-semibold text-gray-900 dark:text-white", 53 | "\n Model Setting\n " 54 | } 55 | button { 56 | "data-modal-toggle": "model-config", 57 | r#type: "button", 58 | class: "text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white", 59 | svg { 60 | "xmlns": "http://www.w3.org/2000/svg", 61 | "fill": "none", 62 | "aria-hidden": "true", 63 | "viewBox": "0 0 14 14", 64 | class: "w-3 h-3", 65 | path { 66 | "d": "m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6", 67 | "stroke": "currentColor", 68 | "stroke-linejoin": "round", 69 | "stroke-linecap": "round", 70 | "stroke-width": "2" 71 | } 72 | } 73 | span { class: "sr-only", "Close modal" } 74 | } 75 | } 76 | form { class: "p-4 md:p-5", 77 | div { class: "grid gap-4 mb-4 grid-cols-2", 78 | div { class: "col-span-2", 79 | label { 80 | r#for: "model", 81 | class: "block mb-2 text-sm font-medium text-gray-900 dark:text-white", 82 | "Model" 83 | } 84 | select { 85 | class: "bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-primary-500 focus:border-primary-500 block w-full p-2.5 dark:bg-gray-600 dark:border-gray-500 dark:placeholder-gray-400 dark:text-white dark:focus:ring-primary-500 dark:focus:border-primary-500", 86 | id: "model", 87 | value: "{model_id}", 88 | onchange: move |event| { 89 | let value = event.value(); 90 | model_id.set(value.clone()); 91 | let mut sess = session.write(); 92 | sess.mode_id = value; 93 | }, 94 | option { "Select model" } 95 | for model in modelOptions() { 96 | option { 97 | value: "{model.value}", 98 | selected: {model.selected}, 99 | "{model.text}" 100 | } 101 | } 102 | } 103 | } 104 | div { class: "col-span-2", 105 | label { 106 | r#for: "description", 107 | class: "block mb-2 text-sm font-medium text-gray-900 dark:text-white", 108 | "System prompt" 109 | } 110 | textarea { 111 | rows: "4", 112 | placeholder: "Write System Prompt", 113 | class: "block p-2.5 w-full text-sm text-gray-900 bg-gray-50 rounded-lg border border-gray-300 focus:ring-blue-500 focus:border-blue-500 dark:bg-gray-600 dark:border-gray-500 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500", 114 | value: "{system_prompt}", 115 | id: "System Prompt", 116 | onchange: move |evt| { 117 | let value = evt.value(); 118 | system_prompt.set(value.clone()); 119 | let mut sess = session.write(); 120 | sess.system_prompt = value; 121 | } 122 | 123 | } 124 | } 125 | } 126 | button { 127 | "data-modal-toggle": "model-config", 128 | r#type: "button", 129 | class: "text-white inline-flex items-center bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800", 130 | "\n OK\n " 131 | } 132 | } 133 | } 134 | } 135 | } 136 | ) 137 | } 138 | 139 | #[component] 140 | fn ShowMessage(msg: Message) -> Element { 141 | use comrak::{markdown_to_html, ExtensionOptions, Options}; 142 | let mut options = Options::default(); 143 | options.extension.table = true; 144 | options.extension.math_code = true; 145 | options.extension.multiline_block_quotes = true; 146 | let html = markdown_to_html(msg.content.as_str(), &options); 147 | rsx!(if msg.role == Role::User { 148 | div { class: "flex justify-end mb-4", 149 | div { class: "bg-blue-500 text-white p-3 rounded-l-lg rounded-br-lg", 150 | p { dangerous_inner_html: "{html}" } 151 | if let Some(img) = msg.img.clone() { 152 | img { class:"rounded-lg", src:"{img}"} 153 | } 154 | } 155 | } 156 | 157 | } else if msg.role == Role::Robot { 158 | div { class: "flex mb-4", 159 | div { class: "bg-gray-300 p-3 rounded-r-lg rounded-bl-lg", 160 | if msg.loading { 161 | Pulse {} 162 | } else { 163 | p { dangerous_inner_html: "{html}" } 164 | } 165 | } 166 | } 167 | } else { 168 | div { class: "flex mb-4", 169 | div { class: "bg-blue-300 p-3 rounded-r-lg rounded-bl-lg", 170 | if msg.loading { 171 | Pulse {} 172 | } else { 173 | p { dangerous_inner_html: "{html}" } 174 | } 175 | } 176 | } 177 | } 178 | 179 | ) 180 | } 181 | 182 | fn sendMsg( 183 | msg: String, 184 | model_id: String, 185 | url: String, 186 | system_prompt: String, 187 | mut modelOptions: Signal>, 188 | mut send_disabled: Signal, 189 | ) { 190 | 191 | if msg != "" { 192 | use reqwest::Client; 193 | let token = match get_user() { 194 | Some(user)=> match user.auth_key { 195 | Some(key) => key, 196 | None => "".to_string(), 197 | }, 198 | None => "".to_string() 199 | }; 200 | let mut history = use_context::>>(); 201 | let id = history().len(); 202 | history.write().push(Message { 203 | id: id, 204 | role: Role::User, 205 | content: msg.clone(), 206 | img: None, 207 | loading: false, 208 | }); 209 | 210 | let id = history().len(); 211 | if msg.starts_with("/load") || msg.starts_with("/unload") { 212 | history.write().push(Message { 213 | id: id, 214 | role: Role::Administrator, 215 | content: String::new(), 216 | img: None, 217 | loading: true, 218 | }); 219 | let history_clone = history.read()[..id].to_owned(); 220 | spawn(async move { 221 | let response = Client::new() 222 | .post(format!("{}load", url)) 223 | .bearer_auth(token) 224 | .body(msg) 225 | .send() 226 | .await 227 | .expect("Failed to post command!"); 228 | let text = response 229 | .text() 230 | .await 231 | .expect("Failed to get text from response!"); 232 | let mut message = &mut history.write()[id]; 233 | message.content.push_str(text.as_str()); 234 | message.loading = false; 235 | 236 | let response = Client::new() 237 | .get(format!("{}models", url)) 238 | .send() 239 | .await 240 | .unwrap() 241 | .json::>() 242 | .await 243 | .unwrap(); 244 | let mut options: Vec = response 245 | .iter() 246 | .map(|model| SelectOption { 247 | text: model.clone(), 248 | value: model.clone(), 249 | selected: model_id == model.clone(), 250 | }) 251 | .collect(); 252 | modelOptions.write().clear(); 253 | modelOptions.write().append(&mut options); 254 | send_disabled.set(false); 255 | }); 256 | } else { 257 | history.write().push(Message { 258 | id: id, 259 | role: Role::Robot, 260 | content: String::new(), 261 | img: None, 262 | loading: true, 263 | }); 264 | let history_clone = history.read()[..id].to_owned(); 265 | 266 | spawn(async move { 267 | use crate::data::Request; 268 | use eventsource_stream::Eventsource; 269 | 270 | let mut stream = Client::new() 271 | .post(format!("{}chat", url)) 272 | .bearer_auth(token) 273 | .json(&Request { 274 | cmd: model_id.clone(), 275 | system_prompt: system_prompt, 276 | msg_list: history_clone, 277 | }) 278 | .send() 279 | .await 280 | .unwrap() 281 | .bytes_stream() 282 | .eventsource(); 283 | let mut history = use_context::>>(); 284 | let mut first_event = true; 285 | 286 | while let Some(event) = futures::StreamExt::next(&mut stream).await { 287 | match event { 288 | Ok(event) => { 289 | let mut message = &mut history.write()[id]; 290 | if event.data == "[DONE]" { 291 | if first_event { 292 | message.content.push_str( 293 | format!("Failed to find {} model server", model_id) 294 | .as_str(), 295 | ); 296 | message.loading = false; 297 | } 298 | 299 | break; 300 | } 301 | message.content.push_str(event.data.as_str()); 302 | } 303 | Err(_) => { 304 | panic!("Error in event stream") 305 | } 306 | } 307 | let mut message = &mut history.write()[id]; 308 | message.loading = false; 309 | first_event = false; 310 | } 311 | send_disabled.set(false); 312 | }); 313 | } 314 | } 315 | } 316 | 317 | pub fn switch_session(id: &str, mut model_id: Signal,mut system_prompt: Signal) { 318 | let mut session = use_context::>(); 319 | let mut temp_session = use_context::>(); 320 | let mut messages = use_context::>>(); 321 | let current_id = session.read().id.clone(); 322 | if current_id != id { 323 | let mut store = Store::new().unwrap(); 324 | // current session don't store. 325 | if store.get_session(current_id.as_str()).is_none() { 326 | temp_session.set(TempSession::new(&session())); 327 | } 328 | // load target session from store 329 | if let Some(new_session) = store.get_session(id) { 330 | if let Some(ref history) = new_session.history { 331 | messages.set(history.clone()); 332 | } 333 | session.set(Session { 334 | id: new_session.id, 335 | name: new_session.name, 336 | mode_id: new_session.mode_id.clone(), 337 | system_prompt: new_session.system_prompt.clone(), 338 | history: if let Some(ref history) = new_session.history { 339 | Some(history.clone()) 340 | } else { 341 | None 342 | }, 343 | }); 344 | info!(new_session.system_prompt); 345 | model_id.set(new_session.mode_id); 346 | system_prompt.set(new_session.system_prompt); 347 | } else { 348 | let new_session = Session { 349 | id: temp_session.read().id.clone(), 350 | name: temp_session.read().name.clone(), 351 | mode_id: temp_session.read().mode_id.clone(), 352 | system_prompt: temp_session.read().system_prompt.clone(), 353 | history: Some(Vec::::new()), 354 | }; 355 | messages.set(Vec::::new()); 356 | model_id.set(new_session.mode_id.clone()); 357 | info!(new_session.system_prompt); 358 | system_prompt.set(new_session.system_prompt.clone()); 359 | session.set(new_session); 360 | } 361 | } 362 | 363 | } 364 | 365 | #[component] 366 | pub fn Conversations(mut model_id: Signal, mut system_prompt: Signal, send_disabled: Signal) -> Element { 367 | let mut do_delete_conv = use_signal(|| false); 368 | let session = use_context::>(); 369 | let session_value = session(); 370 | let mut store = Store::new().unwrap(); 371 | 372 | let current_id = session_value.id.clone(); 373 | let temp_session = use_context::>(); 374 | let mut session_list = store.fetch_all_session(); 375 | if store.get_session(current_id.as_str()).is_none() { 376 | session_list.push(session_value.clone()); 377 | } else { 378 | if store.get_session(temp_session.read().id.as_str()).is_none() { 379 | session_list.push(Session { 380 | id: temp_session.read().id.clone(), 381 | name: temp_session.read().name.clone(), 382 | mode_id: temp_session.read().mode_id.clone(), 383 | system_prompt: temp_session.read().system_prompt.clone(), 384 | history: Some(Vec::::new()), 385 | }) 386 | } 387 | } 388 | session_list.reverse(); 389 | let items: Vec<_> = session_list.iter().map(|sess| {(sess.id.clone(),sess.id.clone(),sess.name.clone())}).collect(); 390 | rsx!( 391 | div { class: "w-1/5 shadow-lg rounded-lg text-sm font-medium text-gray-500 md:me-4 mb-4 md:mb-0", 392 | style: "height:98%;", 393 | div { class: "border-b px-4 py-2 bg-gray-200", 394 | h1 { class: "text-lg font-semibold", "Conversations" } 395 | } 396 | 397 | div { class: "flex-column mt-4 space-y space-y-4 text-sm overflow-y-auto font-medium text-gray-500 dark:text-gray-400 mb-4 md:me-2 md:ms-2 md:mb-0", 398 | style: "height:90%;", 399 | for (id_for_switch,id_for_delete,name) in items { 400 | if id_for_switch == current_id.clone() { 401 | div { class:"flex items-center px-4 py-3 text-white bg-blue-700 rounded-lg active w-full dark:bg-blue-600", 402 | a { 403 | href: "#", 404 | "aria-current": "page", 405 | class: "inline-flex w-full", 406 | onclick: move |evt| { 407 | if !send_disabled() { 408 | switch_session(id_for_switch.as_str(),model_id,system_prompt); 409 | } 410 | }, 411 | "{name}" 412 | } 413 | 414 | } 415 | } else { 416 | div { class:"flex items-center px-4 py-3 rounded-lg hover:text-gray-900 bg-gray-50 hover:bg-gray-100 w-full dark:bg-gray-800 dark:hover:bg-gray-700 dark:hover:text-white", 417 | a { 418 | href: "#", 419 | class: "inline-flex w-full", 420 | onclick: move |evt| { 421 | if !send_disabled() { 422 | switch_session(id_for_switch.as_str(),model_id,system_prompt); 423 | } 424 | }, 425 | "{name}" 426 | } 427 | 428 | button { 429 | r#type: "button", 430 | class: "text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white", 431 | onclick: move |evt| { 432 | let mut store = Store::new().unwrap(); 433 | if !send_disabled() { 434 | store.remove_session(id_for_delete.clone().as_str()); 435 | do_delete_conv.set(true); 436 | } 437 | }, 438 | svg { 439 | "xmlns": "http://www.w3.org/2000/svg", 440 | "fill": "none", 441 | "aria-hidden": "true", 442 | "viewBox": "0 0 14 14", 443 | class: "w-3 h-3", 444 | path { 445 | "d": "m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6", 446 | "stroke": "currentColor", 447 | "stroke-linejoin": "round", 448 | "stroke-linecap": "round", 449 | "stroke-width": "2" 450 | } 451 | } 452 | span { class: "sr-only", "Remove Conversation" } 453 | } 454 | } 455 | } 456 | } 457 | 458 | 459 | } 460 | } 461 | if do_delete_conv() { 462 | span { dangerous_inner_html: " "} 463 | } 464 | ) 465 | } 466 | 467 | fn new_conversation(mut session: Signal,mut messages: Signal>) { 468 | let mut store = Store::new().unwrap(); 469 | 470 | if store.get_session(session().id.as_str()).is_some() { 471 | session.set(store.new_session()); 472 | messages.set(Vec::::new()); 473 | } 474 | } 475 | 476 | pub fn app() -> Element { 477 | use_context_provider(|| Signal::new(Vec::::new())); 478 | 479 | let href = if let Some(window) = window() { 480 | if let Some(document) = window.document() { 481 | if let Some(location) = document.location() { 482 | Some(format!( 483 | "{}//{}/api/", 484 | location.protocol().unwrap(), 485 | location.host().unwrap() 486 | )) 487 | } else { 488 | None 489 | } 490 | } else { 491 | None 492 | } 493 | } else { 494 | None 495 | }; 496 | 497 | let mut model_id = use_signal(|| String::from("meta-llama/Meta-Llama-3-8B-Instruct")); 498 | let mut endpoint = match href { 499 | Some(url) => use_signal(|| String::from(url)), 500 | None => use_signal(|| String::from("http://localhost:10201/api/")), 501 | }; 502 | let mut system_prompt = use_signal(|| String::from("You are helpful assistant!")); 503 | let mut new_msg = use_signal(String::new); 504 | let mut send_disabled = use_signal(|| false); 505 | let mut modelOptions = use_signal(Vec::::new); 506 | 507 | let mut store = Store::new().unwrap(); 508 | use_context_provider(|| Signal::new(store.new_session())); 509 | let mut session = use_context::>(); 510 | use_context_provider(|| Signal::new(TempSession::new(&session()))); 511 | 512 | use_effect(move || { 513 | let messages = use_context::>>(); 514 | if let Some(window) = window() { 515 | if let Some(document) = window.document() { 516 | if let Some(scrollable_div) = document.get_element_by_id("list") { 517 | scrollable_div.set_scroll_top(scrollable_div.scroll_height()); 518 | } 519 | if let Some(last_msg) = messages().last() { 520 | if !last_msg.loading { 521 | if messages().len() > 1 && !send_disabled() { 522 | let mut sess = session.write(); 523 | sess.mode_id = model_id(); 524 | sess.history = Some(messages().clone()); 525 | sess.system_prompt = system_prompt(); 526 | store.save_session(&sess); 527 | } 528 | } 529 | } 530 | 531 | let hljs = Reflect::get(&window, &JsValue::from_str("hljs")).unwrap(); 532 | 533 | let highlight_all = Reflect::get(&hljs, &JsValue::from_str("highlightAll")) 534 | .unwrap() 535 | .dyn_into::() 536 | .unwrap(); 537 | 538 | highlight_all.call0(&JsValue::NULL).unwrap(); 539 | } 540 | } 541 | }); 542 | 543 | let mut messages = use_context::>>(); 544 | let mut send = move || { 545 | info!("try send message"); 546 | if !send_disabled() { 547 | send_disabled.set(true); 548 | info!("send message"); 549 | sendMsg( 550 | new_msg(), 551 | model_id(), 552 | endpoint(), 553 | system_prompt(), 554 | modelOptions, 555 | send_disabled, 556 | ); 557 | new_msg.set(String::new()); 558 | } 559 | }; 560 | 561 | rsx!( 562 | 563 | Conversations { model_id, system_prompt, send_disabled } 564 | 565 | div { class: "w-4/5 bg-white shadow-lg rounded-lg overflow-hidden flex flex-col", 566 | style: "height:98%;", 567 | div { class: "flex border-b px-4 py-2 bg-gray-200", 568 | h1 { class: "text-lg font-semibold text-gray-500", "Chat Robot" } 569 | button { 570 | r#type: "button", 571 | class: "text-gray-400 bg-transparent hover:bg-gray-300 hover:text-gray-600 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white", 572 | onclick: move |evt| { 573 | show_login(true); 574 | }, 575 | svg { 576 | "fill": "none", 577 | "xmlns": "http://www.w3.org/2000/svg", 578 | height: "24", 579 | "viewBox": "0 0 24 24", 580 | "aria-hidden": "true", 581 | width: "24", 582 | class: "w-6 h-6 text-gray-500 dark:text-white", 583 | path { 584 | "stroke": "currentColor", 585 | "stroke-width": "2", 586 | "stroke-linecap": "round", 587 | "stroke-linejoin": "round", 588 | "d": "M12 21a9 9 0 1 0 0-18 9 9 0 0 0 0 18Zm0 0a8.949 8.949 0 0 0 4.951-1.488A3.987 3.987 0 0 0 13 16h-2a3.987 3.987 0 0 0-3.951 3.512A8.948 8.948 0 0 0 12 21Zm3-11a3 3 0 1 1-6 0 3 3 0 0 1 6 0Z" 589 | } 590 | } 591 | span { class: "sr-only", "Sign In or Sign Out" } 592 | } 593 | } 594 | div { class: "flex-1 p-4 overflow-y-auto", id: "list", 595 | for msg in messages() { 596 | ShowMessage { msg } 597 | } 598 | } 599 | div { class: "border-t px-4 py-2 bg-gray-200 flex items-center", 600 | input { 601 | accept: "image/*", 602 | r#type: "file", 603 | class: "hidden", 604 | id:"image-input", 605 | onchange: move |evt| { 606 | async move { 607 | if let Some(file_engine) = &evt.files() { 608 | let files = file_engine.files(); 609 | for file_name in files { 610 | if let Some(file) = file_engine.read_file(&file_name).await{ 611 | let id = messages().len(); 612 | messages.write().push(Message { 613 | id: id, 614 | role: Role::User, 615 | content: String::new(), 616 | img: Some(image_base64_wasm::vec_to_base64(file)), 617 | loading: false, 618 | }); 619 | } 620 | } 621 | } 622 | } 623 | } 624 | } 625 | button { 626 | class: "cursor-pointer bg-gray-300 p-2 mr-4 rounded-lg hover:bg-gray-400", 627 | onclick:move |_| { 628 | async move { 629 | use reqwest::Client; 630 | let response = Client::new().get(format!("{}models",endpoint())).send().await.unwrap().json::>().await.unwrap(); 631 | let mut options: Vec = response.iter().map(|model| SelectOption {text:model.clone(),value:model.clone(),selected:model_id()==model.clone()}).collect(); 632 | modelOptions.write().clear(); 633 | modelOptions.write().append(&mut options); 634 | } 635 | }, 636 | "data-modal-target":"model-config", 637 | "data-modal-toggle":"model-config", 638 | svg { class: "w-6 h-6 text-gray-800 dark:text-white","aria-hidden":"true","xmlns":"http://www.w3.org/2000/svg", 639 | "width":"24","height":"24","fill":"none", 640 | "viewBox":"0 0 24 24", 641 | path { 642 | "stroke":"currentColor", 643 | "stroke-linecap":"round", 644 | "stroke-linejoin":"round", 645 | "stroke-width":"2", 646 | "d":"M21 13v-2a1 1 0 0 0-1-1h-.757l-.707-1.707.535-.536a1 1 0 0 0 0-1.414l-1.414-1.414a1 1 0 0 0-1.414 0l-.536.535L14 4.757V4a1 1 0 0 0-1-1h-2a1 1 0 0 0-1 1v.757l-1.707.707-.536-.535a1 1 0 0 0-1.414 0L4.929 6.343a1 1 0 0 0 0 1.414l.536.536L4.757 10H4a1 1 0 0 0-1 1v2a1 1 0 0 0 1 1h.757l.707 1.707-.535.536a1 1 0 0 0 0 1.414l1.414 1.414a1 1 0 0 0 1.414 0l.536-.535 1.707.707V20a1 1 0 0 0 1 1h2a1 1 0 0 0 1-1v-.757l1.707-.708.536.536a1 1 0 0 0 1.414 0l1.414-1.414a1 1 0 0 0 0-1.414l-.535-.536.707-1.707H20a1 1 0 0 0 1-1Z" 647 | } 648 | path { "stroke":"currentColor", 649 | "stroke-linecap":"round", 650 | "stroke-linejoin":"round", 651 | "stroke-width":"2", 652 | "d":"M12 15a3 3 0 1 0 0-6 3 3 0 0 0 0 6Z" 653 | } 654 | } 655 | } 656 | label { 657 | class: "cursor-pointer bg-gray-300 p-2 mr-4 rounded-lg hover:bg-gray-400", 658 | onclick: move|evt| { 659 | new_conversation(session,messages); 660 | let new_session = session.read(); 661 | model_id.set(new_session.mode_id.clone()); 662 | system_prompt.set(new_session.system_prompt.clone()); 663 | 664 | }, 665 | svg { 666 | "stroke": "currentColor", 667 | "fill": "none", 668 | "xmlns": "http://www.w3.org/2000/svg", 669 | "viewBox": "0 0 24 24", 670 | class: "h-6 w-6 text-gray-600", 671 | path { 672 | "stroke-width": "2", 673 | "d": "M12 4v16m8-8H4", 674 | "stroke-linecap": "round", 675 | "stroke-linejoin": "round" 676 | } 677 | } 678 | } 679 | input { 680 | placeholder: "Type a message...", 681 | r#type: "text", 682 | class: "flex-1 px-4 py-2 mr-4 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-400", 683 | value: "{new_msg}", 684 | oninput: move |event| new_msg.set(event.value()), 685 | onkeyup: move |event| if event.key() == Key::Enter {send();} 686 | } 687 | label { 688 | r#for: "image-input", 689 | class: "cursor-pointer bg-gray-300 p-2 mr-4 rounded-lg hover:bg-gray-400", 690 | svg { 691 | "aria-hidden": "true", 692 | "fill": "none", 693 | width: "24", 694 | "viewBox": "0 0 24 24", 695 | height: "24", 696 | "xmlns": "http://www.w3.org/2000/svg", 697 | class: "w-6 h-6 text-gray-800 dark:text-white", 698 | path { 699 | "stroke-linejoin": "round", 700 | "stroke": "currentColor", 701 | "d": "m3 16 5-7 6 6.5m6.5 2.5L16 13l-4.286 6M14 10h.01M4 19h16a1 1 0 0 0 1-1V6a1 1 0 0 0-1-1H4a1 1 0 0 0-1 1v12a1 1 0 0 0 1 1Z", 702 | "stroke-width": "2", 703 | "stroke-linecap": "round" 704 | } 705 | } 706 | } 707 | button { class: "bg-blue-500 text-white px-4 py-2 rounded-lg", disabled: "{send_disabled}", 708 | onclick: move |_| {info!("send message");send()}, 709 | "Send", 710 | } 711 | } 712 | } 713 | ModelConfig { model_id, modelOptions, system_prompt } 714 | LoginBox { endpoint } 715 | script { 716 | "initFlowbite();" 717 | } 718 | 719 | ) 720 | } 721 | -------------------------------------------------------------------------------- /src/web_state.rs: -------------------------------------------------------------------------------- 1 | use crate::data::Message; 2 | use dioxus::prelude::*; 3 | use serde::{Deserialize, Serialize}; 4 | use js_sys::Date; 5 | use web_sys::{window, Storage}; 6 | 7 | #[derive(Props, Clone, PartialEq, Debug, Serialize, Deserialize)] 8 | pub struct Session { 9 | pub id: String, 10 | pub name: String, 11 | pub mode_id: String, 12 | pub system_prompt: String, 13 | pub history: Option>, 14 | } 15 | 16 | pub struct TempSession { 17 | pub id: String, 18 | pub name: String, 19 | pub mode_id: String, 20 | pub system_prompt: String, 21 | } 22 | 23 | impl TempSession { 24 | pub fn new(session: &Session) -> TempSession { 25 | TempSession { 26 | id: session.id.clone(), 27 | name: session.name.clone(), 28 | mode_id: session.mode_id.clone(), 29 | system_prompt: session.system_prompt.clone(), 30 | } 31 | } 32 | } 33 | 34 | pub struct Store { 35 | local_storage: Storage, 36 | session_list: Vec, 37 | } 38 | 39 | impl Store { 40 | pub fn new() -> Option { 41 | let window = window()?; 42 | if let Ok(Some(local_storage)) = window.local_storage() { 43 | let session_list = if let Ok(Some(value)) = local_storage.get_item("session_list") { 44 | if let Ok(list) = serde_json::from_str::>(value.as_str()) { 45 | list 46 | } else { 47 | local_storage.set_item("session_list", "[]").unwrap(); 48 | Vec::::new() 49 | } 50 | } else { 51 | local_storage.set_item("session_list", "[]").unwrap(); 52 | let list = Vec::::new(); 53 | list 54 | }; 55 | let store = Store { 56 | local_storage, 57 | session_list, 58 | }; 59 | Some(store) 60 | } else { 61 | None 62 | } 63 | } 64 | 65 | pub fn new_session(&mut self) -> Session { 66 | let time = Date::new_0(); 67 | let id = format!("id_{}_", Date::now()); 68 | let session = Session { 69 | id: id.clone(), 70 | name: format!("{}/{}/{} {}:{}", time.get_full_year()-2000,time.get_month()+1,time.get_date(),time.get_hours(),time.get_minutes()), 71 | system_prompt: "".to_string(), 72 | mode_id: "meta-llama/Meta-Llama-3-8B-Instruct".to_string(), 73 | history: None, 74 | }; 75 | session 76 | } 77 | 78 | pub fn save_session(&mut self, session: &Session) { 79 | let id = session.id.clone(); 80 | let exist = if let Ok(None) = self.local_storage.get_item(id.as_str()) { 81 | false 82 | } else { 83 | true 84 | }; 85 | let binding = serde_json::json!(session).to_string(); 86 | let value = binding.as_str(); 87 | self.local_storage.set_item(id.as_str(), value).unwrap(); 88 | if !exist { 89 | self.session_list.push(id.clone()); 90 | self.local_storage 91 | .set_item( 92 | "session_list", 93 | serde_json::json!(self.session_list).to_string().as_str(), 94 | ) 95 | .expect("Failed to save list!"); 96 | } 97 | } 98 | 99 | pub fn fetch_all_session(&mut self) -> Vec { 100 | let list = self.session_list.clone(); 101 | list.iter() 102 | .map(|id| { 103 | let session = if let Ok(Some(value)) = self.local_storage.get_item(id) { 104 | if let Ok(sess) = serde_json::from_str::(value.as_str()) { 105 | sess 106 | } else { 107 | self.new_session() 108 | } 109 | } else { 110 | self.new_session() 111 | }; 112 | session 113 | }) 114 | .collect() 115 | } 116 | 117 | pub fn get_session(&self, id: &str) -> Option { 118 | let session = if let Ok(Some(value)) = self.local_storage.get_item(id) { 119 | if let Ok(sess) = serde_json::from_str::(value.as_str()) { 120 | Some(sess) 121 | } else { 122 | None 123 | } 124 | } else { 125 | None 126 | }; 127 | session 128 | } 129 | 130 | pub fn remove_session(&mut self, id: &str) -> Result<(), &'static str> { 131 | if let Ok(()) = self.local_storage.delete(id) { 132 | self.session_list.retain(|sid| sid != id); 133 | self.local_storage 134 | .set_item( 135 | "session_list", 136 | serde_json::json!(self.session_list).to_string().as_str(), 137 | ) 138 | .expect("Failed to save list!"); 139 | Ok(()) 140 | } else { 141 | Err("delete failed!") 142 | } 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /src/worker_server.rs: -------------------------------------------------------------------------------- 1 | 2 | use crate::data::{Request,Role,Message}; 3 | use crate::model::load; 4 | use crate::ipc::accept; 5 | use std::process; 6 | 7 | pub async fn worker_server(ipc_name:String, model_id: String, temp: f64, top_p: f64) { 8 | 9 | let (receiver, sender) = accept(ipc_name); 10 | 11 | let mut pipeline = load(&model_id, temp, top_p).expect("Failed to load model!"); 12 | println!("model {} server start!", model_id); 13 | loop { 14 | let request: String = receiver.recv().expect("Failed to recv!"); 15 | if let Ok(req) = serde_json::from_str::(request.as_str()) { 16 | if req.cmd.eq("QUIT") { 17 | break; 18 | } 19 | let msg_list: Vec = req.msg_list.into_iter().filter(|msg|msg.role!=Role::Administrator).collect(); 20 | let history = 21 | pipeline.messages_chat_template(&msg_list, req.system_prompt.as_str()); 22 | let _ = pipeline.run(&sender,history.as_str(), 1000usize).unwrap(); 23 | } 24 | } 25 | process::exit(0); 26 | } 27 | 28 | -------------------------------------------------------------------------------- /tailwind.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | mode: "all", 3 | content: [ 4 | // include all rust, html and css files in the src directory 5 | "./src/**/*.{rs,html,css}", 6 | // include all html files in the output (dist) directory 7 | "./dist/**/*.html", 8 | "./node_modules/flowbite/**/*.js", 9 | ], 10 | theme: { 11 | extend: {}, 12 | }, 13 | plugins: [ 14 | require('flowbite/plugin') 15 | ] 16 | 17 | } 18 | -------------------------------------------------------------------------------- /youtube--play.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lyn-liyuan/moonweb/dbe7593d9fd4689a24816b5d017dc9a3010dce48/youtube--play.jpg --------------------------------------------------------------------------------