├── Homepage.py ├── README.md ├── configs └── config.yaml ├── container ├── Dockerfile ├── companion │ ├── Dockerfile │ └── run.sh ├── complete │ ├── Dockerfile │ └── run.sh └── entrypoint.sh ├── install.sh ├── install_windows.ps1 ├── key_generation.py ├── logs └── litellm.log ├── modules ├── api_module.py ├── docs_inline.py ├── shared.py └── streamlit_ip.py ├── pages ├── Chat_Interface.py ├── Docs.py ├── HF_Token_Encrypter.py ├── High_Precision_Quantization.py ├── Hugging_Face_Downloader.py ├── Medium_Precision_Quantization.py ├── Model_Info.py ├── Modelfile_Creator.py ├── Ollama_Endpoint_Url.py ├── OpenAI_LiteLLM.py ├── Public_Endpoint.py └── Upload_Converted_To_HF.py ├── requirements.txt ├── run_app.py ├── run_tunnel.py ├── start.sh └── tools ├── endpoint.py ├── notebook.py └── ollama.py /Homepage.py: -------------------------------------------------------------------------------- 1 | # main.py 2 | import streamlit as st 3 | from st_pages import Page, Section, show_pages, add_page_title, add_indentation 4 | 5 | 6 | show_pages( 7 | [ 8 | Page("Homepage.py", "Home", ":house:"), 9 | Page("pages/Docs.py", "Docs", ":books:"), 10 | Page("pages/Chat_Interface.py", "Chat Interface", ":speech_balloon:"), 11 | Page("pages/OpenAI_LiteLLM.py", "OpenAI compatibility", ":robot_face:"), 12 | Page("pages/Public_Endpoint.py", "Generate public url", ":globe_with_meridians:"), 13 | Section("Ollama Management", icon=":llama:"), 14 | Page("pages/Modelfile_Creator.py", "Create and download models", ":wrench:", in_section=True), 15 | Page("pages/Model_Info.py", "Remove or view models", ":clipboard:"), 16 | Page("pages/Ollama_Endpoint_Url.py", "Set Ollama network address", ":signal_strength:"), 17 | Section("Manually convert models", icon=":arrows_counterclockwise:"), 18 | Page("pages/Hugging_Face_Downloader.py", "Download model", ":inbox_tray:"), 19 | Page("pages/High_Precision_Quantization.py", "High Precision Quantization", ":gem:"), 20 | Page("pages/Medium_Precision_Quantization.py", "Medium Precision Quantization", ":heavy_plus_sign:" ), 21 | Page("pages/Upload_Converted_To_HF.py", "Upload model to HuggingFace", ":outbox_tray:"), 22 | Section("Extra Tools", icon=":toolbox:"), 23 | Page("pages/HF_Token_Encrypter.py", "Security", ":lock:"), 24 | ] 25 | ) 26 | 27 | add_indentation() 28 | 29 | 30 | 31 | st.markdown(""" 32 | # Welcome to Ollama-Companion. 33 | --- 34 | Thank you for installing the Ollama-Companion, to get started use the sidebar to navigate to the page you want to use, 35 | if you have any question sor want to learn how to use a certain functionality then navigate to the ***"Docs"*** page located within the sidebar.""") 36 | 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

Ollama-Companion

2 | 3 |

4 | Ollama-Companion Banner 5 |

6 | 7 | 8 | 9 | 10 | ## Enhanced with Streamlit 11 | 12 | Ollama-Companion is developed to enhance the interaction and management of Ollama and other large language model (LLM) applications. It aims to support all Ollama API endpoints, facilitate model conversion, and ensure seamless connectivity, even in environments behind NAT. This tool is crafted to construct a versatile and user-friendly LLM software stack, meeting a diverse range of user requirements. 13 | 14 | Transitioning from Gradio to Streamlit necessitated the development of new tunneling methods to maintain compatibility with Jupyter Notebooks, like Google Colab. 15 | 16 | Explore our Colab Integration to set up the companion within minutes and obtain a public-facing URL. 17 | 18 | Interact with Ollama API without typing commands and using a interface to manage your models. 19 | Run Ollama or connect to a client an use this WebUI to manage. 20 | ### How to Run 21 | 22 | ### All-in-one installer ### 23 | 24 | 25 | 26 | Clone the repository: 27 | 28 | 29 | ``` 30 | git clone https://github.com/Luxadevi/Ollama-Companion.git 31 | ``` 32 | **Make the linux and mac install script executeable** 33 | ``` 34 | sudo chmod +x install.sh 35 | ``` 36 | **Start the linux installer.sh** 37 | ``` 38 | ./install.sh 39 | ``` 40 | 41 | ## Starting Ollama-Companion 42 | To start the Companion with a public url for example when you want to share the webpage with others or using this on a service like Colab. 43 | **Use the command.** 44 | ``` 45 | $ ./start.sh 46 | ``` 47 | Starting the Companion on a local 127.0.0.1:8501 instance run 48 | ``` 49 | streamlit run Homepage.py 50 | ``` 51 | 52 | **Note**: Windows support is currently unavailable for running Ollama, but you can run the companion from a Windows client for local quantization and management. You can also manage a remote Ollama instance by setting the Ollama endpoint in the UI. 53 | 54 | ### Add Your Own Modules 55 | Develop your own Streamlit components and integrate them into Ollama-Companion. See examples using LangChain and other software stacks within Streamlit. 56 |

57 | Ollama-Companion Second Image 58 |

59 | 60 | ## LiteLLM Proxy Management 61 | 62 | ### Overview 63 | This part allows you to manage and interact with the LiteLLM Proxy, which is used to convert over 100 LLM providers to the OpenAI API standard. 64 | 65 | Check LiteLLM out at [LiteLLM proxy ](https://litellm.ai/) 66 | 67 | 68 | ### LiteLLM Proxy Controls 69 | 70 | - **Start LiteLLM Proxy**: Click this button to start the LiteLLM Proxy. The proxy will run in the background and facilitate the conversion process. 71 | - **Read LiteLLM Log**: Use this button to read the LiteLLM Proxy log, which contains relevant information about its operation. 72 | - **Start Polling**: Click to initiate polling. Polling checks for updates to the ollama API and adds any new models to the configuration. 73 | - **Stop Polling**: Use this button to stop polling for updates. 74 | - **Kill Existing LiteLLM Processes**: If there are existing LiteLLM processes running, this button will terminate them. 75 | - **Free Up Port 8000**: Click this button to free up port 8000 if it's currently in use. 76 | 77 | *Please note that starting the LiteLLM Proxy and performing other actions may take some time, so be patient and wait for the respective success messages.* 78 | 79 | ### LiteLLM Proxy Log 80 | 81 | The "Log Output" section will display relevant information from the LiteLLM Proxy log, providing insights into its operation and status. 82 | 83 | ## How to Download Model Files from Hugging Face 84 | 85 | To download model files from Hugging Face, follow these steps: 86 | 87 | 1. **Visit the Model Page**: Go to the Hugging Face model page you wish to download. For example: [Mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2). 88 | 89 | 2. **Copy Username/RepositoryName**: On the model page, locate the icon next to the username of the model's author (usually a clipboard or copy symbol). Click to copy the Username/RepositoryName, e.g., `mistralai/Mistral-7B-Instruct-v0.2`. 90 | 91 | 3. **Paste in the Input Field**: Paste the copied Username/RepositoryName directly into the designated input field in your application. 92 | 93 | 4. **Get File List**: Click the "Get file list" button to retrieve a list of available files in this repository. 94 | 95 | 5. **Review File List**: Ensure the list contains the correct model files you wish to download. 96 | 97 | 6. **Download Model**: Click the "Download Model" button to start the download process for the selected model files. 98 | 99 | 7. **File Storage**: The model files will be saved in the `llama.cpp/models` directory on your device. 100 | 101 | By following these steps, you have successfully downloaded the model files from Hugging Face, and they are now stored in the `llama.cpp/models` directory for your use. 102 | 103 | 104 | ## How to convert Models 105 | 106 | ## Step One: Model Conversion with High Precision 107 | 108 | ### Conversion Process 109 | 110 | 1. **Select a Model Folder**: Choose a folder within `llama.cpp/models` that contains the model you wish to convert. 111 | 112 | 2. **Set Conversion Options**: Select your desired conversion options from the provided checkboxes, F32 F16 or Q8_0. 113 | 114 | 3. **Docker Container Option**: Optionally, use a Docker container for added flexibility and compatibility. 115 | 116 | 4. **Execute Conversion**: Click the "Run Commands" button to start the conversion process. 117 | 118 | 5. **Output Location**: Converted models will be saved in the `High-Precision-Quantization` subfolder within the selected model folder. 119 | 120 | Utilize this process to efficiently convert models while maintaining high precision and compatibility with `llama.cpp`. 121 | 122 | ## Step Two: Model Quantization Q and Kquants 123 | 124 | ### Quantization Instructions 125 | 126 | 1. **Select GGUF File**: Choose the GGUF file you wish to quantize from the dropdown list. 127 | 128 | 2. **Quantization Options**: Check the boxes next to the quantization options you want to apply (Q, Kquants). 129 | 130 | 3. **Execution Environment**: Choose to use either the native `llama.cpp` or a Docker container for compatibility. 131 | 132 | 4. **Run Quantization**: Click the "Run Selected Commands" button to schedule and execute the quantization tasks. 133 | 134 | 5. **Save Location**: The quantized models will be saved in the `/modelname/Medium-Precision-Quantization` folder. 135 | 136 | Follow these steps to perform model quantization using Q and Kquants, saving the quantized models in the specified directory. 137 | Schedule multiple options in a row they will remember and run eventually. 138 | 139 | ## Model Upload Instructions 140 | 141 | Use this section to securely upload your converted models to Hugging Face. 142 | 143 | ### Steps for Uploading Models 144 | 145 | 1. **Select a Model**: Choose a model from the dropdown list. These models are located in the `llama.cpp/models` directory. 146 | 147 | 2. **Enter Repository Name**: Specify a name for the new Hugging Face repository where your model will be uploaded. 148 | 149 | 3. **Choose Files for Upload**: Select the files you wish to upload from the subfolders of the chosen model. 150 | 151 | 4. **Add README Content**: Optionally, write content for the README.md file of your new repository. 152 | 153 | #### Token Usage 154 | - For enhanced security, use an encrypted token. Encrypt your Hugging Face token on the Token Encrypt page and enter it in the "Enter Encrypted Token" field. 155 | - Alternatively, enter an unencrypted Hugging Face token directly. 156 | 157 | 5. **Upload Files**: Click the "Upload Selected Files" button to initiate the upload to Hugging Face. 158 | 159 | After completing these steps, your uploaded models will be accessible at `https://huggingface.co/your-username/your-repo-name`. 160 | 161 | ## Try Ollama-Companion 162 | Try ollama Companion deployed on google Colab, with our Colab Notebooks and deploy a instance within minutes. This is available on https://github.com/Luxadevi/Ollama-Colab-Integration 163 | 164 | 165 | 166 | 167 | ### Core Features 168 | 169 | #### Streamlit-Powered Interface 170 | - **Intuitive and Responsive UI** 171 | - **Advanced Modelfile Management** 172 | - **Dynamic UI Building Blocks** 173 | 174 | #### Model Compatibility and Conversion 175 | - **Download and Convert PyTorch Models from Huggingface** 176 | - **Multiple Format Conversion Options** 177 | 178 | #### Enhanced Connectivity and Sharing 179 | - **Easy API Connectivity via Secure Tunnels** 180 | - **Options for Sharing and Cloud Testing** 181 | - **Accessible from Any Network Setup** 182 | 183 | #### Efficient Workflow Management 184 | - **Easy Model Upload to Huggingface** 185 | - **Capability to Queue Multiple Workloads** 186 | 187 | #### Security and Configuration 188 | - **Integrated LLAVA Image Analysis** 189 | - **Configurable Security Features** 190 | - **Advanced Token Encryption** 191 | 192 | ### Future Directions and Contributions 193 | 194 | We are dedicated to the continuous enhancement of Ollama-Companion, with a focus on user experience and expanded functionality. 195 | 196 | 197 | **Check the docs for more information** 198 | ### License 199 | 200 | Licensed under the Apache License. 201 | -------------------------------------------------------------------------------- /configs/config.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | - model_name: ollama/dummyentry 3 | litellm_params: 4 | model: ollama/dummyentry 5 | api_base: http://127.0.0.1:11434 6 | json: True -------------------------------------------------------------------------------- /container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use Alpine Linux for a lightweight base image 2 | FROM alpine:latest 3 | 4 | # Install git, g++, make, Python, pip, and additional dependencies 5 | RUN apk add --no-cache git g++ make python3 py3-pip \ 6 | python3-dev blas-dev lapack-dev gfortran cmake pkgconfig 7 | 8 | # Clone the repository 9 | WORKDIR /usr/src 10 | RUN git clone https://github.com/ggerganov/llama.cpp.git 11 | 12 | # Compile the code 13 | WORKDIR /usr/src/llama.cpp 14 | RUN make 15 | 16 | # Set up a Python virtual environment and install dependencies 17 | RUN python3 -m venv /venv 18 | RUN . /venv/bin/activate && pip install --upgrade pip && pip install -r requirements.txt 19 | 20 | # Add entrypoint script 21 | COPY entrypoint.sh /entrypoint.sh 22 | RUN chmod +x /entrypoint.sh 23 | 24 | # Set the custom script as the entrypoint 25 | ENTRYPOINT ["/entrypoint.sh"] 26 | -------------------------------------------------------------------------------- /container/companion/Dockerfile: -------------------------------------------------------------------------------- 1 | # First Stage - Building the Go application 2 | FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder 3 | 4 | ARG TARGETARCH 5 | ARG GOFLAGS="'-ldflags=-w -s'" 6 | 7 | WORKDIR /go/src/github.com/jmorganca/ollama 8 | 9 | RUN apt-get update && apt-get install -y git build-essential cmake 10 | 11 | 12 | RUN git clone https://github.com/ggerganov/llama.cpp.git && \ 13 | mkdir llama.cpp/build && \ 14 | cd llama.cpp/build && \ 15 | cmake .. && \ 16 | cmake --build . --config Release 17 | 18 | # Second Stage - Setting up Python environment and runtime 19 | FROM ubuntu:22.04 20 | 21 | # Install Python 3.11, venv, and other necessary packages 22 | RUN apt-get update && apt-get install -y curl aria2 ca-certificates python3.11 python3.11-venv git 23 | 24 | # Clone the Python application repository 25 | WORKDIR /ollama-companion 26 | RUN git clone https://github.com/luxadevi/ollama-companion.git . 27 | RUN git clone https://github.com/ggerganov/llama.cpp.git 28 | # Set up Python virtual environment and install dependencies 29 | RUN python3.11 -m venv venv 30 | RUN /bin/bash -c "source venv/bin/activate && venv/bin/pip install -r requirements.txt" 31 | 32 | 33 | # Copy the run.sh script and make it executable 34 | COPY run.sh /run.sh 35 | RUN chmod +x /run.sh 36 | 37 | COPY --from=builder /go/src/github.com/jmorganca/ollama/llama.cpp/build/bin /ollama-companion/llama.cpp 38 | 39 | 40 | EXPOSE 8501 41 | ENV OLLAMA_HOST 0.0.0.0 42 | 43 | ## Set the entry point to run.sh 44 | ENTRYPOINT ["/run.sh"] 45 | -------------------------------------------------------------------------------- /container/companion/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Function to start the Python application 4 | start_python_app() { 5 | source venv/bin/activate 6 | python3.11 run_tunnel .py & 7 | } 8 | 9 | # Function to start Streamlit 10 | start_streamlit() { 11 | source venv/bin/activate 12 | streamlit run Homepage.py & 13 | } 14 | 15 | # Check the passed argument 16 | case "$1" in 17 | "public" | "-pub") 18 | # Start Python application for public mode 19 | start_python_app 20 | ;; 21 | "local" | "-lan") 22 | # Start Streamlit for local mode 23 | start_streamlit 24 | ;; 25 | *) 26 | echo "Invalid argument. Use 'public' or '-pub' for public mode, 'local' or '-lan' for local mode." 27 | exit 1 28 | ;; 29 | esac 30 | 31 | -------------------------------------------------------------------------------- /container/complete/Dockerfile: -------------------------------------------------------------------------------- 1 | # First Stage - Building the Go application 2 | FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder 3 | 4 | ARG TARGETARCH 5 | ARG GOFLAGS="'-ldflags=-w -s'" 6 | 7 | WORKDIR /go/src/github.com/jmorganca/ollama 8 | 9 | RUN apt-get update && apt-get install -y git build-essential cmake 10 | ADD https://dl.google.com/go/go1.21.3.linux-$TARGETARCH.tar.gz /tmp/go1.21.3.tar.gz 11 | RUN mkdir -p /usr/local && tar xz -C /usr/local /dev/null | grep -oP '(?<=Python )\d+\.\d+') 29 | PYTHON_VENV_PACKAGE="python3.10-venv" 30 | 31 | if [[ $PYTHON_VERSION < 3.10 ]]; then 32 | echo "Python 3.10 or higher is not installed. Please install it using your distribution's package manager." 33 | case $1 in 34 | "Ubuntu"|"Debian") 35 | echo "Run: sudo apt install python3.10 python3.10-venv (or higher)" 36 | ;; 37 | "Arch") 38 | echo "Run: sudo pacman -S python3.10 python3.10-venv (or higher)" Adjust if package names differ 39 | ;; 40 | "RedHat") 41 | echo "Run: sudo yum install python3.10 python3.10-venv (or higher)" # Adjust if package names differ 42 | ;; 43 | *) 44 | echo "Unsupported distribution." 45 | ;; 46 | esac 47 | else 48 | echo "Python 3.10 or higher is installed." 49 | fi 50 | } 51 | 52 | # Function to create a Python virtual environment 53 | create_python_venv() { 54 | if command -v python3.10 >/dev/null 2>&1; then 55 | python3.10 -m venv companion_venv 56 | echo "Virtual environment created with Python 3.10 in 'companion_venv' directory." 57 | elif command -v python3.11 >/dev/null 2>&1; then 58 | python3.11 -m venv companion_venv 59 | echo "Virtual environment created with Python 3.11 in 'companion_venv' directory." 60 | elif command -v python3 >/dev/null 2>&1; then 61 | python3 -m venv companion_venv 62 | echo "Virtual environment created with default Python 3 in 'companion_venv' directory." 63 | else 64 | echo "No suitable Python 3 version found. Please install Python 3." 65 | return 1 66 | fi 67 | } 68 | 69 | 70 | # Function to activate the virtual environment 71 | activate_venv() { 72 | source companion_venv/bin/activate 73 | echo "Virtual environment activated." 74 | } 75 | 76 | # Function to install dependencies from requirements.txt 77 | pip_dependencies() { 78 | pip install -r requirements.txt 79 | echo "Dependencies installed from requirements.txt." 80 | } 81 | 82 | 83 | # Detect the OS 84 | OS="Unknown" 85 | if [ -f /etc/os-release ]; then 86 | . /etc/os-release 87 | OS=$NAME 88 | fi 89 | 90 | # Function to read the installation type from the log file 91 | read_installation_type() { 92 | if [ -f "$LOGFILE" ]; then 93 | local installed_type=$(sed -n '2p' "$LOGFILE") 94 | echo $installed_type 95 | else 96 | echo "none" 97 | fi 98 | } 99 | 100 | # Function to determine if an upgrade is needed 101 | is_upgrade_needed() { 102 | local current_installation=$(read_installation_type) 103 | local new_installation=$1 104 | 105 | case $current_installation in 106 | minimal) 107 | [[ "$new_installation" == "standard" || "$new_installation" == "large" ]] 108 | ;; 109 | standard) 110 | [[ "$new_installation" == "large" ]] 111 | ;; 112 | interactive) 113 | return 0 # Always rerun interactive 114 | ;; 115 | *) 116 | return 1 # No upgrade needed or unrecognized type 117 | ;; 118 | esac 119 | } 120 | # Function to clone ollama-companion repository 121 | clone_ollama_companion() { 122 | current_dir=$(basename "$PWD") 123 | if [ "$current_dir" != "Ollama-Companion" ]; then 124 | git clone https://github.com/luxadevi/Ollama-Companion.git 125 | cd Ollama-Companion 126 | echo "Cloned ollama-companion and changed directory to ollama-companion" 127 | else 128 | echo "Already inside ollama-companion directory, skipping clone." 129 | fi 130 | } 131 | 132 | # Function to clone llama.cpp repository and run make in its directory 133 | clone_and_make_llama_cpp() { 134 | git clone https://github.com/ggerganov/llama.cpp.git 135 | make -C llama.cpp 136 | echo "Cloned llama.cpp and ran make in the llama.cpp directory" 137 | } 138 | 139 | # Interactive options 140 | # Function to install Ollama 141 | install_ollama() { 142 | read -p "Do you want to install Ollama on this computer? (y/n) " answer 143 | case $answer in 144 | [Yy]* ) 145 | curl https://ollama.ai/install.sh | sh 146 | echo "Ollama installed on this host." 147 | ;; 148 | * ) 149 | echo "Ollama installation skipped." 150 | ;; 151 | esac 152 | } 153 | # Function to instal Ollama headless 154 | install_ollama_headless(){ 155 | curl https://ollama.ai/install.sh | sh 156 | echo "Ollama Installed" 157 | } 158 | 159 | 160 | clean_build_llama_cpp() { 161 | echo "Do you want to clean build llama.cpp? (yes/no)" 162 | read clean_build_response 163 | if [[ $clean_build_response == "yes" ]]; then 164 | git clone http://github.com/ggerganov/llama.cpp.git 165 | make -C llama.cpp 166 | echo "Clean build of llama.cpp completed." 167 | else 168 | echo "Skipping clean build of llama.cpp." 169 | fi 170 | } 171 | # Function to help you install python3.10 interactively 172 | interactive_check_python() { 173 | PYTHON_VERSION=$(python3 --version 2>/dev/null | grep -oP '(?<=Python )\d+\.\d+') 174 | if [[ $PYTHON_VERSION < 3.10 ]]; then 175 | echo "Python 3.10 or 3.11 is required. Would you like to install it? (yes/no)" 176 | read install_python 177 | if [[ $install_python == "yes" ]]; then 178 | case $OS in 179 | "Ubuntu"|"Debian") 180 | sudo apt install -y python3.10 python3.10-venv || sudo apt install -y python3.11 python3.11-venv 181 | ;; 182 | "Arch") 183 | sudo pacman -S python3.10 python3.10-venv || sudo pacman -S python3.11 python3.11-venv 184 | ;; 185 | "RedHat") 186 | sudo yum install -y python3.10 python3.10-venv || sudo yum install -y python3.11 python3.11-venv 187 | ;; 188 | *) 189 | echo "Unsupported distribution for automatic Python installation." 190 | ;; 191 | esac 192 | fi 193 | else 194 | echo "Python 3.10 or higher is already installed." 195 | fi 196 | } 197 | 198 | 199 | write_to_log() { 200 | local installation_type=$1 201 | echo "Writing to log file..." 202 | echo "$VERSION" > "$LOGFILE" 203 | echo "$installation_type" >> "$LOGFILE" 204 | } 205 | 206 | run_start_script(){ 207 | chmod +x start.sh 208 | ./start.sh 209 | } 210 | 211 | 212 | # END message when the installation is completed 213 | 214 | END_MESSAGE="Companion successfully installed, you can launch next time with the start.sh script. Ollama-companion will autolaunch on port 8051 and defaults to making a public-facing URL for your companion. If you only want to run Ollama-companion locally: run the start.sh script with '-local' or '-lan' arguments." 215 | 216 | 217 | ## Installation types 218 | # Minimal installation function 219 | install_minimal() { 220 | echo "Starting minimal installation..." 221 | install_packages "$OS" 222 | check_python "$OS" 223 | clone_ollama_companion 224 | create_python_venv 225 | activate_venv 226 | pip_dependencies 227 | write_to_log "minimal" 228 | echo "$END_MESSAGE" 229 | } 230 | 231 | # Medium installation function 232 | install_medium() { 233 | echo "Starting standard installation..." 234 | install_packages "$OS" 235 | check_python "$OS" 236 | clone_ollama_companion 237 | clone_and_make_llama_cpp 238 | create_python_venv 239 | activate_venv 240 | pip_dependencies 241 | write_to_log "standard" 242 | echo "$END_MESSAGE" 243 | } 244 | 245 | # Large installation function 246 | install_large() { 247 | echo "Starting complete installation..." 248 | install_packages "$OS" 249 | check_python "$OS" 250 | clone_ollama_companion 251 | clone_and_make_llama_cpp 252 | create_python_venv 253 | activate_venv 254 | pip_dependencies 255 | pip install torch 256 | install_ollama 257 | write_to_log "large" 258 | echo "$END_MESSAGE" 259 | } 260 | 261 | install_colab() { 262 | echo "Starting Colab installation..." 263 | echo "This uses pre-compiled llama.cpp binaries." 264 | echo "To freshly compile a new version, use -colab_compile." 265 | echo "Refer to the llama.cpp GitHub repository for more info." 266 | # Redirect stdout and stderr to /dev/null for all commands 267 | echo "Installing required packages..." 268 | install_packages "$OS" > /dev/null 2>&1 269 | echo "Cloning the Ollama Companion repository..." 270 | clone_ollama_companion > /dev/null 2>&1 271 | echo "Installing Python dependencies..." 272 | pip_dependencies > /dev/null 2>&1 273 | echo "Installing the HTTPX Python package..." 274 | pip install httpx > /dev/null 2>&1 275 | echo "Downloading pre-compiled llama.cpp binaries..." 276 | wget https://huggingface.co/luxadev/llama.cpp_binaries/resolve/main/llama.cpp_latest.tar.gz -O /tmp/llama.cpp_latest.tar.gz > /dev/null 2>&1 277 | echo "Extracting the downloaded binaries..." 278 | tar -xzvf /tmp/llama.cpp_latest.tar.gz -C /content/Ollama-Companion/ > /dev/null 2>&1 279 | echo "Installing Ollama in headless mode..." 280 | install_ollama_headless > /dev/null 2>&1 281 | echo "Logging installation type..." 282 | write_to_log "colab" 283 | echo "$END_MESSAGE" 284 | } 285 | 286 | 287 | # Colab compile installation function 288 | install_colab_compile() { 289 | echo "Starting Colab compile installation..." 290 | rm -r /content/Ollama-Companion/llama.cpp 291 | install_packages "$OS" 292 | check_python "$OS" 293 | clone_ollama_companion 294 | pip install httpx 295 | clone_and_make_llama_cpp 296 | pip_dependencies 297 | install_ollama_headless 298 | write_to_log "colab_compile" 299 | echo "$END_MESSAGE" 300 | } 301 | 302 | # Interactive installation function 303 | install_interactive() { 304 | echo "Starting interactive installation..." 305 | install_ollama 306 | interactive_check_python 307 | echo "Cloning Ollama-companion directory" 308 | clone_ollama_companion 309 | echo "Do you want to use the included virtual environment and install all Python dependencies? (recommended) (yes/no)" 310 | read use_venv_response 311 | if [[ $use_venv_response == "yes" ]]; then 312 | create_python_venv 313 | activate_venv 314 | pip_dependencies 315 | pip install torch 316 | write_to_log "interactive" 317 | echo "Virtual environment set up and dependencies installed." 318 | else 319 | echo "Skipping virtual environment setup and Python dependency installation." 320 | echo "Install the needed python dependencies from the requirements.txt with pip install -r requirements.txt" 321 | echo "Recommended to install these python libraries in a virtual environment." 322 | fi 323 | 324 | # Ask the user if they want to start Ollama Companion directly 325 | read -p "Do you want to start Ollama Companion directly? (yes/no) " start_now_response 326 | if [[ $start_now_response == "yes" ]]; then 327 | run_start_script 328 | else 329 | echo "You can run start.sh from the ollama-companion directory to get started." 330 | fi 331 | echo "$END_MESSAGE" 332 | } 333 | 334 | main() { 335 | # Detect the OS 336 | OS="Unknown" 337 | if [ -f /etc/os-release ]; then 338 | . /etc/os-release 339 | OS=$NAME 340 | fi 341 | 342 | local install_ollama_flag=0 343 | local block_start_script_flag=0 344 | local requested_installation="standard" # Set default installation to standard 345 | 346 | # Parse all arguments 347 | for arg in "$@"; do 348 | case $arg in 349 | -minimal|-min) 350 | requested_installation="minimal" 351 | ;; 352 | -large|-l) 353 | requested_installation="large" 354 | ;; 355 | -interactive|-i) 356 | requested_installation="interactive" 357 | ;; 358 | -colab) 359 | requested_installation="colab" 360 | ;; 361 | -colab_compile) 362 | requested_installation="colab_compile" 363 | ;; 364 | -ollama) 365 | install_ollama_flag=1 366 | ;; 367 | -b|-block) 368 | block_start_script_flag=1 369 | ;; 370 | esac 371 | done 372 | 373 | # Check if an upgrade is needed and perform installation 374 | if is_upgrade_needed $requested_installation; then 375 | echo "Upgrade needed. Installing $requested_installation version." 376 | else 377 | echo "Proceeding with $requested_installation installation." 378 | fi 379 | 380 | case $requested_installation in 381 | minimal) 382 | install_minimal 383 | ;; 384 | standard) 385 | install_medium 386 | ;; 387 | large) 388 | install_large 389 | ;; 390 | interactive) 391 | install_interactive 392 | ;; 393 | colab) 394 | install_colab 395 | ;; 396 | colab_compile) 397 | install_colab_compile 398 | ;; 399 | esac 400 | 401 | # Install Ollama if the flag is set 402 | if [[ $install_ollama_flag -eq 1 ]]; then 403 | echo "Installing Ollama..." 404 | install_ollama_headless 405 | fi 406 | 407 | # Run start script if the block flag is not set 408 | if [[ $block_start_script_flag -eq 0 ]]; then 409 | run_start_script 410 | fi 411 | } 412 | 413 | # Call the main function with all passed arguments 414 | main "$@" 415 | 416 | -------------------------------------------------------------------------------- /install_windows.ps1: -------------------------------------------------------------------------------- 1 | # Install Python 3.10 2 | $pythonInstalled = Get-Command python | ForEach-Object { $_.Version.Major } 3 | 4 | if ($pythonInstalled -notcontains 3.10 -and $pythonInstalled -notcontains 3.11) { 5 | winget install -e --id Python.Python.3.10 6 | } else { 7 | Write-Host "Python 3.10 or 3.11 is already installed." 8 | } 9 | 10 | 11 | # Attempt to install Python requirements 12 | # List of required libraries 13 | $requiredLibs = @('streamlit', 'requests', 'flask', 'flask-cloudflared', 'httpx', 'litellm', 'huggingface_hub', 'asyncio', 'Pyyaml', 'APScheduler', 'cryptography', 'gradio','numpy','sentencepiece','gguf','torch','transformers' ) 14 | 15 | # Array to hold libraries that are not installed 16 | $libsToInstall = @() 17 | 18 | foreach ($lib in $requiredLibs) { 19 | try { 20 | pip show $lib | Out-Null 21 | } catch { 22 | $libsToInstall += $lib 23 | } 24 | } 25 | 26 | if ($libsToInstall.Length -gt 0) { 27 | Write-Host "The following libraries are not installed and will be installed:" 28 | Write-Host ($libsToInstall -join ", ") 29 | 30 | # Confirmation prompt 31 | $confirmation = Read-Host "Do you want to proceed with the installation? (Y/N)" 32 | if ($confirmation -eq 'Y') { 33 | try { 34 | pip install $libsToInstall -ErrorAction Stop 35 | } catch { 36 | Write-Host "An error occurred during pip install. Please relog your account and try running the script again." 37 | Exit 38 | } 39 | } else { 40 | Write-Host "Installation canceled." 41 | } 42 | } else { 43 | Write-Host "All required libraries are already installed." 44 | } 45 | 46 | 47 | # Function to check if CMake is installed 48 | function Check-CMakeInstalled { 49 | try { 50 | $cmakeVersion = cmake --version | Select-Object -First 1 51 | if ($cmakeVersion -like "cmake version*") { 52 | return $true 53 | } 54 | } catch { 55 | return $false 56 | } 57 | } 58 | 59 | # Check if CMake is installed 60 | $cmakeInstalled = Check-CMakeInstalled 61 | 62 | if (-not $cmakeInstalled) { 63 | Write-Host "CMake is not installed." 64 | 65 | # Ask for confirmation to install CMake 66 | $confirmation = Read-Host "Do you want to install CMake? (Y/N)" 67 | if ($confirmation -eq 'Y') { 68 | # Download and install CMake 69 | $cmakeInstaller = "https://github.com/Kitware/CMake/releases/download/v3.28.0/cmake-3.28.0-windows-x86_64.msi" 70 | $installerPath = "$env:TEMP\cmake_installer.msi" 71 | Invoke-WebRequest -Uri $cmakeInstaller -OutFile $installerPath 72 | Start-Process msiexec.exe -Wait -ArgumentList "/i $installerPath /quiet /norestart" 73 | Write-Host "CMake has been installed." 74 | } else { 75 | Write-Host "Installation of CMake canceled." 76 | } 77 | } else { 78 | Write-Host "CMake is already installed." 79 | } 80 | 81 | 82 | # Build llama.cpp with CMake 83 | if (Test-Path -Path ".\llama.cpp") { 84 | New-Item -Path ".\llama.cpp\build" -ItemType "directory" -Force 85 | Set-Location -Path ".\llama.cpp\build" 86 | cmake .. -DLLAMA_CUBLAS=ON 87 | cmake --build . --config Release 88 | Set-Location -Path "..\.." 89 | } 90 | 91 | # Function to check if aria2 is installed 92 | function Check-Aria2Installed { 93 | try { 94 | $aria2Version = aria2c --version | Select-Object -First 1 95 | if ($aria2Version -like "aria2 version*") { 96 | return $true 97 | } 98 | } catch { 99 | return $false 100 | } 101 | } 102 | 103 | # Check if aria2 is installed 104 | $aria2Installed = Check-Aria2Installed 105 | 106 | if (-not $aria2Installed) { 107 | Write-Host "aria2 is not installed." 108 | 109 | # Ask for confirmation to install aria2 110 | $confirmation = Read-Host "Do you want to install aria2? (Y/N)" 111 | if ($confirmation -eq 'Y') { 112 | # Install aria2 using winget 113 | winget install --id=aria2.aria2 -e 114 | Write-Host "aria2 has been installed." 115 | } else { 116 | Write-Host "Installation of aria2 canceled." 117 | } 118 | } else { 119 | Write-Host "aria2 is already installed." 120 | } 121 | 122 | #make non .exe copies of the .exe's 123 | Get-ChildItem -Path ".\llama.cpp" -Recurse -Filter *.exe | ForEach-Object { 124 | $linkName = $_.FullName -replace '\.exe$', '' 125 | New-Item -ItemType HardLink -Path $linkName -Target $_.FullName 126 | } 127 | 128 | # fix up the installs the be in the places we expect. 129 | Get-ChildItem -Path .\llama.cpp\build\bin\Release -Filter *.exe | Where-Object { $_.Name -notin @('benchmark.exe', 'test-c.exe', 'test-grad0.exe', 'test-grammar-parser.exe', 'test-llama-grammar.exe', 'test-quantize-fns.exe', 'test-quantize-perf.exe', 'test-rope.exe', 'test-sampling.exe', 'test-tokenizer-0-falcon.exe', 'test-tokenizer-0-llama.exe', 'test-tokenizer-1-bpe.exe', 'test-tokenizer-1-llama.exe') } | ForEach-Object { Copy-Item -Path $_.FullName -Destination .\llama.cpp -Verbose; if (Test-Path ($_.FullName -replace '\.exe$','')) { New-Item -ItemType HardLink -Path (Join-Path .\llama.cpp $_.BaseName) -Target $_.FullName } } 130 | 131 | 132 | # Final message 133 | Write-Host "Thanks for installing the windows version. This OS is not fully supported with Ollama but you can still use this program to interface with an Ollama endpoint or use the quantizing features." 134 | -------------------------------------------------------------------------------- /key_generation.py: -------------------------------------------------------------------------------- 1 | ## Key_generation.py 2 | from cryptography.fernet import Fernet 3 | from pathlib import Path 4 | 5 | def generate_key(): 6 | key_dir = Path('.') / '.key' 7 | key_file_path = key_dir / 'encryption.key' 8 | 9 | if not key_file_path.exists(): 10 | key = Fernet.generate_key() 11 | if not key_dir.exists(): 12 | key_dir.mkdir(parents=True, exist_ok=True) 13 | with open(key_file_path, 'wb') as key_file: 14 | key_file.write(key) 15 | 16 | # Call the function to ensure the key is generated when the module is imported 17 | generate_key() 18 | -------------------------------------------------------------------------------- /logs/litellm.log: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /modules/api_module.py: -------------------------------------------------------------------------------- 1 | # api_module.py 2 | import streamlit as st 3 | import requests 4 | from modules.shared import shared 5 | import json 6 | 7 | api_url = shared['api_endpoint']['url'] 8 | 9 | def get_json(url): 10 | try: 11 | response = requests.get(url) 12 | response.raise_for_status() 13 | return response.json() 14 | except requests.exceptions.RequestException as e: 15 | return str(e) 16 | 17 | 18 | def fetch_models(api_url): 19 | json_data = get_json(f"{api_url}/api/tags") 20 | if isinstance(json_data, dict) and 'models' in json_data: 21 | return [model['name'] for model in json_data['models']] 22 | else: 23 | st.error("Invalid JSON structure or error in fetching data") 24 | return None 25 | 26 | def show_model_details(model_name, api_url): 27 | # Check if the model name includes ":latest" and remove it 28 | if model_name.endswith(":latest"): 29 | model_name = model_name[:-7] # Remove the last 7 characters, which is ":latest" 30 | 31 | # Construct the URL 32 | url = f"{api_url}/api/show" 33 | 34 | # Create the JSON payload 35 | payload = { 36 | "name": model_name 37 | } 38 | 39 | # Send a POST request with the JSON payload 40 | response = requests.post(url, json=payload) 41 | 42 | # Check the response 43 | if response.status_code != 200: 44 | st.error(f"Failed to fetch model details. Status code: {response.status_code}, Response: {response.text}") 45 | return 46 | 47 | model_data = response.json() 48 | 49 | # Display the model parameters in Markdown tables 50 | st.subheader(f"Details for model: {model_name}") 51 | 52 | parameters = model_data.get("parameters", "") 53 | parameters = parameters.split("\n") if parameters else [] 54 | 55 | markdown_tables = {"Parameters": "| Parameter | Value |\n| --- | --- |"} 56 | stop_values = [] # To collect "stop" values 57 | 58 | for param in parameters: 59 | if param.strip(): 60 | name, value = param.strip().split(None, 1) 61 | if name == "stop": 62 | stop_values.append(value) 63 | else: 64 | markdown_tables["Parameters"] += f"\n| {name} | {value} |" 65 | 66 | 67 | 68 | # Display the model file content 69 | st.text_area("Model File", model_data.get("modelfile", "No modelfile data"), height=375) 70 | # Display the Markdown tables 71 | for table_name, table_content in markdown_tables.items(): 72 | st.markdown(f"**{table_name}**") 73 | st.markdown(table_content) 74 | 75 | # Display "stop" values in individual tables 76 | for i, stop_value in enumerate(stop_values, 1): 77 | st.subheader(f"Stop Sequence Parameter {i}") 78 | st.text(stop_value) 79 | # Display the template 80 | st.text_area("Template", model_data.get("template", "No template data"), height=200) 81 | 82 | # Display the license information 83 | st.text_area("License", model_data.get("license", "No license data"), height=100) 84 | 85 | # Display the details in a more structured way 86 | st.subheader("Additional Details") 87 | details = model_data.get("details", {}) 88 | for key, value in details.items(): 89 | st.write(f"{key}: {value}") -------------------------------------------------------------------------------- /modules/docs_inline.py: -------------------------------------------------------------------------------- 1 | docs = { 2 | "General information": { 3 | "Index": """ 4 | # Welcome to Ollama-Companion 5 | 6 | Welcome within the Ollama-Companion, use this page whenever you want to learn about certain components and pages. 7 | Within the pages there will also be a expander that when clicked shows a short overview of the functionality of the current page. 8 | 9 | * Use the sidebar on the left to navigate to the desired page. 10 | 11 | 12 | """, 13 | "Chat-interface": """ 14 | # Chat interface 15 | 16 | It's possible to chat with Ollama and use LLava to evaluate contents of an image 17 | 18 | To use the chat-interface you first have to fetch the models from ollama with the *"fetch models"* button, this will load the models that are currently available. 19 | 20 | There are two types of response generations, completion and conversation. 21 | * The conversation option leverages chain of thought to keep the conversation going and being able to reference previous chats. 22 | * The completion option will not remember previous prompts or chats and will have a "clean slate" everytime you generate a response. 23 | 24 | If you want to reference images and ask questions about them then you have to use the LLAVA or BAKLLAVA models, best practice for these models is to keep the **"Temperature:** parameter very low otherwise it has the tendency to hallucinate or generates not useful responses. 25 | 26 | To start a new conversation use the **"Start new conversation"** button, this deletes the current chat session_state and clears the chat-interface. 27 | 28 | """, 29 | "OpenAI compatibility": """ 30 | # LiteLLM OpenAI Proxy 31 | 32 | LiteLLM is a Python module that converts Ollama-API calls into OpenAI-compatible API calls. This allows the integration of Ollama with thousands of projects originally built for Chat-GPT/OpenAI. 33 | 34 | ### Configuration and Operation of LiteLLM 35 | 36 | - **Configuration File**: 37 | - The Companion stores the LiteLLM configuration file at `logs/config.YAML`. This file includes details about the available models within Ollama. 38 | 39 | - **Model Information Fetching**: 40 | - Upon launching the LiteLLM proxy, it begins to continuously fetch model information from Ollama. 41 | 42 | - **Automatic Updates**: 43 | - The configuration file is automatically updated. When the Companion detects a new model, it adds this model to the configuration file. Subsequently, LiteLLM is restarted with the updated model information. 44 | 45 | - **Handling Model Removal**: 46 | - Note that LiteLLM does not restart when a model is removed from Ollama. This ensures that LiteLLM keeps running when managing models. 47 | --- 48 | """, 49 | "Generating public url": """ 50 | # Public-Url 51 | 52 | One of the main features of the Ollama-Companion is the flexibility in generating public endpoints. 53 | The companion enables you to make the Ollama-API available without opening ports, achieved by utilizing Cloudflare's tunneling service. 54 | 55 | To make Ollama and OpenAI available from a public URL, navigate to the Public-Endpoint page and click the **"Start Endpoint"** button. Once the Cloudflare tunnel is established, the available URL will be displayed. 56 | 57 | #### **How to use public url's** 58 | 59 | **Ollama** : 60 | The Ollama API is available the same way as the local endpoint, for example use: 61 | 62 | ``` 63 | curl https://CUSTOM_GENERATED_URL/api/generate -d '{ 64 | "model": "llama2", 65 | "prompt": "Why is the sky blue?", 66 | }' 67 | ``` 68 | 69 | Replace **"CUSTOM_GENERATED_URL"** with the url provided by the Public-Endpoint page. 70 | 71 | To learn more about the Ollama-API and the available endpoints refrence the documentation at: 72 | 73 | * [Ollama API documentation](https://github.com/jmorganca/ollama/blob/main/docs/api.md) 74 | 75 | **OpenAI / LiteLLM** 76 | 77 | The OpenAI API is available at **"https://CUSTOM_GENERATED_URL/openai"**, to generate a completion use for example: 78 | 79 | ``` 80 | curl --location 'https://CUSTOM_GENERATED_URL/openai/chat/completions' \ 81 | --header 'Content-Type: application/json' \ 82 | --data ' { 83 | "model": "Ollama/llama2", 84 | "messages": [ 85 | { 86 | "role": "user", 87 | "content": "what llm are you" 88 | } 89 | ] 90 | } 91 | ' 92 | ``` 93 | 94 | To learn more about the LiteLLM proxy read the documentation at: 95 | 96 | * [LiteLLM-Documentation](https://docs.litellm.ai) 97 | 98 | If you want to learn more about the OpenAI API and how to use it refrence the documentation at: 99 | 100 | * [OpenAI API documentation](https://platform.openai.com/docs/api-reference) 101 | 102 | --- 103 | """, 104 | 105 | "FAQ": """ 106 | # FAQ 107 | 108 | * Q: How to download models? 109 | - A: Use the modelfile creator page to download and customize models, keep all parameters default and only define a name to just download default models 110 | 111 | * What is quantization and converting of models? 112 | - A: This is the process of compressing models with certain qualities/sizes to a GGUF file format that llama.cpp can read 113 | 114 | * Q: What models can be converted or quantized 115 | - A: You can convert transformers and pytorch models, some models are not supported due to the lack of information about the "TEMPLATE" 116 | 117 | * Q: How to change where Ollama is located? 118 | - A: Define the Ollama url within the endpoint page 119 | 120 | * Q: What is an OpenAI proxy? 121 | - A: This term is used generally for translating local large language models to OpenAI compatible requests. 122 | 123 | * Q: Creating a conversation within the chat-interface doesnt work as supposed 124 | - A: Only certain models and TEMPLATES are supported to use with the chat conversations, some models like PHI2 wont work with the current implementation. 125 | 126 | * Q: Why do i want to quantize or convert models on my own, thebloke does already provide GGUF files 127 | - A: The converting and quanting of models is build upon the same workflow as TheBloke this enables you to convert models otherwise not available in the GGUF file format for example whenever a new model releases. This also enables you to convert niche models and customize more parts. 128 | 129 | * Q: The chat-interface or other parts seem like they are stuck 130 | - A: This can happen when you run Ollama inside of a container and it runs out of memory, Ollama has trouble recovering itself inside of the container. 131 | 132 | * Q: I would like to see support for X or would like X functionality 133 | - A: Open a issue on Github for feature requests or whenever you have cool ideas! 134 | """, 135 | }, 136 | "Ollama-management": { 137 | "General information": """ 138 | General information about Ollama management... 139 | """, 140 | "Downloading and creating models": """ 141 | # Modelfile Creator 142 | 143 | The Modelfile Creator page in Ollama is designed for creating or pulling models in a user-friendly manner. 144 | 145 | #### Steps to Create and Pull Models 146 | 147 | 1. **Select a Model**: Choose a model from the first dropdown menu. 148 | 2. **Choose Quality/Size**: Select the desired quality or size for the model from the second dropdown menu. 149 | 3. **Name Your Model**: Assign a name to the model. Note that the process will not commence without a specified name. 150 | 4. **Add Custom Content(optional)**: 151 | - You can input custom content into the provided textarea. This content is used as a custom modelfile. See the reference for more details. 152 | - Optionally, insert a custom system prompt in the textarea. 153 | - Add a stop sequence in the designated area. The stop sequence instructs the model to cease generating output when it encounters the specified words or phrases. Each new stop sequence added will generate an additional textarea for further stop sequences. 154 | 155 | When you are satisfied with all parameters and options click the **"Build and Deploy Model"** button to start downloading and creating the desired model. 156 | * The checkmark **"Print payload"** allows you to see the specific API request and contents of the requested model. 157 | 158 | Generally it takes a while to download and verify the model contents, this all depends on you're CPU and networking speed. 159 | 160 | * Modelfiles are saved within the saved/modelfile directory for later refrence. 161 | 162 | --- 163 | #### Parameters values and explanation 164 | 165 | Below is a overview of all the different parameters and what their functions are. 166 | 167 | 168 | | Parameter | Description | Value Type | Example Usage | 169 | | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------- | -------------------- | 170 | | mirostat | Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) | int | mirostat 0 | 171 | | mirostat_eta | Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1) | float | mirostat_eta 0.1 | 172 | | mirostat_tau | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0) | float | mirostat_tau 5.0 | 173 | | num_ctx | Sets the size of the context window used to generate the next token. (Default: 2048) | int | num_ctx 4096 | 174 | | num_gqa | The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b | int | num_gqa 1 | 175 | | num_gpu | The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. | int | num_gpu 50 | 176 | | num_thread | Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). | int | num_thread 8 | 177 | | repeat_last_n | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx) | int | repeat_last_n 64 | 178 | | repeat_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) | float | repeat_penalty 1.1 | 179 | | temperature | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8) | float | temperature 0.7 | 180 | | seed | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0) | int | seed 42 | 181 | | stop | Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate `stop` parameters in a modelfile. | string | stop "AI assistant:" | 182 | | tfs_z | Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1) | float | tfs_z 1 | 183 | | num_predict | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context) | int | num_predict 42 | 184 | | top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 | 185 | | top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 | 186 | --- 187 | 188 | #### WIP download GGUF models from HuggingFace or load locally available models 189 | 190 | You can also download models from HuggingFace or use locally converted models. 191 | 192 | Click the **"Use local model"** to read from the llama.cpp/models directory and show locally available models. 193 | 194 | Click the **"Download from HF"** to download a GGUF model from HuggingFace and store these in llama.cpp/models. 195 | 196 | In the future it will be possible to search HuggingFace from within the Companion. 197 | 198 | --- 199 | """, 200 | "Viewing model info": """ 201 | # Model information 202 | 203 | To view details or remove models currently loaded in Ollama, use the model info page to fetch the available models and view their details. 204 | This page also allows you to delete selected models. 205 | The list of details that you can view are: 206 | 207 | * Modelfile content 208 | * Parameters 209 | * Stop sequence 210 | * Template 211 | * License information 212 | """, 213 | "Ollama network address": """ 214 | # Change Ollama network address 215 | The "Ollama network" page allows you to specify the network address where your Ollama instance is hosted. By default, Ollama is set to `http://127.0.0.1:11434`. 216 | If your Ollama instance is running on a different address or port, you can update this setting to match your configuration. 217 | 218 | It is possible to daisy chain multiple instances or combine multiple companions together. 219 | 220 | --- 221 | """ 222 | }, 223 | "Converting models": { 224 | "General information": """ 225 | # Converting Models 226 | 227 | Ollama-Companion offers the capability to download and convert models from HuggingFace with just a few clicks. 228 | This uses the same general workflow as "TheBloke" converted models. 229 | 230 | ### Steps for Model Conversion 231 | 232 | To convert models, follow these steps: 233 | 234 | 1. **Download the Model**: Use the included downloader to obtain the model from HuggingFace. 235 | 2. **Convert to GGUF Format**: Utilize the High Precision Quantization page to convert the model into GGUF file format. 236 | 3. **Further Quantization**: Apply Medium Precision Quantization on the model through the designated page. 237 | 4. **Upload Back to HuggingFace**: After quantization, upload the models back to HuggingFace. 238 | 239 | ##### Pushing Models to Ollama Model Library 240 | 241 | - Additionally, it is possible to push models to the open Ollama model library. This feature aims to create an extensive community-based model library. 242 | - Note: This functionality is currently a Work In Progress. More updates and features are expected in the near future. 243 | 244 | --- 245 | ### What does it mean to quantize models? 246 | 247 | Quantizing transformers or PyTorch large language models for use with the GGUF file format involves compressing the model to make it more efficient for deployment and execution. 248 | 249 | This process reduces the size of the model by converting its parameters from higher-precision formats (like 32-bit floating points) to lower-precision formats (like 8-bit integers), thereby reducing memory usage and improving computational speed. 250 | 251 | The GGUF file format, specifically designed for quantized models, ensures that these smaller, more efficient models are stored in an optimized manner, making them more suitable for deployment in resource-constrained environments or for applications requiring high-speed processing. 252 | 253 | ##### Understanding File Formats: F32, F16, Q8_0 Quantization 254 | 255 | **F32 (32-bit Floating Point)**: 256 | - **Precision**: High, with 32 bits per number (1 for sign, 8 for exponent, 23 for fraction). 257 | - **Usecase**: Used for the base of a quantazing of models 258 | - **Performance**: Requires more memory, slower processing. 259 | 260 | **F16 (16-bit Floating Point)**: 261 | - **Precision**: Medium, using 16 bits (1 for sign, 5 for exponent, 10 for fraction). 262 | - **Performance**: Balances precision and performance, faster and more memory-efficient than F32. 263 | 264 | **Q8_0 (8-bit Fixed Point)**: 265 | - **Precision**: Low, all 8 bits for integer part, no fractional part. 266 | - **Performance**: Highly efficient in memory and speed, but significantly lower precision. 267 | 268 | """, 269 | "Download models": """ 270 | # How to Download Model Files from Hugging Face 271 | 272 | - First, visit the Hugging Face model page that you want to download. For example, if you want to download the model at this link: [https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2). 273 | 274 | - On the model page, locate the icon next to the username of the model's author. This icon typically looks like a clipboard or a copy symbol. Click on this icon to copy the Username/RepositoryName, which in this example is `mistralai/Mistral-7B-Instruct-v0.2`. 275 | 276 | - Paste the copied Username/RepositoryName `mistralai/Mistral-7B-Instruct-v0.2` directly into the input field. 277 | 278 | - Click the "Get file list" button or option to retrieve the list of files available in this repository. 279 | 280 | - Review the list of files to ensure you have the correct model files that you want to download. 281 | 282 | - Finally, click the "Download Model" button or option to initiate the download process for the selected model files. 283 | 284 | - The model files will be saved in the `llama.cpp/models` directory on your device. 285 | 286 | - Now you have successfully downloaded the model files from Hugging Face, and they are stored in the `llama.cpp/models` directory for your use. 287 | 288 | """, 289 | "Manually converting models": """# Convert models to GGUF. 290 | Ollama-Companion provides the conversion of Transformer and PyTorch models to the GGUF (Generic GPU Utility Format) file format. The first step in this process involves converting a model into a format that can be further quantized, essentially creating a base for the next step in quantization. 291 | 292 | 293 | ## Steps to convert transformers model. 294 | 295 | 1. **Model to High-Quality Base File**: Begin by converting a model to a high-quality base file, setting the stage for further quantization. 296 | 297 | 2. **Use Docker for Conversion**: 298 | - Enable the docker checkmark to employ a Docker container for the conversion, pulling the Companion-converter container. 299 | - Ensure Docker commands can run without sudo privileges. 300 | 301 | 3. **Conversion Process**: 302 | - Select a model from the dropdown in the `llama.cpp/models` directory. 303 | - Choose the Quality/Format for conversion, including "Q8_0", "F16", or "F32". 304 | - Click **"Start Conversion"** to begin the process. 305 | 306 | 4. **Storage of Converted Models**: 307 | - The Companion stores converted models in the `High-Precision-Quantization` folder within the `models` directory. 308 | 309 | #### Understanding Different Format Options 310 | 311 | - **F32 Format**: Converting to F32 (32-bit Floating Point) is the recommended approach. This format maintains a high level of precision and information quality, essential for complex computations and detailed model analyses. F32 is ideal for models where accuracy and detailed data representation are critical, making it the preferred choice for further quantization. 312 | 313 | - **F16 Format**: F16 (16-bit Floating Point) offers a balance between performance and precision. It provides less precision than F32 but significantly reduces the model's memory requirements and increases computational speed. F16 is suited for scenarios where speed is prioritized over extreme precision. 314 | 315 | - **Q8_0 Format**: The Q8_0 (Quantized 8-bit) format is another option, but it is generally not recommended for typical use. This format considerably reduces the model size and increases speed but at the cost of a substantial loss in precision. The Q8_0 format can lead to degraded model performance and accuracy, making it less suitable for applications where these factors are important. 316 | # Quantize models 317 | After building the base file for the quantization process, proceed to the Medium Precision Quantization page. Quantizing models is necessary for their use with Ollama/llama.cpp. For optimal results, use the Q8.0 or F16 formats. 318 | For testing purposes, quantizing models to Q4_0 is advised as it generally offers the best compatibility. Jobs are automatically scheduled when multiple options are selected. 319 | 320 | 1. Select the model you would like to quantize from the dropdown menu. 321 | 2. Choose the quality with which you want to quantize (options include Q4.0, Q6K_M). 322 | 3. Press the **"Start quantizing"** button to initiate the quantization process. 323 | 4. The Companion stores quantized models in the Medium-Precision-Quantization sub-directory within the models folder. 324 | """, 325 | 326 | 327 | "Upload Models": """ 328 | # Upload Models to HuggingFace 329 | 330 | Use the HF Uploader page to upload models back to HuggingFace. You can create a free account at [HuggingFace](https://huggingface.co) and store files up to 100GB for free. HuggingFace also offers unlimited repository storage for files and models. For enhanced security, use an encrypted HuggingFace token. 331 | 332 | - **Handling of the HF API Token**: During the uploading process, the HuggingFace API token is temporarily stored in its own environment variable. Once the uploading concludes, this variable is automatically deleted for security purposes. 333 | 334 | - **Future Feature - Persistent Storage of API Token**: In future updates, there will be an option to store the HF API token indefinitely. Streamlit offers a secure vault for storing environment variables, such as API keys. However, this feature is not yet implemented in the current version of the Ollama-Companion. 335 | 336 | #### Encrypted token 337 | 338 | For a extra layer of security when dealing with API keys use the Token Encrypter page, this will encrypt your token and add some extra protection. 339 | Copy your HF API token within the textarea and encrypt your token. 340 | If you desire a new encrypton key or there is no encryption key available click the **"Generate new key"** button. 341 | 342 | #### Steps for Uploading Models 343 | 344 | 1. **Select a Model**: Choose a model from the dropdown list found in the `llama.cpp/models` directory. 345 | 2. **Enter Repository Name**: Provide a name for the new Hugging Face repository where your model will be uploaded. 346 | 3. **Choose Files for Upload**: Select the specific files you wish to upload from the chosen model's subfolders. 347 | 4. **Add README Content**: Optionally, compose content for the README.md file of your repository. 348 | 5. **Token Usage**: 349 | - For added security, use an encrypted token. Encrypt your Hugging Face token on the Token Encrypt page and paste it into the "Enter Encrypted Token" field. 350 | - Alternatively, input an unencrypted Hugging Face token directly. 351 | 6. **Upload Files**: Click on the "Upload Selected Files" button to initiate the upload of your files to Hugging Face. 352 | establishing the connection to HuggingFace can generally take a while, do not press the button multiple times. 353 | 7. **Accessing Uploaded Models**: Once uploaded, the models can be accessed at `https://huggingface.co/your-username/your-repo-name`. 354 | 355 | """ 356 | }, 357 | "Develop custom functions": { 358 | "Tips and tricks": 359 | """ 360 | # How to add custom functions 361 | 362 | To incorporate custom functions or pages into the Ollama-Companion, create a Python file in the **"pages"** directory. This approach simplifies building and adding functions. For instance, you could develop a language chain stack or construct a terminal emulator. The possibilities are endless. 363 | 364 | Import streamlit within the just created file like this: 365 | 366 | ``` 367 | import streamlit as st 368 | ``` 369 | 370 | Now you can start writing your custom functions within this document to learn more about how to build streamlit UI elements refrence : 371 | 372 | [Streamlit docs](http://docs.streamlit.com) 373 | 374 | Tips building for building with Streamlit: 375 | 376 | - **Use Subprocess Instead of Threading**: Opt for subprocesses over threading for improved performance and management. 377 | 378 | - **Utilize Session State or Caching**: Employ Streamlit's `session_state` or caching mechanisms to maintain data across sessions or to minimize redundant processing. 379 | 380 | - **Self-Contained Loops**: Ensure that loops are self-contained to prevent unnecessary complications. 381 | 382 | - **Clearly Defined Functions and UI Elements**: Use clear and distinct names for functions and UI elements, especially important when functions are used across different pages. 383 | 384 | - **Understanding Streamlit's Threading**: Streamlit has its own threading module and recommends using this automated threading for its elements. 385 | 386 | - **Page Changes in Streamlit**: When switching pages in Streamlit, the entire script runs again. Therefore, utilize caching or session state to optimize resource usage. 387 | 388 | - **Naming Conventions**: Choose unique and descriptive names when developing functions for multiple pages to avoid conflicts and ensure clarity. Good naming practices are crucial for importing modules and functions on different pages effectively. 389 | 390 | --- 391 | """ 392 | } 393 | } 394 | -------------------------------------------------------------------------------- /modules/shared.py: -------------------------------------------------------------------------------- 1 | # shared.py 2 | 3 | # Shared configuration data 4 | shared = { 5 | 'checkbox_high_options': ["Q8_0", "F16", "F32"], 6 | 'checkbox_options': [ 7 | "q4_0", "q4_1", "q5_0", "q5_1", "q2_K", "q3_K", "q3_K_S", "q3_K_M", "q3_K_L", 8 | "q4_K", "q4_K_S", "q4_K_M", "q5_K", "q5_K_S", "q5_K_M", "q6_K", "q8_0", "F16", "F32" 9 | ], 10 | 'api_endpoint': {'url': 'http://127.0.0.1:11434'}, 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 'gradio': {}, 42 | 'parameters': { 43 | 'mirostat': [0, [0, 1, 2]], # Dropdown 44 | 'mirostat_eta': [0.1, (0.0, 1.0)], 45 | 'mirostat_tau': [0.1, (0.0, 1.0)], 46 | 'num_ctx': [4096, (1024, 16912)], 47 | 'num_gqa': [256, (128, 512)], 48 | 'num_gpu': [0, (1, 250)], 49 | 'num_thread': [0, (0, 64)], 50 | 'repeat_last_n': [0, (0, 32000)], 51 | 'repeat_penalty': [1.0, (0.5, 2.0)], 52 | 'temperature': [0.8, (0.1, 1.0)], 53 | 'seed': [None, (0, 10000)], # None indicates no default value 54 | 'tfs_z': [1, (1, 20)], # Slider from 1 to 20 55 | 'num_predict': [256, (128, 512)], 56 | 'top_k': [0, (0, 100)], 57 | 'top_p': [1.0, (0.1, 1.0)], 58 | } 59 | } 60 | 61 | 62 | # Separate variable for module imports 63 | modules_to_import = { 64 | "model_selector": ["show_model_selector", "get_json"], 65 | "modelfile_templater": ["show_model_dropdowns", "show_parameter_sliders", "show_model_name_input", "display_model_creator", "show_model_name_input"], 66 | # "ollama_api_configurator": ["show_ollama_api_configurator"], 67 | "litellm_proxy": ["show_litellm_proxy_page"], 68 | "public_endpoint": ["show_public_endpoint_page"], 69 | "downloading_models": ["show_downloading_models_page"], 70 | "High_Precision_Quantization": ["show_high_precision_quantization_page"], 71 | "Medium_Precision_Quantization": ["show_medium_precision_quantization_page"], 72 | "UploadtoHuggingface": ["show_model_management_page"], 73 | "token_encrypt": ["show_token_encrypt_page"], 74 | "chat_interface": ["show_chat_interface"], 75 | } -------------------------------------------------------------------------------- /modules/streamlit_ip.py: -------------------------------------------------------------------------------- 1 | """" 2 | # Old code for building a cloudflare tunnel 3 | 4 | import subprocess 5 | import psutil 6 | import re 7 | 8 | def is_tunnel_running(): 9 | for process in psutil.process_iter(['pid', 'name']): 10 | try: 11 | if 'cloudflared' in process.info['name'].lower(): 12 | return True 13 | except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): 14 | continue 15 | return False 16 | 17 | def start_tunnel(): 18 | if not is_tunnel_running(): 19 | process = subprocess.Popen(['pycloudflared', 'tunnel', '--url', 'http://127.0.0.1:8501'], 20 | stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) 21 | 22 | print("Starting Cloudflare Tunnel...") 23 | for line in iter(process.stdout.readline, ''): 24 | if '.trycloudflare.com' in line: 25 | print(f"Tunnel URL: {line.strip()}") 26 | break 27 | 28 | if __name__ == "__main__": 29 | start_tunnel() 30 | """"" -------------------------------------------------------------------------------- /pages/Chat_Interface.py: -------------------------------------------------------------------------------- 1 | # chat_interface.py 2 | 3 | import streamlit as st 4 | import base64 5 | import requests 6 | import json 7 | import time 8 | from modules.shared import shared 9 | from modules.api_module import get_json 10 | 11 | base_url = shared['api_endpoint']['url'] 12 | 13 | # Encodes images to base64 output as list 14 | def images_to_base64(images): 15 | """Convert a list of image files to base64 encoding.""" 16 | encoded_images = [] 17 | for image_file in images: 18 | if image_file is not None: 19 | # Read the file and encode it 20 | file_bytes = image_file.getvalue() 21 | base64_encoded = base64.b64encode(file_bytes).decode("utf-8") 22 | encoded_images.append(base64_encoded) 23 | return encoded_images 24 | 25 | 26 | 27 | # Here we create the request for a chat completion at /api/generate 28 | def stream_response(prompt, base_url, model_name, encoded_images=None): 29 | url = f'{base_url}/api/generate' 30 | payload = { 31 | "model": model_name, # Using the selected model 32 | "prompt": prompt 33 | } 34 | if encoded_images: 35 | payload["images"] = encoded_images 36 | 37 | headers = {'Content-Type': 'application/json'} 38 | 39 | # Print statement to log the request details 40 | # Using separators to remove extra whitespaces in the list 41 | # Uncomment print statements below to show the request sent to Ollama 42 | print(f"Requesting URL: {url}") 43 | print(f"Headers: {headers}") 44 | print(f"Payload: {json.dumps(payload, separators=(',', ':'), indent=4)}") 45 | 46 | with requests.post(url, json=payload, headers=headers, stream=True) as response: 47 | if response.status_code == 200: 48 | for line in response.iter_lines(): 49 | if line: 50 | yield json.loads(line) 51 | else: 52 | print(f"Error: {response.status_code}") 53 | yield {"response": "Error in generating response"} 54 | 55 | 56 | # 57 | # Generate a conversation with a model with /api/chat 58 | @st.cache_resource 59 | def continuous_conversation(model_name, base_url, messages): 60 | url = f"{base_url}/api/chat" 61 | payload = { 62 | "model": model_name, 63 | "messages": messages, 64 | "stream": True 65 | } 66 | 67 | headers = {'Content-Type': 'application/json'} 68 | conversation_length = 0 69 | 70 | with requests.post(url, json=payload, headers=headers, stream=True) as response: 71 | if response.status_code == 200: 72 | for line in response.iter_lines(): 73 | if line: 74 | body = json.loads(line) 75 | if "error" in body: 76 | raise Exception(body["error"]) 77 | if body.get("done", False): 78 | break 79 | message = body.get("message", "") 80 | if message: 81 | conversation_length += 1 82 | yield message 83 | else: 84 | print(f"Error: {response.status_code}") 85 | yield {"response": "Error in generating response"} 86 | 87 | return conversation_length 88 | 89 | 90 | st.title("Chat Interface") 91 | 92 | # Sidebar dropdown for Chat Options 93 | with st.sidebar: 94 | # Fetch models for model selection 95 | if 'model_names' not in st.session_state: 96 | st.session_state['model_names'] = [] 97 | fetch_button = st.button('Fetch Models') 98 | if fetch_button: 99 | json_data = get_json(f"{base_url}/api/tags") 100 | if isinstance(json_data, dict) and 'models' in json_data: 101 | st.session_state['model_names'] = [model['name'] for model in json_data['models']] 102 | else: 103 | st.error("Invalid JSON structure or error in fetching data") 104 | 105 | 106 | # Image Uploader 107 | uploaded_images = st.file_uploader("Upload Images", type=["jpg", "jpeg", "png"], accept_multiple_files=True) 108 | encoded_images = images_to_base64(uploaded_images) 109 | if encoded_images: 110 | for uploaded_image in uploaded_images: 111 | st.image(uploaded_image, caption="Uploaded Image") 112 | 113 | # Chat mode selection dropdown 114 | chat_type = st.selectbox("Type of Chat", ["Generate completion", "Start a Conversation"]) 115 | chat_option = st.selectbox("Chat Speed", [ "Slow Typing Mode", "Fast Typing Mode"]) 116 | # Model selection dropdown 117 | selected_model = st.selectbox("Select a Model", st.session_state['model_names']) 118 | 119 | # 120 | # Drawing and using of ChatUI 121 | # Check if user already used the ChatInterface 122 | if "messages" not in st.session_state: 123 | st.session_state.messages = [] 124 | 125 | # Display previous messages 126 | for message in st.session_state.messages: 127 | with st.chat_message(message["role"]): 128 | st.markdown(message["content"]) 129 | 130 | # Chat input field 131 | if prompt := st.chat_input("What is up?"): 132 | st.session_state.messages.append({"role": "user", "content": prompt}) 133 | with st.chat_message("user"): 134 | st.markdown(prompt) 135 | 136 | # Check the selected chat type 137 | if chat_type == "Generate completion": 138 | # Generate a completion response 139 | with st.chat_message("assistant"): 140 | message_placeholder = st.empty() 141 | full_response = "" 142 | 143 | # Pass the list of encoded images to the stream_response function 144 | for response_chunk in stream_response(prompt, base_url, selected_model, encoded_images): 145 | if 'response' in response_chunk: 146 | assistant_response = response_chunk['response'] 147 | typing_speed = 0.03 if chat_option == "Slow Typing Mode" else 0.008 148 | for char in assistant_response: 149 | full_response += char 150 | time.sleep(typing_speed) 151 | message_placeholder.markdown(full_response + "▌", unsafe_allow_html=True) 152 | 153 | message_placeholder.markdown(full_response, unsafe_allow_html=True) 154 | st.session_state.messages.append({"role": "assistant", "content": full_response}) 155 | 156 | elif chat_type == "Start a Conversation": 157 | # Continuous conversation logic 158 | with st.chat_message("assistant"): 159 | message_placeholder = st.empty() 160 | full_response = "" 161 | 162 | # Fetch and display the continuous conversation 163 | for message in continuous_conversation(selected_model, base_url, st.session_state.messages): 164 | assistant_response = message.get("content", "") 165 | typing_speed = 0.03 if chat_option == "Slow Typing Mode" else 0.008 166 | for char in assistant_response: 167 | full_response += char 168 | time.sleep(typing_speed) 169 | message_placeholder.markdown(full_response + "▌", unsafe_allow_html=True) 170 | 171 | message_placeholder.markdown(full_response, unsafe_allow_html=True) 172 | st.session_state.messages.append({"role": "assistant", "content": full_response}) 173 | 174 | -------------------------------------------------------------------------------- /pages/Docs.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from modules.docs_inline import docs 3 | from st_pages import Page, add_indentation 4 | 5 | add_indentation() 6 | 7 | # Create Tabs for Main Subjects 8 | tab_main = st.tabs(list(docs.keys())) 9 | 10 | for i, subject in enumerate(docs.keys()): 11 | with tab_main[i]: 12 | # Create Tabs for Sub-Subjects within each Main Subject 13 | tab_sub = st.tabs(list(docs[subject].keys())) 14 | 15 | for j, sub_subject in enumerate(docs[subject].keys()): 16 | with tab_sub[j]: 17 | # Display the Documentation for each Sub-Subject 18 | st.markdown(docs[subject][sub_subject])# st.divider() 19 | 20 | -------------------------------------------------------------------------------- /pages/HF_Token_Encrypter.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from cryptography.fernet import Fernet 3 | from pathlib import Path 4 | import subprocess 5 | 6 | # Function to load the existing key 7 | def load_key(): 8 | key_dir = Path('.') / '.key' 9 | key_file_path = key_dir / 'encryption.key' 10 | return key_file_path.read_bytes() 11 | 12 | # Encrypt the token 13 | def encrypt_token(token): 14 | key = load_key() 15 | f = Fernet(key) 16 | encrypted_token = f.encrypt(token.encode()) 17 | return encrypted_token.decode() 18 | 19 | def generate_new_key(): 20 | key_dir = Path('.') / '.key' 21 | key_file_path = key_dir / 'encryption.key' 22 | 23 | # Check if the key file exists and delete it 24 | if key_file_path.exists(): 25 | try: 26 | key_file_path.unlink() # Deletes the file 27 | print("Existing key file deleted.") 28 | except Exception as e: 29 | return f"Error deleting existing key: {e}" 30 | 31 | # Generate new key 32 | root_dir = Path(__file__).parent.parent 33 | script_path = root_dir / 'key_generation.py' 34 | try: 35 | subprocess.run(['python3', str(script_path)], check=True) 36 | return "New private key generated successfully." 37 | except subprocess.CalledProcessError as e: 38 | return f"Error generating new key: {e}" 39 | 40 | # Oude manier van pagina aanroepen 41 | # def show_token_encrypt_page(): 42 | st.title("Token Encryption") 43 | 44 | token = st.text_input("Enter your Hugging Face Token", type="password") 45 | 46 | if st.button("Encrypt Token"): 47 | if token: 48 | encrypted_token = encrypt_token(token) 49 | st.text_area("Encrypted Token", encrypted_token, height=100) 50 | else: 51 | st.error("Please enter a token to encrypt.") 52 | 53 | if st.button("Generate New Private Key"): 54 | message = generate_new_key() 55 | st.text(message) 56 | 57 | # Uncomment this line to run this script directly for testing 58 | # show_token_encrypt_page() 59 | with st.expander("Token Encryption Guide", expanded=False): 60 | st.markdown(""" 61 | **Token Encryption Guide** 62 | 63 | This page assists you in encrypting your Hugging Face token for enhanced security. 64 | 65 | **Why Encrypt Your Token?** 66 | 67 | Encrypting your Hugging Face token adds an extra layer of security, protecting it from unauthorized access. It is particularly useful when you deploy scripts in shared environments. 68 | 69 | **How to Encrypt Your Token:** 70 | 71 | 1. **Enter Your Token:** Type your Hugging Face token into the input field. 72 | 2. **Encrypt:** Click the 'Encrypt Token' button to encrypt your token. 73 | 3. **Use Your Encrypted Token:** The encrypted token will be displayed. You can now use this encrypted token within this app for secure uploading to Hugging Face. 74 | 4. **Secure Usage:** Store your encrypted token securely. It will be your secure key for uploads in this application. 75 | 76 | Encrypting your token ensures its security and enables you to upload to Hugging Face safely within this app. 77 | """) 78 | -------------------------------------------------------------------------------- /pages/High_Precision_Quantization.py: -------------------------------------------------------------------------------- 1 | from apscheduler.schedulers.background import BackgroundScheduler 2 | import subprocess 3 | import threading 4 | from pathlib import Path 5 | import streamlit as st 6 | from modules.shared import shared 7 | import sys 8 | import queue 9 | 10 | # Initialize queue 11 | command_queue = queue.Queue() 12 | 13 | # Existing function definitions (find_llama_models_dir, run_command, trigger_command) 14 | 15 | def process_queue(): 16 | if not command_queue.empty(): 17 | model_folder, out_type, use_docker = command_queue.get() 18 | result = run_command(model_folder, out_type, use_docker) 19 | print(result) 20 | command_queue.task_done() 21 | 22 | # Set up APScheduler 23 | scheduler = BackgroundScheduler() 24 | scheduler.add_job(process_queue, 'interval', seconds=10) # Adjust the interval as needed 25 | scheduler.start() 26 | 27 | # Initialize queue and start a background thread for processing commands 28 | 29 | @st.cache_data 30 | def find_llama_models_dir(start_path, max_up=4, max_down=3): 31 | def search_upwards(path, depth): 32 | if depth > max_up: 33 | return None 34 | if (path / "llama.cpp/models").exists(): 35 | return path / "llama.cpp/models" 36 | return search_upwards(path.parent, depth + 1) 37 | 38 | @st.cache_data 39 | def search_downwards(path, depth): 40 | if depth > max_down: 41 | return None 42 | if (path / "llama.cpp/models").exists(): 43 | return path / "llama.cpp/models" 44 | for child in [d for d in path.iterdir() if d.is_dir()]: 45 | found = search_downwards(child, depth + 1) 46 | if found: 47 | return found 48 | return None 49 | 50 | # Search upwards 51 | found_path = search_upwards(start_path, 4) 52 | if found_path: 53 | return found_path # Return the found 'llama.cpp/models' directory 54 | 55 | # Search downwards 56 | return search_downwards(start_path, 3) 57 | 58 | 59 | # Use the function to find the base directory 60 | # current_path = Path(__file__).resolve() 61 | # base_dir = find_llama_models_dir(current_path) 62 | 63 | # if not base_dir: 64 | # print("Error: llama.cpp/models/ directory not found.") 65 | # else: 66 | # print("llama.cpp/models/ found at:", base_dir) 67 | 68 | 69 | 70 | 71 | def run_command(model_folder, out_type, use_docker): 72 | base_dir = Path("llama.cpp/models") 73 | input_dir = base_dir / model_folder 74 | target_dir = input_dir / "High-Precision-Quantization" 75 | output_file = f"{model_folder}-{out_type}.GGUF" 76 | target_dir.mkdir(parents=True, exist_ok=True) 77 | 78 | # Correct path for convert.py 79 | convert_script_path = base_dir.parent / "convert.py" # Assuming convert.py i 80 | 81 | if use_docker: 82 | docker_image = "luxaplexx/convert-compaan-ollama" 83 | # Docker volume paths need to be in Linux format even on Windows 84 | if sys.platform.startswith('win'): 85 | volume_path = base_dir.resolve().drive # This will be 'D:' on Windows if base_dir is on D drive 86 | else: 87 | volume_path = base_dir.resolve().as_posix() # On Unix-like systems, the full path is used 88 | output_path = Path(f"./models/{model_folder}/High-Precision-Quantization/{output_file}").as_posix() 89 | 90 | 91 | command = [ 92 | "docker", "run", "--rm", 93 | "-v", f"{volume_path}/models", 94 | docker_image, "convert", Path("./models") / model_folder, 95 | "--outfile", output_path.as_posix(), 96 | "--outtype", out_type.lower() 97 | ] 98 | print("ran with docker", command) 99 | else: 100 | command = [ 101 | "python3", str(convert_script_path), 102 | str(input_dir), 103 | "--outfile", str(target_dir / output_file), 104 | "--outtype", out_type.lower() 105 | ] 106 | print("First statement", target_dir) 107 | try: 108 | subprocess.run(command, check=True) 109 | return "Command completed successfully." 110 | except subprocess.CalledProcessError as e: 111 | return f"Error in command execution: {e}" 112 | 113 | 114 | def trigger_command(model_folder, options, use_docker): 115 | if not any(options.values()): 116 | return "Error: No quantization type selected." 117 | for option in options: 118 | if options[option]: 119 | command_queue.put((model_folder, option.lower(), use_docker)) 120 | return "Commands queued. They will run sequentially." 121 | 122 | 123 | 124 | # Old UI code 125 | # def show_high_precision_quantization_page(): 126 | 127 | st.title("High Precision Quantization") 128 | 129 | models_dir = Path("llama.cpp/models/") 130 | model_folders = [f.name for f in models_dir.iterdir() if f.is_dir()] if models_dir.exists() else ["Directory not found"] 131 | 132 | model_folder = st.selectbox("Select a Model Folder", model_folders) 133 | options = {option: st.checkbox(label=option) for option in shared['checkbox_high_options']} 134 | use_docker = st.checkbox("Use Docker Container") 135 | 136 | if st.button("Run Commands"): 137 | if not any(options.values()): 138 | st.error("Please select at least one quantization type before running commands.") 139 | elif use_docker and not any(options.values()): 140 | st.error("Please select at least one quantization type along with the Docker option.") 141 | else: 142 | status = trigger_command(model_folder, options, use_docker) 143 | st.text(status) 144 | 145 | 146 | with st.expander("Step One: Model Conversion with High Precision", expanded=False): 147 | st.markdown(""" 148 | **Step One: Model Conversion with High Precision** 149 | 150 | 151 | **Conversion Process:** 152 | 153 | 1. **Select a Model Folder:** Choose a folder containing the model you wish to convert, found within `llama.cpp/models`. 154 | 2. **Set Conversion Options:** Select the desired conversion options from the provided checkboxes (e.g., Q, Kquants). 155 | 3. **Docker Container Option:** Opt to use a Docker container for added flexibility and compatibility. 156 | 4. **Execute Conversion:** Click the "Run Commands" button to start the conversion process. 157 | 5. **Output Location:** Converted models will be saved in the `High-Precision-Quantization` subfolder within the chosen model folder. 158 | 159 | Utilize this process to efficiently convert models while maintaining high precision and compatibility with `llama.cpp`. 160 | """) 161 | # Start the thread to process commands 162 | threading.Thread(target=process_queue, daemon=True).start() 163 | -------------------------------------------------------------------------------- /pages/Hugging_Face_Downloader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import streamlit as st 4 | import requests 5 | from apscheduler.schedulers.background import BackgroundScheduler 6 | from pathlib import Path 7 | 8 | # Initialize APScheduler 9 | scheduler = BackgroundScheduler() 10 | scheduler.start() 11 | 12 | # Global variables to keep track of download tasks and downloaded files 13 | scheduled_jobs = [] 14 | downloaded_files = [] 15 | 16 | def download_file_task(file_url, download_path, filename): 17 | global downloaded_files 18 | file_path = download_path / filename 19 | command = [ 20 | "aria2c", file_url, 21 | "--max-connection-per-server=16", "--split=8", "--min-split-size=25M", "--allow-overwrite=true", 22 | "-d", str(download_path), "-o", filename, 23 | "--continue=true" 24 | ] 25 | try: 26 | subprocess.run(command, check=True) 27 | downloaded_files.append(str(file_path)) 28 | except subprocess.CalledProcessError as e: 29 | print(f"Error downloading {filename}: {str(e)}") 30 | 31 | def queue_download(file_links_dict, model_name): 32 | global scheduled_jobs 33 | folder_name = model_name.split("/")[-1] 34 | current_dir = Path(__file__).parent 35 | download_path = current_dir.parent / f"llama.cpp/models/{folder_name}" 36 | download_path.mkdir(parents=True, exist_ok=True) 37 | 38 | for file_name, file_url in file_links_dict.items(): 39 | filename = Path(file_name).name 40 | job = scheduler.add_job(download_file_task, args=[file_url, download_path, filename]) 41 | scheduled_jobs.append(job) 42 | 43 | return "Download tasks have been queued." 44 | 45 | def cancel_downloads(): 46 | global scheduled_jobs, downloaded_files 47 | for job in scheduled_jobs: 48 | job.remove() 49 | scheduled_jobs.clear() 50 | 51 | for file_path in downloaded_files: 52 | if os.path.exists(file_path): 53 | os.remove(file_path) 54 | downloaded_files.clear() 55 | 56 | return "All queued downloads have been cancelled and files removed." 57 | 58 | def construct_hf_repo_url(model_name): 59 | base_url = "https://huggingface.co/api/models/" 60 | return f"{base_url}{model_name}/tree/main" 61 | 62 | def get_files_from_repo(url, repo_name): 63 | try: 64 | response = requests.get(url) 65 | if response.status_code == 200: 66 | files_info = response.json() 67 | file_info_dict = {} 68 | file_links_dict = {} 69 | 70 | base_url = f"https://huggingface.co/{repo_name}/resolve/main/" 71 | for file in files_info: 72 | name = file.get('path', 'Unknown') 73 | size = file.get('size', 0) 74 | human_readable_size = f"{size / 1024 / 1024:.2f} MB" 75 | file_info_dict[name] = human_readable_size 76 | file_links_dict[name] = base_url + name 77 | 78 | return file_info_dict, file_links_dict 79 | else: 80 | return {}, {} 81 | except Exception as e: 82 | return {}, {} 83 | 84 | st.title("Model Downloader") 85 | 86 | model_name = st.text_input("Download PyTorch models from Huggingface", "Use the HuggingfaceUsername/Modelname") 87 | if st.button("Get File List"): 88 | _, file_links = get_files_from_repo(construct_hf_repo_url(model_name), model_name) 89 | if file_links: 90 | st.session_state['file_links_dict'] = file_links 91 | files_info = "\n".join(f"{name}, Size: {size}" for name, size in file_links.items()) 92 | st.text_area("Files Information", files_info, height=300) 93 | else: 94 | st.error("Unable to retrieve file links.") 95 | if 'file_links_dict' in st.session_state: 96 | del st.session_state['file_links_dict'] 97 | 98 | if st.button("Download Files"): 99 | if 'file_links_dict' in st.session_state and st.session_state['file_links_dict']: 100 | queue_message = queue_download(st.session_state['file_links_dict'], model_name) 101 | st.text(queue_message) 102 | else: 103 | st.error("No files to download. Please get the file list first.") 104 | 105 | if st.button("Stop Downloads"): 106 | cancel_message = cancel_downloads() 107 | st.text(cancel_message) 108 | 109 | with st.expander("How to Download Model Files from Hugging Face", expanded=False): 110 | st.markdown(""" 111 | **How to Download Model Files from Hugging Face** 112 | 113 | - First, visit the Hugging Face model page that you want to download. For example, if you want to download the model at this link: [https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2). 114 | 115 | - On the model page, locate the icon next to the username of the model's author. This icon typically looks like a clipboard or a copy symbol. Click on this icon to copy the Username/RepositoryName, which in this example is `mistralai/Mistral-7B-Instruct-v0.2`. 116 | 117 | - Paste the copied Username/RepositoryName `mistralai/Mistral-7B-Instruct-v0.2` directly into the input field. 118 | 119 | - Click the "Get file list" button or option to retrieve the list of files available in this repository. 120 | 121 | - Review the list of files to ensure you have the correct model files that you want to download. 122 | 123 | - Finally, click the "Download Model" button or option to initiate the download process for the selected model files. 124 | 125 | - The model files will be saved in the `llama.cpp/models` directory on your device. 126 | 127 | - Now you have successfully downloaded the model files from Hugging Face, and they are stored in the `llama.cpp/models` directory for your use. 128 | """) 129 | -------------------------------------------------------------------------------- /pages/Medium_Precision_Quantization.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import streamlit as st 4 | from modules.shared import shared 5 | from apscheduler.schedulers.background import BackgroundScheduler 6 | from pathlib import Path 7 | import sys 8 | 9 | # Initialize the scheduler 10 | scheduler = BackgroundScheduler() 11 | scheduler.start() 12 | 13 | def find_llama_cpp_dir(): 14 | # Search for llama.cpp directory two levels up 15 | current_dir = Path(__file__).resolve().parent 16 | for _ in range(2): 17 | current_dir = current_dir.parent 18 | llama_cpp_dir = current_dir / 'llama.cpp' 19 | if llama_cpp_dir.is_dir(): 20 | return llama_cpp_dir 21 | 22 | # If not found, search two levels down 23 | current_dir = Path(__file__).resolve().parent 24 | for _ in range(2): 25 | current_dir = current_dir / 'llama.cpp' 26 | if current_dir.is_dir(): 27 | return current_dir 28 | 29 | return None 30 | 31 | def list_gguf_files(models_dir): 32 | gguf_files = [] 33 | if os.path.exists(models_dir): 34 | for model_folder in os.listdir(models_dir): 35 | hpq_folder = os.path.join(models_dir, model_folder, 'High-Precision-Quantization') 36 | if os.path.exists(hpq_folder) and os.path.isdir(hpq_folder): 37 | for file in os.listdir(hpq_folder): 38 | if file.lower().endswith('.gguf'): 39 | gguf_files.append(os.path.join(model_folder, 'High-Precision-Quantization', file)) 40 | return gguf_files 41 | 42 | def schedule_quantize_task(command): 43 | try: 44 | subprocess.run(command, check=True) 45 | return f"Task completed: {' '.join(command)}" 46 | except subprocess.CalledProcessError as e: 47 | return f"Error in task execution: {e}" 48 | 49 | def trigger_command(modelpath, options, use_docker): 50 | if not any(options.values()): 51 | return "Error: Please select at least one quantization option." 52 | 53 | debug_output = "" 54 | llama_cpp_dir = find_llama_cpp_dir() 55 | if llama_cpp_dir: 56 | base_dir = llama_cpp_dir / 'models' 57 | gguf_files = list_gguf_files(base_dir) 58 | else: 59 | base_dir = 'llama.cpp/models' 60 | models_dir = os.path.join(base_dir) 61 | gguf_files = list_gguf_files(models_dir) 62 | 63 | if not gguf_files: 64 | st.warning("No GGUF files found using the new logic. Falling back to the old logic.") 65 | base_dir = 'llama.cpp/models' 66 | models_dir = os.path.join(base_dir) 67 | gguf_files = list_gguf_files(models_dir) 68 | 69 | modelpath_path = Path(modelpath) 70 | model_name_only, model_file = modelpath_path.parts[-3], modelpath_path.name 71 | medium_precision_dir = base_dir / model_name_only / 'Medium-Precision-Quantization' 72 | medium_precision_dir.mkdir(parents=True, exist_ok=True) 73 | 74 | for option, selected in options.items(): 75 | if selected: 76 | volume_path = base_dir.resolve().drive # This will be 'D:' on Windows if base_dir is on D drive 77 | source_path = base_dir / model_name_only / 'High-Precision-Quantization' / model_file 78 | modified_model_file = model_file.lower().replace('f16.gguf', '').replace('q8_0.gguf', '').replace('f32.gguf', '') 79 | output_path = medium_precision_dir / f"{modified_model_file}-{option.upper()}.GGUF" 80 | absolute_path = os.getcwd().replace('\\', '/') 81 | 82 | 83 | if use_docker: 84 | 85 | docker_image = "luxaplexx/convert-compaan-ollama" 86 | 87 | output_path_in_docker = f"/models/{model_name_only}/Medium-Precision-Quantization/{modified_model_file}-{option.upper()}.GGUF" 88 | command = [ 89 | "docker", "run", "--rm", 90 | "-v", f"{absolute_path}/{base_dir}:/models", 91 | docker_image, "quantize", f"/models/{model_name_only}/High-Precision-Quantization/{model_file}", 92 | str(output_path_in_docker), option 93 | ] 94 | if sys.platform == "linux": 95 | command = [str(base_dir.parent / 'quantize'), str(source_path), str(output_path), option] 96 | else: 97 | command = [str(base_dir / 'quantize'), str(source_path), str(output_path), option] 98 | 99 | print(command) 100 | 101 | scheduler.add_job(schedule_quantize_task, args=[command]) 102 | debug_command_str = ' '.join(command) 103 | debug_output += f"Scheduled: {debug_command_str}\n" 104 | 105 | return debug_output if debug_output else "No options selected." 106 | 107 | # Old UI drawing 108 | # def show_medium_precision_quantization_page(): 109 | 110 | 111 | 112 | st.title("Medium Precision Quantization") 113 | 114 | models_dir = os.path.join("llama.cpp", "models") 115 | gguf_files = list_gguf_files(models_dir) 116 | 117 | selected_gguf_file = st.selectbox("Select a GGUF File", gguf_files) 118 | options = {option: st.checkbox(label=option) for option in shared['checkbox_options']} 119 | use_docker = st.checkbox("Use Docker Container") 120 | 121 | run_commands = st.button("Run Selected Commands") 122 | 123 | if run_commands: 124 | # Check if no quantization type options are selected 125 | if not any(options.values()): 126 | st.error("Please select at least one quantization type before running commands.") 127 | # Proceed only if at least one quantization type is selected or if Docker is selected with a type 128 | elif any(options.values()) or (use_docker and any(options.values())): 129 | status = trigger_command(selected_gguf_file, options, use_docker) 130 | st.text_area("Debug Output", status, height=300) 131 | else: 132 | # This should not happen, but we include it for robustness 133 | st.error("Unexpected condition: No options selected.") 134 | 135 | with st.expander("Step Two: Model Quantization Q and Kquants", expanded=False): 136 | st.markdown(""" 137 | **Step Two: Model Quantization Q and Kquants** 138 | 139 | In this step, you will perform model quantization using Q and Kquants. The files found in the `llama.cpp/models/modelname/High-Precision-Quantization` folder will be displayed here. 140 | 141 | **Instructions:** 142 | 143 | 1. Select the GGUF file you want to quantize from the dropdown list. 144 | 2. Check the boxes next to the quantization options you want to apply (Q, Kquants). 145 | 3. Choose whether to use the native `llama.cpp` or a Docker container for compatibility. 146 | 4. Click the "Run Selected Commands" button to schedule and execute the quantization tasks. 147 | 5. The quantized models will be saved in the `/modelname/Medium-Precision-Quantization` folder. 148 | """) 149 | 150 | -------------------------------------------------------------------------------- /pages/Model_Info.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import requests # Import the requests library 3 | from modules.shared import shared 4 | from modules.api_module import get_json, show_model_details, fetch_models # Importing show_model_details 5 | 6 | api_url = shared['api_endpoint']['url'] 7 | 8 | def delete_model(model_name, api_url): 9 | try: 10 | # Define the URL for deleting a model 11 | delete_url = f"{api_url}/api/delete" 12 | 13 | # Create a JSON payload with the model name 14 | payload = {'name': model_name} 15 | 16 | # Send a DELETE request to delete the model 17 | response = requests.delete(delete_url, json=payload) 18 | 19 | # Check the response status code 20 | if response.status_code == 200: 21 | return True 22 | else: 23 | st.error(f"Error deleting model: {response.status_code}") 24 | return False 25 | 26 | except Exception as e: 27 | st.error(f"Error deleting model: {str(e)}") 28 | return False 29 | 30 | 31 | 32 | st.title('Ollama Model Manager') 33 | st.text("Start by downloading your list of available models") 34 | 35 | if 'model_names' not in st.session_state: 36 | st.session_state['model_names'] = [] 37 | 38 | fetch_button = st.button('Fetch Models') 39 | if fetch_button: 40 | model_names = fetch_models(api_url) 41 | if model_names is not None: 42 | st.session_state['model_names'] = model_names 43 | 44 | if st.session_state['model_names']: 45 | st.text("Show detailed model overview, or delete the selected model") 46 | selected_model = st.selectbox("Select a Model", st.session_state['model_names']) 47 | 48 | if st.button('Show Model Details'): 49 | show_model_details(selected_model, api_url) # Ensure selected_model is correctly used 50 | 51 | if st.button('Delete Model'): 52 | if delete_model(selected_model, api_url): 53 | st.success(f"Model '{selected_model}' has been deleted.") 54 | # Update the model list 55 | st.session_state['model_names'].remove(selected_model) 56 | else: 57 | st.error("Failed to delete the model.") 58 | -------------------------------------------------------------------------------- /pages/Modelfile_Creator.py: -------------------------------------------------------------------------------- 1 | # modelfile_templater.py 2 | import streamlit as st 3 | import requests 4 | from modules.shared import shared 5 | def load_model_data(): 6 | url = "https://raw.githubusercontent.com/Luxadevi/Ollama-Colab-Integration/main/models.json" 7 | response = requests.get(url) 8 | return response.json() 9 | 10 | 11 | def show_model_dropdowns(): 12 | json_data = load_model_data() 13 | if not json_data: 14 | st.error("Failed to load model data.") 15 | return None, None 16 | 17 | model_providers = list(json_data.keys()) 18 | selected_provider = st.selectbox("Select Model Provider", model_providers, key="model_provider") 19 | models = json_data.get(selected_provider, []) 20 | selected_model = st.selectbox("Select Model", models, key="model_selection") 21 | return selected_provider, selected_model 22 | 23 | def show_parameter_sliders(): 24 | params = {} 25 | for param, (default, range_) in shared['parameters'].items(): 26 | key = f"param_{param}" 27 | if param == 'mirostat': 28 | params[param] = st.selectbox(param, range_, key=key) # Use the options from shared['parameters'] 29 | else: 30 | params[param] = st.slider(param, min_value=range_[0], max_value=range_[1], value=default, key=key) 31 | return params 32 | 33 | def show_model_name_input(key): 34 | return st.text_input("Model Name", key=key) 35 | 36 | 37 | def manage_stop_sequences(): 38 | stop_sequences = [] 39 | for i in range(10): # Allows up to 10 stop sequences 40 | seq = st.text_input(f"Stop Sequence {i+1}", key=f"stop_sequence_{i}") 41 | if seq: 42 | stop_sequences.append(seq) 43 | else: 44 | break # Stop adding more inputs once an empty one is found 45 | return stop_sequences 46 | 47 | def construct_modelfile_content(provider, model, additional_content, system_prompt, stop_seqs, params): 48 | modelfile_content = f"FROM {provider}:{model}\n{additional_content}" 49 | for seq in stop_seqs: 50 | modelfile_content += f"\nPARAMETER stop {seq}" 51 | if system_prompt: 52 | modelfile_content += f"\nSYSTEM {system_prompt}" 53 | for param, value in params.items(): 54 | default_value = shared['parameters'][param][0] 55 | if value != default_value: 56 | modelfile_content += f"\nPARAMETER {param} {value}" 57 | return modelfile_content 58 | 59 | 60 | def create_model(name, modelfile_content, print_payload): 61 | try: 62 | data = {"name": name, "modelfile": modelfile_content} 63 | if print_payload: 64 | st.write("Request Data:", data) 65 | 66 | output_content = "" 67 | text_area_placeholder = st.empty() 68 | with requests.post(f"{shared['api_endpoint']['url']}/api/create", json=data, stream=True) as response: 69 | if response.status_code == 200: 70 | for line in response.iter_lines(): 71 | if line: 72 | decoded_line = line.decode('utf-8') 73 | output_content += decoded_line + "\n" 74 | text_area_placeholder.text_area("Model Build Output", output_content, height=300) 75 | return "Model created successfully." 76 | else: 77 | return f"Request failed: {response.text}" 78 | except Exception as e: 79 | return f"Error creating model: {str(e)}" 80 | 81 | 82 | 83 | st.title("Model File Creator") 84 | selected_provider, selected_model = show_model_dropdowns() 85 | model_name = show_model_name_input("model_name_creator") 86 | additional_modelfile_content = st.text_area("Additional Modelfile Content", key="modelfile_content_creator") 87 | option_system_prompt = st.text_area("Optional System Prompt", key="option_system_prompt_creator") 88 | 89 | stop_sequences = manage_stop_sequences() 90 | parameters = show_parameter_sliders() 91 | print_payload_details = st.checkbox("Print Payload Details on Webpage") 92 | submit_button = st.button("Build and Deploy Model") 93 | 94 | if submit_button and model_name: 95 | modelfile_content = construct_modelfile_content(selected_provider, selected_model, additional_modelfile_content, option_system_prompt, stop_sequences, parameters) 96 | result = create_model(model_name, modelfile_content, print_payload_details) 97 | st.write(result) 98 | elif not model_name: # Check if model_name is not provided 99 | st.write("Pick a modelname to continue") 100 | 101 | 102 | -------------------------------------------------------------------------------- /pages/Ollama_Endpoint_Url.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from modules.shared import shared # Importing the shared dictionary 3 | from pathlib import Path 4 | from modules.shared import shared 5 | from pathlib import Path 6 | 7 | def update_shared_file(new_url): 8 | try: 9 | # Navigate up one directory level and then enter the "modules" directory 10 | modules_dir = Path('./modules') 11 | 12 | # Specify the path to the shared.py file inside the "modules" directory 13 | shared_file = modules_dir / 'shared.py' 14 | 15 | # Read the current contents of the file 16 | lines = shared_file.read_text().splitlines() 17 | 18 | # Find and modify the api_endpoint line 19 | for i, line in enumerate(lines): 20 | if line.strip().startswith("'api_endpoint':"): 21 | start = line.find('{') 22 | end = line.rfind('}') + 1 23 | dict_str = line[start:end] 24 | shared_dict = eval(dict_str) # Using eval to convert string to dict 25 | shared_dict['url'] = new_url # Modify the url 26 | new_line = f" 'api_endpoint': {shared_dict},\n" 27 | lines[i] = new_line 28 | break 29 | 30 | # Write the modified contents back to the file 31 | shared_file.write_text('\n'.join(lines)) 32 | 33 | return "API Endpoint URL updated successfully!" 34 | except Exception as e: 35 | return f"Error: {e}" 36 | 37 | def is_valid_url(url): 38 | return url.startswith("http://") or url.startswith("https://") 39 | 40 | 41 | 42 | 43 | # Old Ui drawing is not needed to call a function 44 | # def show_ollama_api_configurator(): 45 | 46 | 47 | # Building and using UI 48 | 49 | # Set the title 50 | st.title("Ollama API Configuration") 51 | 52 | # Instructions for setting the API URL 53 | st.info("Set the IP or URL where Ollama is running. For local instances, typically use `http://127.0.0.1:11434`.") 54 | 55 | # Display and allow editing of the API endpoint URL 56 | if 'current_url' not in st.session_state: 57 | st.session_state.current_url = shared['api_endpoint']['url'] 58 | 59 | st.write("API Endpoint URL:") 60 | st.session_state.current_url = st.text_input("Edit the API Endpoint URL:", st.session_state.current_url) 61 | 62 | if st.button("Update"): 63 | if is_valid_url(st.session_state.current_url): 64 | message = update_shared_file(st.session_state.current_url) 65 | st.success(message) 66 | st.write("Updated API Endpoint URL:", st.session_state.current_url) 67 | else: 68 | st.error("Invalid URL. Please ensure it starts with http:// or https://") 69 | 70 | # Uncomment this line to run this script directly for testing 71 | # show_ollama_api_configurator() -------------------------------------------------------------------------------- /pages/OpenAI_LiteLLM.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import multiprocessing 3 | import streamlit as st 4 | import requests 5 | import yaml 6 | from modules.shared import shared # Importing the shared dictionary 7 | from apscheduler.schedulers.background import BackgroundScheduler 8 | import socket 9 | import time 10 | from pathlib import Path 11 | 12 | 13 | # Sets folders for the neccesairy files 14 | def initialize_directories(): 15 | current_dir = Path(__file__).parent 16 | root_dir = current_dir.parent # Set the root directory one level up 17 | log_dir = root_dir / 'logs' 18 | config_dir = root_dir / 'configs' 19 | return log_dir, config_dir 20 | 21 | def is_process_running(process_name): 22 | try: 23 | process = subprocess.run(["pgrep", "-f", process_name], capture_output=True, text=True) 24 | return process.stdout != "" 25 | except subprocess.CalledProcessError: 26 | return False 27 | 28 | def kill_process(process_name): 29 | try: 30 | subprocess.run(["pkill", "-f", process_name]) 31 | except Exception as e: 32 | print(f"Error killing process {process_name}: {e}") 33 | 34 | def is_port_in_use(port): 35 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: 36 | return s.connect_ex(('localhost', port)) == 0 37 | 38 | def kill_process_on_port(port): 39 | try: 40 | subprocess.run(["fuser", "-k", f"{port}/tcp"]) 41 | except Exception as e: 42 | print(f"Error killing process on port {port}: {e}") 43 | def start_litellm_proxy(log_file_path, config_file_path, append_to_log=False): 44 | def run_process(): 45 | with open(log_file_path, "a" if append_to_log else "w") as log_file: 46 | process = subprocess.Popen( 47 | ["litellm", "--config", str(config_file_path), "--debug", "--add_function_to_prompt"], 48 | stdout=log_file, 49 | stderr=subprocess.STDOUT 50 | ) 51 | process.wait() 52 | 53 | litellm_process = multiprocessing.Process(target=run_process) 54 | litellm_process.start() 55 | 56 | 57 | def restart_litellm_proxy(log_file_path, config_file_path): 58 | # Start the proxy for the first time (creates new log file) 59 | start_litellm_proxy(log_file_path, config_file_path, append_to_log=False) 60 | # Wait for 4 seconds 61 | time.sleep(4) 62 | # Kill all LiteLLM instances 63 | kill_process("litellm") 64 | # Wait for 2 more seconds 65 | time.sleep(2) 66 | # Restart the proxy (appends to the existing log file) 67 | start_litellm_proxy(log_file_path, config_file_path, append_to_log=True) 68 | # Curls the OpenAI proxy and updates the logfile 69 | # 70 | def test_litellm_proxy(): 71 | try: 72 | result = subprocess.run( 73 | ["curl", "--location", "http://127.0.0.1:8000/chat/completions", 74 | "--header", "Content-Type: application/json", 75 | "--data", '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "what llm are you"}]}'], 76 | capture_output=True, text=True 77 | ) 78 | return result.stdout if result.returncode == 0 else f"Error: {result.stderr}" 79 | except Exception as e: 80 | return f"Error executing curl command: {str(e)}" 81 | 82 | 83 | def read_litellm_log(log_file_path): 84 | try: 85 | # Run the test_litellm_proxy function 86 | test_response = test_litellm_proxy() 87 | # print("Test LiteLLM Proxy Response:", test_response) 88 | 89 | time.sleep(2) 90 | with open(log_file_path, "r") as log_file: 91 | lines = log_file.readlines() 92 | 93 | for line in lines: 94 | if "LiteLLM: Proxy initialized with Config, Set models:" in line: 95 | model_lines = [line.strip() for line in lines[lines.index(line) + 1:] if line.strip()] 96 | return "\n".join([line.strip()] + model_lines) 97 | 98 | return "Relevant log information not found." 99 | except Exception as e: 100 | return f"Error: {str(e)}" 101 | 102 | 103 | # Pulls Ollama tags api to get newest models and update Config.Yaml accordingly 104 | # 105 | def poll_api(config_file_path, log_file_path): 106 | api_url = shared['api_endpoint']['url'] 107 | response = requests.get(f"{api_url}/api/tags") 108 | if response.status_code == 200: 109 | json_data = response.json() 110 | model_names = [model['name'] for model in json_data.get('models', [])] 111 | if update_config_file(model_names, config_file_path): 112 | # Config file updated, restart LiteLLM Proxy 113 | restart_litellm_proxy(log_file_path, config_file_path) 114 | 115 | # Start checking for every 15 seconds for updates will restart for new models and give a 116 | # Update about the Config file in the terminal. Only restarts when new models are found. 117 | def start_polling(config_file_path, log_file_path): 118 | if 'scheduler' not in st.session_state or st.session_state.scheduler is None: 119 | st.session_state.scheduler = BackgroundScheduler() 120 | st.session_state.scheduler.add_job(lambda: poll_api(config_file_path, log_file_path), 'interval', seconds=15) 121 | st.session_state.scheduler.start() 122 | st.success("Polling started") 123 | else: 124 | st.error("Polling is already running.") 125 | 126 | # Stop scheduled checking of api/tags 127 | def stop_polling(): 128 | if 'scheduler' in st.session_state and st.session_state.scheduler: 129 | st.session_state.scheduler.shutdown() 130 | st.session_state.scheduler = None 131 | st.success("Polling stopped") 132 | else: 133 | st.error("Polling not started or already stopped") 134 | 135 | # Logic to update Config.Yaml with the right data 136 | # 137 | def update_config_file(model_names, config_file_path): 138 | if not config_file_path.exists(): 139 | print(f"Config file not found at {config_file_path}") 140 | return False 141 | 142 | with open(config_file_path, "r") as file: 143 | try: 144 | config = yaml.safe_load(file) or {} 145 | except yaml.YAMLError as e: 146 | print(f"Error reading config file: {e}") 147 | return False 148 | 149 | if 'model_list' not in config: 150 | config['model_list'] = [] 151 | 152 | updated_models = set(f"ollama/{name}" for name in model_names) 153 | existing_models = set(model['model_name'] for model in config['model_list']) 154 | needs_update = False 155 | 156 | # Add new models 157 | for model_name in updated_models: 158 | if model_name not in existing_models: 159 | entry = { 160 | 'model_name': model_name, 161 | 'litellm_params': { 162 | 'model': model_name, 163 | 'api_base': shared['api_endpoint']['url'], 164 | 'json': True, 165 | 'drop_params': True 166 | } 167 | } 168 | config['model_list'].append(entry) 169 | print(f"Added new model: {model_name}") 170 | needs_update = True 171 | 172 | # Remove models that are no longer present 173 | original_model_count = len(config['model_list']) 174 | config['model_list'] = [model for model in config['model_list'] if model['model_name'] in updated_models] 175 | if len(config['model_list']) < original_model_count: 176 | removed_models = existing_models - updated_models 177 | print(f"Removed models from config file: {', '.join(removed_models)}") 178 | 179 | if needs_update or len(config['model_list']) < original_model_count: 180 | with open(config_file_path, "w") as file: 181 | yaml.dump(config, file, default_flow_style=False) 182 | print("Config file updated successfully.") 183 | return needs_update 184 | 185 | 186 | ### Interface creator 187 | 188 | # def show_litellm_proxy_page(): 189 | # global scheduler 190 | 191 | log_dir, config_dir = initialize_directories() 192 | log_file_path = log_dir / 'litellmlog' 193 | config_file_path = config_dir / 'config.yaml' 194 | 195 | st.title('OPENAI API Proxy') 196 | st.text("Start Litellm With the button below to convert Ollama traffic to Openai Traffic") 197 | # Define the scheduler variable outside the if statement 198 | 199 | # Button to start and restart the LiteLLM Proxy 200 | if st.button('Start LiteLLM'): 201 | litellm_process = multiprocessing.Process(target=lambda: restart_litellm_proxy(log_file_path, config_file_path)) 202 | litellm_process.start() 203 | st.success("LiteLLM Proxy start and restart sequence initiated") 204 | 205 | if st.button('Read LiteLLM Log'): 206 | log_output = read_litellm_log(log_file_path) 207 | st.text_area("Log Output", log_output, height=500) 208 | 209 | 210 | if st.write("Start creating new config files for LiteLLM. Whenever there is a new model detected, it will be added to the Config.yaml, and the proxy will be restarted."): 211 | pass 212 | if st.button('Start Polling'): 213 | start_polling(config_file_path, log_file_path) 214 | 215 | if st.button('Stop Polling'): 216 | stop_polling() 217 | 218 | if st.button('Kill Existing LiteLLM Processes'): 219 | litellm_process_name = "litellm" 220 | if is_process_running(litellm_process_name): 221 | kill_process(litellm_process_name) 222 | st.success(f"Killed existing {litellm_process_name} processes") 223 | else: 224 | st.info("No LiteLLM processes found") 225 | 226 | # Button to free up port 8000 if it's in use 227 | if st.button('Free Up Port 8000'): 228 | litellm_port = 8000 229 | if is_port_in_use(litellm_port): 230 | kill_process_on_port(litellm_port) 231 | st.success(f"Freed up port {litellm_port}") 232 | else: 233 | st.info(f"Port {litellm_port} is not in use") 234 | if st.button('Test LiteLLM Proxy'): 235 | test_response = test_litellm_proxy() 236 | st.text_area("Test LiteLLM Proxy Response", test_response, height=150) 237 | with st.expander("LiteLLM Proxy Management"): 238 | st.markdown(""" 239 | **LiteLLM Proxy Management** 240 | 241 | This section allows you to manage and interact with the LiteLLM Proxy, which is used to convert OpenAI GPT models to the OpenAI API standard. 242 | 243 | **LiteLLM Proxy Controls** 244 | 245 | - **Start LiteLLM Proxy:** Click this button to start the LiteLLM Proxy. The proxy will run in the background and facilitate the conversion process. 246 | - **Read LiteLLM Log:** Use this button to read the LiteLLM Proxy log, which contains relevant information about its operation. 247 | - **Start Polling:** Click to initiate polling. Polling checks for updates to the ollama API and adds any new models to the configuration. 248 | - **Stop Polling:** Use this button to stop polling for updates. 249 | - **Kill Existing LiteLLM Processes:** If there are existing LiteLLM processes running, this button will terminate them. 250 | - **Free Up Port 8000:** Click this button to free up port 8000 if it's currently in use. 251 | 252 | Please note that starting the LiteLLM Proxy and performing other actions may take some time, so be patient and wait for the respective success messages. 253 | 254 | **LiteLLM Proxy Log** 255 | 256 | The "Log Output" section will display relevant information from the LiteLLM Proxy log, providing insights into its operation and status. 257 | """) 258 | -------------------------------------------------------------------------------- /pages/Public_Endpoint.py: -------------------------------------------------------------------------------- 1 | import os 2 | import threading 3 | import time 4 | import subprocess 5 | import streamlit as st 6 | from pathlib import Path 7 | 8 | # Get the directory of the current module 9 | module_dir = Path(__file__).parent 10 | 11 | # Define the root directory (one level up from the current module directory) 12 | root_dir = module_dir.parent 13 | 14 | # Define the log directory and log file paths in the root directory 15 | log_dir = root_dir / 'logs' 16 | endpoint_log_path = log_dir / 'endpoint.log' 17 | 18 | # Define the tools directory path in the root directory 19 | tools_dir = root_dir / 'tools' 20 | 21 | flask_thread = None 22 | 23 | def flask_endpoint(): 24 | # Set the path to your Flask endpoint script 25 | endpoint_path = tools_dir / 'endpoint.py' 26 | command = f"PYTHONUNBUFFERED=1 python3 {endpoint_path} > {endpoint_log_path} 2>&1" 27 | os.system(command) 28 | 29 | def start_endpoint_and_get_last_2_lines(): 30 | global flask_thread 31 | try: 32 | flask_thread = threading.Thread(target=flask_endpoint, daemon=True) 33 | flask_thread.start() 34 | 35 | time.sleep(15) 36 | 37 | cloudflare_url = None 38 | with open(endpoint_log_path, "r") as log_file: 39 | for line in log_file: 40 | if ".trycloudflare.com" in line: 41 | cloudflare_url = line.split()[3] 42 | break 43 | 44 | result = "Tunnel proxy setup successful\n\n" 45 | if cloudflare_url: 46 | ollama_endpoint = f"{cloudflare_url}" 47 | openai_endpoint = f"{cloudflare_url}/openai" 48 | result += f"Ollama endpoint is available at: [{ollama_endpoint}]({ollama_endpoint})\n\n" 49 | result += f"OpenAI API endpoint is available at: [{openai_endpoint}]({openai_endpoint})" 50 | 51 | return result 52 | except Exception as e: 53 | return f"Error: {str(e)}" 54 | 55 | def kill_endpoint(): 56 | try: 57 | # Find processes using port 5000 and kill them 58 | pids = subprocess.check_output(["lsof", "-t", "-i:5000"]).decode().splitlines() 59 | for pid in pids: 60 | subprocess.run(["kill", "-9", pid]) 61 | 62 | return "Endpoint killed successfully." 63 | except Exception as e: 64 | return f"Error: {str(e)}" 65 | 66 | 67 | # Old UI drawing 68 | # def show_public_endpoint_page(): 69 | st.title("Public Endpoint Management") 70 | 71 | if st.button("Start Endpoint"): 72 | result = start_endpoint_and_get_last_2_lines() 73 | 74 | # Check if result contains URLs and convert them to Markdown links 75 | if "Running on" in result: 76 | lines = result.split('\n') 77 | for i, line in enumerate(lines): 78 | if line.startswith("* Running on"): 79 | url = line.split()[3] 80 | lines[i] = f"* Running on [link]({url})" 81 | elif line.startswith("* Traffic stats available on"): 82 | url = line.split()[4] 83 | lines[i] = f"* Traffic stats available on [link]({url})" 84 | result = '\n'.join(lines) 85 | 86 | # Display the result as Markdown 87 | st.markdown(result) 88 | 89 | if st.button("Kill Endpoint"): 90 | result = kill_endpoint() 91 | st.text(result) 92 | 93 | with st.expander("Public Endpoint Information"): 94 | st.markdown(""" 95 | **Public Endpoint Management** 96 | 97 | This section is dedicated to managing and accessing the public endpoint for OpenAI and Ollama APIs. 98 | 99 | **Public Endpoint Controls** 100 | 101 | - **Start Public Endpoint:** Use this button to start the public endpoint. The endpoint will be accessible for interfacing with the OpenAI or Ollama API. 102 | - **Read Public Endpoint Log:** This section will display the last few lines of the log, providing insights into the endpoint's operation. 103 | - **Access Public Endpoint:** Once the endpoint is running, it will be accessible at specific URLs provided in the log output. 104 | - **Kill Public Endpoint:** If the endpoint is running and needs to be stopped, use this button to terminate it. 105 | 106 | Please be patient when starting or stopping the public endpoint as these actions may take some time to complete. 107 | """) 108 | -------------------------------------------------------------------------------- /pages/Upload_Converted_To_HF.py: -------------------------------------------------------------------------------- 1 | import os 2 | import streamlit as st 3 | from huggingface_hub import HfApi 4 | from requests.exceptions import HTTPError 5 | from cryptography.fernet import Fernet 6 | from pages.HF_Token_Encrypter import load_key 7 | from pathlib import Path # Import pathlib 8 | 9 | # Initialize session state for selected_model 10 | if 'selected_model' not in st.session_state: 11 | st.session_state['selected_model'] = '' 12 | if 'selected_files' not in st.session_state: 13 | st.session_state['selected_files'] = [] 14 | 15 | # Define a function to search for the llama.cpp directory 16 | def find_llama_cpp_dir(): 17 | # Search for llama.cpp directory three levels up 18 | current_dir = Path(__file__).resolve().parent 19 | for _ in range(3): 20 | current_dir = current_dir.parent 21 | llama_cpp_dir = current_dir / 'llama.cpp' 22 | models_dir = llama_cpp_dir / "models" 23 | if llama_cpp_dir.is_dir(): 24 | return llama_cpp_dir, models_dir 25 | 26 | # If not found, search two levels down 27 | current_dir = Path(__file__).resolve().parent 28 | for _ in range(2): 29 | current_dir = current_dir / 'llama.cpp' 30 | models_dir = current_dir / "models" 31 | if current_dir.is_dir(): 32 | return current_dir, models_dir 33 | 34 | return None, None 35 | 36 | # Search for the llama.cpp directory 37 | llama_cpp_dir, models_dir = find_llama_cpp_dir() 38 | if not llama_cpp_dir: 39 | st.error("llama.cpp directory not found. Please check the file structure.") 40 | 41 | 42 | 43 | 44 | ## Uses username from HF Token 45 | def get_username_from_token(token): 46 | api = HfApi() 47 | user_info = api.whoami(token=token) 48 | return user_info['name'] 49 | 50 | # Decrypt the token 51 | def decrypt_token(encrypted_token): 52 | key = load_key() 53 | f = Fernet(key) 54 | return f.decrypt(encrypted_token.encode()).decode() 55 | 56 | 57 | # Gathers files and uploads to HuggingFace 58 | def upload_files_to_repo(token, models_dir, repo_name, files_to_upload, readme_content, high_precision_files, medium_precision_files, selected_model): 59 | try: 60 | api = HfApi() 61 | username = get_username_from_token(token) 62 | repo_id = f"{username}/{repo_name}" 63 | 64 | # Check if the repository exists, if not create it 65 | try: 66 | api.repo_info(repo_id=repo_id, token=token) 67 | except HTTPError as e: 68 | if e.response.status_code == 404: 69 | api.create_repo(repo_id=repo_id, token=token, repo_type="model") 70 | else: 71 | raise 72 | 73 | # Upload README.md if content is provided 74 | if readme_content: 75 | readme_path = Path(models_dir) / 'README.md' 76 | with open(str(readme_path), 'w') as readme_file: 77 | readme_file.write(readme_content) 78 | api.upload_file(path_or_fileobj=str(readme_path), path_in_repo='README.md', repo_id=repo_id, token=token) 79 | os.remove(str(readme_path)) 80 | 81 | # Upload selected files 82 | for file_name in files_to_upload: 83 | if file_name in high_precision_files.get(selected_model, []): 84 | folder_path = Path(models_dir) / selected_model / "High-Precision-Quantization" 85 | elif file_name in medium_precision_files.get(selected_model, []): 86 | folder_path = Path(models_dir) / selected_model / "Medium-Precision-Quantization" 87 | else: 88 | continue 89 | 90 | file_path = folder_path / file_name 91 | if file_path.is_file(): 92 | api.upload_file(path_or_fileobj=str(file_path), path_in_repo=file_name, repo_id=repo_id, token=token) 93 | 94 | return f"Files uploaded successfully. View at: https://huggingface.co/{repo_id}" 95 | except Exception as e: 96 | return f"An error occurred: {str(e)}" 97 | 98 | 99 | 100 | # Cache the function to improve performance 101 | @st.cache_data 102 | def list_model_files(models_dir, subfolder): 103 | model_files = {} 104 | models_dir_path = Path(models_dir) 105 | if models_dir_path.exists() and models_dir_path.is_dir(): 106 | for model_folder in models_dir_path.iterdir(): 107 | specific_folder = model_folder / subfolder 108 | if specific_folder.exists() and specific_folder.is_dir(): 109 | model_files[model_folder.name] = [file.name for file in specific_folder.iterdir() if file.is_file()] 110 | return model_files 111 | 112 | # List files in High-Precision and Medium-Precision folders 113 | high_precision_files = list_model_files(models_dir, "High-Precision-Quantization") 114 | medium_precision_files = list_model_files(models_dir, "Medium-Precision-Quantization") 115 | 116 | # After calling list_model_files, check the contents 117 | # print("High Precision Files:", high_precision_files) 118 | # print("Medium Precision Files:", medium_precision_files) 119 | 120 | # Old UI drawing 121 | # def show_model_management_page(): 122 | # Search for the llama.cpp directory 123 | # llama_cpp_dir = find_llama_cpp_dir() 124 | # models_dir = "llama.cpp/models/" 125 | # high_precision_files = list_model_files(models_dir, "High-Precision-Quantization") 126 | # medium_precision_files = list_model_files(models_dir, "Medium-Precision-Quantization") 127 | 128 | 129 | def get_combined_files(model): 130 | # Combine files from both precision types 131 | return high_precision_files.get(model, []) + medium_precision_files.get(model, []) 132 | 133 | # Main UI 134 | st.title("Upload Converted Models to HuggingFace") 135 | 136 | # Select a model 137 | all_models = list(set(high_precision_files.keys()) | set(medium_precision_files.keys())) 138 | selected_model = st.selectbox("Select a Model", all_models, index=0) 139 | 140 | # File selection multiselect 141 | combined_files = get_combined_files(selected_model) 142 | selected_files = st.multiselect("Select Files to Upload", combined_files, key="file_selector") 143 | 144 | 145 | # Repository details and README content 146 | repo_name = st.text_input("Repository Name", value=f"{selected_model}-GGUF") 147 | readme_content = st.text_area("README.md Content", "Enter content for README.md") 148 | 149 | # Token input 150 | use_unencrypted_token = st.checkbox("Unencrypted Token") 151 | if use_unencrypted_token: 152 | hf_token = st.text_input("Hugging Face Token", type="password") 153 | else: 154 | encrypted_token = st.text_input("Enter Encrypted Token", type="password") 155 | if encrypted_token: 156 | hf_token = decrypt_token(encrypted_token) 157 | 158 | # Upload button 159 | if st.button("Upload Selected Files") and hf_token: 160 | upload_message = upload_files_to_repo( 161 | token=hf_token, 162 | models_dir=models_dir, 163 | repo_name=repo_name, 164 | files_to_upload=selected_files, 165 | readme_content=readme_content, 166 | high_precision_files=high_precision_files, 167 | medium_precision_files=medium_precision_files, 168 | selected_model=selected_model 169 | ) 170 | st.info(upload_message) 171 | 172 | if 'HUGGINGFACE_TOKEN' in os.environ: 173 | del os.environ['HUGGINGFACE_TOKEN'] 174 | 175 | with st.expander("Model Upload Instructions", expanded=False): 176 | st.markdown(""" 177 | **Model Upload Instructions** 178 | 179 | Use this section to upload your converted models to Hugging Face with enhanced security. 180 | 181 | **Steps for Uploading Models:** 182 | 183 | 1. **Select a Model:** Choose a model from the dropdown list. These models are found in the `llama.cpp/models` directory. 184 | 2. **Enter Repository Name:** Specify a name for the new Hugging Face repository where your model will be uploaded. 185 | 3. **Choose Files for Upload:** Select the files you want to upload from the chosen model's subfolders. 186 | 4. **Add README Content:** Optionally, write content for the README.md file of your repository. 187 | 5. **Token Usage:** 188 | - For added security, use an encrypted token. Encrypt your Hugging Face token on the **Token Encrypt** page and paste it into the "Enter Encrypted Token" field. 189 | - Alternatively, you can directly enter an unencrypted Hugging Face token. 190 | 6. **Upload Files:** Click the "Upload Selected Files" button to start uploading your files to Hugging Face. 191 | 192 | The uploaded models will be viewable at `https://huggingface.co/your-username/your-repo-name`. 193 | """) 194 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | requests 3 | flask 4 | flask-cloudflared 5 | litellm 6 | huggingface_hub 7 | asyncio 8 | Pyyaml 9 | httpx 10 | APScheduler 11 | cryptography 12 | pycloudflared 13 | sentencepiece 14 | transformers 15 | litellm[proxy] 16 | st-pages==0.4.1 17 | -------------------------------------------------------------------------------- /run_app.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import re 3 | 4 | def start_tunnel(): 5 | print("Starting Cloudflare Tunnel...") 6 | # Start the Cloudflare Tunnel and capture its output 7 | process = subprocess.Popen(['pycloudflared', 'tunnel', '--url', 'http://127.0.0.1:8501'], 8 | stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) 9 | 10 | # Read the output line by line and search for the URL 11 | for line in iter(process.stdout.readline, ''): 12 | if '.trycloudflare.com' in line: 13 | url = re.search(r'https://[a-zA-Z0-9-]+\.trycloudflare\.com', line) 14 | if url: 15 | print(f"Tunnel URL: {url.group()}") 16 | break 17 | 18 | 19 | 20 | def run_streamlit(): 21 | print("Starting Streamlit App...") 22 | try: 23 | subprocess.check_call(['streamlit', 'run', './Homepage.py']) 24 | except subprocess.CalledProcessError as e: 25 | print(f"Error running Streamlit: {e}") 26 | except FileNotFoundError: 27 | print("Streamlit file not found, checking current directory") 28 | try: 29 | subprocess.check_call(['streamlit', 'run', './Homepage.py']) 30 | except subprocess.CalledProcessError as e: 31 | print(f"Error running Streamlit from current directory: {e}") 32 | 33 | def main(): 34 | try: 35 | start_tunnel() 36 | except Exception as e: 37 | print(f"Error starting tunnel: {e}") 38 | 39 | try: 40 | run_streamlit() 41 | except Exception as e: 42 | print(f"Error running Streamlit: {e}") 43 | 44 | if __name__ == "__main__": 45 | main() 46 | -------------------------------------------------------------------------------- /run_tunnel.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import re 3 | import threading 4 | import time 5 | 6 | def start_tunnel(): 7 | print("Starting Cloudflare Tunnel...") 8 | # Start the Cloudflare Tunnel and capture its output 9 | process = subprocess.Popen(['pycloudflared', 'tunnel', '--url', 'http://127.0.0.1:8501'], 10 | stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) 11 | 12 | # Read the output line by line and search for the URL for 10 seconds 13 | start_time = time.time() 14 | while time.time() - start_time < 10: # Run for 10 seconds 15 | line = process.stdout.readline() 16 | if '.trycloudflare.com' in line: 17 | url = re.search(r'https://[a-zA-Z0-9-]+\.trycloudflare\.com', line) 18 | if url: 19 | print(f"Tunnel URL: {url.group()}") 20 | break 21 | 22 | # After 10 seconds, continue running without printing 23 | while True: 24 | process.stdout.readline() 25 | if process.poll() is not None: 26 | break # Exit the loop if the process ends 27 | 28 | def main(): 29 | try: 30 | # Start the tunnel in a separate thread 31 | tunnel_thread = threading.Thread(target=start_tunnel) 32 | tunnel_thread.start() 33 | 34 | # You can perform other tasks here or just wait for the thread 35 | tunnel_thread.join() # Optionally, wait indefinitely for the thread 36 | except Exception as e: 37 | print(f"Error starting tunnel: {e}") 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Run this script with the arguments -lan or -local to start the companion without 3 | # generating a public URL. 4 | 5 | # Get the directory of this script 6 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 7 | 8 | # Change to the script's directory 9 | cd "$SCRIPT_DIR" 10 | 11 | # Function to kill any existing Python process running `ollama.py` 12 | kill_existing_ollama() { 13 | pgrep -f 'python3.*ollama\.py' | xargs -r kill -9 14 | echo "Terminated existing Ollama processes" 15 | } 16 | 17 | # Launch virtual environment 18 | 19 | start_locally() { 20 | kill_existing_ollama 21 | echo "Starting Ollama-Companion locally on port 8501" 22 | streamlit run Homepage.py 23 | } 24 | start_colab() { 25 | kill_existing_ollama 26 | pgrep -f '.*tunnel.*127\.0\.0\.1:8501.*' | xargs -r kill -9 27 | echo "Starting Ollama-Companion with a public URL" 28 | python3 run_tunnel.py & 29 | sleep 8 30 | python3 "$SCRIPT_DIR/tools/ollama.py" > /dev/null 2>&1 & 31 | echo "Starting Ollama" 32 | streamlit run Homepage.py 33 | } 34 | 35 | start_public() { 36 | kill_existing_ollama 37 | pgrep -f '.*tunnel.*127\.0\.0\.1:8501.*' | xargs -r kill -9 38 | echo "Starting Ollama-Companion with a public URL" 39 | python3 run_tunnel.py & 40 | sleep 8 41 | streamlit run Homepage.py 42 | } 43 | 44 | # Default function to start_public 45 | function_to_run=start_public 46 | 47 | # Check if the script is running from `/content/Ollama-Companion` and set `start_colab` 48 | if [[ "$SCRIPT_DIR" == "/content/Ollama-Companion" ]]; then 49 | function_to_run=start_colab 50 | else 51 | # Parse arguments to override the default function 52 | for arg in "$@"; do 53 | case $arg in 54 | -local|-lan) 55 | function_to_run=start_locally 56 | break 57 | ;; 58 | *) 59 | ;; 60 | esac 61 | done 62 | fi 63 | 64 | # Run the selected function 65 | $function_to_run 66 | -------------------------------------------------------------------------------- /tools/endpoint.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | from flask import Flask, request, Response 3 | import requests 4 | from flask_cloudflared import run_with_cloudflared 5 | from threading import Thread 6 | import sys 7 | import os 8 | import logging 9 | from flask import stream_with_context 10 | 11 | # Add the parent directory (project) to the sys.path 12 | sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../') 13 | 14 | # Import shared module from the "modules" directory 15 | from modules.shared import shared 16 | 17 | api_url = shared['api_endpoint']['url'] 18 | app = Flask(__name__) 19 | 20 | # Define a chunk size for streaming 21 | 22 | 23 | @app.route('/', defaults={'path': ''}) 24 | @app.route('/', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH']) 25 | def proxy(path): 26 | url = f'{api_url}/{path}' 27 | resp = requests.request( 28 | method=request.method, 29 | url=url, 30 | headers={key: value for (key, value) in request.headers if key != 'Host'}, 31 | data=request.get_data(), 32 | cookies=request.cookies, 33 | allow_redirects=False, 34 | stream=True) # Enable streaming 35 | 36 | def generate(): 37 | for line in resp.iter_lines(): 38 | if line: 39 | yield line + b'\n' 40 | 41 | excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection'] 42 | headers = [(name, value) for (name, value) in resp.raw.headers.items() 43 | if name.lower() not in excluded_headers] 44 | return Response(generate(), resp.status_code, headers) 45 | 46 | @app.route('/openai', defaults={'path': ''}) 47 | @app.route('/openai/', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH']) 48 | def openai_proxy(path): 49 | 50 | new_url = f'http://127.0.0.1:8000/{path}' 51 | logging.info(f"Proxying to URL: {new_url}") 52 | 53 | resp = requests.request( 54 | method=request.method, 55 | url=new_url, 56 | headers={key: value for (key, value) in request.headers if key != 'Host'}, 57 | data=request.get_data(), 58 | cookies=request.cookies, 59 | allow_redirects=False, 60 | stream=True) # Enable streaming 61 | 62 | def generate(): 63 | for line in resp.iter_lines(): 64 | if line: 65 | yield line + b'\n' 66 | 67 | excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection'] 68 | headers = [(name, value) for (name, value) in resp.raw.headers.items() 69 | if name.lower() not in excluded_headers] 70 | return Response(generate(), resp.status_code, headers) 71 | run_with_cloudflared(app) 72 | 73 | if __name__ == '__main__': 74 | app.run(host='0.0.0.0') 75 | -------------------------------------------------------------------------------- /tools/notebook.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import threading 3 | import re 4 | 5 | def launch_start_script(): 6 | script_path = '/content/Ollama-Companion/start.sh' 7 | try: 8 | subprocess.run([script_path], check=True, cwd='/content/Ollama-Companion', shell=True) 9 | except subprocess.CalledProcessError as e: 10 | print(f"Error: {e}") 11 | else: 12 | print(f"Script {script_path} has been successfully launched.") 13 | 14 | def run_ollama(): 15 | print("Starting Ollama...") 16 | subprocess.Popen(['python3', '/content/ollama.py']) 17 | 18 | def main(): 19 | ollama_thread = threading.Thread(target=run_ollama) 20 | ollama_thread.start() 21 | 22 | # Now the main thread can continue doing other things. 23 | # Note: Since we are not waiting for ollama_thread to finish, 24 | # we won't use ollama_thread.join() here. 25 | 26 | launch_start_script() 27 | 28 | if __name__ == "__main__": 29 | main() 30 | -------------------------------------------------------------------------------- /tools/ollama.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import threading 3 | import time 4 | import logging.handlers 5 | import httpx 6 | import sys 7 | import os 8 | 9 | def create_logger(name, filename, level, formatter): 10 | logger = logging.getLogger(name) 11 | handler = logging.handlers.RotatingFileHandler(filename, maxBytes=5*1024*1024, backupCount=5) 12 | handler.setFormatter(formatter) 13 | logger.addHandler(handler) 14 | logger.setLevel(level) 15 | return logger 16 | 17 | status_formatter = logging.Formatter('[%(asctime)s] [%(levelname)s] [%(name)s] - %(message)s') 18 | error_formatter = logging.Formatter('[%(asctime)s] [%(levelname)s] [%(name)s] - %(message)s') 19 | 20 | loggers = { 21 | "Status": create_logger("Status", "status.log", logging.INFO, status_formatter), 22 | "OllamaStatus": create_logger("OllamaStatus", "ollama.log", logging.INFO, status_formatter), 23 | "Error": create_logger("Error", "error.log", logging.ERROR, error_formatter), 24 | "OllamaError": create_logger("OllamaError", "ollama_error.log", logging.ERROR, error_formatter) 25 | } 26 | 27 | class ProcessMonitor: 28 | def __init__(self): 29 | self.processes = {} 30 | self.is_monitoring = True 31 | 32 | def handle_output(self, process_name): 33 | process = self.processes[process_name] 34 | logger_status = loggers[f"{process_name.capitalize()}Status"] 35 | for line in iter(process.stdout.readline, b''): 36 | logger_status.info(line.decode().strip()) 37 | 38 | def run_ollama(self): 39 | os.environ["OLLAMA_HOST"] = "0.0.0.0:11434" 40 | os.environ["OLLAMA_ORIGINS"] = "http://0.0.0.0:*" 41 | 42 | cmd = "ollama serve" 43 | # Redirect subprocess output to /dev/null 44 | with open(os.devnull, 'wb') as devnull: 45 | self.processes['ollama'] = subprocess.Popen(cmd, shell=True, stdout=devnull, stderr=devnull) 46 | loggers["OllamaStatus"].info(f"Started ollama with command: {cmd}") 47 | 48 | def monitor_process(self, process_name): 49 | while self.is_monitoring: 50 | if self.processes[process_name].poll() is not None: 51 | loggers["Status"].warning(f"{process_name} process has stopped. Restarting...") 52 | self.run_ollama() 53 | time.sleep(5) 54 | 55 | def start(self): 56 | self.run_ollama() 57 | threading.Thread(target=self.monitor_process, args=('ollama',)).start() 58 | 59 | def stop(self): 60 | self.is_monitoring = False 61 | for p in self.processes.values(): 62 | p.terminate() 63 | 64 | if __name__ == '__main__': 65 | monitor = ProcessMonitor() 66 | monitor.start() 67 | --------------------------------------------------------------------------------