├── .dockerignore ├── .env ├── .gitattributes ├── .github └── workflows │ └── python-package-publish.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── benchmarks ├── llm_gpu_benchmark.py ├── llm_gpu_benchmark_text-generation-inference.html ├── llm_gpu_benchmark_transformers.html ├── llm_gpu_benchmarks.json ├── perf.json ├── perf.md └── rag_benchmark.md ├── blog └── README.md ├── ci └── jenkinsfile ├── cloud └── packer │ ├── Jenkinsfile │ ├── README.md │ ├── h2oai-h2ogpt-4096-llama2-13b-chat.sh │ ├── h2ogpt-azure.json │ ├── h2ogpt-gcp.json │ ├── install_h2ogpt.sh │ ├── setup_environment.sh │ └── startup-scripts │ ├── h2ogpt.service │ ├── h2ogpt_nginx.service │ ├── run_h2ogpt.sh │ ├── run_nginx.sh │ ├── run_vllm.sh │ ├── temp.conf │ └── vllm.service ├── data ├── NGSL_1.2_stats.csv.zip ├── README-template.md ├── Sample-Invoice-printable.png ├── censor_words.txt ├── config.json ├── count_1w.txt.zip ├── create_data_cards.py ├── dai_docs.train.json ├── dai_docs.train_cleaned.json ├── dai_docs.valid.json ├── dai_faq.json ├── demo.png ├── example.xlsx ├── h2ogpt-personality.json ├── merged.json └── pexels-evg-kowalievska-1170986_small.jpg ├── dev_installers └── mac │ ├── README.md │ ├── build_mac_installer.sh │ ├── h2ogpt-osx-m1-cpu.spec │ ├── h2ogpt-osx-m1-gpu.spec │ └── mac_run_app.py ├── docker-compose-cpu.yml ├── docker-compose-vllm.yml ├── docker-compose.yml ├── docker_build_script_ubuntu.sh ├── docs ├── Dockerfile.delta2 ├── Dockerfile.internvl ├── FAQ.md ├── FINETUNE.md ├── INSTALL.md ├── LINKS.md ├── README_Agents.md ├── README_CLI.md ├── README_CLIENT.md ├── README_CPU.md ├── README_DOCKER.md ├── README_GPU.md ├── README_InferenceServers.md ├── README_LINUX.md ├── README_LangChain.md ├── README_MACOS.md ├── README_SerpAPI.md ├── README_WHEEL.md ├── README_WINDOWS.md ├── README_offline.md ├── README_quickstart.md ├── README_ui.md ├── TRITON.md ├── autogen.patch ├── autogen2.patch ├── aws_sagemaker_endpoint_setup.pdf ├── build_windows_gpu.sh ├── chat_headerless.png ├── chat_tabless.png ├── chat_view.png ├── google.patch ├── h2o-qr.png ├── h2oGPT_CPU.ipynb ├── h2oGPT_GPU.ipynb ├── img-small.png ├── img.png ├── langchain.png ├── linux_install.sh ├── linux_install_full.sh ├── llamasmall.jpg ├── minicondashellsmall.png ├── minigw32small.png ├── model_add.png ├── models_compare.png ├── models_lock.png ├── ollama_max_seq_len.png ├── ollama_setup.png ├── ollama_use.png ├── onedocselected.png ├── only_all_sources.png ├── openai.patch ├── openwebui1.png ├── openwebui2.png ├── pytubefix.patch ├── run_patches.sh ├── saved_chats.png ├── setup_docker_linux.sh ├── tos.md ├── trans.patch ├── trans2.patch ├── ui_1.png ├── ui_10.png ├── ui_2.png ├── ui_3.png ├── ui_4.png ├── ui_5.png ├── ui_6.png ├── ui_7.png ├── ui_8.png ├── ui_9.png ├── ui_9b.png ├── ui_9c.png ├── ui_talk_to_images.png ├── voice_clone.png ├── vs2022small.png ├── windows_freezelist.txt ├── windows_install.bat └── xtt.patch ├── finetune.py ├── generate.py ├── gradio_utils ├── __init__.py ├── css.py ├── google_auth.py ├── grclient.py ├── prompt_form.py └── yield_utils.py ├── h2o-logo.svg ├── h2ogpt ├── __init__.py ├── generate.py ├── gradio_utils ├── iterators ├── metrics ├── models ├── openai_server ├── spkemb └── src ├── helm └── h2ogpt-chart │ ├── .helmignore │ ├── Chart.yaml │ ├── templates │ ├── _helpers.tpl │ ├── config-map.yaml │ ├── deployment.yaml │ └── service.yaml │ └── values.yaml ├── iterators ├── __init__.py ├── iterator_pipe.py └── timeout_iterator.py ├── metrics ├── __init__.py └── quip.py ├── models ├── README-template.md ├── __init__.py ├── anthropic.jpeg ├── anthropic.png ├── create_model_cards.py ├── female.wav ├── google.png ├── gpu_mem_track.py ├── h2oai.png ├── hf-logo.png ├── human.jpg ├── human.png ├── lama.jpeg ├── lama2.jpeg ├── llava.png ├── longalpaca.png ├── makevllm.sh ├── male.wav ├── meta.png ├── mistralai.png ├── openai.png ├── openchat.png ├── pirate_by_coqui.wav ├── predict_aquila.py ├── test_scrape1.py ├── vicuna.jpeg ├── wizard.jpg └── yi.svg ├── notebooks └── h2oGPT_api_examples.ipynb ├── openai_server ├── __init__.py ├── agent_prompting.py ├── agent_tools │ ├── aider_code_generation.py │ ├── ask_question_about_documents.py │ ├── ask_question_about_image.py │ ├── audio_transcription.py │ ├── bing_search.py │ ├── common │ │ └── utils.py │ ├── convert_document_to_text.py │ ├── download_web_video.py │ ├── driverless_ai_data_science.py │ ├── google_search.py │ ├── image_generation.py │ ├── mermaid_renderer.py │ ├── news_query.py │ ├── query_to_web_image.py │ ├── scholar_papers_query.py │ └── wolfram_alpha_math_science_query.py ├── agent_utils.py ├── autogen_2agent_backend.py ├── autogen_agents.py ├── autogen_multi_agent_backend.py ├── autogen_streaming.py ├── autogen_utils.py ├── backend.py ├── backend_utils.py ├── chat_history_render.py ├── cogvlm2_server │ ├── cogvlm2.py │ └── requirements.txt ├── function_server.py ├── log.py ├── openai_client.py ├── server.py ├── server_start.py ├── test_autogen_utils.py ├── test_backend_utils.py ├── test_conversion.py ├── test_openai_server.py └── test_prompt_caching.py ├── papers └── technical-report │ ├── compile.sh │ ├── conf.sty │ ├── h2oGPT-TR.pdf │ ├── h2oGPT-TR.tex │ └── images │ ├── chatbot.png │ ├── h2oGPT-light.pdf │ ├── h2oGPT.pdf │ ├── langchain.png │ ├── llm-studio-logo.pdf │ ├── llmstudio1.png │ └── llmstudio2.png ├── reqs_optional ├── reqs_constraints.txt ├── requirements_optional_agents.txt ├── requirements_optional_audio.txt ├── requirements_optional_cpu_only.txt ├── requirements_optional_doctr.txt ├── requirements_optional_gpu_only.txt ├── requirements_optional_image.txt ├── requirements_optional_langchain.gpllike.txt ├── requirements_optional_langchain.metrics.txt ├── requirements_optional_langchain.txt ├── requirements_optional_langchain.urls.txt ├── requirements_optional_llamacpp_gpt4all.txt ├── requirements_optional_training.txt └── requirements_optional_wikiprocessing.txt ├── requirements.txt ├── setup.py ├── spaces ├── chatbot │ └── repo_to_spaces.sh └── demo │ ├── app.py │ ├── app_client_test.py │ ├── h2oai_pipeline.py │ └── requirements.txt ├── spkemb ├── cmu_us_awb_arctic-wav-arctic_a0002.npy ├── cmu_us_bdl_arctic-wav-arctic_a0009.npy ├── cmu_us_clb_arctic-wav-arctic_a0144.npy ├── cmu_us_ksp_arctic-wav-arctic_b0087.npy ├── cmu_us_rms_arctic-wav-arctic_b0353.npy └── cmu_us_slt_arctic-wav-arctic_a0508.npy ├── src ├── __init__.py ├── audio_langchain.py ├── basic_nltk.py ├── cli.py ├── client_test.py ├── create_data.py ├── db_utils.py ├── enums.py ├── eval.py ├── evaluate_params.py ├── export_hf_checkpoint.py ├── function_client.py ├── function_server.py ├── gen.py ├── gpt4all_llm.py ├── gpt_langchain.py ├── gradio_funcs.py ├── gradio_runner.py ├── gradio_themes.py ├── h2o-logo.ico ├── h2o_serpapi.py ├── h2oai_pipeline.py ├── image_captions.py ├── image_doctr.py ├── image_pix2struct.py ├── image_utils.py ├── langchain_mistralai │ └── chat_models.py ├── langchain_openai_local.py ├── llama_flash_attn_monkey_patch.py ├── llm_exllama.py ├── loaders.py ├── make_db.py ├── model_utils.py ├── output_parser.py ├── pandas_agent_langchain.py ├── pre-commit ├── prepare_offline.py ├── prompter.py ├── prompter_utils.py ├── read_wiki_full.py ├── sagemaker.py ├── stopping.py ├── stt.py ├── tts.py ├── tts_coqui.py ├── tts_sentence_parsing.py ├── tts_utils.py ├── utils.py ├── utils_langchain.py ├── utils_procs.py ├── utils_sys.py ├── version.py └── vision │ ├── __init__.py │ ├── extract_movie.py │ ├── flux.py │ ├── playv2.py │ ├── sdxl_turbo.py │ ├── stable_diffusion_xl.py │ └── utils_vision.py ├── tests ├── 1paul_graham.txt ├── 2403.09629.pdf ├── CityofTshwaneWater.pdf ├── __init__.py ├── conftest.py ├── dental.png ├── driverslicense.jpeg.zip ├── dummy.pdf ├── example.xlsx ├── fastfood.jpg ├── gridnumbers.gif ├── image-based-pdf-sample.pdf ├── jon.png ├── memory_hog_script.py ├── next.txt ├── ocr1.png ├── ocr2.png ├── ocr3.png ├── pdf-sample.pdf ├── porsche.mp3.zip ├── receipt.jpg ├── revenue.png ├── rotate-ex2.png ├── sample.eml ├── sample.pdf ├── screenshot.png ├── table_as_image.docx ├── test4gpus.sh ├── test_animated_gif.gif ├── test_async_iterator_pipe.py ├── test_async_timeout_iterator.py ├── test_cli.py ├── test_client_calls.py ├── test_client_readme.py ├── test_eval.py ├── test_eval_models.py ├── test_fine_tune_export_tgi.sh ├── test_imports.py ├── test_inference_servers.py ├── test_iterator_pipe.py ├── test_langchain_simple.py ├── test_langchain_units.py ├── test_long_context.py ├── test_manual_test.py ├── test_metrics.py ├── test_openai_server.py ├── test_perf_benchmarks.py ├── test_pipeline.py ├── test_prompter.py ├── test_requirements.py ├── test_sentence_parsing.py ├── test_speech.wav ├── test_timeout_iterator.py ├── test_tokenizer.py ├── test_tts.py ├── test_ui.py ├── test_utils.py ├── test_vision.py ├── utils.py └── videotest.mp4 ├── version.txt ├── win_run_app.py └── windows_installer.cfg /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | .npm 3 | .dockerignore 4 | .pytest_cache 5 | .cache 6 | .local 7 | .github 8 | .nv 9 | .benchmarks 10 | .bash_history 11 | .gitignore 12 | h2ogpt.egg-info 13 | venv 14 | build 15 | dist 16 | prebuilt_deps 17 | Dockerfile -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | 2 | # H2OGPT 3 | 4 | H2OGPT_PORT=7860 5 | H2OGPT_BASE_MODEL=h2oai/h2ogpt-4096-llama2-7b-chat 6 | H2OGPT_ARGS="/workspace/generate.py --base_model=${H2OGPT_BASE_MODEL} --use_safetensors=True --prompt_type=llama2 --save_dir=/workspace/save/ --use_gpu_id=False --score_model=None --max_max_new_tokens=2048 --max_new_tokens=1024" 7 | 8 | # VLLM 9 | 10 | VLLM_TOKENIZER=hf-internal-testing/llama-tokenizer 11 | H2OGPT_VLLM_ARGS="--model=${H2OGPT_BASE_MODEL} --tokenizer=${VLLM_TOKENIZER} --tensor-parallel-size=2 --seed=1234 --trust-remote-code --download-dir=/workspace/.cache/huggingface/hub" 12 | 13 | # CPU models 14 | 15 | MODEL_PATH_LLAMA=https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q6_K.gguf 16 | H2OGPT_CPU_ARGS="/workspace/generate.py --base_model=llama --model_path_llama=${MODEL_PATH_LLAMA} --max_seq_len=4096" 17 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/.gitattributes -------------------------------------------------------------------------------- /.github/workflows/python-package-publish.yml: -------------------------------------------------------------------------------- 1 | name: Build & Publish h2oGPT Python wheel to PYPI 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | pypi-index: 7 | type: choice 8 | description: PyPI index that needed to be published 9 | required: true 10 | default: Test-PyPI 11 | options: 12 | - PyPI 13 | - Test-PyPI 14 | version: 15 | description: | 16 | Override the current version for the python package for dev purposes when uploading to Test-PyPI 17 | type: string 18 | 19 | jobs: 20 | build_and_upload: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v3.5.3 24 | 25 | - uses: actions/setup-python@v4 26 | with: 27 | python-version: '3.10' 28 | 29 | - name: Install Dependencies 30 | run: | 31 | python3.10 -m pip install --upgrade pip 32 | python3.10 -m pip install setuptools wheel twine --upgrade 33 | 34 | - name: Modify Version 35 | if: ${{ inputs.version != ''}} 36 | run: | 37 | echo ${{ inputs.version}} > version.txt 38 | echo "h2ogpt-wheel-version = $(cat version.txt)" 39 | 40 | - name: Build Wheel 41 | run: make clean dist 42 | 43 | - name: Publish to Test-PyPI 44 | if: ${{ inputs.pypi-index == 'Test-PyPI' }} 45 | run: | 46 | twine upload -r testpypi dist/* 47 | env: 48 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 49 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 50 | 51 | - name: Publish to PyPI 52 | if: ${{ inputs.pypi-index == 'PyPI' }} 53 | run: | 54 | twine upload dist/* 55 | env: 56 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 57 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | out/ 2 | 7B/ 3 | 13B/ 4 | __pycache__/ 5 | checkpoint** 6 | minimal-llama** 7 | upload.py 8 | lora-** 9 | *ckpt 10 | wandb 11 | evaluate.py 12 | test_data.json 13 | todo.txt 14 | .neptune/ 15 | *.bin 16 | db_dir_UserData 17 | temp_path_do_doc1 18 | offline_folder 19 | flagged_data_points 20 | .pytest_cache 21 | user_path 22 | user_path_test 23 | build 24 | h2ogpt.egg-info 25 | dist 26 | .idea 27 | .cache 28 | .local 29 | .bash_history 30 | .benchmarks 31 | Dockerfile-runner.dockerfile 32 | build_info.txt 33 | prebuilt_deps 34 | Dockerfile_deps 35 | 36 | # IDEs 37 | .idea/ 38 | 39 | # virtual envs 40 | venv 41 | 42 | # Mac one click installer 43 | Tesseract-OCR/ 44 | poppler/ 45 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # devel needed for bitsandbytes requirement of libcudart.so, otherwise runtime sufficient 2 | FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 3 | 4 | ENV DEBIAN_FRONTEND=noninteractive 5 | 6 | ENV PATH="/h2ogpt_conda/envs/h2ogpt/bin:${PATH}" 7 | ARG PATH="/h2ogpt_conda/envs/h2ogpt/bin:${PATH}" 8 | 9 | ENV HOME=/workspace 10 | ENV CUDA_HOME=/usr/local/cuda-12.1 11 | ENV VLLM_CACHE=/workspace/.vllm_cache 12 | ENV TIKTOKEN_CACHE_DIR=/workspace/tiktoken_cache 13 | ENV HF_HUB_ENABLE_HF_TRANSFER=1 14 | 15 | WORKDIR /workspace 16 | 17 | COPY . /workspace/ 18 | 19 | COPY build_info.txt /workspace/ 20 | 21 | RUN cd /workspace && ./docker_build_script_ubuntu.sh 22 | 23 | RUN chmod -R a+rwx /workspace 24 | 25 | ARG user=h2ogpt 26 | ARG group=h2ogpt 27 | ARG uid=1000 28 | ARG gid=1000 29 | 30 | RUN groupadd -g ${gid} ${group} && useradd -u ${uid} -g ${group} -s /bin/bash ${user} 31 | # already exists in base image 32 | # RUN groupadd -g ${gid} docker && useradd -u ${uid} -g ${group} -m ${user} 33 | 34 | # Add the user to the docker group 35 | RUN usermod -aG docker ${user} 36 | 37 | # Switch to the new user 38 | USER ${user} 39 | 40 | EXPOSE 8888 41 | EXPOSE 7860 42 | EXPOSE 5000 43 | EXPOSE 5002 44 | EXPOSE 5004 45 | 46 | ENTRYPOINT ["python3.10"] 47 | -------------------------------------------------------------------------------- /cloud/packer/README.md: -------------------------------------------------------------------------------- 1 | # h2oGPT Packer Templates 2 | 3 | These scripts help create images in public clouds that can then submitted to Azure/GCP Marketplace for commercial use. 4 | 5 | ### Packer Scripts 6 | - Azure - `h2ogpt-azure.json` 7 | - GCP - `h2ogpt-gcp.json` 8 | 9 | ### Provisioning Scripts 10 | - `setup_environment.sh` 11 | - Responsible for setting up CUDA, GCC, Nginx, Python 12 | - `install_h2ogpt.sh` 13 | - Responsible for setting up h2oGPT with its dependencies 14 | - `h2oai-h2ogpt-4096-llama2-13b-chat.sh` 15 | - Responsible for setting up default model h2oai-h2ogpt-4096-llama2-13b-chat with vLLM in port 80 via Nginx 16 | - vLLM, h2oGPT and Nginx are executed through services 17 | - Model is downloaded at the runtime 18 | 19 | __Jenkins Pipeline__: http://jenkins.h2o.local:8080/job/build-h2ogpt-cloud-images/ 20 | 21 | ### Notes: 22 | - Since model is downloaded at the runtime after VM is provisioned it takes around 5 - 10 min start h2oGPT correctly 23 | -------------------------------------------------------------------------------- /cloud/packer/h2oai-h2ogpt-4096-llama2-13b-chat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | sudo systemctl daemon-reload 4 | sudo systemctl enable h2ogpt_nginx.service 5 | sudo systemctl enable vllm.service 6 | sudo systemctl enable h2ogpt.service 7 | 8 | cd "$HOME" 9 | # sudo rm -rf "$HOME"/.cache/huggingface/hub/ 10 | sudo DEBIAN_FRONTEND=noninteractive apt-get -y autoremove 11 | sudo DEBIAN_FRONTEND=noninteractive apt-get -y clean 12 | -------------------------------------------------------------------------------- /cloud/packer/h2ogpt-gcp.json: -------------------------------------------------------------------------------- 1 | { 2 | "variables": { 3 | "project_id": "eng-llm", 4 | "account_file": "", 5 | "h2ogpt_version": "010", 6 | "branch_tag": "main", 7 | "base_model": "h2oai-h2ogpt-4096-llama2-13b-chat" 8 | }, 9 | "builders": [ 10 | { 11 | "type": "googlecompute", 12 | "project_id": "{{user `project_id`}}", 13 | "account_file": "{{user `account_file`}}", 14 | "machine_type": "n1-standard-8", 15 | "on_host_maintenance": "TERMINATE", 16 | "accelerator_type": "projects/{{user `project_id`}}/zones/us-west1-b/acceleratorTypes/nvidia-tesla-t4", 17 | "accelerator_count": "4", 18 | "source_image_family": "ubuntu-2004-lts", 19 | "zone": "us-west1-b", 20 | "image_description": "h2ogpt using Packer", 21 | "image_name": "h2ogpt-{{user `h2ogpt_version`}}", 22 | "disk_size": 512, 23 | "disk_type": "pd-ssd", 24 | "ssh_username": "ubuntu", 25 | "tags": ["h2ogpt"] 26 | } 27 | ], 28 | "post-processors": [ 29 | { 30 | "type": "manifest", 31 | "output": "gcp-image-info.json", 32 | "strip_path": true, 33 | "custom_data": { 34 | "base_image": "GCP Ubuntu 20.04", 35 | "h2ogpt_version": "{{user `h2ogpt_version`}}" 36 | } 37 | } 38 | ], 39 | "provisioners": [ 40 | { 41 | "type": "shell", 42 | "script": "setup_environment.sh", 43 | "pause_before": "10s", 44 | "pause_after": "10s" 45 | }, 46 | { 47 | "type": "shell", 48 | "inline": ["sudo reboot now"], 49 | "pause_after": "10s", 50 | "expect_disconnect": true 51 | }, 52 | { 53 | "type": "shell", 54 | "environment_vars": ["BRANCH_TAG={{user `branch_tag`}}"], 55 | "script": "install_h2ogpt.sh", 56 | "pause_after": "10s" 57 | }, 58 | { 59 | "type": "shell", 60 | "inline": [ 61 | "sudo chown -R ubuntu:ubuntu /etc/nginx/conf.d", 62 | "sudo chown -R ubuntu:ubuntu /etc/systemd/system/" 63 | ], 64 | "pause_before": "10s" 65 | }, 66 | { 67 | "type": "file", 68 | "source": "./startup-scripts/run_nginx.sh", 69 | "destination": "/workspace/run_nginx.sh" 70 | }, 71 | { 72 | "type": "file", 73 | "source": "./startup-scripts/run_vllm.sh", 74 | "destination": "/workspace/run_vllm.sh" 75 | }, 76 | { 77 | "type": "file", 78 | "source": "./startup-scripts/run_h2ogpt.sh", 79 | "destination": "/workspace/run_h2ogpt.sh" 80 | }, 81 | { 82 | "type": "file", 83 | "source": "./startup-scripts/h2ogpt_nginx.service", 84 | "destination": "/etc/systemd/system/h2ogpt_nginx.service" 85 | }, 86 | { 87 | "type": "file", 88 | "source": "./startup-scripts/vllm.service", 89 | "destination": "/etc/systemd/system/vllm.service" 90 | }, 91 | { 92 | "type": "file", 93 | "source": "./startup-scripts/h2ogpt.service", 94 | "destination": "/etc/systemd/system/h2ogpt.service" 95 | }, 96 | { 97 | "type": "file", 98 | "source": "./startup-scripts/temp.conf", 99 | "destination": "/workspace/temp.conf" 100 | }, 101 | { 102 | "type": "shell", 103 | "script": "{{user `base_model`}}.sh", 104 | "pause_after": "10s" 105 | } 106 | ] 107 | } 108 | -------------------------------------------------------------------------------- /cloud/packer/install_h2ogpt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | export PATH=$PATH:/home/ubuntu/.local/bin 4 | sudo mkdir -p /workspace && cd /workspace 5 | sudo chmod a+rwx . 6 | 7 | git config --global --add safe.directory /workspace 8 | git config --global advice.detachedHead false 9 | git clone https://github.com/h2oai/h2ogpt.git . 10 | 11 | if [ -z "$BRANCH_TAG" ]; then 12 | echo "BRANCH_TAG environment variable is not set." 13 | exit 1 14 | fi 15 | 16 | git checkout $BRANCH_TAG 17 | 18 | ls -la 19 | sudo ./docker_build_script_ubuntu.sh 20 | -------------------------------------------------------------------------------- /cloud/packer/setup_environment.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | sudo DEBIAN_FRONTEND=noninteractive apt-get -y update 4 | sudo DEBIAN_FRONTEND=noninteractive apt-get -y --no-install-recommends install \ 5 | git \ 6 | software-properties-common \ 7 | pandoc \ 8 | curl \ 9 | apt-utils \ 10 | make \ 11 | build-essential \ 12 | wget \ 13 | gnupg2 \ 14 | ca-certificates \ 15 | lsb-release \ 16 | ubuntu-keyring 17 | 18 | curl https://nginx.org/keys/nginx_signing.key | gpg --dearmor | sudo tee /usr/share/keyrings/nginx-archive-keyring.gpg >/dev/null 19 | gpg --dry-run --quiet --no-keyring --import --import-options import-show /usr/share/keyrings/nginx-archive-keyring.gpg 20 | echo "deb [signed-by=/usr/share/keyrings/nginx-archive-keyring.gpg] http://nginx.org/packages/ubuntu `lsb_release -cs` nginx" sudo tee /etc/apt/sources.list.d/nginx.list 21 | echo -e "Package: *\nPin: origin nginx.org\nPin: release o=nginx\nPin-Priority: 900\n" sudo tee /etc/apt/preferences.d/99nginx 22 | 23 | sudo DEBIAN_FRONTEND=noninteractive apt -y update 24 | sudo DEBIAN_FRONTEND=noninteractive apt -y install nginx 25 | 26 | MAX_GCC_VERSION=11 27 | sudo DEBIAN_FRONTEND=noninteractive add-apt-repository -y ppa:ubuntu-toolchain-r/test 28 | sudo DEBIAN_FRONTEND=noninteractive apt-get -y install gcc-$MAX_GCC_VERSION g++-$MAX_GCC_VERSION 29 | 30 | sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-$MAX_GCC_VERSION 100 31 | sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-$MAX_GCC_VERSION 100 32 | sudo update-alternatives --set gcc /usr/bin/gcc-$MAX_GCC_VERSION 33 | sudo update-alternatives --set g++ /usr/bin/g++-$MAX_GCC_VERSION 34 | 35 | wget --quiet https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin 36 | sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 37 | wget --quiet https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda-repo-ubuntu2004-11-8-local_11.8.0-520.61.05-1_amd64.deb 38 | sudo dpkg -i cuda-repo-ubuntu2004-11-8-local_11.8.0-520.61.05-1_amd64.deb 39 | sudo cp /var/cuda-repo-ubuntu2004-11-8-local/cuda-*-keyring.gpg /usr/share/keyrings/ 40 | sudo DEBIAN_FRONTEND=noninteractive apt-get -y update 41 | sudo DEBIAN_FRONTEND=noninteractive apt-get -y install cuda 42 | sudo rm -rf "*.deb" 43 | 44 | sudo echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.8/lib64/" >> ~/.bashrc 45 | sudo echo "export CUDA_HOME=/usr/local/cuda-11.8" >> ~/.bashrc 46 | sudo echo "export PATH=$PATH:/h2ogpt_conda/bin:/usr/local/cuda-11.8/bin/" >> ~/.bashrc 47 | -------------------------------------------------------------------------------- /cloud/packer/startup-scripts/h2ogpt.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=h2oGPT Server 3 | After=network.target 4 | 5 | [Service] 6 | Type=simple 7 | User=ubuntu 8 | WorkingDirectory=/workspace 9 | ExecStart=/usr/bin/bash /workspace/run_h2ogpt.sh 10 | 11 | [Install] 12 | WantedBy=multi-user.target 13 | -------------------------------------------------------------------------------- /cloud/packer/startup-scripts/h2ogpt_nginx.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=h2oGPT Nginx Server 3 | After=network.target 4 | 5 | [Service] 6 | Type=simple 7 | User=ubuntu 8 | WorkingDirectory=/workspace 9 | ExecStart=/usr/bin/bash /workspace/run_nginx.sh 10 | 11 | [Install] 12 | WantedBy=multi-user.target 13 | -------------------------------------------------------------------------------- /cloud/packer/startup-scripts/run_h2ogpt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | while true; do 4 | http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:5000/v1/completions \ 5 | -H "Content-Type: application/json" \ 6 | -d '{ 7 | "model": "h2oai/h2ogpt-4096-llama2-13b-chat", 8 | "prompt": "San Francisco is a", 9 | "max_tokens": 7, 10 | "temperature": 0 11 | }') 12 | 13 | if [ "$http_code" -eq 200 ]; then 14 | echo "Received HTTP 200 status code. Starting h2ogpt service" 15 | CUDA_VISIBLE_DEVICES=$(seq -s, $(($(nvidia-smi -L | wc -l) / 2)) $(($(nvidia-smi -L | wc -l) - 1))) /h2ogpt_conda/bin/python3.10 \ 16 | /workspace/generate.py \ 17 | --inference_server="vllm:0.0.0.0:5000" \ 18 | --base_model=h2oai/h2ogpt-4096-llama2-13b-chat \ 19 | --langchain_mode=UserData 20 | break 21 | else 22 | echo "Received HTTP $http_code status code. Retrying in 5 seconds..." 23 | sleep 5 24 | fi 25 | done 26 | 27 | -------------------------------------------------------------------------------- /cloud/packer/startup-scripts/run_nginx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | while true; do 4 | http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:5000/v1/completions \ 5 | -H "Content-Type: application/json" \ 6 | -d '{ 7 | "model": "h2oai/h2ogpt-4096-llama2-13b-chat", 8 | "prompt": "San Francisco is a", 9 | "max_tokens": 7, 10 | "temperature": 0 11 | }') 12 | 13 | if [ "$http_code" -eq 200 ]; then 14 | echo "Received HTTP 200 status code. Restarting Nginx for h2oGPT" 15 | ip=$(dig +short myip.opendns.com @resolver1.opendns.com) 16 | sed "s/<|_SUBST_PUBLIC_IP|>;/$ip;/g" /workspace/temp.conf > /etc/nginx/conf.d/h2ogpt.conf 17 | sudo systemctl restart nginx.service 18 | break 19 | else 20 | echo "Received HTTP $http_code status code. Retrying in 5 seconds..." 21 | sleep 5 22 | fi 23 | done 24 | -------------------------------------------------------------------------------- /cloud/packer/startup-scripts/run_vllm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | tps=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader | wc -l | awk '{if ($1 > 1) print int($1/2); else print 1}') 4 | NCCL_IGNORE_DISABLED_P2P=1 CUDA_VISIBLE_DEVICES=$(seq -s, 0 $(($(nvidia-smi -L | wc -l) > 1 ? $(nvidia-smi -L | wc -l) / 2 - 1 : 0))) \ 5 | /h2ogpt_conda/vllm_env/bin/python3.10 -m vllm.entrypoints.openai.api_server \ 6 | --port=5000 \ 7 | --host=0.0.0.0 \ 8 | --model h2oai/h2ogpt-4096-llama2-13b-chat \ 9 | --tokenizer=hf-internal-testing/llama-tokenizer \ 10 | --tensor-parallel-size=$tps --seed 1234 11 | -------------------------------------------------------------------------------- /cloud/packer/startup-scripts/temp.conf: -------------------------------------------------------------------------------- 1 | server { 2 | listen 80; 3 | listen [::]:80; 4 | server_name <|_SUBST_PUBLIC_IP|>; # Change this to your domain name 5 | 6 | location / { # Change this if you'd like to server your Gradio app on a different path 7 | proxy_pass http://0.0.0.0:7860/; # Change this if your Gradio app will be running on a different port 8 | proxy_redirect off; 9 | proxy_http_version 1.1; 10 | proxy_set_header Upgrade $http_upgrade; 11 | proxy_set_header Connection "upgrade"; 12 | proxy_set_header Host $host; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /cloud/packer/startup-scripts/vllm.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=vLLM Server 3 | After=network.target 4 | 5 | [Service] 6 | Type=simple 7 | User=ubuntu 8 | WorkingDirectory=/workspace 9 | ExecStart=/usr/bin/bash /workspace/run_vllm.sh 10 | 11 | [Install] 12 | WantedBy=multi-user.target 13 | -------------------------------------------------------------------------------- /data/NGSL_1.2_stats.csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/data/NGSL_1.2_stats.csv.zip -------------------------------------------------------------------------------- /data/README-template.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: apache-2.0 3 | language: 4 | - en 5 | thumbnail: https://h2o.ai/etc.clientlibs/h2o/clientlibs/clientlib-site/resources/images/favicon.ico 6 | tags: 7 | - gpt 8 | - llm 9 | - large language model 10 | - open-source 11 | --- 12 | # h2oGPT Data Card 13 | ## Summary 14 | 15 | H2O.ai's `<>` is an open-source instruct-type dataset for fine-tuning of large language models, licensed for commercial use. 16 | 17 | - Number of rows: `<>` 18 | - Number of columns: `<>` 19 | - Column names: `<>` 20 | 21 | ## Source 22 | 23 | <> 24 | -------------------------------------------------------------------------------- /data/Sample-Invoice-printable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/data/Sample-Invoice-printable.png -------------------------------------------------------------------------------- /data/censor_words.txt: -------------------------------------------------------------------------------- 1 | fuck-buddy 2 | fuck-buddys 3 | clusterfuck 4 | fuckup 5 | fuckups 6 | dumbfuck 7 | dumbfucks 8 | mindfuck 9 | *fucking 10 | fuckin' 11 | -------------------------------------------------------------------------------- /data/count_1w.txt.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/data/count_1w.txt.zip -------------------------------------------------------------------------------- /data/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/data/demo.png -------------------------------------------------------------------------------- /data/example.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/data/example.xlsx -------------------------------------------------------------------------------- /data/pexels-evg-kowalievska-1170986_small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/data/pexels-evg-kowalievska-1170986_small.jpg -------------------------------------------------------------------------------- /dev_installers/mac/README.md: -------------------------------------------------------------------------------- 1 | # One Click Installers for MacOS 2 | 3 | This document provide the details to build one click installers for MacOS. To manually build h2ogpt on MacOS follow steps at [README_MACOS.md](../../docs/README_MACOS.md). 4 | 5 | **Note**: Experimental and still under development. 6 | 7 | ## Prerequisite 8 | 9 | - Need conda installed inorder to run the build script. 10 | - We use `PyInstaller` to build one click installer, it doesn't support cross platform builds. So the installers can 11 | be only built from Mac Machines. 12 | - Install tesseract & poppler on your Mac Machine 13 | 14 | ## Build 15 | 16 | ### Debug Mode (for one click installer developers) 17 | 18 | - Clone `h2ogpt` from https://github.com/h2oai/h2ogpt.git 19 | - Create conda environment and installer all required dependencies, consult [build_mac_installer.sh](build_mac_installer.sh) for more details. 20 | - Run below commands to build the spec file for installer, replace the `--name` appropriately depending on whether building for CPU only or with MPS (GPU) support 21 | ```shell 22 | cd h2ogpt 23 | pyi-makespec mac_run_app.py -F --name=h2ogpt-osx-m1-cpu \ 24 | --hidden-import=h2ogpt \ 25 | --collect-all=h2ogpt \ 26 | --recursive-copy-metadata=transformers \ 27 | --collect-data=langchain \ 28 | --collect-data=gradio_client \ 29 | --collect-all=gradio \ 30 | --collect-all=sentencepiece \ 31 | --collect-all=gradio_pdf \ 32 | --collect-all=llama_cpp \ 33 | --collect-all=tiktoken_ext \ 34 | --add-data=../../Tesseract-OCR:Tesseract-OCR \ 35 | --add-data=../../poppler:poppler 36 | ``` 37 | - Edit the `h2ogpt-osx-m1-cpu.spec` and/or `h2ogpt-osx-m1-gpu.spec` and add below code block to `Analysis()`, to explicitly tell PyInstaller to collect all `.py` modules from listed dependencies. 38 | ``` 39 | module_collection_mode={ 40 | 'gradio' : 'py', 41 | 'gradio_pdf' : 'py', 42 | }, 43 | ``` 44 | - Run `pyinstaller h2ogpt-osx-m1-cpu.spec` to build the installer. 45 | ### Deployment Mode 46 | 47 | - Clone `h2ogpt` from https://github.com/h2oai/h2ogpt.git 48 | - For CPU only installer, run below commands to build the installer 49 | ```shell 50 | cd h2ogpt 51 | . ./dev_installers/mac/build_mac_installer.sh 52 | ``` 53 | - For MPS (GPU) supported installer, run below commands to build the installer 54 | ```shell 55 | cd h2ogpt 56 | BUILD_MPS=1 . ./dev_installers/mac/build_mac_installer.sh 57 | ``` 58 | 59 | ## Run 60 | 61 | From MacOS finder, go to `h2ogpt/dist/` and double-click on the installer (i.e `h2ogpt-osx-m1-cpu`). -------------------------------------------------------------------------------- /dev_installers/mac/build_mac_installer.sh: -------------------------------------------------------------------------------- 1 | # This script should be run from project root 2 | 3 | # Create conda environment to build installer 4 | if ! command -v conda &> /dev/null 5 | then 6 | echo "conda could not be found, need conda to continue!" 7 | exit 1 8 | fi 9 | 10 | # Remove old Tesseract and poppler deps 11 | rm -rf ./Tesseract-OCR poppler 12 | 13 | conda env remove -n h2ogpt-mac 14 | conda create -n h2ogpt-mac python=3.10 rust -y 15 | conda activate h2ogpt-mac 16 | 17 | pip install --upgrade pip 18 | python -m pip install --upgrade setuptools 19 | 20 | # Install required dependencies into conda environment 21 | pip install -r requirements.txt --extra-index https://download.pytorch.org/whl/cpu -c reqs_optional/reqs_constraints.txt 22 | # Required for Doc Q/A: LangChain: 23 | pip install -r reqs_optional/requirements_optional_langchain.txt -c reqs_optional/reqs_constraints.txt 24 | # Optional: PyMuPDF/ArXiv: 25 | pip install -r reqs_optional/requirements_optional_langchain.gpllike.txt -c reqs_optional/reqs_constraints.txt 26 | # Optional: Selenium/PlayWright: 27 | pip install -r reqs_optional/requirements_optional_langchain.urls.txt -c reqs_optional/reqs_constraints.txt 28 | # Optional: DocTR OCR: 29 | conda install weasyprint pygobject -c conda-forge -y 30 | pip install -r reqs_optional/requirements_optional_doctr.txt -c reqs_optional/reqs_constraints.txt 31 | # Optional: for supporting unstructured package 32 | python -m nltk.downloader all 33 | 34 | # Required for CPU: LLaMa/GPT4All: 35 | # For MPS support 36 | if [ -z "$BUILD_MPS" ] 37 | then 38 | echo "BUILD_MPS is not set, skipping MPS specific configs..." 39 | pip uninstall llama-cpp-python -y 40 | CMAKE_ARGS="-DLLAMA_METAL=off" FORCE_CMAKE=1 pip install -r reqs_optional/requirements_optional_llamacpp_gpt4all.txt -c reqs_optional/reqs_constraints.txt --no-cache-dir 41 | else 42 | if [ "$BUILD_MPS" = "1" ] 43 | then 44 | echo "BUILD_MPS is set to 1, running MPS specific configs..." 45 | pip uninstall llama-cpp-python -y 46 | CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install -r reqs_optional/requirements_optional_llamacpp_gpt4all.txt -c reqs_optional/reqs_constraints.txt --no-cache-dir 47 | fi 48 | fi 49 | pip install librosa -c reqs_optional/reqs_constraints.txt 50 | 51 | # Install PyInstaller 52 | pip install PyInstaller 53 | 54 | # Install and copy tesseract & poppler 55 | #brew install poppler 56 | #brew install tesseract 57 | cp -R /opt/homebrew/Cellar/poppler/24.02.0/ ./poppler 58 | cp -R /opt/homebrew/Cellar/tesseract/5.3.4_1/ ./Tesseract-OCR 59 | 60 | # Build and install h2ogpt 61 | make clean dist 62 | pip install ./dist/h2ogpt*.whl 63 | 64 | # Build Mac Installer 65 | # below command is used to build current .spec file from project root, replace it whenever use new configs 66 | #pyi-makespec mac_run_app.py -F --name=h2ogpt-osx-m1-cpu \ 67 | # --hidden-import=h2ogpt \ 68 | # --collect-all=h2ogpt \ 69 | # --recursive-copy-metadata=transformers \ 70 | # --collect-data=langchain \ 71 | # --collect-data=gradio_client \ 72 | # --collect-all=gradio \ 73 | # --collect-all=sentencepiece \ 74 | # --collect-all=gradio_pdf \ 75 | # --collect-all=llama_cpp \ 76 | # --collect-all=tiktoken_ext \ 77 | # --add-data=../../Tesseract-OCR:Tesseract-OCR \ 78 | # --add-data=../../poppler:poppler 79 | 80 | # add below argument to Analysis() call in h2ogpt-osx-m1-cpu.spec file 81 | #module_collection_mode={ 82 | # 'gradio' : 'py', 83 | # 'gradio_pdf' : 'py', 84 | #} 85 | if [ "$BUILD_MPS" = "1" ] 86 | then 87 | echo "BUILD_MPS is set to 1, building one click installer for MPS..." 88 | pyinstaller ./dev_installers/mac/h2ogpt-osx-m1-gpu.spec 89 | else 90 | echo "BUILD_MPS is set to 0 or not set, building one click installer for CPU..." 91 | pyinstaller ./dev_installers/mac/h2ogpt-osx-m1-cpu.spec 92 | fi 93 | -------------------------------------------------------------------------------- /dev_installers/mac/h2ogpt-osx-m1-cpu.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | from PyInstaller.utils.hooks import collect_data_files 3 | from PyInstaller.utils.hooks import collect_all 4 | from PyInstaller.utils.hooks import copy_metadata 5 | 6 | datas = [('../../Tesseract-OCR', 'Tesseract-OCR'), ('../../poppler', 'poppler')] 7 | binaries = [] 8 | hiddenimports = ['h2ogpt'] 9 | datas += collect_data_files('langchain') 10 | datas += collect_data_files('gradio_client') 11 | datas += copy_metadata('transformers', recursive=True) 12 | tmp_ret = collect_all('h2ogpt') 13 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 14 | tmp_ret = collect_all('gradio') 15 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 16 | tmp_ret = collect_all('sentencepiece') 17 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 18 | tmp_ret = collect_all('gradio_pdf') 19 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 20 | tmp_ret = collect_all('llama_cpp') 21 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 22 | tmp_ret = collect_all('tiktoken_ext') 23 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 24 | 25 | 26 | a = Analysis( 27 | ['mac_run_app.py'], 28 | pathex=[], 29 | binaries=binaries, 30 | datas=datas, 31 | hiddenimports=hiddenimports, 32 | hookspath=[], 33 | hooksconfig={}, 34 | runtime_hooks=[], 35 | excludes=[], 36 | noarchive=False, 37 | module_collection_mode={ 38 | 'gradio' : 'py', 39 | 'gradio_pdf' : 'py', 40 | }, 41 | ) 42 | pyz = PYZ(a.pure) 43 | 44 | exe = EXE( 45 | pyz, 46 | a.scripts, 47 | a.binaries, 48 | a.datas, 49 | [], 50 | name='h2ogpt-osx-m1-cpu', 51 | debug=False, 52 | bootloader_ignore_signals=False, 53 | strip=False, 54 | upx=True, 55 | upx_exclude=[], 56 | runtime_tmpdir=None, 57 | console=True, 58 | disable_windowed_traceback=False, 59 | argv_emulation=False, 60 | target_arch=None, 61 | codesign_identity=None, 62 | entitlements_file=None, 63 | ) 64 | -------------------------------------------------------------------------------- /dev_installers/mac/h2ogpt-osx-m1-gpu.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | from PyInstaller.utils.hooks import collect_data_files 3 | from PyInstaller.utils.hooks import collect_all 4 | from PyInstaller.utils.hooks import copy_metadata 5 | 6 | datas = [('../../Tesseract-OCR', 'Tesseract-OCR'), ('../../poppler', 'poppler')] 7 | binaries = [] 8 | hiddenimports = ['h2ogpt'] 9 | datas += collect_data_files('langchain') 10 | datas += collect_data_files('gradio_client') 11 | datas += copy_metadata('transformers', recursive=True) 12 | tmp_ret = collect_all('h2ogpt') 13 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 14 | tmp_ret = collect_all('gradio') 15 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 16 | tmp_ret = collect_all('sentencepiece') 17 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 18 | tmp_ret = collect_all('gradio_pdf') 19 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 20 | tmp_ret = collect_all('llama_cpp') 21 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 22 | tmp_ret = collect_all('tiktoken_ext') 23 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 24 | 25 | 26 | a = Analysis( 27 | ['mac_run_app.py'], 28 | pathex=[], 29 | binaries=binaries, 30 | datas=datas, 31 | hiddenimports=hiddenimports, 32 | hookspath=[], 33 | hooksconfig={}, 34 | runtime_hooks=[], 35 | excludes=[], 36 | noarchive=False, 37 | module_collection_mode={ 38 | 'gradio' : 'py', 39 | 'gradio_pdf' : 'py', 40 | }, 41 | ) 42 | pyz = PYZ(a.pure) 43 | 44 | exe = EXE( 45 | pyz, 46 | a.scripts, 47 | a.binaries, 48 | a.datas, 49 | [], 50 | name='h2ogpt-osx-m1-gpu', 51 | debug=False, 52 | bootloader_ignore_signals=False, 53 | strip=False, 54 | upx=True, 55 | upx_exclude=[], 56 | runtime_tmpdir=None, 57 | console=True, 58 | disable_windowed_traceback=False, 59 | argv_emulation=False, 60 | target_arch=None, 61 | codesign_identity=None, 62 | entitlements_file=None, 63 | ) 64 | -------------------------------------------------------------------------------- /dev_installers/mac/mac_run_app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import webbrowser 5 | 6 | print('__file__: %s' % __file__) 7 | path1 = os.path.dirname(os.path.abspath(__file__)) 8 | sys.path.append(path1) 9 | base_path = os.path.dirname(path1) 10 | sys.path.append(base_path) 11 | os.environ['PYTHONPATH'] = path1 12 | print('PYTHONPATH: ', os.getenv('PYTHONPATH'), end='\n', flush=True) 13 | print('Path_1: ', path1, end='\n', flush=True) 14 | 15 | os.environ['NLTK_DATA'] = os.path.join(path1, 'nltk_data') 16 | os.environ['PATH'] = os.environ['PATH'] + ':' + \ 17 | os.path.join(path1, 'poppler/bin/') + ':' + \ 18 | os.path.join(path1, 'poppler/lib/') + ':' + \ 19 | os.path.join(path1, 'Tesseract-OCR') 20 | 21 | print('NLTK_DATA: ', os.getenv('NLTK_DATA'), end='\n', flush=True) 22 | print('PATH: ', os.environ['PATH'], end='\n', flush=True) 23 | 24 | for sub in ['src', 'iterators', 'gradio_utils', 'metrics', 'models', '.']: 25 | path2 = os.path.join(path1, 'h2ogpt', sub) 26 | sys.path.append(path2) 27 | print('Path_3: ', path2, end='\n', flush=True) 28 | 29 | 30 | def main(): 31 | from generate import entrypoint_main as main_h2ogpt 32 | os.environ['h2ogpt_block_gradio_exit'] = 'False' 33 | os.environ['h2ogpt_score_model'] = '' 34 | main_h2ogpt() 35 | 36 | server_name = os.getenv('h2ogpt_server_name', os.getenv('H2OGPT_SERVER_NAME', 'localhost')) 37 | server_port = os.getenv('GRADIO_SERVER_PORT', str(7860)) 38 | 39 | url = "http://%s:%s" % (server_name, server_port) 40 | webbrowser.open(url) 41 | 42 | while True: 43 | time.sleep(10000) 44 | 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /docker-compose-cpu.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | h2ogpt: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | restart: always 9 | shm_size: '2gb' 10 | ports: 11 | - '7860:7860' 12 | volumes: 13 | - cache:/workspace/.cache 14 | - save:/workspace/save 15 | command: ${H2OGPT_CPU_ARGS} 16 | 17 | volumes: 18 | cache: 19 | save: 20 | -------------------------------------------------------------------------------- /docker-compose-vllm.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | h2ogpt: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | restart: always 9 | shm_size: '2gb' 10 | depends_on: 11 | vllm: 12 | condition: service_healthy 13 | ports: 14 | - '${H2OGPT_PORT}:7860' 15 | volumes: 16 | - cache:/workspace/.cache 17 | - save:/workspace/save 18 | networks: 19 | - h2ogpt 20 | command: 21 | - /workspace/generate.py 22 | - --inference_server="vllm:vllm:5000" 23 | - --base_model=${H2OGPT_BASE_MODEL} 24 | - --langchain_mode=UserData 25 | deploy: 26 | resources: 27 | reservations: 28 | devices: 29 | - driver: nvidia 30 | device_ids: ['2', '3'] 31 | capabilities: [gpu] 32 | 33 | vllm: 34 | image: vllm/vllm-openai:latest 35 | restart: always 36 | shm_size: '64gb' 37 | expose: 38 | - 5000 39 | volumes: 40 | - cache:/workspace/.cache 41 | networks: 42 | - h2ogpt 43 | entrypoint: python3 44 | command: -m vllm.entrypoints.openai.api_server --port=5000 --host=0.0.0.0 ${H2OGPT_VLLM_ARGS} 45 | environment: 46 | - NCCL_IGNORE_DISABLED_P2P=1 47 | healthcheck: 48 | test: [ "CMD", "curl", "-f", "http://0.0.0.0:5000/v1/models" ] 49 | interval: 30s 50 | timeout: 5s 51 | retries: 20 52 | deploy: 53 | resources: 54 | reservations: 55 | devices: 56 | - driver: nvidia 57 | device_ids: ['0', '1'] 58 | capabilities: [gpu] 59 | 60 | volumes: 61 | cache: 62 | save: 63 | networks: 64 | h2ogpt: 65 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | h2ogpt: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | restart: always 9 | shm_size: '2gb' 10 | ports: 11 | - '${H2OGPT_PORT}:7860' 12 | volumes: 13 | - cache:/workspace/.cache 14 | - save:/workspace/save 15 | command: ${H2OGPT_ARGS} 16 | deploy: 17 | resources: 18 | reservations: 19 | devices: 20 | - driver: nvidia 21 | count: all 22 | capabilities: [gpu] 23 | 24 | volumes: 25 | cache: 26 | save: 27 | -------------------------------------------------------------------------------- /docs/Dockerfile.delta2: -------------------------------------------------------------------------------- 1 | FROM gcr.io/vorvan/h2oai/h2oai-h2ogpt-runtime:0.2.1-1290 2 | USER root 3 | # 4 | COPY src/enums.py /workspace/src/enums.py 5 | COPY src/gen.py /workspace/src/gen.py 6 | COPY src/gpt_langchain.py /workspace/src/gpt_langchain.py 7 | COPY src/model_utils.py /workspace/src/model_utils.py 8 | COPY src/prompter.py /workspace/src/prompter.py 9 | COPY src/utils.py /workspace/src/utils.py 10 | COPY src/version.py /workspace/src/version.py 11 | 12 | COPY openai_server/agent_prompting.py /workspace/openai_server/agent_prompting.py 13 | COPY openai_server/agent_utils.py /workspace/openai_server/agent_utils.py 14 | COPY openai_server/autogen_2agent_backend.py /workspace/openai_server/autogen_2agent_backend.py 15 | COPY openai_server/autogen_agents.py /workspace/openai_server/autogen_agents.py 16 | COPY openai_server/autogen_multi_agent_backend.py /workspace/openai_server/autogen_multi_agent_backend.py 17 | COPY openai_server/autogen_utils.py /workspace/openai_server/autogen_utils.py 18 | COPY openai_server/server.py /workspace/openai_server/server.py 19 | COPY openai_server/agent_tools/download_web_video.py /workspace/openai_server/agent_tools/download_web_video.py 20 | COPY openai_server/agent_tools/convert_document_to_text.py /workspace/openai_server/agent_tools/convert_document_to_text.py 21 | COPY openai_server/agent_tools/ask_question_about_documents.py /workspace/openai_server/agent_tools/ask_question_about_documents.py 22 | 23 | COPY openai_server/agent_prompting.py /workspace/openai_server/agent_prompting.py 24 | COPY openai_server/agent_tools/bing_search.py /workspace/openai_server/agent_tools/bing_search.py 25 | COPY openai_server/agent_tools/convert_document_to_text.py /workspace/openai_server/agent_tools/convert_document_to_text.py 26 | COPY openai_server/agent_tools/download_youtube_video.py /workspace/openai_server/agent_tools/download_youtube_video.py 27 | COPY openai_server/agent_tools/google_search.py /workspace/openai_server/agent_tools/google_search.py 28 | COPY openai_server/autogen_utils.py /workspace/openai_server/autogen_utils.py 29 | 30 | RUN chmod a+rwx /workspace/src/*.py 31 | RUN chmod a+rwx /workspace/openai_server/*.py 32 | RUN chmod a+rwx /workspace/openai_server/agent_tools/*.py 33 | 34 | RUN chmod a+rwx /workspace/.cache 35 | 36 | USER h2ogpt 37 | 38 | # docker build -f docs/Dockerfile.delta2 -t gcr.io/vorvan/h2oai/h2oai-h2ogpt-runtime:0.2.1-1290-patch1 . 39 | # docker push gcr.io/vorvan/h2oai/h2oai-h2ogpt-runtime:0.2.1-1290-patch1 -------------------------------------------------------------------------------- /docs/Dockerfile.internvl: -------------------------------------------------------------------------------- 1 | # if from main: 2 | # (base) ubuntu@compute-permanent-node-406:~/lmdeploy$ docker build . -f docker/Dockerfile -t internvlmain --no-cache 3 | # then change below "FROM openmmlab/lmdeploy:latest" to "FROM internvlmain" 4 | 5 | # docker build - < Dockerfile.internvl -t internvl 6 | FROM openmmlab/lmdeploy:latest 7 | 8 | RUN apt-get update && apt-get install -y python3 python3-pip git 9 | 10 | WORKDIR /app 11 | 12 | RUN pip3 uninstall pkg_resources -y 13 | RUN pip3 install --upgrade pip 14 | RUN pip3 install --upgrade setuptools==66.1.1 15 | RUN pip3 uninstall -y ninja && pip3 install ninja 16 | RUN CUDA_HOME=/usr/local/cuda-11.8/ PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu118 pip3 install timm xformers triton==2.1.0 transformers 17 | RUN MAX_JOBS=4 CUDA_HOME=/usr/local/cuda-11.8/ PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu118 FLASH_ATTENTION_FORCE_BUILD=TRUE pip3 install flash-attn==2.5.2 --no-build-isolation 18 | RUN pip3 install git+https://github.com/haotian-liu/LLaVA.git --no-deps 19 | 20 | COPY . . 21 | 22 | CMD ["lmdeploy", "serve", "api_server", "OpenGVLab/InternVL-Chat-V1-5"] 23 | -------------------------------------------------------------------------------- /docs/README_Agents.md: -------------------------------------------------------------------------------- 1 | ## h2oGPT integration with LangChain Agents 2 | 3 | Various agents from LangChain are included: 4 | * Search -- Works sometimes with non-OpenAI models after improvements beyond LangChain 5 | * Collection -- Pre-alpha tested 6 | * Python -- Pre-alpha tested, only currently allowed with OpenAI 7 | * CSV -- Works well with OpenAI due to use of Function Tools 8 | * Pandas -- Disabled until load csv/json with pandas. 9 | * JSON -- Alpha tested, only currently allowed with OpenAI 10 | * AutoGPT -- Alpha tested 11 | * Tools: 12 | * Search 13 | * Wikipedia 14 | * Shell 15 | * File 16 | * Python 17 | * Requests 18 | * Wolfram Alpha 19 | * Memory 20 | -------------------------------------------------------------------------------- /docs/README_CLI.md: -------------------------------------------------------------------------------- 1 | ### CLI chat 2 | 3 | The CLI can be used instead of gradio by running for some base model, e.g.: 4 | ```bash 5 | python generate.py --base_model=gptj --cli=True --answer_with_sources=False 6 | ``` 7 | and for LangChain run: 8 | ```bash 9 | python src/make_db.py --user_path=user_path --collection_name=UserData 10 | python generate.py --base_model=gptj --cli=True --langchain_mode=UserData --answer_with_sources=False 11 | ``` 12 | with documents in `user_path` folder, or directly run: 13 | ```bash 14 | python generate.py --base_model=gptj --cli=True --langchain_mode=UserData --user_path=user_path --answer_with_sources=False 15 | ``` 16 | which will build the database first time. One can also use any other models, like: 17 | ```bash 18 | python generate.py --base_model=h2oai/h2ogpt-oig-oasst1-512-6_9b --cli=True --langchain_mode=UserData --user_path=user_path --answer_with_sources=False 19 | ``` 20 | or for LLaMa2: 21 | ```bash 22 | python generate.py --base_model='llama' --prompt_type=llama2 --cli=True --langchain_mode=UserData --user_path=user_path --answer_with_sources=False 23 | ``` 24 | 25 | ### Evaluation 26 | 27 | To evaluate some custom json data by making the LLM generate responses and/or give reward scores, with parquet output, run: 28 | ```bash 29 | python generate.py --base_model=MYMODEL --eval_filename=MYFILE.json --eval_prompts_only_num=NPROMPTS 30 | ``` 31 | where NPROMPTS is the number of prompts in the json file to evaluate (can be less than total). See `tests/test_eval.py::test_eval_json` for a test code example. 32 | -------------------------------------------------------------------------------- /docs/README_CPU.md: -------------------------------------------------------------------------------- 1 | ## CPU Details 2 | 3 | Details that do not depend upon whether you are running on CPU for Linux, Windows, or macOS. 4 | 5 | ### LLaMa.cpp 6 | 7 | Default llama.cpp model is LLaMa2 GPTQ model from TheBloke: 8 | 9 | * Run LLaMa.cpp LLaMa2 model: 10 | 11 | With documents in `user_path` folder, run: 12 | ```bash 13 | # if don't have wget, download to repo folder using below link 14 | wget https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q6_K.gguf 15 | python generate.py --base_model='llama' --prompt_type=llama2 --score_model=None --langchain_mode='UserData' --user_path=user_path 16 | ``` 17 | 18 | For another llama.cpp model: 19 | 20 | * Choose from [TheBloke](https://huggingface.co/TheBloke), then with documents in `user_path` folder, run: 21 | ```bash 22 | python generate.py --base_model=llama --model_path_llama=https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q6_K.gguf --score_model=None --langchain_mode='UserData' --user_path=user_path 23 | ``` 24 | For `llama.cpp` based models on CPU, for computers with low system RAM or slow CPUs, we recommend running: 25 | ```bash 26 | python generate.py --base_model=llama --model_path_llama=https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q6_K.gguf --llamacpp_dict="{'use_mlock':False,'n_batch':256}" --max_seq_len=512 --score_model=None --langchain_mode='UserData' --user_path=user_path 27 | ``` 28 | 29 | ### GPT4ALL 30 | 31 | * Choose Model from GPT4All Model explorer [GPT4All-J compatible model](https://gpt4all.io/index.html). One does not need to download manually, the GPT4ALL package will download at runtime and put it into `.cache` like Hugging Face would. 32 | 33 | * With documents in `user_path` folder, run: 34 | ```bash 35 | python generate.py --base_model=gptj --model_path_gptj=ggml-gpt4all-j-v1.3-groovy.bin --score_model=None --langchain_mode='UserData' --user_path=user_path 36 | ``` 37 | or 38 | ```bash 39 | python generate.py --base_model=gpt4all_llama --model_name_gpt4all_llama=ggml-wizardLM-7B.q4_2.bin --score_model=None --langchain_mode='UserData' --user_path=user_path 40 | ``` 41 | However, `gpjt` model often gives [no output](FAQ.md#gpt4all-not-producing-output), even outside h2oGPT. See [GPT4All](https://github.com/nomic-ai/gpt4all) for details on installation instructions if you encounter any issues. 42 | 43 | ### Low-memory 44 | 45 | For more information about low-memory recommendations, see [Low Memory](FAQ.md#low-memory-mode). 46 | 47 | -------------------------------------------------------------------------------- /docs/README_SerpAPI.md: -------------------------------------------------------------------------------- 1 | ## h2oGPT integration with LangChain and SerpAPI 2 | 3 | Web search augments LLM context with additional information obtained from duck duck go (can be changed in code) search results. 4 | 5 | * Install search package 6 | ```bash 7 | pip install -r reqs_optional/requirements_optional_agents.txt 8 | ```` 9 | 10 | * Setup account at https://serpapi.com/ (they have some number of free searches for free accounts) 11 | 12 | * Setup ENV that defines: `SERPAPI_API_KEY` 13 | 14 | * Start h2oGPT as normal 15 | 16 | * You should see web search available in `Resources` 17 | 18 | * Additionally, the SEARCH agent will appear in `Resources` under `Agents`. These agents are highly experimental and works best with OpenAI at moment. 19 | -------------------------------------------------------------------------------- /docs/README_quickstart.md: -------------------------------------------------------------------------------- 1 | # Quick Start 2 | 3 | ## Install 4 | 5 | To quickly try out h2oGPT with limited document Q/A capability, create a fresh Python 3.10 environment and run: 6 | * CPU or MAC (M1/M2): 7 | ```bash 8 | # for windows/mac use "set" or relevant environment setting mechanism 9 | export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" 10 | ``` 11 | * Linux/Windows CPU/CUDA/ROC: 12 | ```bash 13 | # for windows/mac use "set" or relevant environment setting mechanism 14 | export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu121 https://huggingface.github.io/autogptq-index/whl/cu121" 15 | # for cu118 use export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu118 https://huggingface.github.io/autogptq-index/whl/cu118" 16 | ``` 17 | Then choose your llama_cpp_python options, by changing `CMAKE_ARGS` to whichever system you have according to [llama_cpp_python backend documentation](https://github.com/abetlen/llama-cpp-python?tab=readme-ov-file#supported-backends). 18 | E.g. CUDA on Linux: 19 | ```bash 20 | export GGML_CUDA=1 21 | export CMAKE_ARGS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=all" 22 | export FORCE_CMAKE=1 23 | ``` 24 | Note for some reason things will fail with llama_cpp_python if don't add all cuda arches, and building with all those arches does take some time. 25 | Windows CUDA: 26 | ```cmdline 27 | set CMAKE_ARGS=-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=all 28 | set GGML_CUDA=1 29 | set FORCE_CMAKE=1 30 | ``` 31 | Note for some reason things will fail with llama_cpp_python if don't add all cuda arches, and building with all those arches does take some time. 32 | Metal M1/M2: 33 | ```bash 34 | export CMAKE_ARGS="-DLLAMA_METAL=on" 35 | export FORCE_CMAKE=1 36 | ``` 37 | Run PyPI install: 38 | ```bash 39 | pip install h2ogpt 40 | ``` 41 | or manually install 42 | ```bash 43 | ```bash 44 | git clone https://github.com/h2oai/h2ogpt.git 45 | cd h2ogpt 46 | pip install -r requirements.txt 47 | pip install -r reqs_optional/requirements_optional_langchain.txt 48 | 49 | pip uninstall llama_cpp_python llama_cpp_python_cuda -y 50 | pip install -r reqs_optional/requirements_optional_llamacpp_gpt4all.txt --no-cache-dir 51 | 52 | pip install -r reqs_optional/requirements_optional_langchain.urls.txt 53 | # GPL, only run next line if that is ok: 54 | pip install -r reqs_optional/requirements_optional_langchain.gpllike.txt 55 | ``` 56 | 57 | ## Chat with h2oGPT 58 | 59 | ```bash 60 | # choose up to 32768 if have enough GPU memory: 61 | python generate.py --base_model=TheBloke/Mistral-7B-Instruct-v0.2-GGUF --prompt_type=mistral --max_seq_len=4096 62 | ``` 63 | Next, go to your browser by visiting [http://127.0.0.1:7860](http://127.0.0.1:7860) or [http://localhost:7860](http://localhost:7860). Choose 13B for a better model than 7B. 64 | 65 | #### Chat template based GGUF models 66 | 67 | For newer chat template models, a `--prompt_type` is not required on CLI, but for GGUF files one should pass the HF tokenizer so it knows the chat template, e.g. for LLaMa-3: 68 | ```bash 69 | python generate.py --base_model=llama --model_path_llama=https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_M.gguf?download=true --tokenizer_base_model=meta-llama/Meta-Llama-3-8B-Instruct --max_seq_len=8192 70 | ``` 71 | Or for Phi: 72 | ```bash 73 | python generate.py --tokenizer_base_model=microsoft/Phi-3-mini-4k-instruct --base_model=llama --llama_cpp_model=https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf --max_seq_len=4096 74 | ``` 75 | the `--llama_cpp_path` could be a local path as well if you already downloaded it, or we will also check the `llamacpp_path` for the file. 76 | 77 | See [Offline](docs/README_offline.md#tldr) for how to run h2oGPT offline. 78 | 79 | --- 80 | 81 | Note that for all platforms, some packages such as DocTR, Unstructured, Florence-2, Stable Diffusion, etc. download models at runtime that appear to delay operations in the UI. The progress appears in the console logs. 82 | -------------------------------------------------------------------------------- /docs/autogen.patch: -------------------------------------------------------------------------------- 1 | --- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/autogen/token_count_utils.py 2024-07-29 21:31:51.630851528 -0700 2 | +++ /home/jon/token_count_utils.py 2024-07-30 19:13:10.160760647 -0700 3 | @@ -116,19 +116,9 @@ 4 | elif "gpt-4" in model: 5 | logger.info("gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.") 6 | return _num_token_from_messages(messages, model="gpt-4-0613") 7 | - elif "gemini" in model: 8 | - logger.info("Gemini is not supported in tiktoken. Returning num tokens assuming gpt-4-0613.") 9 | - return _num_token_from_messages(messages, model="gpt-4-0613") 10 | - elif "claude" in model: 11 | - logger.info("Claude is not supported in tiktoken. Returning num tokens assuming gpt-4-0613.") 12 | - return _num_token_from_messages(messages, model="gpt-4-0613") 13 | - elif "mistral-" in model or "mixtral-" in model: 14 | - logger.info("Mistral.AI models are not supported in tiktoken. Returning num tokens assuming gpt-4-0613.") 15 | - return _num_token_from_messages(messages, model="gpt-4-0613") 16 | else: 17 | - raise NotImplementedError( 18 | - f"""_num_token_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""" 19 | - ) 20 | + logger.info("%s model is not supported in tiktoken. Returning num tokens assuming gpt-4-0613." % model) 21 | + return _num_token_from_messages(messages, model="gpt-4-0613") 22 | num_tokens = 0 23 | for message in messages: 24 | num_tokens += tokens_per_message 25 | -------------------------------------------------------------------------------- /docs/autogen2.patch: -------------------------------------------------------------------------------- 1 | --- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/autogen/agentchat/conversable_agent.py 2024-08-30 22:59:32.130641859 -0700 2 | +++ /home/jon/conversable_agent.py 2024-08-30 22:59:52.451296579 -0700 3 | @@ -1453,7 +1453,7 @@ 4 | if num_code_blocks == 1: 5 | iostream.print( 6 | colored( 7 | - f"\n>>>>>>>> EXECUTING CODE BLOCK (inferred language is {code_blocks[0].language})...", 8 | + f"\n\n**EXECUTING CODE BLOCK (inferred language is {code_blocks[0].language})**\n\n", 9 | "red", 10 | ), 11 | flush=True, 12 | @@ -1461,7 +1461,7 @@ 13 | else: 14 | iostream.print( 15 | colored( 16 | - f"\n>>>>>>>> EXECUTING {num_code_blocks} CODE BLOCKS (inferred languages are [{', '.join([x.language for x in code_blocks])}])...", 17 | + f"\n\n**EXECUTING {num_code_blocks} CODE BLOCKS (inferred languages are [{', '.join([x.language for x in code_blocks])}])**\n\n", 18 | "red", 19 | ), 20 | flush=True, 21 | @@ -1757,7 +1757,7 @@ 22 | 23 | # print the no_human_input_msg 24 | if no_human_input_msg: 25 | - iostream.print(colored(f"\n>>>>>>>> {no_human_input_msg}", "red"), flush=True) 26 | + iostream.print(colored(f"\n\n**{no_human_input_msg}", "red"), flush=True) 27 | 28 | # stop the conversation 29 | if reply == "exit": 30 | @@ -1797,7 +1797,7 @@ 31 | # increment the consecutive_auto_reply_counter 32 | self._consecutive_auto_reply_counter[sender] += 1 33 | if self.human_input_mode != "NEVER": 34 | - iostream.print(colored("\n>>>>>>>> USING AUTO REPLY...", "red"), flush=True) 35 | + iostream.print(colored("\n\n**USING AUTO REPLY**\n\n", "red"), flush=True) 36 | 37 | return False, None 38 | 39 | @@ -1870,7 +1870,7 @@ 40 | 41 | # print the no_human_input_msg 42 | if no_human_input_msg: 43 | - iostream.print(colored(f"\n>>>>>>>> {no_human_input_msg}", "red"), flush=True) 44 | + iostream.print(colored(f"\n\n**{no_human_input_msg}", "red"), flush=True) 45 | 46 | # stop the conversation 47 | if reply == "exit": 48 | @@ -1910,7 +1910,7 @@ 49 | # increment the consecutive_auto_reply_counter 50 | self._consecutive_auto_reply_counter[sender] += 1 51 | if self.human_input_mode != "NEVER": 52 | - iostream.print(colored("\n>>>>>>>> USING AUTO REPLY...", "red"), flush=True) 53 | + iostream.print(colored("\n\n**USING AUTO REPLY**\n\n", "red"), flush=True) 54 | 55 | return False, None 56 | 57 | @@ -2142,7 +2142,7 @@ 58 | lang = infer_lang(code) 59 | iostream.print( 60 | colored( 61 | - f"\n>>>>>>>> EXECUTING CODE BLOCK {i} (inferred language is {lang})...", 62 | + f"\n\n**EXECUTING CODE BLOCK {i} (inferred language is {lang})**\n\n", 63 | "red", 64 | ), 65 | flush=True, 66 | @@ -2239,7 +2239,7 @@ 67 | # Try to execute the function 68 | if arguments is not None: 69 | iostream.print( 70 | - colored(f"\n>>>>>>>> EXECUTING FUNCTION {func_name}...", "magenta"), 71 | + colored(f"\n\n**EXECUTING FUNCTION {func_name}**\n\n", "magenta"), 72 | flush=True, 73 | ) 74 | try: 75 | @@ -2296,7 +2296,7 @@ 76 | # Try to execute the function 77 | if arguments is not None: 78 | iostream.print( 79 | - colored(f"\n>>>>>>>> EXECUTING ASYNC FUNCTION {func_name}...", "magenta"), 80 | + colored(f"\n\n**EXECUTING ASYNC FUNCTION {func_name}**\n\n", "magenta"), 81 | flush=True, 82 | ) 83 | try: 84 | -------------------------------------------------------------------------------- /docs/aws_sagemaker_endpoint_setup.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/aws_sagemaker_endpoint_setup.pdf -------------------------------------------------------------------------------- /docs/build_windows_gpu.sh: -------------------------------------------------------------------------------- 1 | # https://pypi.org/project/pynsist/ 2 | # https://stackoverflow.com/questions/69352179/package-streamlit-app-and-run-executable-on-windows/69621578#69621578 3 | # see also https://stackoverflow.com/questions/17428199/python-windows-installer-with-all-dependencies 4 | # see also https://cyrille.rossant.net/create-a-standalone-windows-installer-for-your-python-application/ 5 | # see also https://pyinstaller.org/en/stable/operating-mode.html 6 | 7 | # install NSIS: 8 | # http://nsis.sourceforge.net/Download 9 | 10 | # pip install pynsist 11 | 12 | # 1) clear old build 13 | 14 | del build 15 | del wheels 16 | 17 | # 2) Follow through README_WINDOWS.md installation, then do: 18 | 19 | mkdir wheels 20 | cd wheels 21 | pip freeze > ..\docs\windows_freezelist.txt 22 | # file needs some edits for download 23 | pip download -r ..\docs\windows_freezelist.txt 24 | 25 | # extra things from tar.gz need to be wheel not just download: 26 | for /r %i in (*.tar.gz) do pip wheel %i 27 | for /r %i in (*.zip) do pip wheel %i 28 | 29 | # GPU (so package name not confusing to installer) 30 | ren exllama-0.0.18+cu118-cp310-cp310-win_amd64.whl exllama-0.0.18-cp310-cp310-win_amd64.whl 31 | ren torchvision-0.16.2+cu118-cp310-cp310-win_amd64.whl torchvision-0.16.2-cp310-cp310-win_amd64.whl 32 | del hnswlib-0.7.0-cp310-cp310-win_amd64.whl 33 | # others: 34 | pip wheel tabula==1.0.5 35 | 36 | # FIXME: 37 | # pip install --global-option build_ext --global-option --compiler=mingw32 pygobject 38 | 39 | cd .. 40 | # Download: https://github.com/oschwartz10612/poppler-windows/releases/download/v23.08.0-0/Release-23.08.0-0.zip 41 | 42 | unzip Release-23.08.0-0.zip 43 | move poppler-23.08.0 poppler 44 | 45 | # Install: https://digi.bib.uni-mannheim.de/tesseract/tesseract-ocr-w64-setup-5.3.1.20230401.exe 46 | # copy from install path to local path 47 | mkdir Tesseract-OCR 48 | xcopy C:\Users\pseud\AppData\Local\Programs\Tesseract-OCR Tesseract-OCR /s /e /h # say specifies Directory 49 | 50 | python src/basic_nltk.py 51 | 52 | del C:\Users\pseud\AppData\Local\ms-playwright ms-playwright 53 | playwright install 54 | xcopy C:\Users\pseud\AppData\Local\ms-playwright ms-playwright /s /e /h # say specifies Directory 55 | 56 | # build 57 | python -m nsist windows_installer.cfg 58 | 59 | # test 60 | python run_app.py 61 | 62 | 63 | # these changes required for GPU build: 64 | #diff --git a/windows_installer.cfg b/windows_installer.cfg 65 | #index 120d284..ea71ea0 100644 66 | #--- a/windows_installer.cfg 67 | #+++ b/windows_installer.cfg 68 | #@@ -34,7 +34,7 @@ pypi_wheels = absl-py==1.4.0 69 | # Authlib==1.2.1 70 | # # GPU 71 | #- # auto_gptq==0.4.2 72 | #+ auto_gptq==0.4.2 73 | # backoff==2.2.1 74 | # beautifulsoup4==4.12.2 75 | # bioc==2.0 76 | #@@ -73,7 +73,7 @@ pypi_wheels = absl-py==1.4.0 77 | # exceptiongroup==1.1.2 78 | # execnet==2.0.2 79 | # # GPU: 80 | #- # exllama==0.0.13 81 | #+ exllama==0.0.13 82 | # fastapi==0.100.0 83 | # feedparser==6.0.10 84 | # ffmpy==0.3.1 85 | #@@ -123,9 +123,9 @@ pypi_wheels = absl-py==1.4.0 86 | # layoutparser==0.3.4 87 | # linkify-it-py==2.0.2 88 | # # CPU 89 | #- llama_cpp_python==0.1.73 90 | #+ # llama_cpp_python==0.1.73 91 | # # GPU 92 | #- # llama-cpp-python-cuda==0.1.73 93 | #+ llama-cpp-python-cuda==0.1.73 94 | # lm-dataformat==0.0.20 95 | # loralib==0.1.1 96 | # lxml==4.9.3 -------------------------------------------------------------------------------- /docs/chat_headerless.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/chat_headerless.png -------------------------------------------------------------------------------- /docs/chat_tabless.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/chat_tabless.png -------------------------------------------------------------------------------- /docs/chat_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/chat_view.png -------------------------------------------------------------------------------- /docs/google.patch: -------------------------------------------------------------------------------- 1 | --- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/langchain_google_genai/chat_models.py 2024-07-25 17:02:46.040222538 -0700 2 | +++ /home/jon/chat_models.py 2024-07-25 17:01:48.722952945 -0700 3 | @@ -550,7 +550,10 @@ 4 | for candidate in response.candidates: 5 | generation_info = {} 6 | if candidate.finish_reason: 7 | - generation_info["finish_reason"] = candidate.finish_reason.name 8 | + if hasattr(candidate.finish_reason, 'name'): 9 | + generation_info["finish_reason"] = candidate.finish_reason.name 10 | + else: 11 | + generation_info["finish_reason"] = 'unknown' 12 | generation_info["safety_ratings"] = [ 13 | proto.Message.to_dict(safety_rating, use_integers_for_enums=False) 14 | for safety_rating in candidate.safety_ratings 15 | -------------------------------------------------------------------------------- /docs/h2o-qr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/h2o-qr.png -------------------------------------------------------------------------------- /docs/img-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/img-small.png -------------------------------------------------------------------------------- /docs/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/img.png -------------------------------------------------------------------------------- /docs/langchain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/langchain.png -------------------------------------------------------------------------------- /docs/linux_install_full.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o pipefail 3 | set -ex 4 | 5 | echo -e "\n\n\n\t\tSTART\n\n\n"; 6 | 7 | # ensure not in h2ogpt repo folder 8 | cd $HOME 9 | 10 | # Check if the h2ogpt directory already exists 11 | if [ -d "h2ogpt" ]; then 12 | echo "h2ogpt directory exists. Updating the repository." 13 | cd h2ogpt 14 | git stash 2>&1 15 | git pull 2>&1 16 | else 17 | echo "h2ogpt directory does not exist. Cloning the repository." 18 | git clone https://github.com/h2oai/h2ogpt.git 19 | cd h2ogpt 20 | fi 21 | 22 | if ! command -v conda &> /dev/null; then 23 | echo "Conda not found, installing Miniconda." 24 | wget https://repo.anaconda.com/miniconda/Miniconda3-py310_23.1.0-1-Linux-x86_64.sh 25 | bash ./Miniconda3-py310_23.1.0-1-Linux-x86_64.sh -b -u 26 | source ~/miniconda3/bin/activate 27 | conda init bash 28 | conda deactivate 29 | else 30 | echo "Conda is already installed." 31 | source ~/miniconda3/bin/activate 32 | conda init bash 33 | conda deactivate 34 | fi 35 | 36 | if [ "$CONDA_DEFAULT_ENV" = "h2ogpt" ]; then 37 | echo "Deactivating the h2ogpt Conda environment." 38 | conda deactivate 39 | else 40 | echo "The h2ogpt Conda environment is not currently activated." 41 | fi 42 | 43 | echo "Installing fresh h2oGPT env." 44 | if conda env list | grep -q 'h2ogpt'; then 45 | conda remove -n h2ogpt --all -y 46 | else 47 | echo "h2ogpt environment does not exist." 48 | fi 49 | conda update conda -y 50 | conda create -n h2ogpt -y 51 | conda activate h2ogpt 52 | conda install python=3.10 -c conda-forge -y 53 | 54 | export CUDA_HOME=/usr/local/cuda-12.1 55 | export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu121" 56 | export GGML_CUDA=1 57 | export CMAKE_ARGS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=all" 58 | export FORCE_CMAKE=1 59 | 60 | # get patches 61 | curl -O https://h2o-release.s3.amazonaws.com/h2ogpt/run_patches.sh 62 | curl -O https://h2o-release.s3.amazonaws.com/h2ogpt/trans.patch 63 | curl -O https://h2o-release.s3.amazonaws.com/h2ogpt/xtt.patch 64 | curl -O https://h2o-release.s3.amazonaws.com/h2ogpt/trans2.patch 65 | curl -O https://h2o-release.s3.amazonaws.com/h2ogpt/google.patch 66 | mkdir -p docs 67 | alias cp='cp' 68 | cp run_patches.sh trans.patch xtt.patch trans2.patch google.patch docs/ 69 | 70 | echo "Installing fresh h2oGPT" 71 | set +x 72 | export GPLOK=1 73 | curl -fsSL https://h2o-release.s3.amazonaws.com/h2ogpt/linux_install.sh | bash 74 | 75 | 76 | echo -e "\n\n\n\t\t h2oGPT installation FINISHED\n\n\n"; 77 | -------------------------------------------------------------------------------- /docs/llamasmall.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/llamasmall.jpg -------------------------------------------------------------------------------- /docs/minicondashellsmall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/minicondashellsmall.png -------------------------------------------------------------------------------- /docs/minigw32small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/minigw32small.png -------------------------------------------------------------------------------- /docs/model_add.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/model_add.png -------------------------------------------------------------------------------- /docs/models_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/models_compare.png -------------------------------------------------------------------------------- /docs/models_lock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/models_lock.png -------------------------------------------------------------------------------- /docs/ollama_max_seq_len.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ollama_max_seq_len.png -------------------------------------------------------------------------------- /docs/ollama_setup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ollama_setup.png -------------------------------------------------------------------------------- /docs/ollama_use.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ollama_use.png -------------------------------------------------------------------------------- /docs/onedocselected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/onedocselected.png -------------------------------------------------------------------------------- /docs/only_all_sources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/only_all_sources.png -------------------------------------------------------------------------------- /docs/openai.patch: -------------------------------------------------------------------------------- 1 | --- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/openai/_streaming.py 2024-07-30 23:11:13.902075163 -0700 2 | +++ /home/jon/_streaming.py 2024-07-30 23:08:59.651464011 -0700 3 | @@ -58,6 +58,8 @@ 4 | for sse in iterator: 5 | if sse.data.startswith("[DONE]"): 6 | break 7 | + if sse.event == 'ping': 8 | + continue 9 | 10 | if sse.event is None: 11 | data = sse.json() 12 | -------------------------------------------------------------------------------- /docs/openwebui1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/openwebui1.png -------------------------------------------------------------------------------- /docs/openwebui2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/openwebui2.png -------------------------------------------------------------------------------- /docs/pytubefix.patch: -------------------------------------------------------------------------------- 1 | --- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/pytubefix/extract.py 2024-08-17 02:11:12.847159003 -0700 2 | +++ /home/jon/extract.py 2024-08-17 02:10:57.622790026 -0700 3 | @@ -103,6 +103,7 @@ 4 | :returns: 5 | Playability status and reason of the video. 6 | """ 7 | + return None, [] 8 | status_dict = player_response.get('playabilityStatus', {}) 9 | if 'liveStreamability' in status_dict: 10 | return 'LIVE_STREAM', 'Video is a live stream.' 11 | -------------------------------------------------------------------------------- /docs/run_patches.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o pipefail 3 | set -ex 4 | 5 | # 6 | #* Deal with not-thread-safe things in LangChain: 7 | # 8 | sp=`python3.10 -c 'import site; print(site.getsitepackages()[0])'` 9 | sed -i 's/with HiddenPrints():/if True:/g' $sp/langchain_community/utilities/serpapi.py 10 | #sed -i 's/"progress": Status.PROGRESS,/"progress": Status.PROGRESS,\n "heartbeat": Status.PROGRESS,/g' gradio_client/utils.py 11 | #sed -i 's/async for line in response.aiter_text():/async for line in response.aiter_lines():\n if len(line) == 0:\n continue\n if line == """{"detail":"Not Found"}""":\n continue/g' gradio_client/utils.py 12 | 13 | # aggressively remove thread-unsafe reassignment of stderr stdout 14 | # WIP 15 | # find "$sp" -type f -name "*.py" -exec sed -i -E 's/(sys\.stdout\s*=\s*.*)/pass # \1/; s/(sys\.stderr\s*=\s*.*)/pass # \1/' {} + 16 | 17 | # use pytubefix instead, pytube too old and various issues 18 | #sed -i 's/Pytube/PytubeFix/g' $sp/fiftyone/utils/youtube.py 19 | #sed -i 's/pytube>=15/pytube>=6/g' $sp/fiftyone/utils/youtube.py 20 | #sed -i 's/pytube/pytubefix/g' $sp/fiftyone/utils/youtube.py 21 | 22 | # diff -Naru /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/pytubefix/extract.py ~/extract.py > docs/pytubefix.patch 23 | #patch $sp/pytubefix/extract.py docs/pytubefix.patch 24 | 25 | # fix asyncio same way websockets was fixed, else keep hitting errors in async calls 26 | # https://github.com/python-websockets/websockets/commit/f9fd2cebcd42633ed917cd64e805bea17879c2d7 27 | sed -i "s/except OSError:/except (OSError, RuntimeError):/g" $sp/anyio/_backends/_asyncio.py 28 | 29 | # https://github.com/gradio-app/gradio/issues/7086 30 | sed -i 's/while True:/while True:\n time.sleep(0.001)\n/g' $sp/gradio_client/client.py 31 | 32 | # diff -Naru $sp/transformers/modeling_utils.py modeling_utils.py > docs/trans.patch 33 | patch $sp/transformers/modeling_utils.py docs/trans.patch 34 | 35 | # diff -Naru /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/TTS/tts/layers/xtts/stream_generator.py new.py > docs/xtt.patch 36 | patch $sp/TTS/tts/layers/xtts/stream_generator.py docs/xtt.patch 37 | 38 | # diff -Naru /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/transformers/generation/utils.py ~/utils.py > docs/trans2.patch 39 | patch $sp/transformers/generation/utils.py docs/trans2.patch 40 | 41 | # diff -Naru /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/langchain_google_genai/chat_models.py ~/chat_models.py > docs/google.patch 42 | patch $sp/langchain_google_genai/chat_models.py docs/google.patch 43 | 44 | # diff -Naru /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/autogen/token_count_utils.py ~/token_count_utils.py > docs/autogen.patch 45 | patch $sp/autogen/token_count_utils.py docs/autogen.patch 46 | 47 | # diff -Naru /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/autogen/agentchat/conversable_agent.py ~/conversable_agent.py > docs/autogen2.patch 48 | patch $sp/autogen/agentchat/conversable_agent.py docs/autogen2.patch 49 | 50 | # diff -Naru /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/openai/_streaming.py ~/_streaming.py > docs/openai.patch 51 | patch $sp/openai/_streaming.py docs/openai.patch 52 | 53 | find $sp/flaml/ -type f -name '*.py' -exec sed -i 's/^except ImportError:/except (ModuleNotFoundError, ImportError):/g' {} + 54 | -------------------------------------------------------------------------------- /docs/saved_chats.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/saved_chats.png -------------------------------------------------------------------------------- /docs/setup_docker_linux.sh: -------------------------------------------------------------------------------- 1 | # BUILD 2 | sudo apt-get update 3 | sudo apt install software-properties-common 4 | sudo apt-get install build-essential 5 | 6 | # DRIVER + toolkit 7 | sudo apt-get update 8 | sudo apt-get -y install nvidia-headless-535-server nvidia-fabricmanager-535 nvidia-utils-535-server 9 | 10 | # wget wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run 11 | # sudo sh cuda_11.8.0_520.61.05_linux.run 12 | 13 | wget https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run 14 | sudo sh cuda_12.1.0_530.30.02_linux.run 15 | 16 | echo "export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/usr/local/cuda/lib64/" >> ~/.bashrc 17 | echo "export CUDA_HOME=/usr/local/cuda" >> ~/.bashrc 18 | echo "export PATH=\$PATH:/usr/local/cuda/bin/" >> ~/.bashrc 19 | echo "sudo nvidia-smi -pm 1" >> ~/.bashrc 20 | 21 | # reboot after driver installed if installed driver, else no need if just cuda toolkit added, then just logout and log back in or do: source ~/.bashrc 22 | 23 | # DOCKER 24 | sudo apt update 25 | sudo apt install -y apt-transport-https ca-certificates curl software-properties-common 26 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - 27 | sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu focal stable" 28 | apt-cache policy docker-ce 29 | sudo apt install -y docker-ce 30 | sudo systemctl status docker 31 | 32 | sudo usermod -aG docker $USER 33 | 34 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \ 35 | && curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ 36 | && curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list | \ 37 | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ 38 | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list 39 | sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit-base 40 | sudo apt install -y nvidia-container-runtime 41 | sudo nvidia-ctk runtime configure --runtime=docker 42 | sudo systemctl restart docker 43 | 44 | sudo docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi 45 | docker pull gcr.io/vorvan/h2oai/h2ogpt-runtime:0.2.1 46 | 47 | # no need to reboot 48 | 49 | # if / is too small, can move docker to new location, change /etc/docker/daemon.json to have: 50 | # 51 | #{ 52 | # "runtimes": { 53 | # "nvidia": { 54 | # "args": [], 55 | # "path": "nvidia-container-runtime" 56 | # } 57 | # }, 58 | # "data-root": "/ephemeral/docker-data" 59 | #} 60 | # then run: 61 | # docker image prune -a 62 | # sudo systemctl restart docker 63 | -------------------------------------------------------------------------------- /docs/trans.patch: -------------------------------------------------------------------------------- 1 | --- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/transformers/modeling_utils.py 2024-06-17 10:32:30.807673675 -0700 2 | +++ modeling_utils.py 2024-06-17 10:32:12.827315293 -0700 3 | @@ -3412,7 +3412,12 @@ 4 | "_commit_hash": commit_hash, 5 | **has_file_kwargs, 6 | } 7 | - if not has_file(pretrained_model_name_or_path, safe_weights_name, **has_file_kwargs): 8 | + import requests 9 | + try: 10 | + has_file_res = has_file(pretrained_model_name_or_path, safe_weights_name, **has_file_kwargs) 11 | + except requests.exceptions.ConnectionError: 12 | + has_file_res = False 13 | + if not has_file_res: 14 | Thread( 15 | target=auto_conversion, 16 | args=(pretrained_model_name_or_path,), 17 | -------------------------------------------------------------------------------- /docs/trans2.patch: -------------------------------------------------------------------------------- 1 | --- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/transformers/generation/utils.py 2024-07-25 14:52:00.809023484 -0700 2 | +++ /home/jon/utils.py 2024-07-25 14:51:31.280385967 -0700 3 | @@ -695,9 +695,9 @@ 4 | dim=-1, 5 | ) 6 | 7 | - if model_kwargs.get("use_cache", True): 8 | + if model_kwargs.get("use_cache", True) and "cache_position" in model_kwargs: 9 | model_kwargs["cache_position"] = model_kwargs["cache_position"][-1:] + num_new_tokens 10 | - else: 11 | + elif "cache_position" in model_kwargs: 12 | past_positions = model_kwargs.pop("cache_position") 13 | new_positions = torch.arange( 14 | past_positions[-1] + 1, past_positions[-1] + num_new_tokens + 1, dtype=past_positions.dtype 15 | @@ -868,8 +868,8 @@ 16 | ) 17 | if ( 18 | generation_config.min_length is not None 19 | - and generation_config._eos_token_tensor is not None 20 | and generation_config.min_length > 0 21 | + and generation_config._eos_token_tensor is not None 22 | ): 23 | processors.append( 24 | MinLengthLogitsProcessor( 25 | @@ -880,8 +880,8 @@ 26 | ) 27 | if ( 28 | generation_config.min_new_tokens is not None 29 | - and generation_config._eos_token_tensor is not None 30 | and generation_config.min_new_tokens > 0 31 | + and generation_config._eos_token_tensor is not None 32 | ): 33 | processors.append( 34 | MinNewTokensLengthLogitsProcessor( 35 | @@ -997,7 +997,7 @@ 36 | "stop strings, you must pass the model's tokenizer to the `tokenizer` argument of `generate`." 37 | ) 38 | criteria.append(StopStringCriteria(stop_strings=generation_config.stop_strings, tokenizer=tokenizer)) 39 | - if generation_config._eos_token_tensor is not None: 40 | + if hasattr(generation_config, '_eos_token_tensor') and generation_config._eos_token_tensor is not None: 41 | criteria.append(EosTokenCriteria(eos_token_id=generation_config._eos_token_tensor)) 42 | criteria = self._merge_criteria_processor_list(criteria, stopping_criteria) 43 | return criteria 44 | -------------------------------------------------------------------------------- /docs/ui_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ui_1.png -------------------------------------------------------------------------------- /docs/ui_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ui_10.png -------------------------------------------------------------------------------- /docs/ui_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ui_2.png -------------------------------------------------------------------------------- /docs/ui_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ui_3.png -------------------------------------------------------------------------------- /docs/ui_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ui_4.png -------------------------------------------------------------------------------- /docs/ui_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ui_5.png -------------------------------------------------------------------------------- /docs/ui_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ui_6.png -------------------------------------------------------------------------------- /docs/ui_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ui_7.png -------------------------------------------------------------------------------- /docs/ui_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ui_8.png -------------------------------------------------------------------------------- /docs/ui_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ui_9.png -------------------------------------------------------------------------------- /docs/ui_9b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ui_9b.png -------------------------------------------------------------------------------- /docs/ui_9c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ui_9c.png -------------------------------------------------------------------------------- /docs/ui_talk_to_images.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/ui_talk_to_images.png -------------------------------------------------------------------------------- /docs/voice_clone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/voice_clone.png -------------------------------------------------------------------------------- /docs/vs2022small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/docs/vs2022small.png -------------------------------------------------------------------------------- /docs/xtt.patch: -------------------------------------------------------------------------------- 1 | --- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/TTS/tts/layers/xtts/stream_generator.py 2024-07-14 17:49:58.051220434 -0700 2 | +++ new.py 2024-07-14 17:49:44.570938022 -0700 3 | @@ -183,10 +183,12 @@ 4 | requires_attention_mask = "encoder_outputs" not in model_kwargs 5 | 6 | if model_kwargs.get("attention_mask", None) is None and requires_attention_mask and accepts_attention_mask: 7 | + pad_token_tensor = torch.tensor([generation_config.pad_token_id], device=inputs_tensor.device) if generation_config.pad_token_id is not None else None 8 | + eos_token_tensor = torch.tensor([generation_config.eos_token_id], device=inputs_tensor.device) if generation_config.eos_token_id is not None else None 9 | model_kwargs["attention_mask"] = self._prepare_attention_mask_for_generation( 10 | inputs_tensor, 11 | - generation_config.pad_token_id, 12 | - generation_config.eos_token_id, 13 | + pad_token_tensor, 14 | + eos_token_tensor, 15 | ) 16 | 17 | # decoder-only models should use left-padding for generation 18 | @@ -409,7 +411,7 @@ 19 | ) 20 | elif is_sample_gen_stream_mode: 21 | # 11. prepare logits warper 22 | - logits_warper = self._get_logits_warper(generation_config) 23 | + logits_warper = self._get_logits_warper(generation_config, device=inputs_tensor.device) 24 | 25 | # 12. expand input_ids with `num_return_sequences` additional sequences per batch 26 | input_ids, model_kwargs = self._expand_inputs_for_generation( 27 | -------------------------------------------------------------------------------- /generate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | if os.path.dirname(os.path.abspath(__file__)) not in sys.path: 5 | sys.path.append(os.path.dirname(os.path.abspath(__file__))) 6 | 7 | from src.utils_sys import protect_stdout_stderr 8 | 9 | protect_stdout_stderr() 10 | 11 | from src.gen import main 12 | from src.utils import H2O_Fire 13 | 14 | 15 | def entrypoint_main(): 16 | H2O_Fire(main) 17 | 18 | 19 | if __name__ == "__main__": 20 | entrypoint_main() 21 | -------------------------------------------------------------------------------- /gradio_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/gradio_utils/__init__.py -------------------------------------------------------------------------------- /gradio_utils/yield_utils.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class ReturnType(BaseModel): 5 | reply: str | list[str] | None 6 | reply_final: str | list[str] | None = None 7 | prompt_raw: str | None = None 8 | actual_llm: str | None = None 9 | text_context_list: list[str] | None = [] 10 | input_tokens: int = 0 11 | output_tokens: int = 0 12 | tokens_per_second: float = 0.0 13 | time_to_first_token: float = 0.0 14 | trial: int = 0 15 | vision_visible_model: str | None = None 16 | vision_batch_input_tokens: int = 0 17 | vision_batch_output_tokens: int = 0 18 | vision_batch_tokens_per_second: float = 0.0 19 | files: list[str] | list[dict[str, str]] | None = [] 20 | files_pdf: list[str] | list[dict[str, str]] | None = [] 21 | chat_history: list[dict[str, str]] | None = [] 22 | chat_history_md: str | None = "" 23 | -------------------------------------------------------------------------------- /h2o-logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /h2ogpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/h2ogpt/__init__.py -------------------------------------------------------------------------------- /h2ogpt/generate.py: -------------------------------------------------------------------------------- 1 | ../generate.py -------------------------------------------------------------------------------- /h2ogpt/gradio_utils: -------------------------------------------------------------------------------- 1 | ../gradio_utils -------------------------------------------------------------------------------- /h2ogpt/iterators: -------------------------------------------------------------------------------- 1 | ../iterators -------------------------------------------------------------------------------- /h2ogpt/metrics: -------------------------------------------------------------------------------- 1 | ../metrics -------------------------------------------------------------------------------- /h2ogpt/models: -------------------------------------------------------------------------------- 1 | ../models -------------------------------------------------------------------------------- /h2ogpt/openai_server: -------------------------------------------------------------------------------- 1 | ../openai_server -------------------------------------------------------------------------------- /h2ogpt/spkemb: -------------------------------------------------------------------------------- 1 | ../spkemb -------------------------------------------------------------------------------- /h2ogpt/src: -------------------------------------------------------------------------------- 1 | ../src -------------------------------------------------------------------------------- /helm/h2ogpt-chart/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /helm/h2ogpt-chart/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: h2ogpt 3 | description: A Helm chart for h2ogpt 4 | 5 | # A chart can be either an 'application' or a 'library' chart. 6 | # 7 | # Application charts are a collection of templates that can be packaged into versioned archives 8 | # to be deployed. 9 | # 10 | # Library charts provide useful utilities or functions for the chart developer. They're included as 11 | # a dependency of application charts to inject those utilities and functions into the rendering 12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 13 | type: application 14 | 15 | # This is the chart version. This version number should be incremented each time you make changes 16 | # to the chart and its templates, including the app version. 17 | # Versions are expected to follow Semantic Versioning (https://semver.org/) 18 | version: 0.1.0-288 19 | 20 | # This is the version number of the application being deployed. This version number should be 21 | # incremented each time you make changes to the application. Versions are not expected to 22 | # follow Semantic Versioning. They should reflect the version the application is using. 23 | # It is recommended to use it with quotes. 24 | appVersion: 0.1.0-288 25 | -------------------------------------------------------------------------------- /helm/h2ogpt-chart/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "h2ogpt.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 6 | {{- end }} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 11 | If release name contains chart name it will be used as a full name. 12 | */}} 13 | {{- define "h2ogpt.fullname" -}} 14 | {{- if .Values.fullnameOverride }} 15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 16 | {{- else }} 17 | {{- $name := default .Chart.Name .Values.nameOverride }} 18 | {{- if contains $name .Release.Name }} 19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 20 | {{- else }} 21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | 26 | {{/* 27 | Allow the release namespace to be overridden. 28 | */}} 29 | {{- define "h2ogpt.namespace" -}} 30 | {{- default .Release.Namespace .Values.namespaceOverride | trunc 63 | trimSuffix "-" -}} 31 | {{- end -}} 32 | 33 | 34 | {{/* 35 | Create chart name and version as used by the chart label. 36 | */}} 37 | {{- define "h2ogpt.chart" -}} 38 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 39 | {{- end }} 40 | 41 | {{/* 42 | Common labels 43 | */}} 44 | {{- define "h2ogpt.labels" -}} 45 | helm.sh/chart: {{ include "h2ogpt.chart" . }} 46 | {{ include "h2ogpt.selectorLabels" . }} 47 | {{- if .Chart.AppVersion }} 48 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 49 | {{- end }} 50 | app.kubernetes.io/managed-by: {{ .Release.Service }} 51 | {{- end }} 52 | 53 | {{/* 54 | Selector labels 55 | */}} 56 | {{- define "h2ogpt.selectorLabels" -}} 57 | app.kubernetes.io/name: {{ include "h2ogpt.name" . }} 58 | app.kubernetes.io/instance: {{ .Release.Name }} 59 | {{- end }} 60 | 61 | {{/* 62 | Create the name of the service account to use 63 | */}} 64 | {{- define "h2ogpt.serviceAccountName" -}} 65 | {{- if .Values.serviceAccount.create }} 66 | {{- default (include "h2ogpt.fullname" .) .Values.serviceAccount.name }} 67 | {{- else }} 68 | {{- default "default" .Values.serviceAccount.name }} 69 | {{- end }} 70 | {{- end }} 71 | -------------------------------------------------------------------------------- /helm/h2ogpt-chart/templates/config-map.yaml: -------------------------------------------------------------------------------- 1 | 2 | {{- if .Values.h2ogpt.enabled }} 3 | apiVersion: v1 4 | kind: ConfigMap 5 | metadata: 6 | name: {{ include "h2ogpt.fullname" . }}-config 7 | namespace: {{ include "h2ogpt.namespace" . | quote }} 8 | labels: 9 | {{- include "h2ogpt.labels" . | nindent 4 }} 10 | data: 11 | {{- range $key, $value := .Values.h2ogpt.overrideConfig }} 12 | {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} 13 | {{- end }} 14 | {{- end }} 15 | --- 16 | {{- if .Values.tgi.enabled }} 17 | apiVersion: v1 18 | kind: ConfigMap 19 | metadata: 20 | name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config 21 | namespace: {{ include "h2ogpt.namespace" . | quote }} 22 | labels: 23 | {{- include "h2ogpt.labels" . | nindent 4 }} 24 | data: 25 | {{- range $key, $value := .Values.tgi.overrideConfig }} 26 | {{ printf "%s" $key | upper }}: {{ $value | quote }} 27 | {{- end }} 28 | {{- end }} 29 | --- 30 | {{- if .Values.vllm.enabled }} 31 | apiVersion: v1 32 | kind: ConfigMap 33 | metadata: 34 | name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config 35 | namespace: {{ include "h2ogpt.namespace" . | quote }} 36 | labels: 37 | {{- include "h2ogpt.labels" . | nindent 4 }} 38 | data: 39 | {{- range $key, $value := .Values.vllm.overrideConfig }} 40 | {{ printf "%s" $key | upper }}: {{ $value | quote }} 41 | {{- end }} 42 | {{- end }} 43 | --- 44 | {{- if .Values.lmdeploy.enabled }} 45 | apiVersion: v1 46 | kind: ConfigMap 47 | metadata: 48 | name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config 49 | namespace: {{ include "h2ogpt.namespace" . | quote }} 50 | labels: 51 | {{- include "h2ogpt.labels" . | nindent 4 }} 52 | data: 53 | {{- range $key, $value := .Values.lmdeploy.overrideConfig }} 54 | {{ printf "%s" $key | upper }}: {{ $value | quote }} 55 | {{- end }} 56 | {{- end }} 57 | --- 58 | {{- if .Values.caCertificates}} 59 | apiVersion: v1 60 | kind: ConfigMap 61 | metadata: 62 | name: {{ include "h2ogpt.fullname" . }}-ca-certificates 63 | namespace: {{ include "h2ogpt.namespace" . | quote }} 64 | labels: 65 | {{- include "h2ogpt.labels" . | nindent 4 }} 66 | data: 67 | root-ca-bundle.crt: | 68 | {{ .Values.caCertificates | nindent 4 | trim }} 69 | {{- end }} 70 | -------------------------------------------------------------------------------- /helm/h2ogpt-chart/templates/service.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.h2ogpt.enabled }} 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: {{ include "h2ogpt.fullname" . }}-web 6 | namespace: {{ include "h2ogpt.namespace" . | quote }} 7 | 8 | {{- with .Values.h2ogpt.service.webServiceAnnotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | spec: 13 | selector: 14 | app: {{ include "h2ogpt.fullname" . }} 15 | ports: 16 | - name: http 17 | protocol: TCP 18 | port: {{ .Values.h2ogpt.service.webPort }} 19 | targetPort: 7860 20 | - name: openai 21 | protocol: TCP 22 | port: {{ .Values.h2ogpt.service.openaiPort }} 23 | targetPort: 5000 24 | - name: function 25 | protocol: TCP 26 | port: {{ .Values.h2ogpt.service.functionPort }} 27 | targetPort: 5002 28 | - name: agent 29 | protocol: TCP 30 | port: {{ .Values.h2ogpt.service.agentsPort }} 31 | targetPort: 5004 32 | type: {{ .Values.h2ogpt.service.type }} 33 | {{- end }} 34 | --- 35 | {{- if .Values.h2ogpt.enabled }} 36 | apiVersion: v1 37 | kind: Service 38 | metadata: 39 | name: {{ include "h2ogpt.fullname" . }} 40 | namespace: {{ include "h2ogpt.namespace" . | quote }} 41 | spec: 42 | selector: 43 | app: {{ include "h2ogpt.fullname" . }} 44 | ports: 45 | - protocol: TCP 46 | port: {{ .Values.h2ogpt.service.gptPort }} 47 | targetPort: 8888 48 | type: {{ .Values.h2ogpt.service.type }} 49 | {{- end }} 50 | --- 51 | {{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} 52 | apiVersion: v1 53 | kind: Service 54 | metadata: 55 | name: {{ include "h2ogpt.fullname" . }}-tgi-inference 56 | namespace: {{ include "h2ogpt.namespace" . | quote }} 57 | spec: 58 | selector: 59 | app: {{ include "h2ogpt.fullname" . }}-tgi-inference 60 | ports: 61 | - protocol: TCP 62 | port: {{ .Values.tgi.service.port }} 63 | targetPort: 80 64 | type: {{ .Values.tgi.service.type }} 65 | {{- end }} 66 | --- 67 | {{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }} 68 | apiVersion: v1 69 | kind: Service 70 | metadata: 71 | name: {{ include "h2ogpt.fullname" . }}-vllm-inference 72 | namespace: {{ include "h2ogpt.namespace" . | quote }} 73 | spec: 74 | selector: 75 | app: {{ include "h2ogpt.fullname" . }}-vllm-inference 76 | ports: 77 | - protocol: TCP 78 | port: {{ .Values.vllm.service.port }} 79 | targetPort: 5000 80 | type: {{ .Values.vllm.service.type }} 81 | {{- end }} 82 | --- 83 | {{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled ) }} 84 | apiVersion: v1 85 | kind: Service 86 | metadata: 87 | name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference 88 | namespace: {{ include "h2ogpt.namespace" . | quote }} 89 | spec: 90 | selector: 91 | app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference 92 | ports: 93 | - protocol: TCP 94 | port: {{ .Values.lmdeploy.service.port }} 95 | targetPort: 23333 96 | type: {{ .Values.lmdeploy.service.type }} 97 | {{- end }} 98 | -------------------------------------------------------------------------------- /iterators/__init__.py: -------------------------------------------------------------------------------- 1 | from .timeout_iterator import TimeoutIterator, AsyncTimeoutIterator 2 | from .iterator_pipe import IteratorPipe, AsyncIteratorPipe 3 | 4 | __all__ = ["TimeoutIterator", "AsyncTimeoutIterator", "IteratorPipe", "AsyncIteratorPipe"] -------------------------------------------------------------------------------- /iterators/iterator_pipe.py: -------------------------------------------------------------------------------- 1 | import queue 2 | import asyncio 3 | 4 | 5 | class IteratorPipe: 6 | """ 7 | Iterator Pipe creates an iterator that can be fed in data from another block of code or thread of execution 8 | """ 9 | 10 | def __init__(self, sentinel=object()): 11 | self._q = queue.Queue() 12 | self._sentinel = sentinel 13 | self._sentinel_pushed = False 14 | self._closed = False 15 | 16 | def __iter__(self): 17 | return self 18 | 19 | def __next__(self): 20 | if self._closed: 21 | raise StopIteration 22 | 23 | data = self._q.get(block=True) 24 | if data is self._sentinel: 25 | self._closed = True 26 | raise StopIteration 27 | 28 | return data 29 | 30 | def put(self, data) -> bool: 31 | """ 32 | Pushes next item to Iterator and returns True 33 | If iterator has been closed via close(), doesn't push anything and returns False 34 | """ 35 | if self._sentinel_pushed: 36 | return False 37 | 38 | self._q.put(data) 39 | return True 40 | 41 | def close(self): 42 | """ 43 | Close is idempotent. Calling close multiple times is safe 44 | Iterator will raise StopIteration only after all elements pushed before close have been iterated 45 | """ 46 | # make close idempotent 47 | if not self._sentinel_pushed: 48 | self._sentinel_pushed = True 49 | self._q.put(self._sentinel) 50 | 51 | 52 | class AsyncIteratorPipe: 53 | 54 | def __init__(self, sentinel=object()): 55 | self._q = asyncio.Queue() 56 | self._sentinel = sentinel 57 | self._sentinel_pushed = False 58 | self._closed = False 59 | 60 | def __aiter__(self): 61 | return self 62 | 63 | async def __anext__(self): 64 | if self._closed: 65 | raise StopAsyncIteration 66 | 67 | data = await self._q.get() 68 | if data is self._sentinel: 69 | self._closed = True 70 | raise StopAsyncIteration 71 | 72 | return data 73 | 74 | async def put(self, data) -> bool: 75 | """ 76 | Pushes next item to Iterator and returns True 77 | If iterator has been closed via close(), doesn't push anything and returns False 78 | """ 79 | if self._sentinel_pushed: 80 | return False 81 | 82 | await self._q.put(data) 83 | return True 84 | 85 | async def close(self): 86 | """ 87 | Close is idempotent. Calling close multiple times is safe 88 | Iterator will raise StopIteration only after all elements pushed before close have been iterated 89 | """ 90 | # make close idempotent 91 | if not self._sentinel_pushed: 92 | self._sentinel_pushed = True 93 | await self._q.put(self._sentinel) 94 | -------------------------------------------------------------------------------- /metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/metrics/__init__.py -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/__init__.py -------------------------------------------------------------------------------- /models/anthropic.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/anthropic.jpeg -------------------------------------------------------------------------------- /models/anthropic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/anthropic.png -------------------------------------------------------------------------------- /models/female.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/female.wav -------------------------------------------------------------------------------- /models/google.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/google.png -------------------------------------------------------------------------------- /models/h2oai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/h2oai.png -------------------------------------------------------------------------------- /models/hf-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/hf-logo.png -------------------------------------------------------------------------------- /models/human.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/human.jpg -------------------------------------------------------------------------------- /models/human.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/human.png -------------------------------------------------------------------------------- /models/lama.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/lama.jpeg -------------------------------------------------------------------------------- /models/lama2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/lama2.jpeg -------------------------------------------------------------------------------- /models/llava.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/llava.png -------------------------------------------------------------------------------- /models/longalpaca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/longalpaca.png -------------------------------------------------------------------------------- /models/makevllm.sh: -------------------------------------------------------------------------------- 1 | pip download openai==1.3.7 --no-deps 2 | mkdir -p openai_wheel 3 | mv openai-1.3.7-py3-none-any.whl openai_wheel 4 | cd openai_wheel 5 | unzip openai-1.3.7-py3-none-any.whl 6 | rm -rf openai-1.3.7-py3-none-any.whl 7 | 8 | mv openai-1.3.7.dist-info openvllm-1.3.7.dist-info 9 | mv openai openvllm 10 | 11 | find . -name '*.py' | xargs sed -i 's/from openai /from openvllm /g' 12 | find . -name '*.py' | xargs sed -i 's/openai\./openvllm./g' 13 | find . -name '*.py' | xargs sed -i 's/from openai\./from openvllm./g' 14 | find . -name '*.py' | xargs sed -i 's/import openai/import openvllm/g' 15 | find . -name '*.py' | xargs sed -i 's/OpenAI/vLLM/g' 16 | find . -type f | xargs sed -i 's/ openai/ openvllm/g' 17 | find . -type f | xargs sed -i 's/openai /openvllm /g' 18 | find . -type f | xargs sed -i 's/OpenAI/vLLM/g' 19 | find . -type f | xargs sed -i 's/\/openai/\/vllm/g' 20 | find . -type f | xargs sed -i 's/openai\./openvllm\./g' 21 | find . -type f | xargs sed -i 's/OPENAI/OPENVLLM/g' 22 | find . -type f | xargs sed -i 's/openai\//openvllm\//g' 23 | find . -type f | xargs sed -i 's/"openai"/"openvllm"/g' 24 | find . -type f | xargs sed -i 's/_has_openai_credentials/_has_openvllm_credentials/g' 25 | find . -type f | xargs sed -i 's/openai-/openvllm-/g' 26 | find . -type f | xargs sed -i 's/:openai:/:openavllm:/g' 27 | 28 | # add stop_token_ids everywhere frequency_penalty exists. 29 | 30 | rm -rf openvllm-1.3.7-py3-none-any.whl 31 | zip -r openvllm-1.3.7-py3-none-any.whl openvllm-1.3.7.dist-info openvllm 32 | -------------------------------------------------------------------------------- /models/male.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/male.wav -------------------------------------------------------------------------------- /models/meta.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/meta.png -------------------------------------------------------------------------------- /models/mistralai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/mistralai.png -------------------------------------------------------------------------------- /models/openai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/openai.png -------------------------------------------------------------------------------- /models/openchat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/openchat.png -------------------------------------------------------------------------------- /models/pirate_by_coqui.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/pirate_by_coqui.wav -------------------------------------------------------------------------------- /models/test_scrape1.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["COQUI_TOS_AGREED"] = "1" 3 | 4 | 5 | import pytest 6 | from tests.utils import wrap_test_forked 7 | 8 | from TTS.api import TTS 9 | 10 | @pytest.mark.parametrize( 11 | "model_name", 12 | TTS().list_models() 13 | ) 14 | @wrap_test_forked 15 | def test_get_models(model_name): 16 | import torch 17 | from TTS.api import TTS 18 | 19 | # Get device 20 | device = "cuda" if torch.cuda.is_available() else "cpu" 21 | # Init TTS 22 | tts = TTS(model_name).to(device) 23 | 24 | # Run TTS 25 | # ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language 26 | # Text to speech list of amplitude values as output 27 | try: 28 | wav = tts.tts(text="Hello world!", speaker_wav="./models/male.wav", language="en") 29 | # Text to speech to a file 30 | tts.tts_to_file(text="Hello world!", speaker_wav="./models/male.wav", language="en", file_path="output.wav") 31 | except ValueError: 32 | wav = tts.tts(text="Hello world!", speaker_wav="./models/male.wav") 33 | # Text to speech to a file 34 | tts.tts_to_file(text="Hello world!", speaker_wav="./models/male.wav", file_path="output.wav") 35 | 36 | # files are located in e.g. /home/jon/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_v1.1 37 | # downloaded from e.g. https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--glow-tts.zip 38 | # all stored in https://h2o-release.s3.amazonaws.com/h2ogpt/tts_in_.local_share_tts.tgz -------------------------------------------------------------------------------- /models/vicuna.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/vicuna.jpeg -------------------------------------------------------------------------------- /models/wizard.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/models/wizard.jpg -------------------------------------------------------------------------------- /models/yi.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /openai_server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/openai_server/__init__.py -------------------------------------------------------------------------------- /openai_server/agent_tools/aider_code_generation.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import subprocess 4 | import sys 5 | 6 | try: 7 | from importlib.metadata import distribution, PackageNotFoundError 8 | assert distribution('aider-chat') is not None 9 | have_aider = True 10 | except (PackageNotFoundError, AssertionError): 11 | have_aider = False 12 | 13 | 14 | def install_aider(): 15 | if not have_aider: 16 | subprocess.check_call([sys.executable, "-m", "pip", "install", "aider-chat>=0.59.0"]) 17 | print("Successfully installed aider-chat.") 18 | 19 | 20 | def main(): 21 | # Install aider-chat if not already installed 22 | try: 23 | import aider 24 | except ImportError: 25 | print("aider-chat not found. Installing...") 26 | install_aider() 27 | 28 | # Now we can safely import from aider 29 | from aider.coders import Coder 30 | from aider.models import Model 31 | from aider.io import InputOutput 32 | 33 | default_max_time = int(os.getenv('H2OGPT_AGENT_OPENAI_TIMEOUT', "120")) 34 | 35 | parser = argparse.ArgumentParser(description="Aider Coding Tool") 36 | parser.add_argument("--model", type=str, help="Model to use for coding assistance") 37 | parser.add_argument("--files", nargs="+", required=False, help="Files to work on") 38 | parser.add_argument("--output_dir", type=str, default="aider_output", help="Directory for output files") 39 | parser.add_argument("--prompt", "--query", type=str, required=True, help="Prompt or query for the coding task") 40 | parser.add_argument("--max_time", type=int, default=default_max_time, help="Maximum time in seconds for API calls") 41 | parser.add_argument("--verbose", action="store_true", help="Show verbose output") 42 | args = parser.parse_args() 43 | 44 | # Ensure output directory exists 45 | os.makedirs(args.output_dir, exist_ok=True) 46 | 47 | # Set up OpenAI-like client 48 | base_url = os.getenv('H2OGPT_OPENAI_BASE_URL') 49 | assert base_url is not None, "H2OGPT_OPENAI_BASE_URL environment variable is not set" 50 | server_api_key = os.getenv('H2OGPT_OPENAI_API_KEY', 'EMPTY') 51 | from openai import OpenAI 52 | client = OpenAI(base_url=base_url, api_key=server_api_key, timeout=args.max_time) 53 | 54 | # Set environment variables for Aider 55 | os.environ['OPENAI_API_KEY'] = server_api_key 56 | os.environ['OPENAI_API_BASE'] = base_url 57 | 58 | # Set up InputOutput with streaming enabled 59 | io = InputOutput( 60 | yes=True, 61 | chat_history_file=os.path.join(args.output_dir, "chat_history.txt"), 62 | pretty=True, 63 | ) 64 | 65 | # Determine which model to use 66 | if args.model: 67 | selected_model = args.model 68 | elif os.getenv('H2OGPT_AGENT_OPENAI_MODEL'): 69 | selected_model = os.getenv('H2OGPT_AGENT_OPENAI_MODEL') 70 | else: 71 | # Only fetch the model list if we need to use the default 72 | model_list = client.models.list() 73 | selected_model = model_list.data[0].id 74 | 75 | print(f"Using model: {selected_model}") 76 | 77 | # Set up Model 78 | main_model = Model(selected_model) 79 | 80 | # Set up Coder with streaming enabled 81 | coder = Coder.create( 82 | main_model=main_model, 83 | fnames=args.files if args.files else [], 84 | io=io, 85 | stream=True, 86 | use_git=False, 87 | edit_format="diff" 88 | #edit_format="whole" # required for weaker models 89 | ) 90 | 91 | # Run the prompt 92 | output = coder.run(args.prompt) 93 | 94 | # Save the output 95 | output_file = os.path.join(args.output_dir, "aider_output.txt") 96 | with open(output_file, "w") as f: 97 | f.write(output) 98 | 99 | if args.verbose: 100 | print(f"Task completed. Output saved to {output_file}") 101 | 102 | 103 | if __name__ == "__main__": 104 | main() 105 | -------------------------------------------------------------------------------- /openai_server/agent_tools/audio_transcription.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import uuid 4 | 5 | 6 | def check_valid_extension(file): 7 | """ 8 | OpenAI only allows certain file types 9 | :param file: 10 | :return: 11 | """ 12 | valid_extensions = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm'] 13 | 14 | # Get the file extension (convert to lowercase for case-insensitive comparison) 15 | _, file_extension = os.path.splitext(file) 16 | file_extension = file_extension.lower().lstrip('.') 17 | 18 | if file_extension not in valid_extensions: 19 | raise ValueError( 20 | f"Invalid file extension. Expected one of {', '.join(valid_extensions)}, but got '{file_extension}'") 21 | 22 | return True 23 | 24 | 25 | def main(): 26 | parser = argparse.ArgumentParser(description="Get transcription of an audio (or audio in video) file") 27 | parser.add_argument("--input", type=str, required=True, help="Path to the input audio-video file") 28 | # Model 29 | parser.add_argument("--model", type=str, required=False, 30 | help="Model name (For Azure deployment name must match actual model name, e.g. whisper-1)") 31 | # File name 32 | parser.add_argument("--output", "--file", type=str, default='', required=False, 33 | help="Path (ensure unique) to output text file") 34 | args = parser.parse_args() 35 | ## 36 | if not args.model: 37 | args.model = os.getenv('STT_OPENAI_MODEL', 'whisper-1') 38 | 39 | stt_url = os.getenv("STT_OPENAI_BASE_URL", None) 40 | assert stt_url is not None, "STT_OPENAI_BASE_URL environment variable is not set" 41 | 42 | stt_api_key = os.getenv('STT_OPENAI_API_KEY') 43 | if stt_url == "https://api.openai.com/v1" or 'openai.azure.com' in stt_url: 44 | assert stt_api_key, "STT_OPENAI_API_KEY environment variable is not set and is required if using OpenAI or Azure endpoints" 45 | 46 | if 'openai.azure.com' in stt_url: 47 | # https://learn.microsoft.com/en-us/azure/ai-services/openai/whisper-quickstart?tabs=command-line%2Cpython-new%2Cjavascript&pivots=programming-language-python 48 | from openai import AzureOpenAI 49 | client = AzureOpenAI( 50 | api_version="2024-02-01", 51 | api_key=stt_api_key, 52 | # like base_url, but Azure endpoint like https://PROJECT.openai.azure.com/ 53 | azure_endpoint=stt_url, 54 | azure_deployment=args.model, 55 | ) 56 | else: 57 | from openai import OpenAI 58 | client = OpenAI(base_url=stt_url, api_key=stt_api_key) 59 | 60 | check_valid_extension(args.input) 61 | else: 62 | from openai import OpenAI 63 | stt_api_key = os.getenv('STT_OPENAI_API_KEY', 'EMPTY') 64 | client = OpenAI(base_url=stt_url, api_key=stt_api_key) 65 | 66 | # Read the audio file 67 | with open(args.input, "rb") as f: 68 | transcription = client.audio.transcriptions.create( 69 | model=args.model, 70 | file=f, 71 | response_format="text", 72 | ) 73 | if hasattr(transcription, 'text'): 74 | trans = transcription.text 75 | else: 76 | trans = transcription 77 | # Save the image to a file 78 | if not args.output: 79 | args.output = f"transcription_{str(uuid.uuid4())[:6]}.txt" 80 | # Write the transcription to a file 81 | with open(args.output, "wt") as f: 82 | f.write(trans) 83 | 84 | full_path = os.path.abspath(args.output) 85 | print(f"Transcription successfully saved to the file: {full_path}") 86 | # generally too much, have agent read if too long for context of LLM 87 | if len(trans) < 1024: 88 | print(f"Audio file successfully transcribed as follows:\n\n{trans}") 89 | 90 | print("""\n\nRemember, use ask_question_about_documents.py to ask questions about the transcription. This is usually preferred over trying to extract information blindly using python regexp etc.""") 91 | 92 | 93 | if __name__ == "__main__": 94 | main() 95 | -------------------------------------------------------------------------------- /openai_server/agent_tools/common/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import uuid 4 | from urllib.parse import urlparse 5 | 6 | import requests 7 | 8 | 9 | def is_url_valid_and_alive(url, timeout=5): 10 | try: 11 | # Check if the URL is valid 12 | result = urlparse(url) 13 | if all([result.scheme, result.netloc]): 14 | # Try to send a GET request to the URL 15 | response = requests.get(url, timeout=timeout) 16 | # If the status code is less than 400, consider it alive 17 | return response.status_code < 400 18 | else: 19 | return False 20 | except requests.exceptions.RequestException: 21 | return False 22 | 23 | 24 | def filename_is_url(filename): 25 | if filename and (filename.startswith('http://') or filename.startswith('https://') or filename.startswith('www.')): 26 | if is_url_valid_and_alive(filename): 27 | return True 28 | return False 29 | 30 | 31 | def download_simple(url, dest=None, overwrite=False, verbose=False): 32 | if dest is None: 33 | dest = os.path.basename(url) 34 | base_path = os.path.dirname(dest) 35 | if base_path: # else local path 36 | os.makedirs(base_path, exist_ok=True) 37 | dest = os.path.join(base_path, os.path.basename(dest)) 38 | 39 | if os.path.isfile(dest): 40 | if not overwrite: 41 | if verbose: 42 | print("Already have %s from url %s, delete file if invalid" % (dest, str(url)), flush=True) 43 | return dest 44 | else: 45 | os.remove(dest) 46 | 47 | if verbose: 48 | print("BEGIN get url %s" % str(url), flush=True) 49 | if url.startswith("file://"): 50 | from requests_file import FileAdapter 51 | s = requests.Session() 52 | s.mount('file://', FileAdapter()) 53 | url_data = s.get(url, stream=True) 54 | else: 55 | url_data = requests.get(url, stream=True) 56 | if verbose: 57 | print("GOT url %s" % str(url), flush=True) 58 | 59 | if url_data.status_code != requests.codes.ok: 60 | msg = "Cannot get url %s, code: %s, reason: %s" % ( 61 | str(url), 62 | str(url_data.status_code), 63 | str(url_data.reason), 64 | ) 65 | raise requests.exceptions.RequestException(msg) 66 | url_data.raw.decode_content = True 67 | 68 | uuid_tmp = str(uuid.uuid4())[:6] 69 | dest_tmp = dest + "_dl_" + uuid_tmp + ".tmp" 70 | 71 | # Sizes in bytes. 72 | block_size = 1024 73 | with open(dest_tmp, "wb") as file: 74 | for data in url_data.iter_content(block_size): 75 | file.write(data) 76 | 77 | try: 78 | shutil.move(dest_tmp, dest) 79 | except (shutil.Error, FileExistsError): 80 | pass 81 | 82 | if verbose: 83 | print("DONE url %s" % str(url), flush=True) 84 | return dest 85 | -------------------------------------------------------------------------------- /openai_server/agent_tools/query_to_web_image.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | 3 | matplotlib.use('Agg') # Set the backend to non-interactive 4 | import matplotlib.pyplot as plt 5 | 6 | plt.ioff() 7 | import os 8 | 9 | os.environ['TERM'] = 'dumb' 10 | import requests 11 | from serpapi import GoogleSearch 12 | from PIL import Image 13 | from io import BytesIO 14 | import os 15 | import argparse 16 | 17 | 18 | def download_image(text, file, save_dir='.'): 19 | # Ensure the save directory exists 20 | os.makedirs(save_dir, exist_ok=True) 21 | 22 | # Set up the search parameters 23 | params = { 24 | "engine": "google_images", 25 | "q": text, 26 | "api_key": os.getenv("SERPAPI_API_KEY") 27 | } 28 | 29 | # Perform the search 30 | search = GoogleSearch(params) 31 | results = search.get_dict() 32 | 33 | # Check if we have image results 34 | if "images_results" in results and len(results["images_results"]) > 0: 35 | # Get the first image result 36 | image_url = results["images_results"][0]["original"] 37 | 38 | # Download the image 39 | response = requests.get(image_url) 40 | if response.status_code == 200: 41 | # Open the image and convert to RGB (in case it's RGBA) 42 | img = Image.open(BytesIO(response.content)).convert("RGB") 43 | 44 | # Generate a filename based on the query 45 | filepath = os.path.join(save_dir, file) 46 | 47 | # Save the image 48 | img.save(filepath) 49 | print(f"Image downloaded and saved as {filepath}") 50 | return filepath 51 | else: 52 | print(f"Failed to download image for text: {text}") 53 | return None 54 | else: 55 | print(f"No image results found for text: {text}") 56 | return None 57 | 58 | 59 | def main(): 60 | # check with assert if os.getenv("SERPAPI_API_KEY") is defined, if not, print a message 61 | assert os.getenv("SERPAPI_API_KEY"), "Please set the SERPAPI_API_KEY environment variable" 62 | 63 | parser = argparse.ArgumentParser(description="Download one image from the web based on a search text") 64 | parser.add_argument("--text", "--prompt", "--query", type=str, required=True, help="The text to search for") 65 | parser.add_argument("--output", "--file", type=str, help="The file name to save the image to") 66 | args = parser.parse_args() 67 | download_image(text=args.text, file=args.output) 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /openai_server/cogvlm2_server/requirements.txt: -------------------------------------------------------------------------------- 1 | # https://raw.githubusercontent.com/THUDM/CogVLM2/main/basic_demo/requirements.txt 2 | xformers 3 | torch>=2.0.0 4 | torchvision 5 | transformers>=4.40 6 | huggingface-hub>=0.23.0 7 | pillow 8 | chainlit>=1.0 9 | pydantic>=2.7.1 10 | timm>=0.9.16 11 | openai>=1.30.1 12 | loguru>=0.7.2 13 | pydantic>=2.7.1 14 | einops 15 | sse-starlette>=2.1.0 16 | bitsandbytes>=0.43.1 # for int4 quantization 17 | -------------------------------------------------------------------------------- /openai_server/function_server.py: -------------------------------------------------------------------------------- 1 | ../src/function_server.py -------------------------------------------------------------------------------- /openai_server/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | # create logger 4 | logger = logging.getLogger('__name__') 5 | level = logging.INFO 6 | logger.setLevel(level) 7 | 8 | # ----> console info messages require these lines <---- 9 | # create console handler and set level to debug 10 | ch = logging.StreamHandler() 11 | ch.setLevel(level) 12 | 13 | # add ch to logger 14 | logger.addHandler(ch) 15 | -------------------------------------------------------------------------------- /openai_server/test_backend_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | def test_extract_xml_tags(): 5 | xml_input = """ 6 | 7 | Zulu is hot..pdf 8 | 1 9 | 10 | Zulu is hot. 11 | 12 | 13 | """ 14 | 15 | from openai_server.backend_utils import extract_xml_tags 16 | name_page_dict = extract_xml_tags(xml_input) 17 | assert name_page_dict == {'name': 'Zulu is hot..pdf', 'page': '1'} 18 | 19 | from openai_server.backend_utils import generate_unique_filename 20 | filename, clean_name, page = generate_unique_filename(name_page_dict) 21 | assert (filename, clean_name, page) == ('Zulu_is_hot__page_1.txt', 'Zulu_is_hot_', '1') 22 | 23 | 24 | def test_deduplicate_filenames(): 25 | original_filenames = [ 26 | "Zulu_is_hot__page_1.txt", 27 | "Zulu_is_hot__page_1.txt", 28 | "Zulu_is_hot__page_2.txt", 29 | "Another_document_page_1.txt", 30 | "Zulu_is_hot__page_1.txt" 31 | ] 32 | 33 | expected = [ 34 | "Zulu_is_hot__page_1_chunk_0.txt", 35 | "Zulu_is_hot__page_1_chunk_1.txt", 36 | "Zulu_is_hot__page_2.txt", 37 | "Another_document_page_1.txt", 38 | "Zulu_is_hot__page_1_chunk_2.txt" 39 | ] 40 | 41 | from openai_server.backend_utils import deduplicate_filenames 42 | result = deduplicate_filenames(original_filenames) 43 | assert result == expected, f"Expected: {expected}, but got: {result}" 44 | 45 | 46 | def test_generate_unique_filename_multiple_returns(): 47 | meta_datas = [ 48 | "Zulu is hot..pdf\n1", 49 | "Missing page.pdf", 50 | "5", 51 | "No XML tags here", 52 | "" 53 | ] 54 | 55 | from openai_server.backend_utils import generate_unique_filename 56 | from openai_server.backend_utils import extract_xml_tags 57 | results = [generate_unique_filename(extract_xml_tags(x)) for x in meta_datas] 58 | file_names, cleaned_names, pages = zip(*results) 59 | 60 | print("File names:", file_names) 61 | print("Cleaned names:", cleaned_names) 62 | print("Pages:", pages) 63 | 64 | # Assertions to verify the results 65 | assert len(file_names) == len(meta_datas) 66 | assert len(cleaned_names) == len(meta_datas) 67 | assert len(pages) == len(meta_datas) 68 | 69 | assert file_names[0] == "Zulu_is_hot__page_1.txt" 70 | assert cleaned_names[0] == "Zulu_is_hot_" 71 | assert pages[0] == "1" 72 | 73 | assert file_names[1].endswith("_page_0.txt") 74 | assert cleaned_names[1] == "Missing_page" 75 | assert pages[1] == "0" 76 | 77 | assert pages[2] == "5" 78 | assert file_names[3] == 'unknown_page_0.txt' 79 | assert file_names[4] == 'unknown_page_0.txt' 80 | 81 | 82 | def test_exif(): 83 | import pyexiv2 84 | img_file_one = 'tests/image_exif.jpg' 85 | with pyexiv2.Image(img_file_one) as img: 86 | metadata = img.read_exif() 87 | assert metadata is not None and metadata != {} 88 | print(metadata, file=sys.stderr) 89 | -------------------------------------------------------------------------------- /papers/technical-report/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | latexmk -pdf h2oGPT-TR.tex 3 | -------------------------------------------------------------------------------- /papers/technical-report/h2oGPT-TR.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/papers/technical-report/h2oGPT-TR.pdf -------------------------------------------------------------------------------- /papers/technical-report/images/chatbot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/papers/technical-report/images/chatbot.png -------------------------------------------------------------------------------- /papers/technical-report/images/h2oGPT-light.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/papers/technical-report/images/h2oGPT-light.pdf -------------------------------------------------------------------------------- /papers/technical-report/images/h2oGPT.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/papers/technical-report/images/h2oGPT.pdf -------------------------------------------------------------------------------- /papers/technical-report/images/langchain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/papers/technical-report/images/langchain.png -------------------------------------------------------------------------------- /papers/technical-report/images/llm-studio-logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/papers/technical-report/images/llm-studio-logo.pdf -------------------------------------------------------------------------------- /papers/technical-report/images/llmstudio1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/papers/technical-report/images/llmstudio1.png -------------------------------------------------------------------------------- /papers/technical-report/images/llmstudio2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/papers/technical-report/images/llmstudio2.png -------------------------------------------------------------------------------- /reqs_optional/reqs_constraints.txt: -------------------------------------------------------------------------------- 1 | # ensure doesn't drift, e.g. Issue #1348 2 | torch==2.2.1; sys_platform != "darwin" and platform_machine != "arm64" 3 | torch==2.3.1; sys_platform == "darwin" and platform_machine == "arm64" 4 | gradio==4.44.0 5 | gradio_client==1.3.0 6 | transformers>=4.45.1 7 | # https://github.com/langchain-ai/langchain/issues/22972 8 | tenacity==8.3.0 9 | pydantic==2.7.0 10 | # rust failure with 3.10.7 11 | orjson==3.10.6 12 | huggingface-hub==0.25.2 13 | -------------------------------------------------------------------------------- /reqs_optional/requirements_optional_agents.txt: -------------------------------------------------------------------------------- 1 | google-search-results>=2.4.2 2 | 3 | # for AutoGPT: 4 | duckduckgo-search>=4.1.1 5 | gradio_tools>=0.0.9 6 | wikipedia>=1.4.0 7 | wolframalpha>=5.0.0 8 | semanticscholar>=0.7.0 9 | sympy>=1.12 10 | 11 | 12 | # for AutoGen 13 | pyautogen==0.2.33 14 | # 2.3.0 breaks older autogen with xgboost import 15 | flaml==2.2.0 16 | pyautogen[redis] 17 | #pyautogen[ipython] 18 | pyautogen[retrievechat] 19 | pyautogen[lmm] 20 | #pyautogen[mathchat]<0.2 21 | pyautogen[graph] 22 | pyautogen[long-context] 23 | 24 | # helpers for AutoGen (most are already installed) 25 | sympy 26 | seaborn 27 | scikit-learn 28 | statsmodels 29 | plotly 30 | numpy 31 | lightgbm 32 | nltk 33 | spacy 34 | opencv-python 35 | opencv-python-headless 36 | textblob 37 | imageio 38 | bokeh 39 | altair 40 | # part of already-installed complex thing: 41 | #pysqlite3 42 | bs4 43 | requests 44 | lxml 45 | httpx 46 | # bit heavy and not normally installed: 47 | scrapy 48 | # selenium 49 | wolframalpha 50 | semanticscholar 51 | googlesearch-python 52 | google-search-results 53 | reportlab 54 | yfinance 55 | # too different deps like pandas 56 | # yahooquery 57 | 58 | # svg support 59 | svglib 60 | cairosvg 61 | 62 | # requires poppler from conda or apt-get 63 | pdf2image 64 | # for graphviz support 65 | pydot 66 | 67 | # old but light requirements 68 | PyPDF2 69 | 70 | # just to be sure stays around 71 | tzlocal 72 | 73 | # for plots 74 | seaborn 75 | 76 | # Aider tool 77 | # installs old tokenizers 0.19.1 due to litellm even if don't care 78 | # So only install in steps in linux_install.sh for now 79 | # aider-chat>=0.59.0 80 | 81 | # bing api 82 | # https://github.com/microsoft/bing-search-sdk-for-python/tree/main 83 | msrest 84 | azure-core 85 | azure-common 86 | msrestazure 87 | microsoft-bing-websearch 88 | microsoft-bing-visualsearch 89 | microsoft-bing-videosearch 90 | microsoft-bing-imagesearch 91 | microsoft-bing-newssearch 92 | #microsoft-bing-spellcheck 93 | #microsoft-bing-entitysearch 94 | #microsoft-bing-autosuggest 95 | microsoft-bing-customimagesearch 96 | microsoft-bing-customwebsearch 97 | 98 | # DAI: 99 | h2o_engine_manager 100 | h2o_authn -------------------------------------------------------------------------------- /reqs_optional/requirements_optional_audio.txt: -------------------------------------------------------------------------------- 1 | pydub>=0.25.1 2 | librosa>=0.10.1 3 | ffmpeg>=1.4 4 | yt_dlp>=2024.10.22 5 | wavio>=0.0.8 6 | # Audio speed-up and slowdown (best quality), if not installed can only speed-up with lower quality 7 | # pyrubberband>=0.3.0 8 | # stackoverflow.com/questions/75813603/python-working-with-sound-librosa-and-pyrubberband-conflict 9 | # pip uninstall -y pysoundfile soundfile 10 | soundfile==0.12.1 11 | # Optional: Only for testing for now 12 | # playsound==1.3.0 13 | # STT from microphone (may not be required if ffmpeg installed above) 14 | # for any TTS: 15 | torchaudio 16 | soundfile>=0.12.1 17 | # GPU Only: for Coqui XTTS (ensure CUDA_HOME set and consistent with added postfix for extra-index): 18 | # relaxed versions to avoid conflicts 19 | # TTS 20 | #deepspeed 21 | noisereduce 22 | emoji 23 | ffmpeg-python 24 | trainer 25 | pysbd 26 | coqpit 27 | # for Coqui XTTS language helpers (specific versions probably not required) 28 | cutlet>=0.3.0 29 | langid>=1.1.6 30 | g2pkk>=0.1.2 31 | jamo>=0.4.1 32 | gruut[de,es,fr]>=2.2.3 33 | jieba>=0.42.1 34 | # librosa==0.10.1 35 | # For faster whisper: 36 | # git+https://github.com/SYSTRAN/faster-whisper.git 37 | -------------------------------------------------------------------------------- /reqs_optional/requirements_optional_cpu_only.txt: -------------------------------------------------------------------------------- 1 | faiss-cpu>=1.7.4 2 | # for unstructured 3 | onnxruntime==1.15.0 -------------------------------------------------------------------------------- /reqs_optional/requirements_optional_doctr.txt: -------------------------------------------------------------------------------- 1 | python-doctr @ git+https://github.com/h2oai/doctr.git@aee9b1c369e37af9e18265660935bce2c4447d65 2 | weasyprint>=60.1 3 | imutils>=0.5.4 4 | opencv-python-headless>=4.8.1.78 5 | -------------------------------------------------------------------------------- /reqs_optional/requirements_optional_gpu_only.txt: -------------------------------------------------------------------------------- 1 | faiss-gpu>=1.7.2 2 | # for unstructured 3 | onnxruntime-gpu==1.15.0 4 | auto-gptq>=0.7.1 5 | #optimum>=1.17.1 6 | # autoawq for cuda 12.1, else build from source: https://github.com/casper-hansen/AutoAWQ?tab=readme-ov-file#build-from-source 7 | autoawq 8 | autoawq-kernels 9 | exllama @ https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-linux_x86_64.whl 10 | # See: Dao-AILab/flash-attention/issues/453 11 | # flash-attn==2.4.2 12 | -------------------------------------------------------------------------------- /reqs_optional/requirements_optional_image.txt: -------------------------------------------------------------------------------- 1 | # Vision/Image packages 2 | fiftyone>=0.24.1 3 | pytube 4 | diffusers>=0.29.0 5 | yt-dlp>=2024.8.6 6 | # if want to use gif_to_mp4() 7 | # moviepy>=0.5.1 8 | 9 | # for fiftyone with patches 10 | pytubefix==8.1.1 11 | -------------------------------------------------------------------------------- /reqs_optional/requirements_optional_langchain.gpllike.txt: -------------------------------------------------------------------------------- 1 | pymupdf>=1.23.8 # AGPL license 2 | pymupdf4llm>=0.0.12 # AGPL license 3 | # extract-msg==0.41.1 # GPL3 4 | -------------------------------------------------------------------------------- /reqs_optional/requirements_optional_langchain.metrics.txt: -------------------------------------------------------------------------------- 1 | bert_score>=0.3.13 2 | evaluate @ git+https://github.com/huggingface/evaluate@7d7d81dd3ffec0812e2edb09f86b3b1e31d61118 3 | sacremoses>=0.0.53 4 | absl-py 5 | nltk 6 | rouge_score>=0.1.2 7 | # below install tensorflow and downgrades numpy, so heavy dependency 8 | git+https://github.com/google-research/bleurt.git 9 | -------------------------------------------------------------------------------- /reqs_optional/requirements_optional_langchain.txt: -------------------------------------------------------------------------------- 1 | # ensure constrained to requirements.txt version: 2 | torch==2.2.1; sys_platform != "darwin" and platform_machine != "arm64" 3 | torch==2.3.1; sys_platform == "darwin" and platform_machine == "arm64" 4 | 5 | # optional for chat with PDF 6 | langchain==0.2.6 7 | langchain_experimental==0.0.62 8 | langchain-community==0.2.6 9 | langsmith==0.1.82 10 | langchain-core==0.2.23 11 | langchain-text-splitters==0.2.2 12 | #langchain_huggingface==0.0.3 13 | 14 | pypdf>=3.17.1 15 | # avoid textract, requires old six 16 | #textract==1.6.5 17 | pypdfium2>=4.24.0 18 | 19 | # for HF embeddings 20 | sentence_transformers>=3.0.1 21 | # https://github.com/h2oai/instructor-embedding/tree/h2ogpt 22 | # pip wheel . 23 | InstructorEmbedding @ https://h2o-release.s3.amazonaws.com/h2ogpt/InstructorEmbedding-1.0.1-py3-none-any.whl 24 | # https://github.com/h2oai/sentence-transformers/tree/h2ogpt 25 | # pip wheel . 26 | sentence_transformers_old @ https://h2o-release.s3.amazonaws.com/h2ogpt/sentence_transformers_old-2.2.2-py3-none-any.whl 27 | 28 | # optional: for OpenAI endpoint or embeddings (requires key) 29 | replicate>=0.26.0 30 | anthropic>=0.34.2 31 | langchain-anthropic>=0.1.20 32 | together>=1.1.5 33 | langchain_together==0.1.3 34 | langchain-openai>=0.1.8 35 | langchain-google-genai>=1.0.8 36 | google-generativeai>=0.7.2 37 | google-ai-generativelanguage>=0.6.6 38 | # pydantic version conflict 39 | #mistral_common==1.3.3 40 | 41 | llava @ https://h2o-release.s3.amazonaws.com/h2ogpt/llava-1.7.0.dev0-py3-none-any.whl 42 | 43 | #langchain_mistralai==0.1.2 # tokenizers<0.16.0, but transformers requires >=0.19 44 | httpx>=0.25.2 45 | httpx-sse>=0.3.1 46 | mistralai>=0.4.0 47 | # pydantic issue, don't need yet 48 | #mistral-common==1.0.2 49 | 50 | groq>=0.5.0 51 | langchain-groq>=0.1.5 52 | 53 | # local vector db 54 | chromadb==0.4.23 55 | 56 | pydantic-settings>=2.1.0 57 | 58 | # server vector db 59 | #pymilvus==2.2.8 60 | 61 | # weak url support, if can't install opencv etc. If comment-in this one, then comment-out unstructured[local-inference]==0.6.6 62 | # unstructured==0.8.1 63 | 64 | # strong support for images 65 | # Requires on Ubuntu: sudo apt-get install libmagic-dev poppler-utils tesseract-ocr libtesseract-dev libreoffice 66 | unstructured[local-inference]==0.12.5 67 | unstructured[all-docs]==0.12.5 68 | docx2txt==0.8 69 | python-docx==1.1.0 70 | #pdf2image==1.16.3 71 | #pytesseract==0.3.10 72 | pillow>=10.2.0 73 | posthog 74 | 75 | pdfminer.six==20231228 76 | urllib3 77 | requests_file 78 | 79 | #pdf2image==1.16.3 80 | #pytesseract==0.3.10 81 | tabulate>=0.9.0 82 | # FYI pandoc already part of requirements.txt 83 | 84 | # JSONLoader, but makes some trouble for some users 85 | # TRY: apt-get install autoconf libtool 86 | # unclear what happens on windows/mac for now 87 | jq>=1.4.1; platform_machine == "x86_64" 88 | 89 | # to check licenses 90 | # Run: pip-licenses|grep -v 'BSD\|Apache\|MIT' 91 | pip-licenses>=4.3.0 92 | 93 | # weaviate vector db 94 | # required for httpx for mistralai 95 | weaviate-client==3.26.2 96 | 97 | # vllm==0.2.2 98 | 99 | # only gradio>=4 100 | gradio_pdf>=0.0.7 101 | 102 | gradio_tools>=0.0.9 103 | 104 | # Qdrant - https://qdrant.tech vector database 105 | qdrant-client>=1.8.0 106 | 107 | # MIT: 108 | arxiv>=2.1.3 -------------------------------------------------------------------------------- /reqs_optional/requirements_optional_langchain.urls.txt: -------------------------------------------------------------------------------- 1 | # sometimes unstructured fails, these work in those cases. See Issue #320 2 | playwright>=1.37.0 3 | # requires Chrome binary to be in path 4 | selenium>=4.11.2 5 | html2text>=2020.1.16 6 | bs4>=0.0.1 -------------------------------------------------------------------------------- /reqs_optional/requirements_optional_llamacpp_gpt4all.txt: -------------------------------------------------------------------------------- 1 | gpt4all==1.0.5 2 | 3 | # requires env to be set for specific systems 4 | llama-cpp-python==0.2.87 5 | 6 | -------------------------------------------------------------------------------- /reqs_optional/requirements_optional_training.txt: -------------------------------------------------------------------------------- 1 | #xformers==0.0.20 2 | # optional for finetune 3 | tensorboard>=2.13.0 4 | neptune>=1.2.0 5 | -------------------------------------------------------------------------------- /reqs_optional/requirements_optional_wikiprocessing.txt: -------------------------------------------------------------------------------- 1 | # Only for converting full wiki into db, not required to use db for wiki_full 2 | mwxml>=0.3.3 3 | mwparserfromhell>=0.6.4 4 | 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # no websockets, more cloud friendly 2 | # able to make gradio clean-up states 3 | 4 | # gradio @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio-4.25.0-py3-none-any.whl 5 | # gradio_client @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio_client-0.15.0-py3-none-any.whl 6 | #gradio @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio-4.20.1-py3-none-any.whl 7 | #gradio_client==0.11.0 8 | # gradio @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio-4.26.0-py3-none-any.whl 9 | # gradio_client @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio_client-0.15.1-py3-none-any.whl 10 | 11 | gradio==4.44.0 12 | gradio_client==1.3.0 13 | 14 | uvicorn[standard] 15 | gunicorn 16 | fastapi-utils 17 | sse_starlette>=1.8.2 18 | # consrained by tokenizers etc.: 19 | huggingface_hub==0.25.2 20 | appdirs>=1.4.4 21 | fire>=0.5.0 22 | docutils>=0.20.1 23 | torch==2.2.1; sys_platform != "darwin" and platform_machine != "arm64" 24 | torch==2.3.1; sys_platform == "darwin" and platform_machine == "arm64" 25 | evaluate>=0.4.0 26 | rouge_score>=0.1.2 27 | sacrebleu>=2.3.1 28 | scikit-learn>=1.2.2 29 | # optional (need to uncomment code in gradio_runner.py for import of better_profanity) 30 | # alt-profanity-check==1.2.2 31 | # better-profanity==0.7.0 32 | numpy>=1.23.4,<2.0 33 | pandas>=2.0.2 34 | matplotlib>=3.7.1 35 | 36 | # transformers 37 | loralib>=0.1.2 38 | bitsandbytes>=0.43.1; sys_platform != "darwin" and platform_machine != "arm64" 39 | #bitsandbytes downgraded because of Mac M1/M2 support issue. See https://github.com/axolotl-ai-cloud/axolotl/issues/1436 40 | bitsandbytes==0.42.0; sys_platform == "darwin" and platform_machine == "arm64" 41 | accelerate>=0.30.1 42 | peft>=0.7.0 43 | transformers>=4.45.1 44 | jinja2>=3.1.0 45 | tokenizers>=0.19.0 46 | hf_transfer>=0.1.6 47 | #optimum>=1.17.1 48 | datasets>=2.18.0 49 | sentencepiece>=0.2.0 50 | 51 | APScheduler>=3.10.1 52 | 53 | # optional for generate 54 | pynvml>=11.5.0 55 | psutil>=5.9.5 56 | boto3>=1.26.101 57 | botocore>=1.29.101 58 | beautifulsoup4>=4.12.2 59 | markdown>=3.4.3 60 | 61 | # data and testing 62 | pytest>=7.2.2 63 | pytest-xdist>=3.2.1 64 | nltk>=3.8.1 65 | textstat>=0.7.3 66 | # pandoc==2.3 67 | pypandoc>=1.11; sys_platform == "darwin" and platform_machine == "arm64" 68 | pypandoc_binary>=1.11; platform_machine == "x86_64" 69 | pypandoc_binary>=1.11; platform_system == "Windows" 70 | python-magic-bin>=0.4.14; platform_system == "Windows" 71 | openpyxl>=3.1.2 72 | lm_dataformat>=0.0.20 73 | bioc>=2.0 74 | 75 | # for HF embeddings 76 | sentence_transformers>=3.0.1 77 | InstructorEmbedding @ https://h2o-release.s3.amazonaws.com/h2ogpt/InstructorEmbedding-1.0.1-py3-none-any.whl 78 | sentence_transformers_old @ https://h2o-release.s3.amazonaws.com/h2ogpt/sentence_transformers_old-2.2.2-py3-none-any.whl 79 | 80 | # falcon 81 | einops>=0.6.1 82 | 83 | # for gpt4all .env file, but avoid worrying about imports 84 | python-dotenv>=1.0.0 85 | 86 | json_repair>=0.21.0 87 | 88 | text-generation>=0.7.0 89 | 90 | # for tokenization when don't have HF tokenizer 91 | tiktoken>=0.5.2 92 | 93 | # optional: for OpenAI endpoint 94 | openai>=1.40.1 95 | slowapi>=0.1.9 96 | 97 | # for image metadata 98 | pyexiv2 99 | 100 | requests>=2.31.0 101 | httpx>=0.24.1 102 | urllib3>=1.26.16 103 | filelock>=3.12.2 104 | joblib>=1.3.1 105 | tqdm>=4.65.0 106 | tabulate>=0.9.0 107 | packaging>=23.1 108 | 109 | jsonschema>=4.23.0 110 | spacy==3.7.5 -------------------------------------------------------------------------------- /spaces/chatbot/repo_to_spaces.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # NOTE: start in h2ogpt repo base directory 4 | # i.e. can run below to update both spaces (assumes repos already existed, else will have to login HF for each) 5 | # (h2ollm) jon@pseudotensor:~/h2ogpt$ ./spaces/chatbot/repo_to_spaces.sh h2ogpt-chatbot ; ./spaces/chatbot/repo_to_spaces.sh h2ogpt-chatbot2 6 | 7 | spacename=${1:-h2ogpt-chatbot} 8 | echo "Space name: $spacename" 9 | 10 | # NOTE: start in h2ogpt repo base directory 11 | 12 | h2ogpt_hash="$(git rev-parse HEAD)" 13 | 14 | ln -sr generate.py spaces/chatbot/ 15 | mkdir -p spaces/chatbot/src/ 16 | ln -sr src/gen.py src/evaluate_params.py src/gradio_runner.py src/gradio_themes.py h2o-logo.svg LICENSE src/stopping.py src/prompter.py src/enums.py src/utils.py src/utils_langchain.py src/client_test.py src/gpt_langchain.py src/create_data.py src/h2oai_pipeline.py src/gpt4all_llm.py src/loaders.py requirements.txt iterators reqs_optional gradio_utils src/serpapi.py src/db_utils.py spaces/chatbot/src 17 | cd .. 18 | 19 | rm -rf "${spacename}" 20 | git clone https://huggingface.co/spaces/h2oai/"${spacename}" 21 | cd "${spacename}" 22 | git reset --hard origin/main 23 | git pull --rebase 24 | rm -rf app.py generate.py src 25 | cd ../h2ogpt/spaces/chatbot/ 26 | cp -rL generate.py ../../../"${spacename}"/ 27 | mkdir -p ../../../"${spacename}"/src/ 28 | cp -rL src/* ../../../"${spacename}"/src/ 29 | cd ../../../"${spacename}"/ 30 | 31 | ln -s generate.py app.py 32 | 33 | # for langchain support and gpt4all support 34 | mv requirements.txt requirements.txt.001 35 | # avoid gpt4all, hit ERROR: Could not build wheels for llama-cpp-python, which is required to install pyproject.toml-based projects 36 | #cat requirements.txt.001 requirements_optional_langchain.txt requirements_optional_llamacpp_gpt4all.txt >> requirements.txt 37 | cat requirements.txt.001 reqs_optional/requirements_optional_langchain.txt reqs_optional/requirements_optional_langchain.txt reqs_optional/requirements_optional_gpu_only.txt reqs_optional/requirements_optional_langchain.gpllike.txt >> requirements.txt 38 | rm -rf requirements.txt.001 39 | 40 | git add app.py generate.py src/* 41 | git commit -m "Update with h2oGPT hash ${h2ogpt_hash}" 42 | # ensure write token used and login with git control: huggingface-cli login --token --add-to-git-credential 43 | git push 44 | 45 | echo "WARNING: Also change sdk_version: x.xx.xx in README.md in space" 46 | -------------------------------------------------------------------------------- /spaces/demo/app.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | import torch 3 | import os 4 | from transformers import AutoTokenizer, AutoModelForCausalLM 5 | 6 | theme = gr.themes.Monochrome( 7 | primary_hue="indigo", 8 | secondary_hue="blue", 9 | neutral_hue="slate", 10 | radius_size=gr.themes.sizes.radius_sm, 11 | font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"], 12 | ) 13 | 14 | auth_token = os.environ.get("SECRET_TOKEN") or True 15 | 16 | from h2oai_pipeline import H2OTextGenerationPipeline 17 | 18 | model_name = "h2oai/h2ogpt-oig-oasst1-512-6_9b" 19 | tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", trust_remote_code=True, use_auth_token=auth_token) 20 | model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True, use_auth_token=auth_token) 21 | 22 | generate_text = H2OTextGenerationPipeline(model=model, tokenizer=tokenizer) 23 | 24 | 25 | def generate(query): 26 | return generate_text(query, max_new_tokens=150)[0]['generated_text'] 27 | 28 | 29 | examples = [ 30 | "Why is drinking water so healthy?", 31 | "Is there such a thing as Shallow Learning?", 32 | "Tell me a funny joke in German", 33 | "What does the 402 error mean?", 34 | "Can penguins fly?", 35 | "What's the secret to a happy life?", 36 | "Is it easy to train large language models?" 37 | ] 38 | 39 | 40 | def process_example(args): 41 | for x in generate(args): 42 | pass 43 | return x 44 | 45 | css = ".generating {visibility: hidden}" 46 | 47 | with gr.Blocks(theme=theme) as demo: 48 | gr.Markdown( 49 | """

h2oGPT

50 | """ 51 | ) 52 | with gr.Row(): 53 | with gr.Column(): 54 | with gr.Row(): 55 | instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input") 56 | with gr.Row(): 57 | with gr.Row(): 58 | submit = gr.Button("Generate Answer") 59 | with gr.Row(): 60 | with gr.Column(): 61 | with gr.Box(): 62 | gr.Markdown("**h2oGPT**") 63 | output = gr.Markdown() 64 | with gr.Row(): 65 | gr.Examples( 66 | examples=examples, 67 | inputs=[instruction], 68 | cache_examples=False, 69 | fn=process_example, 70 | outputs=[output], 71 | ) 72 | submit.click(generate, inputs=[instruction], outputs=[output], api_name='submit') 73 | instruction.submit(generate, inputs=[instruction], outputs=[output]) 74 | 75 | demo.queue(concurrency_count=16).launch(debug=True) 76 | -------------------------------------------------------------------------------- /spaces/demo/app_client_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Client test. 3 | 4 | Run server: 5 | 6 | python app.py 7 | 8 | Then run this client: 9 | 10 | python app_client_test.py 11 | 12 | NOTE: To access a private app on gradio, do: 13 | 14 | HUGGINGFACE_TOKEN= GRADIO_HOST="https://huggingface.co/spaces/h2oai/h2ogpt-oasst1-512-6_9b-hosted" python app_client_test.py 15 | """ 16 | 17 | import os 18 | from gradio_client import Client 19 | import markdown # pip install markdown 20 | from bs4 import BeautifulSoup # pip install beautifulsoup4 21 | 22 | 23 | hf_token = os.environ.get('HUGGINGFACE_TOKEN') 24 | host = os.environ.get("GRADIO_HOST", "http://localhost:7860") 25 | client = Client(host, hf_token=hf_token) 26 | 27 | 28 | def test_app_client_basic(): 29 | instruction = "Who are you?" 30 | args = [instruction] 31 | 32 | api_name = '/submit' 33 | res = client.predict( 34 | *tuple(args), 35 | api_name=api_name, 36 | ) 37 | print(md_to_text(res)) 38 | 39 | 40 | def md_to_text(md): 41 | html = markdown.markdown(md) 42 | soup = BeautifulSoup(html, features='html.parser') 43 | return soup.get_text() 44 | 45 | 46 | if __name__ == '__main__': 47 | test_app_client_basic() 48 | -------------------------------------------------------------------------------- /spaces/demo/h2oai_pipeline.py: -------------------------------------------------------------------------------- 1 | ../../src/h2oai_pipeline.py -------------------------------------------------------------------------------- /spaces/demo/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.28.1 2 | torch==2.0.0 3 | accelerate==0.18.0 4 | -------------------------------------------------------------------------------- /spkemb/cmu_us_awb_arctic-wav-arctic_a0002.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/spkemb/cmu_us_awb_arctic-wav-arctic_a0002.npy -------------------------------------------------------------------------------- /spkemb/cmu_us_bdl_arctic-wav-arctic_a0009.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/spkemb/cmu_us_bdl_arctic-wav-arctic_a0009.npy -------------------------------------------------------------------------------- /spkemb/cmu_us_clb_arctic-wav-arctic_a0144.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/spkemb/cmu_us_clb_arctic-wav-arctic_a0144.npy -------------------------------------------------------------------------------- /spkemb/cmu_us_ksp_arctic-wav-arctic_b0087.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/spkemb/cmu_us_ksp_arctic-wav-arctic_b0087.npy -------------------------------------------------------------------------------- /spkemb/cmu_us_rms_arctic-wav-arctic_b0353.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/spkemb/cmu_us_rms_arctic-wav-arctic_b0353.npy -------------------------------------------------------------------------------- /spkemb/cmu_us_slt_arctic-wav-arctic_a0508.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/spkemb/cmu_us_slt_arctic-wav-arctic_a0508.npy -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/src/__init__.py -------------------------------------------------------------------------------- /src/basic_nltk.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.environ['NLTK_DATA'] = './nltk_data' 4 | 5 | from nltk.downloader import download 6 | 7 | # download('all') 8 | download('tokenizers', download_dir=os.environ['NLTK_DATA']) 9 | download('taggers', download_dir=os.environ['NLTK_DATA']) 10 | download('punkt', download_dir=os.environ['NLTK_DATA']) 11 | download('averaged_perceptron_tagger', download_dir=os.environ['NLTK_DATA']) 12 | download('maxent_treebank_pos_tagger', download_dir=os.environ['NLTK_DATA']) 13 | download('spanish_grammars', download_dir=os.environ['NLTK_DATA']) 14 | -------------------------------------------------------------------------------- /src/function_client.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | 4 | import requests 5 | import json 6 | 7 | 8 | def execute_function_on_server(host: str, port: int, function_name: str, args: tuple, kwargs: dict, use_disk: bool, 9 | use_pickle: bool, function_api_key: str): 10 | url = f"http://{host}:{port}/execute_function/" 11 | payload = { 12 | "function_name": function_name, 13 | "args": args, 14 | "kwargs": kwargs, 15 | "use_disk": use_disk, 16 | "use_pickle": use_pickle, 17 | } 18 | headers = { 19 | "Authorization": f"Bearer {function_api_key}" 20 | } 21 | response = requests.post(url, json=payload, headers=headers) 22 | if response.status_code == 200: 23 | return response.json() 24 | else: 25 | return {"error": response.json()["detail"]} 26 | 27 | 28 | def read_result_from_disk(file_path: str, use_pickle: bool, verbose=False): 29 | if verbose: 30 | print(f"Size of {file_path} is {os.path.getsize(file_path)}") 31 | try: 32 | if use_pickle: 33 | with open(file_path, "rb") as f: 34 | result = pickle.load(f) 35 | else: 36 | with open(file_path, "r") as f: 37 | result = json.load(f) 38 | except Exception as e: 39 | raise IOError(f"Error reading file {file_path}: {e}") 40 | finally: 41 | try: 42 | os.remove(file_path) 43 | except OSError as e: 44 | print(f"Error deleting file {file_path}: {e}") 45 | return result 46 | 47 | 48 | def call_function_server(host, port, function_name, args, kwargs, use_disk=False, use_pickle=False, 49 | function_api_key='EMPTY', verbose=False): 50 | execute_result = execute_function_on_server(host, port, function_name, args, kwargs, use_disk, use_pickle, 51 | function_api_key) 52 | if "error" in execute_result: 53 | raise RuntimeError(execute_result['error']) 54 | else: 55 | if use_disk or use_pickle: 56 | file_path = execute_result["file_path"] 57 | result_from_disk = read_result_from_disk(file_path, use_pickle, verbose=verbose) 58 | return result_from_disk 59 | else: 60 | return execute_result["result"] 61 | 62 | 63 | def get_data_h2ogpt(file_path, verbose=False, is_url=False, **kwargs): 64 | """ 65 | Simple function for Open Web UI 66 | """ 67 | function_server_host = os.getenv('H2OGPT_FUNCTION_SERVER_HOST', '0.0.0.0') 68 | function_server_port = int(os.getenv('H2OGPT_FUNCTION_SERVER_PORT', '5002')) 69 | function_api_key = os.getenv('H2OGPT_FUNCTION_SERVER_API_KEY', 'EMPTY') 70 | 71 | # could set other things: 72 | # https://github.com/h2oai/h2ogpt/blob/d2fa3d7ce507e8fb141c78ff92a83a8e27cf8b31/src/gpt_langchain.py#L9498 73 | simple_kwargs = kwargs 74 | if is_url: 75 | simple_kwargs.update(dict(filei=None, url=file_path, text=None)) 76 | file_path = None 77 | function_name = 'path_to_docs' 78 | use_disk = False 79 | use_pickle = True 80 | sources = call_function_server(function_server_host, 81 | function_server_port, 82 | function_name, 83 | (file_path,), 84 | simple_kwargs, 85 | use_disk=use_disk, use_pickle=use_pickle, 86 | function_api_key=function_api_key, 87 | verbose=verbose) 88 | known_type = len(sources) > 0 89 | return sources, known_type 90 | -------------------------------------------------------------------------------- /src/h2o-logo.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/src/h2o-logo.ico -------------------------------------------------------------------------------- /src/image_pix2struct.py: -------------------------------------------------------------------------------- 1 | """ 2 | Based upon ImageCaptionLoader in LangChain version: langchain/document_loaders/image_captions.py 3 | But accepts preloaded model to avoid slowness in use and CUDA forking issues 4 | 5 | Loader that uses Pix2Struct models to image caption 6 | 7 | """ 8 | from typing import List, Union, Any, Tuple 9 | 10 | from langchain.docstore.document import Document 11 | from langchain_community.document_loaders import ImageCaptionLoader 12 | from utils import get_device, clear_torch_cache 13 | from PIL import Image 14 | 15 | 16 | class H2OPix2StructLoader(ImageCaptionLoader): 17 | """Loader that extracts text from images""" 18 | 19 | def __init__(self, path_images: Union[str, List[str]] = None, model_type="google/pix2struct-textcaps-base", 20 | max_new_tokens=50): 21 | super().__init__(path_images) 22 | self._pix2struct_model = None 23 | self._model_type = model_type 24 | self._max_new_tokens = max_new_tokens 25 | 26 | def set_context(self): 27 | if get_device() == 'cuda': 28 | import torch 29 | n_gpus = torch.cuda.device_count() if torch.cuda.is_available() else 0 30 | if n_gpus > 0: 31 | self.context_class = torch.device 32 | self.device = 'cuda' 33 | else: 34 | self.device = 'cpu' 35 | else: 36 | self.device = 'cpu' 37 | 38 | def load_model(self): 39 | try: 40 | from transformers import AutoProcessor, Pix2StructForConditionalGeneration 41 | except ImportError: 42 | raise ValueError( 43 | "`transformers` package not found, please install with " 44 | "`pip install transformers`." 45 | ) 46 | if self._pix2struct_model: 47 | self._pix2struct_model = self._pix2struct_model.to(self.device) 48 | return self 49 | self.set_context() 50 | self._pix2struct_processor = AutoProcessor.from_pretrained(self._model_type) 51 | self._pix2struct_model = Pix2StructForConditionalGeneration.from_pretrained(self._model_type).to(self.device) 52 | return self 53 | 54 | def unload_model(self): 55 | if hasattr(self._pix2struct_model, 'cpu'): 56 | self._pix2struct_model.cpu() 57 | clear_torch_cache() 58 | 59 | def set_image_paths(self, path_images: Union[str, List[str]]): 60 | """ 61 | Load from a list of image files 62 | """ 63 | if isinstance(path_images, str): 64 | self.image_paths = [path_images] 65 | else: 66 | self.image_paths = path_images 67 | 68 | def load(self, prompt=None) -> List[Document]: 69 | if self._pix2struct_model is None: 70 | self.load_model() 71 | results = [] 72 | for path_image in self.image_paths: 73 | caption, metadata = self._get_captions_and_metadata( 74 | processor=self._pix2struct_processor, model=self._pix2struct_model, path_image=path_image 75 | ) 76 | doc = Document(page_content=caption, metadata=metadata) 77 | results.append(doc) 78 | 79 | return results 80 | 81 | def _get_captions_and_metadata( 82 | self, processor: Any, model: Any, path_image: str) -> Tuple[str, dict]: 83 | """ 84 | Helper function for getting the captions and metadata of an image 85 | """ 86 | try: 87 | image = Image.open(path_image) 88 | except Exception: 89 | raise ValueError(f"Could not get image data for {path_image}") 90 | inputs = self._pix2struct_processor(images=image, return_tensors="pt") 91 | inputs = inputs.to(self.device) 92 | generated_ids = self._pix2struct_model.generate(**inputs, max_new_tokens=self._max_new_tokens) 93 | generated_text = self._pix2struct_processor.batch_decode(generated_ids, skip_special_tokens=True)[0] 94 | metadata: dict = {"image_path": path_image} 95 | return generated_text, metadata 96 | -------------------------------------------------------------------------------- /src/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # The path to the utils.py file relative to the root of the repository 4 | FILE_PATH="src/version.py" 5 | 6 | # Get the current git commit hash 7 | GITHASH=$(git rev-parse HEAD) 8 | 9 | # Update the __version__ variable in utils.py 10 | # This uses a Perl one-liner to find the __version__ line and replace it with the current GITHASH 11 | perl -pi -e "s/__version__ = \".*\"/__version__ = \"$GITHASH\"/" $FILE_PATH 12 | 13 | # Add the modified utils.py file to the commit 14 | git add $FILE_PATH 15 | 16 | # End of script 17 | -------------------------------------------------------------------------------- /src/prepare_offline.py: -------------------------------------------------------------------------------- 1 | def noop_load(*args, **kwargs): 2 | return None 3 | 4 | 5 | def go_prepare_offline(*args, **kwargs): 6 | kwargs0 = kwargs['kwargs'] 7 | # gen.py steps should have already obtained: 8 | # model+tokenizers from base_model or model_lock if required 9 | # tokenizers, including tokenizers for model_lock if using inference servers even if no LLM locally 10 | # score_model or reward model 11 | # 12 | # Additional steps are related to document Q/A: 13 | # For simplicity use gradio functions, 14 | # but not API calls that would require actual gradio app up and API usage that might have issues 15 | 16 | kwargs['max_quality'] = True 17 | embed = True 18 | h2ogpt_key = '' 19 | file_list = ['tests/driverslicense.jpeg', 'tests/CityofTshwaneWater.pdf', 'tests/example.xlsx'] 20 | 21 | inputs2 = [kwargs['my_db_state0'], 22 | kwargs['selection_docs_state0'], 23 | kwargs['requests_state0'], 24 | kwargs0['langchain_mode'], 25 | kwargs0['chunk'], 26 | kwargs0['chunk_size'], 27 | embed, 28 | kwargs['image_audio_loaders_options'], 29 | kwargs['pdf_loaders_options'], 30 | kwargs['url_loaders_options'], 31 | kwargs['jq_schema0'], 32 | kwargs['extract_frames'], 33 | kwargs['llava_prompt'], 34 | h2ogpt_key, 35 | ] 36 | 37 | for fileup_output in file_list: 38 | # ensure normal blip (not 2) obtained 39 | blip2 = 'CaptionLarge' 40 | if blip2 in kwargs['image_audio_loaders_options']: 41 | kwargs['image_audio_loaders_options'].remove(blip2) 42 | 43 | # ensure normal asr (not asrlarge) obtained 44 | asrlarge = 'ASRLarge' 45 | if asrlarge in kwargs['image_audio_loaders_options']: 46 | kwargs['image_audio_loaders_options'].remove(asrlarge) 47 | 48 | inputs1 = [fileup_output] 49 | add_file_kwargs = dict(fn=kwargs['update_db_func'], 50 | inputs=inputs1 + inputs2) 51 | add_file_kwargs['fn'](*tuple(add_file_kwargs['inputs'])) 52 | 53 | inputs2[8] = kwargs['image_audio_loaders_options'] 54 | add_file_kwargs = dict(fn=kwargs['update_db_func'], 55 | inputs=inputs1 + inputs2) 56 | add_file_kwargs['fn'](*tuple(add_file_kwargs['inputs'])) 57 | 58 | # FakeTokenizer etc. needs tiktoken for general tasks 59 | import tiktoken 60 | encoding = tiktoken.get_encoding("cl100k_base") 61 | assert encoding 62 | encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") 63 | assert encoding 64 | 65 | # sometimes summarization needs gpt2 still 66 | from transformers import AutoTokenizer 67 | model_name = 'gpt2' 68 | tokenizer = AutoTokenizer.from_pretrained(model_name) 69 | assert tokenizer 70 | 71 | # then run h2ogpt as: 72 | # HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 python generate.py --gradio_offline_level=2 --share=False ... 73 | -------------------------------------------------------------------------------- /src/prompter_utils.py: -------------------------------------------------------------------------------- 1 | import base64 2 | 3 | from enums import unknown_prompt_type, template_prompt_type 4 | 5 | 6 | def get_use_chat_template(tokenizer, prompt_type=None): 7 | if tokenizer is None: 8 | return False 9 | use_chat_template = prompt_type in [None, '', unknown_prompt_type, template_prompt_type] and \ 10 | has_chat_template(tokenizer) 11 | return use_chat_template 12 | 13 | 14 | def has_chat_template(tokenizer): 15 | return (hasattr(tokenizer, 'chat_template') and 16 | tokenizer.chat_template not in [None, ''] or 17 | hasattr(tokenizer, 'default_chat_template') and 18 | tokenizer.default_chat_template not in [None, ''] 19 | ) 20 | 21 | 22 | def get_chat_template(tokenizer): 23 | if tokenizer is None: 24 | return None 25 | if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template not in [None, '']: 26 | return tokenizer.chat_template 27 | if hasattr(tokenizer, 'default_chat_template') and tokenizer.default_chat_template not in [None, '']: 28 | return tokenizer.default_chat_template 29 | return None 30 | 31 | 32 | def base64_encode_jinja_template(template_str): 33 | encoded_bytes = base64.b64encode(template_str.encode('utf-8')) 34 | encoded_str = encoded_bytes.decode('utf-8') 35 | return encoded_str 36 | 37 | 38 | def base64_decode_jinja_template(encoded_str): 39 | if is_base64(encoded_str): 40 | decoded_bytes = base64.b64decode(encoded_str.encode('utf-8')) 41 | decoded_str = decoded_bytes.decode('utf-8') 42 | return decoded_str 43 | else: 44 | # just normal string, pass along 45 | return encoded_str 46 | 47 | 48 | def is_base64(s): 49 | # Check if the length is a multiple of 4 50 | if len(s) % 4 != 0: 51 | return False 52 | 53 | # Check if the string contains only valid base64 characters 54 | try: 55 | # Try to decode the base64 string 56 | decoded = base64.b64decode(s, validate=True) 57 | # Check if the decoded bytes can be converted to a UTF-8 string 58 | decoded.decode('utf-8') 59 | except Exception: 60 | return False 61 | 62 | return True 63 | -------------------------------------------------------------------------------- /src/sagemaker.py: -------------------------------------------------------------------------------- 1 | import os 2 | import typing 3 | import json 4 | from langchain_community.llms import SagemakerEndpoint 5 | from langchain.llms.sagemaker_endpoint import LLMContentHandler 6 | from pydantic.v1 import root_validator 7 | 8 | from utils import FakeTokenizer 9 | 10 | 11 | class ChatContentHandler(LLMContentHandler): 12 | content_type = "application/json" 13 | accepts = "application/json" 14 | 15 | def transform_input(self, prompt: str, model_kwargs: typing.Dict) -> bytes: 16 | messages0 = [] 17 | openai_system_prompt = "You are a helpful assistant." 18 | if openai_system_prompt: 19 | messages0.append({"role": "system", "content": openai_system_prompt}) 20 | messages0.append({'role': 'user', 'content': prompt}) 21 | input_dict = {'inputs': [messages0], "parameters": model_kwargs} 22 | return json.dumps(input_dict).encode("utf-8") 23 | 24 | def transform_output(self, output: bytes) -> str: 25 | response_json = json.loads(output.read().decode("utf-8")) 26 | return response_json[0]["generation"]['content'] 27 | 28 | 29 | class BaseContentHandler(LLMContentHandler): 30 | content_type = "application/json" 31 | accepts = "application/json" 32 | 33 | def transform_input(self, prompt: str, model_kwargs: typing.Dict) -> bytes: 34 | input_dict = {'inputs': prompt, "parameters": model_kwargs} 35 | return json.dumps(input_dict).encode("utf-8") 36 | 37 | def transform_output(self, output: bytes) -> str: 38 | response_json = json.loads(output.read().decode("utf-8")) 39 | return response_json[0]["generation"] 40 | 41 | 42 | class H2OSagemakerEndpoint(SagemakerEndpoint): 43 | aws_access_key_id: str = "" 44 | aws_secret_access_key: str = "" 45 | tokenizer: typing.Any = None 46 | 47 | @root_validator() 48 | def validate_environment(cls, values: typing.Dict) -> typing.Dict: 49 | """Validate that AWS credentials to and python package exists in environment.""" 50 | try: 51 | import boto3 52 | 53 | try: 54 | if values["credentials_profile_name"] is not None: 55 | session = boto3.Session( 56 | profile_name=values["credentials_profile_name"] 57 | ) 58 | else: 59 | # use default credentials 60 | session = boto3.Session() 61 | 62 | values["client"] = session.client( 63 | "sagemaker-runtime", 64 | region_name=values['region_name'], 65 | aws_access_key_id=values['aws_access_key_id'], 66 | aws_secret_access_key=values['aws_secret_access_key'], 67 | ) 68 | 69 | except Exception as e: 70 | raise ValueError( 71 | "Could not load credentials to authenticate with AWS client. " 72 | "Please check that credentials in the specified " 73 | "profile name are valid." 74 | ) from e 75 | 76 | except ImportError: 77 | raise ImportError( 78 | "Could not import boto3 python package. " 79 | "Please install it with `pip install boto3`." 80 | ) 81 | return values 82 | 83 | def get_token_ids(self, text: str) -> typing.List[int]: 84 | tokenizer = self.tokenizer 85 | if tokenizer is not None: 86 | return tokenizer.encode(text) 87 | else: 88 | return FakeTokenizer().encode(text)['input_ids'] 89 | 90 | -------------------------------------------------------------------------------- /src/utils_sys.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import traceback 4 | 5 | 6 | class StreamProxy: 7 | def __init__(self, original_stream): 8 | self.__original_stream = original_stream 9 | 10 | def write(self, *args, **kwargs): 11 | try: 12 | return self.__original_stream.write(*args, **kwargs) 13 | except ValueError as e: 14 | if str(e) == "I/O operation on closed file": 15 | self.handle_closed_file_error("write") 16 | else: 17 | raise 18 | 19 | def flush(self, *args, **kwargs): 20 | try: 21 | return self.__original_stream.flush(*args, **kwargs) 22 | except ValueError as e: 23 | if str(e) == "I/O operation on closed file": 24 | self.handle_closed_file_error("flush") 25 | else: 26 | raise 27 | 28 | def handle_closed_file_error(self, operation): 29 | message = f"Warning: Attempt to {operation} to a closed stream has been ignored." 30 | if os.getenv("HARD_ASSERTS"): 31 | raise ValueError("I/O operation on closed file.") 32 | else: 33 | # Use sys.__stderr__ to ensure the message is seen even if stderr is closed/redirected. 34 | print(message, file=sys.__stderr__) 35 | 36 | def close(self): 37 | # Print the stack trace to the original stream 38 | traceback.print_stack(file=self.__original_stream) 39 | message = "Warning: Attempt to close stream has been ignored." 40 | 41 | if os.getenv("HARD_ASSERTS"): 42 | # Raise an exception if HARD_ASSERTS is set 43 | raise Exception("Attempt to close stream intercepted.") 44 | else: 45 | print(message, file=self.__original_stream) 46 | 47 | def __getattr__(self, name): 48 | return getattr(self.__original_stream, name) 49 | 50 | def __setattr__(self, name, value): 51 | is_hard_asserts = os.getenv("HARD_ASSERTS") 52 | if name in {"_StreamProxy__original_stream"}: 53 | super().__setattr__(name, value) 54 | else: 55 | traceback.print_stack(file=self.__original_stream) 56 | message = "Modification attempt of protected stream attribute has been logged." 57 | if is_hard_asserts: 58 | raise AttributeError(f"{message} Modification of '{name}' is not allowed on StreamProxy instances.") 59 | else: 60 | print(message, file=self.__original_stream) 61 | 62 | 63 | class FinalizeStream: 64 | def __init__(self, proxy): 65 | self.__proxy = proxy 66 | 67 | def __setattr__(self, key, value): 68 | is_hard_asserts = os.getenv("HARD_ASSERTS") 69 | if key in {"_FinalizeStream__proxy"}: 70 | super().__setattr__(key, value) 71 | else: 72 | # Use sys.__stdout__ to ensure output if sys.stderr/stdout is protected 73 | traceback.print_stack(file=sys.__stdout__) 74 | message = "Stream protection violation has been logged." 75 | if is_hard_asserts: 76 | raise AttributeError(f"{message} Modification of '{key}' is prohibited.") 77 | else: 78 | print(message, file=sys.__stdout__) 79 | 80 | def __getattr__(self, item): 81 | return getattr(self.__proxy, item) 82 | 83 | 84 | def protect_stream(stream_name): 85 | if stream_name == "stdout": 86 | sys.stdout = FinalizeStream(StreamProxy(sys.stdout)) 87 | elif stream_name == "stderr": 88 | sys.stderr = FinalizeStream(StreamProxy(sys.stderr)) 89 | else: 90 | raise ValueError("Unsupported stream name. Choose 'stdout' or 'stderr'.") 91 | 92 | 93 | def protect_stdout_stderr(): 94 | # Protect both stdout and stderr at the start of your application 95 | protect_stream("stdout") 96 | protect_stream("stderr") 97 | -------------------------------------------------------------------------------- /src/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "8e3a4df7edc2ff6d7f764ba5341f4fd54dc1cf60" 2 | -------------------------------------------------------------------------------- /src/vision/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | root_path = os.path.dirname((__file__)) 5 | root_path = os.path.abspath(os.path.abspath(os.path.join(root_path, ".."))) 6 | if root_path not in sys.path: 7 | sys.path.append(root_path) 8 | -------------------------------------------------------------------------------- /src/vision/extract_movie.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import uuid 4 | 5 | from src.utils import makedirs, sanitize_filename, get_gradio_tmp 6 | 7 | 8 | def extract_unique_frames(urls=None, file=None, download_dir=None, export_dir=None, extract_frames=10): 9 | temp_workaround = False 10 | if temp_workaround: 11 | download_dir = './' 12 | else: 13 | download_dir = download_dir or os.getenv('VID_DOWNLOADS', "viddownloads") 14 | download_dir = os.path.join(download_dir, str(uuid.uuid4())) 15 | makedirs(download_dir, exist_ok=True) 16 | # os.environ['FIFTYONE_DISABLE_SERVICES'] = 'True' 17 | if urls: 18 | if 'openai_server' not in sys.path: 19 | sys.path.append('openai_server') 20 | from openai_server.agent_tools.download_web_video import download_web_video 21 | for url in urls: 22 | download_web_video(video_url=url, base_url="https://www.youtube.com", output_dir=download_dir) 23 | #import fiftyone.utils.youtube as fouy 24 | #fouy.download_youtube_videos(urls, download_dir=download_dir) 25 | 26 | # Create a FiftyOne Dataset 27 | import fiftyone as fo 28 | if file: 29 | dataset = fo.Dataset.from_videos([file]) 30 | else: 31 | dataset = fo.Dataset.from_videos_dir(download_dir) 32 | 33 | # Convert videos to images, sample 1 frame per second 34 | frame_view = dataset.to_frames(sample_frames=True, fps=1) 35 | 36 | import fiftyone.brain as fob 37 | 38 | # Index images by similarity 39 | results = fob.compute_similarity(frame_view, brain_key="frame_sim") 40 | 41 | # Find maximally unique frames 42 | num_unique = min(extract_frames, frame_view.count()) # Scale this to whatever you want 43 | results.find_unique(num_unique) 44 | unique_view = frame_view.select(results.unique_ids) 45 | 46 | # Visualize in the App 47 | # session = fo.launch_app(frame_view) 48 | # session = fo.launch_app(unique_view) 49 | 50 | san_file = sanitize_filename(os.path.basename(file)) if file else None 51 | 52 | gradio_tmp = get_gradio_tmp() 53 | if san_file: 54 | export_dir = export_dir or os.path.join(gradio_tmp, "extraction_%s" % san_file) 55 | if os.path.isdir(export_dir): 56 | export_dir += "_%s" % str(uuid.uuid4()) 57 | else: 58 | export_dir = export_dir or os.path.join(gradio_tmp, "extraction_%s" % str(uuid.uuid4())) 59 | makedirs(export_dir, exist_ok=True) 60 | unique_view.export(export_dir, dataset_type=fo.types.VideoDirectory) 61 | return export_dir 62 | -------------------------------------------------------------------------------- /src/vision/flux.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import filelock 4 | from diffusers import FluxPipeline 5 | import torch 6 | 7 | from src.utils import makedirs 8 | from src.vision.sdxl_turbo import get_device 9 | 10 | 11 | def get_pipe_make_image(gpu_id): 12 | device = get_device(gpu_id) 13 | 14 | pipe = FluxPipeline.from_pretrained( 15 | "black-forest-labs/FLUX.1-dev", 16 | torch_dtype=torch.bfloat16, 17 | ).to(device) 18 | 19 | return pipe 20 | 21 | 22 | def get_pipe_make_image_2(gpu_id): 23 | device = get_device(gpu_id) 24 | 25 | pipe = FluxPipeline.from_pretrained( 26 | "black-forest-labs/FLUX.1-schnell", 27 | torch_dtype=torch.bfloat16, 28 | ).to(device) 29 | 30 | return pipe 31 | 32 | 33 | def make_image(prompt, filename=None, gpu_id='auto', pipe=None, 34 | image_guidance_scale=3.0, 35 | image_size="1024x1024", 36 | image_quality='standard', 37 | image_num_inference_steps=50, 38 | max_sequence_length=512): 39 | if pipe is None: 40 | pipe = get_pipe_make_image(gpu_id=gpu_id) 41 | 42 | if image_quality == 'manual': 43 | # listen to guidance_scale and num_inference_steps passed in 44 | pass 45 | else: 46 | if image_quality == 'quick': 47 | image_num_inference_steps = 10 48 | image_size = "512x512" 49 | elif image_quality == 'standard': 50 | image_num_inference_steps = 20 51 | elif image_quality == 'hd': 52 | image_num_inference_steps = 50 53 | 54 | lock_type = 'image' 55 | base_path = os.path.join('locks', 'image_locks') 56 | base_path = makedirs(base_path, exist_ok=True, tmp_ok=True, use_base=True) 57 | lock_file = os.path.join(base_path, "%s.lock" % lock_type) 58 | makedirs(os.path.dirname(lock_file)) # ensure made 59 | with filelock.FileLock(lock_file): 60 | image = pipe(prompt=prompt, 61 | height=int(image_size.lower().split('x')[0]), 62 | width=int(image_size.lower().split('x')[1]), 63 | num_inference_steps=image_num_inference_steps, 64 | max_sequence_length=max_sequence_length, 65 | guidance_scale=image_guidance_scale).images[0] 66 | if filename: 67 | image.save(filename) 68 | return filename 69 | return image 70 | -------------------------------------------------------------------------------- /src/vision/playv2.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import filelock 4 | from diffusers import DiffusionPipeline 5 | import torch 6 | 7 | from src.utils import makedirs 8 | from src.vision.sdxl_turbo import get_device 9 | 10 | 11 | def get_pipe_make_image(gpu_id): 12 | device = get_device(gpu_id) 13 | 14 | pipe = DiffusionPipeline.from_pretrained( 15 | # "playgroundai/playground-v2-1024px-aesthetic", 16 | "playgroundai/playground-v2.5-1024px-aesthetic", 17 | torch_dtype=torch.float16, 18 | use_safetensors=True, 19 | add_watermarker=False, 20 | variant="fp16" 21 | ).to(device) 22 | 23 | return pipe 24 | 25 | 26 | def make_image(prompt, filename=None, gpu_id='auto', pipe=None, 27 | image_guidance_scale=5.0, # 5 is optimal for playv2.5 28 | image_size="1024x1024", 29 | image_quality='standard', 30 | image_num_inference_steps=50, 31 | max_sequence_length=512): 32 | if pipe is None: 33 | pipe = get_pipe_make_image(gpu_id=gpu_id) 34 | 35 | if image_quality == 'manual': 36 | # listen to guidance_scale and num_inference_steps passed in 37 | pass 38 | else: 39 | if image_quality == 'quick': 40 | image_num_inference_steps = 10 41 | image_size = "512x512" 42 | elif image_quality == 'standard': 43 | image_num_inference_steps = 20 44 | elif image_quality == 'hd': 45 | image_num_inference_steps = 50 46 | 47 | lock_type = 'image' 48 | base_path = os.path.join('locks', 'image_locks') 49 | base_path = makedirs(base_path, exist_ok=True, tmp_ok=True, use_base=True) 50 | lock_file = os.path.join(base_path, "%s.lock" % lock_type) 51 | makedirs(os.path.dirname(lock_file)) # ensure made 52 | with filelock.FileLock(lock_file): 53 | image = pipe(prompt=prompt, 54 | height=int(image_size.lower().split('x')[0]), 55 | width=int(image_size.lower().split('x')[1]), 56 | num_inference_steps=image_num_inference_steps, 57 | max_sequence_length=max_sequence_length, 58 | guidance_scale=image_guidance_scale, 59 | ).images[0] 60 | if filename: 61 | image.save(filename) 62 | return filename 63 | return image 64 | -------------------------------------------------------------------------------- /src/vision/sdxl_turbo.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import filelock 4 | import torch 5 | from diffusers import AutoPipelineForImage2Image, AutoPipelineForText2Image 6 | from diffusers.utils import load_image 7 | 8 | from src.utils import cuda_vis_check, makedirs 9 | 10 | n_gpus1 = torch.cuda.device_count() if torch.cuda.is_available() else 0 11 | n_gpus1, gpu_ids = cuda_vis_check(n_gpus1) 12 | 13 | 14 | def get_device(gpu_id): 15 | if gpu_id == 'auto': 16 | device = 'cpu' if n_gpus1 == 0 else 'cuda:0' 17 | else: 18 | device = 'cpu' if n_gpus1 == 0 else 'cuda:%s' % gpu_id 19 | return device 20 | 21 | 22 | def get_pipe_make_image(gpu_id='auto'): 23 | # https://huggingface.co/stabilityai/sdxl-turbo 24 | device = get_device(gpu_id) 25 | 26 | pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16").to(device) 27 | return pipe 28 | 29 | 30 | def make_image(prompt, filename=None, gpu_id='auto', pipe=None, 31 | image_size="1024x1024", image_quality='standard', 32 | image_num_inference_steps=1, image_guidance_scale=0.0): 33 | if pipe is None: 34 | pipe = get_pipe_make_image(gpu_id=gpu_id) 35 | 36 | if image_quality == 'manual': 37 | # listen to guidance_scale and num_inference_steps passed in 38 | pass 39 | else: 40 | if image_quality == 'quick': 41 | image_num_inference_steps = 1 42 | image_size = "512x512" 43 | elif image_quality == 'standard': 44 | image_num_inference_steps = 2 45 | elif image_quality == 'hd': 46 | image_num_inference_steps = 3 47 | 48 | lock_type = 'image' 49 | base_path = os.path.join('locks', 'image_locks') 50 | base_path = makedirs(base_path, exist_ok=True, tmp_ok=True, use_base=True) 51 | lock_file = os.path.join(base_path, "%s.lock" % lock_type) 52 | makedirs(os.path.dirname(lock_file)) # ensure made 53 | with filelock.FileLock(lock_file): 54 | image = pipe(prompt=prompt, 55 | height=int(image_size.lower().split('x')[0]), 56 | width=int(image_size.lower().split('x')[1]), 57 | num_inference_steps=image_num_inference_steps, # more than 1 not really helpful 58 | guidance_scale=0.0, # disabled: https://huggingface.co/stabilityai/sdxl-turbo#diffusers 59 | ).images[0] 60 | if filename: 61 | image.save(filename) 62 | return filename 63 | return image 64 | 65 | 66 | def get_pipe_change_image(gpu_id='auto'): 67 | device = get_device(gpu_id) 68 | 69 | pipe = AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16").to(device) 70 | return pipe 71 | 72 | 73 | def change_image(prompt, init_image=None, init_file=None, filename=None, gpu_id='auto', pipe=None): 74 | if pipe is None: 75 | pipe = get_pipe_change_image(gpu_id) 76 | 77 | if init_file: 78 | init_image = load_image(init_file).resize((512, 512)) 79 | 80 | image = pipe(prompt, image=init_image, num_inference_steps=2, strength=0.5, guidance_scale=0.0).images[0] 81 | if filename: 82 | image.save(filename) 83 | return filename 84 | else: 85 | return image 86 | 87 | 88 | -------------------------------------------------------------------------------- /tests/2403.09629.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/2403.09629.pdf -------------------------------------------------------------------------------- /tests/CityofTshwaneWater.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/CityofTshwaneWater.pdf -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import importlib.util 4 | 5 | 6 | def pytest_itemcollected(item): 7 | item._nodeid = item.nodeid + os.getenv("PYTEST_TEST_NAME", "") 8 | 9 | 10 | def pytest_sessionstart(session): 11 | if not os.getenv("USE_WHEEL", None): 12 | return 13 | try: 14 | for location in importlib.util.find_spec("h2ogpt").submodule_search_locations: 15 | sys.path.append(location) 16 | except AttributeError: 17 | pass 18 | -------------------------------------------------------------------------------- /tests/dental.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/dental.png -------------------------------------------------------------------------------- /tests/driverslicense.jpeg.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/driverslicense.jpeg.zip -------------------------------------------------------------------------------- /tests/dummy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/dummy.pdf -------------------------------------------------------------------------------- /tests/example.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/example.xlsx -------------------------------------------------------------------------------- /tests/fastfood.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/fastfood.jpg -------------------------------------------------------------------------------- /tests/gridnumbers.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/gridnumbers.gif -------------------------------------------------------------------------------- /tests/image-based-pdf-sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/image-based-pdf-sample.pdf -------------------------------------------------------------------------------- /tests/jon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/jon.png -------------------------------------------------------------------------------- /tests/memory_hog_script.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | def use_memory(): 5 | # This list will keep growing, consuming more and more memory 6 | memory_hog = [] 7 | 8 | print("Starting memory allocation...") 9 | 10 | # Continuously append large arrays to the list 11 | while True: 12 | # Create a large list (about 10 million integers) 13 | large_list = [i for i in range(10**7)] 14 | 15 | # Append the large list to memory_hog 16 | memory_hog.append(large_list) 17 | 18 | # Print the current size of the memory_hog list 19 | print(f"Appended a large list. Current memory_hog length: {len(memory_hog)}") 20 | 21 | # Sleep for 1 second between allocations 22 | time.sleep(1) 23 | 24 | 25 | if __name__ == "__main__": 26 | use_memory() 27 | -------------------------------------------------------------------------------- /tests/next.txt: -------------------------------------------------------------------------------- 1 | 4/4/23, 3:26 PM 2 | 3 | Next Fundamental Physics Discovery 4 | 5 | Next Fundamental Physics Discovery 6 | done by applying techniques from quantum field theory, which is a framework for describing the behavior of 7 | particles and fields in terms of quantum mechanical principles. Another approach is to try to reformulate 8 | quantum mechanics itself in a way that is compatible with general relativity. This can be done by applying 9 | ideas from loop quantum gravity, which is a theory that tries to quantize spacetime itself. 10 | There are also other approaches to combining quantum mechanics and general relativity, such as string theory 11 | and non-commutative geometry, that are based on different underlying principles. While it is still not clear 12 | which of these approaches, if any, will ultimately lead to a successful theory of quantum gravity, it is an active 13 | area of research and there is ongoing progress being made. 14 | 15 | How would you reformulate quantum mechanics itself in a way that is compatible with general relativity? 16 | 17 | One approach to reformulating quantum mechanics in a way that is compatible with general relativity is 18 | known as loop quantum gravity. This is a theory that tries to quantize spacetime itself, rather than just the 19 | particles and fields that exist within spacetime. 20 | In loop quantum gravity, spacetime is represented as a network of interconnected loops, rather than as a 21 | continuous geometric structure. These loops can be thought of as quantized versions of the gravitational field, 22 | and their behavior is described using the principles of quantum mechanics. By working with these loops, 23 | rather than with continuous spacetime, it is possible to incorporate quantum mechanical principles into the 24 | theory of gravity in a way that is consistent with general relativity. 25 | One of the key ideas in loop quantum gravity is that spacetime is made up of discrete "atoms" of space, rather 26 | than being a continuous structure. This idea is motivated by the fact that the Heisenberg uncertainty 27 | principle, a fundamental principle of quantum mechanics, implies that it is not possible to measure the 28 | position and momentum of a particle with arbitrary precision. In the context of spacetime, this means that it 29 | is not possible to measure the shape of spacetime with arbitrary precision, and so it is reasonable to assume 30 | that spacetime is made up of discrete units. 31 | -------------------------------------------------------------------------------- /tests/ocr1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/ocr1.png -------------------------------------------------------------------------------- /tests/ocr2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/ocr2.png -------------------------------------------------------------------------------- /tests/ocr3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/ocr3.png -------------------------------------------------------------------------------- /tests/pdf-sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/pdf-sample.pdf -------------------------------------------------------------------------------- /tests/porsche.mp3.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/porsche.mp3.zip -------------------------------------------------------------------------------- /tests/receipt.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/receipt.jpg -------------------------------------------------------------------------------- /tests/revenue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/revenue.png -------------------------------------------------------------------------------- /tests/rotate-ex2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/rotate-ex2.png -------------------------------------------------------------------------------- /tests/sample.eml: -------------------------------------------------------------------------------- 1 | FCC: imap://piro-test@mail.clear-code.com/Sent 2 | X-Identity-Key: id1 3 | X-Account-Key: account1 4 | From: "piro-test@clear-code.com" 5 | Subject: test confirmation 6 | To: piro.outsider.reflex+1@gmail.com, piro.outsider.reflex+2@gmail.com, 7 | mailmaster@example.com, mailmaster@example.org, webmaster@example.com, 8 | webmaster@example.org, webmaster@example.jp, mailmaster@example.jp 9 | Message-ID: <05c18622-f2ad-cb77-2ce9-a0bbfc7d7ad0@clear-code.com> 10 | Date: Thu, 15 Aug 2019 14:54:37 +0900 11 | X-Mozilla-Draft-Info: internal/draft; vcard=0; receipt=0; DSN=0; uuencode=0; 12 | attachmentreminder=0; deliveryformat=4 13 | User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 14 | Thunderbird/69.0 15 | MIME-Version: 1.0 16 | Content-Type: multipart/mixed; 17 | boundary="------------26A45336F6C6196BD8BBA2A2" 18 | Content-Language: en-US 19 | 20 | This is a multi-part message in MIME format. 21 | --------------26A45336F6C6196BD8BBA2A2 22 | Content-Type: text/plain; charset=utf-8; format=flowed 23 | Content-Transfer-Encoding: 7bit 24 | 25 | testtest 26 | testtest 27 | testtest 28 | testtest 29 | testtest 30 | testtest 31 | 32 | 33 | 34 | --------------26A45336F6C6196BD8BBA2A2 35 | Content-Type: text/plain; charset=UTF-8; 36 | name="sha1hash.txt" 37 | Content-Transfer-Encoding: base64 38 | Content-Disposition: attachment; 39 | filename="sha1hash.txt" 40 | 41 | NzRjOGYwOWRmYTMwZWFjY2ZiMzkyYjEzMjMxNGZjNmI5NzhmMzI1YSAqZmxleC1jb25maXJt 42 | LW1haWwuMS4xMC4wLnhwaQpjY2VlNGI0YWE0N2Y1MTNhYmNlMzQyY2UxZTJlYzJmZDk2MDBl 43 | MzFiICpmbGV4LWNvbmZpcm0tbWFpbC4xLjExLjAueHBpCjA3MWU5ZTM3OGFkMDE3OWJmYWRi 44 | MWJkYzY1MGE0OTQ1NGQyMDRhODMgKmZsZXgtY29uZmlybS1tYWlsLjEuMTIuMC54cGkKOWQ3 45 | YWExNTM0MThlYThmYmM4YmU3YmE2ZjU0Y2U4YTFjYjdlZTQ2OCAqZmxleC1jb25maXJtLW1h 46 | aWwuMS45LjkueHBpCjgxNjg1NjNjYjI3NmVhNGY5YTJiNjMwYjlhMjA3ZDkwZmIxMTg1NmUg 47 | KmZsZXgtY29uZmlybS1tYWlsLnhwaQo= 48 | --------------26A45336F6C6196BD8BBA2A2 49 | Content-Type: application/json; 50 | name="manifest.json" 51 | Content-Transfer-Encoding: base64 52 | Content-Disposition: attachment; 53 | filename="manifest.json" 54 | 55 | ewogICJtYW5pZmVzdF92ZXJzaW9uIjogMiwKICAiYXBwbGljYXRpb25zIjogewogICAgImdl 56 | Y2tvIjogewogICAgICAiaWQiOiAiZmxleGlibGUtY29uZmlybS1tYWlsQGNsZWFyLWNvZGUu 57 | Y29tIiwKICAgICAgInN0cmljdF9taW5fdmVyc2lvbiI6ICI2OC4wIgogICAgfQogIH0sCiAg 58 | Im5hbWUiOiAiRmxleCBDb25maXJtIE1haWwiLAogICJkZXNjcmlwdGlvbiI6ICJDb25maXJt 59 | IG1haWxhZGRyZXNzIGFuZCBhdHRhY2htZW50cyBiYXNlZCBvbiBmbGV4aWJsZSBydWxlcy4i 60 | LAogICJ2ZXJzaW9uIjogIjIuMCIsCgogICJsZWdhY3kiOiB7CiAgICAidHlwZSI6ICJ4dWwi 61 | LAogICAgIm9wdGlvbnMiOiB7CiAgICAgICJwYWdlIjogImNocm9tZTovL2NvbmZpcm0tbWFp 62 | bC9jb250ZW50L3NldHRpbmcueHVsIiwKICAgICAgIm9wZW5faW5fdGFiIjogdHJ1ZQogICAg 63 | fQogIH0KfQ== 64 | --------------26A45336F6C6196BD8BBA2A2-- -------------------------------------------------------------------------------- /tests/sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/sample.pdf -------------------------------------------------------------------------------- /tests/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/screenshot.png -------------------------------------------------------------------------------- /tests/table_as_image.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/table_as_image.docx -------------------------------------------------------------------------------- /tests/test4gpus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # CHOOSE: 3 | ngpus=4 4 | # below has to match GPUs for A6000s due to long context tests 5 | export TESTMODULOTOTAL=4 6 | 7 | pip install pytest-instafail || true 8 | docker ps | grep text-generation-inference | awk '{print $1}' | xargs docker stop 9 | killall -s SIGINT pytest 10 | killall -s SIGTERM pytest 11 | killall -s 9 pytest 12 | pkill --signal 9 -f weaviate-embedded/weaviate 13 | 14 | NPHYSICAL=`lscpu -p | egrep -v '^\#' | sort -u -t, -k 2,4 | wc -l` 15 | NPROCS=`lscpu -p | egrep -v '^\#' | wc -l` 16 | # 17 | n_jobs=$(($NPROCS / $TESTMODULOTOTAL)) 18 | echo "CORES: $NPHYSICAL $NPROCS $n_jobs" 19 | 20 | # GENERAL: 21 | lowergpuid=0 22 | low=0 23 | high=$(($TESTMODULOTOTAL-1)) 24 | pids="" 25 | for mod in $(seq $low $high) 26 | do 27 | # in some cases launch gradio server, TGI server, or gradio server as inference server with +1 and +2 off base port 28 | # ports always increment by 3 29 | export GRADIO_SERVER_PORT=$((7860+$(($mod*3)))) 30 | export TESTMODULO=$mod 31 | 32 | # CVD loops over number of GPUs 33 | export CUDA_VISIBLE_DEVICES=$(($lowergpuid+$(($mod % $ngpus)))) 34 | export n_jobs=$n_jobs 35 | export OMP_NUM_THREADS=$n_jobs 36 | export NUMEXPR_MAX_THREADS=$n_jobs 37 | export OPENBLAS_NUM_THREADS=$n_jobs 38 | # By default, OpenBLAS will restrict the Cpus_allowed to be 0x1. 39 | export OPENBLAS_MAIN_FREE=$n_jobs 40 | export MKL_NUM_THREADS=$n_jobs 41 | export H2OGPT_BASE_PATH="./base_$mod" 42 | 43 | # huggyllama test uses alot of memory, requires TESTMODULOTOTAL=ngpus for even A6000s 44 | # pytest --instafail -s -v -n 1 tests -k "not test_huggyllama_transformers_pr" &> testsparallel"${mod}".log & 45 | pytest --instafail -s -v -n 1 tests &> testsparallel"${mod}".log & 46 | pid=$! 47 | echo "MODS: $mod $GRADIO_SERVER_PORT $CUDA_VISIBLE_DEVICES $H2OGPT_BASE_PATH" 48 | pids="$pids $pid" 49 | done 50 | trap "kill $pids; exit 1" INT 51 | 52 | echo "to check on results while running, do:" 53 | echo "grep -a PASSED testsparallel*.log | sed 's/.*PASSED//g' | sort | uniq |wc -l" 54 | echo "grep -a FAILED testsparallel*.log | sed 's/.*FAILED//g' | sort | uniq |wc -l" 55 | 56 | echo "to interrupt but still get some results, do:" 57 | #echo "ps -auxwf | grep -v "[g]rep" | grep pytest | awk '{print $2}' |xargs kill -s SIGINT" 58 | echo "kill -s SIGINT $pids" 59 | wait 60 | -------------------------------------------------------------------------------- /tests/test_animated_gif.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/test_animated_gif.gif -------------------------------------------------------------------------------- /tests/test_async_iterator_pipe.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import asyncio 3 | from iterators import AsyncIteratorPipe 4 | 5 | 6 | class TestTimeoutIterator(unittest.TestCase): 7 | 8 | def test_normal_iteration(self): 9 | 10 | async def _(self): 11 | it = AsyncIteratorPipe() 12 | 13 | await it.put(1) 14 | await it.put(2) 15 | await it.put(3) 16 | await it.close() # stop iteration 17 | 18 | self.assertEqual(await it.__anext__(), 1) 19 | self.assertEqual(await it.__anext__(), 2) 20 | self.assertEqual(await it.__anext__(), 3) 21 | 22 | with self.assertRaises(StopAsyncIteration): 23 | await it.__anext__() 24 | 25 | asyncio.get_event_loop().run_until_complete(_(self)) 26 | 27 | def test_multiple_next_after_exception(self): 28 | 29 | async def _(self): 30 | it = AsyncIteratorPipe() 31 | 32 | await it.put(1) 33 | await it.put(2) 34 | await it.put(3) 35 | await it.close() # stop iteration 36 | 37 | self.assertEqual(await it.__anext__(), 1) 38 | self.assertEqual(await it.__anext__(), 2) 39 | self.assertEqual(await it.__anext__(), 3) 40 | 41 | with self.assertRaises(StopAsyncIteration): 42 | await it.__anext__() 43 | 44 | with self.assertRaises(StopAsyncIteration): 45 | await it.__anext__() 46 | 47 | asyncio.get_event_loop().run_until_complete(_(self)) 48 | 49 | def test_multiple_close(self): 50 | 51 | async def _(self): 52 | it = AsyncIteratorPipe() 53 | 54 | await it.put(1) 55 | await it.put(2) 56 | await it.put(3) 57 | await it.close() # stop iteration 58 | await it.close() # stop iteration 59 | await it.close() # stop iteration 60 | 61 | self.assertEqual(await it.__anext__(), 1) 62 | self.assertEqual(await it.__anext__(), 2) 63 | self.assertEqual(await it.__anext__(), 3) 64 | 65 | with self.assertRaises(StopAsyncIteration): 66 | await it.__anext__() 67 | 68 | asyncio.get_event_loop().run_until_complete(_(self)) 69 | 70 | def test_put_after_close(self): 71 | 72 | async def _(self): 73 | it = AsyncIteratorPipe() 74 | 75 | self.assertTrue(await it.put(1)) 76 | await it.close() # stop iteration 77 | 78 | self.assertFalse(await it.put(2)) 79 | await it.close() # stop iteration 80 | 81 | self.assertFalse(await it.put(3)) 82 | await it.close() # stop iteration 83 | 84 | self.assertEqual(await it.__anext__(), 1) 85 | 86 | with self.assertRaises(StopAsyncIteration): 87 | await it.__anext__() 88 | 89 | asyncio.get_event_loop().run_until_complete(_(self)) 90 | 91 | def test_normal_iteration_via_for_loop(self): 92 | 93 | async def _(self): 94 | it = AsyncIteratorPipe() 95 | await it.put(1) 96 | await it.put(2) 97 | await it.put(3) 98 | await it.close() 99 | 100 | iter_results = [] 101 | async for x in it: 102 | iter_results.append(x) 103 | self.assertEqual(iter_results, [1, 2, 3]) 104 | 105 | iter_results = [] 106 | async for x in it: 107 | iter_results.append(x) 108 | self.assertEqual(iter_results, []) 109 | 110 | asyncio.get_event_loop().run_until_complete(_(self)) 111 | -------------------------------------------------------------------------------- /tests/test_client_readme.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from tests.utils import wrap_test_forked 4 | 5 | 6 | @pytest.mark.parametrize("local_server", [False, True]) 7 | @pytest.mark.parametrize("persist", [True, False]) 8 | @wrap_test_forked 9 | def test_readme_example(local_server, persist): 10 | if local_server: 11 | from src.gen import main 12 | main(base_model='llama', chat=True, gradio=True, num_beams=1, block_gradio_exit=False, verbose=True) 13 | 14 | # self-contained example used for readme, to be copied to README_CLIENT.md if changed, setting local_server = True at first 15 | import os 16 | # The grclient.py file can be copied from h2ogpt repo and used with local gradio_client for example use 17 | from gradio_utils.grclient import GradioClient 18 | 19 | h2ogpt_key = os.getenv('H2OGPT_KEY') or os.getenv('H2OGPT_H2OGPT_KEY') 20 | 21 | if local_server: 22 | host = "http://0.0.0.0:7860" 23 | auth = None 24 | else: 25 | host = "https://gpt.h2o.ai" 26 | auth = ('guest', 'guest') 27 | 28 | client = GradioClient(host, h2ogpt_key=h2ogpt_key, persist=persist, auth=auth) 29 | 30 | models = client.list_models() 31 | print(models) 32 | 33 | print(client.question("Who are you?", model=models[0])) 34 | print(client.question("What did I just ask?", model=models[0])) 35 | if persist: 36 | assert len(client.chat_conversation) == 2 37 | assert client.chat_conversation[-1][1] == "You just asked: Who are you?" or \ 38 | client.chat_conversation[-1][1] == "You just asked: \"Who are you?\"" or \ 39 | client.chat_conversation[-1][1] == "You asked, \"Who are you?\"" 40 | 41 | # LLM 42 | print(client.question("Who are you?", model=models[0])) 43 | 44 | url = "https://cdn.openai.com/papers/whisper.pdf" 45 | 46 | # Q/A 47 | print(client.query("What is whisper?", url=url, model=models[0])) 48 | # summarization (map_reduce over all pages if top_k_docs=-1) 49 | print(client.summarize(url=url, top_k_docs=3, model=models[0])) 50 | # extraction (map per page) 51 | print(client.extract(url=url, top_k_docs=3, model=models[0])) 52 | 53 | # summarization (map_reduce over all pages if top_k_docs=-1) 54 | print(client.summarize(query="List all names", url=url, top_k_docs=3, model=models[0])) 55 | # extraction (map per page) 56 | print(client.extract(query="Give valid JSON for any names.", url=url, top_k_docs=3, model=models[0])) 57 | 58 | if persist: 59 | assert len(client.chat_conversation) == 8 60 | -------------------------------------------------------------------------------- /tests/test_fine_tune_export_tgi.sh: -------------------------------------------------------------------------------- 1 | export DATA=h2oai/openassistant_oasst1_h2ogpt 2 | 3 | export BASE_MODEL=tiiuae/falcon-7b # confirmed working with 0.9.2 4 | # export BASE_MODEL=openlm-research/open_llama_3b # fails with OOM on 48GB card?? 5 | # export BASE_MODEL=Salesforce/xgen-7b-8k-base # fails since tokenizer not yet supported (have to hack to force LLaMa tokenizer) 6 | 7 | export CUDA_VISIBLE_DEVICES=0 8 | 9 | export MODEL=model-test 10 | export MODEL_NAME=`echo $MODEL | sed 's@/@_@g'` 11 | export HF_PORT=1000 12 | #export TGI_VERSION=latest # works 13 | #export TGI_VERSION=0.9.1 # fails 14 | export TGI_VERSION=0.9.3 # works 15 | 16 | 17 | # Train LoRA 18 | rm -rf $MODEL.lora 19 | python finetune.py --data_path=$DATA --base_model=$BASE_MODEL --num_epochs=0.01 --output_dir=$MODEL.lora 20 | 21 | # Merge LoRA, export model to $MODEL dir (via env var) 22 | rm -rf $MODEL 23 | python src/export_hf_checkpoint.py 24 | 25 | # Load model with TGI 26 | docker run --gpus all --shm-size 1g -e CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES -p $HF_PORT:80 -v $HOME/.cache/huggingface/hub/:/data -v $PWD/$MODEL:/$MODEL ghcr.io/huggingface/text-generation-inference:$TGI_VERSION --model-id /$MODEL --max-input-length 2048 --max-total-tokens 4096 --max-stop-sequences 6 --sharded false --disable-custom-kernels --trust-remote-code 27 | -------------------------------------------------------------------------------- /tests/test_imports.py: -------------------------------------------------------------------------------- 1 | from tests.utils import wrap_test_forked 2 | 3 | 4 | @wrap_test_forked 5 | def test_transformers(): 6 | import transformers 7 | assert transformers is not None 8 | -------------------------------------------------------------------------------- /tests/test_iterator_pipe.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from iterators import IteratorPipe 3 | 4 | 5 | class TestQueueToIterator(unittest.TestCase): 6 | 7 | def test_normal_iteration(self): 8 | it = IteratorPipe() 9 | 10 | it.put(1) 11 | it.put(2) 12 | it.put(3) 13 | it.close() # stop iteration 14 | 15 | self.assertEqual(next(it), 1) 16 | self.assertEqual(next(it), 2) 17 | self.assertEqual(next(it), 3) 18 | self.assertRaises(StopIteration, next, it) 19 | 20 | def test_normal_custom_sentinel(self): 21 | sentinel = object() 22 | it = IteratorPipe(sentinel=sentinel) 23 | 24 | it.put(1) 25 | it.put(2) 26 | it.put(3) 27 | it.put(sentinel) # stop iteration 28 | 29 | self.assertEqual(next(it), 1) 30 | self.assertEqual(next(it), 2) 31 | self.assertEqual(next(it), 3) 32 | self.assertRaises(StopIteration, next, it) 33 | 34 | def test_multiple_close(self): 35 | sentinel = object() 36 | it = IteratorPipe(sentinel=sentinel) 37 | 38 | it.put(1) 39 | it.put(2) 40 | it.put(3) 41 | it.close() # stop iteration 42 | it.close() # stop iteration 43 | it.close() # stop iteration 44 | 45 | self.assertEqual(next(it), 1) 46 | self.assertEqual(next(it), 2) 47 | self.assertEqual(next(it), 3) 48 | self.assertRaises(StopIteration, next, it) 49 | 50 | def test_multiple_next_after_close(self): 51 | sentinel = object() 52 | it = IteratorPipe(sentinel=sentinel) 53 | 54 | it.put(1) 55 | it.put(2) 56 | it.put(3) 57 | it.close() # stop iteration 58 | 59 | self.assertEqual(next(it), 1) 60 | self.assertEqual(next(it), 2) 61 | self.assertEqual(next(it), 3) 62 | self.assertRaises(StopIteration, next, it) 63 | self.assertRaises(StopIteration, next, it) 64 | self.assertRaises(StopIteration, next, it) 65 | 66 | def test_put_after_close(self): 67 | sentinel = object() 68 | it = IteratorPipe(sentinel=sentinel) 69 | 70 | self.assertTrue(it.put(1)) 71 | it.close() # stop iteration 72 | 73 | self.assertFalse(it.put(2)) 74 | it.close() # stop iteration 75 | 76 | self.assertFalse(it.put(3)) 77 | it.close() # stop iteration 78 | 79 | self.assertEqual(next(it), 1) 80 | self.assertRaises(StopIteration, next, it) 81 | self.assertRaises(StopIteration, next, it) 82 | -------------------------------------------------------------------------------- /tests/test_openai_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/test_openai_server.py -------------------------------------------------------------------------------- /tests/test_speech.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/test_speech.wav -------------------------------------------------------------------------------- /tests/test_ui.py: -------------------------------------------------------------------------------- 1 | from tests.utils import wrap_test_forked 2 | 3 | 4 | @wrap_test_forked 5 | def test_newline_replace(): 6 | text0 = """You can use the `sorted()` function to merge two sorted lists in Python. The `sorted()` function takes a list as an argument and returns a new sorted list. Here’s an example of how you can use it to merge two sorted lists: 7 | 8 | ```python 9 | list1 = [1, 2, 3, 4, 5] 10 | list2 = [6, 7, 8, 9, 10] 11 | merged_list = sorted(list1 + list2)
print(merged_list) 12 | ``` 13 | 14 | The output of this code is: 15 | ``` 16 | [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 17 | ``` 18 | 19 | As you can see, the `sorted()` function has merged the two sorted lists into a single sorted list.""" 20 | 21 | from src.gradio_runner import fix_text_for_gradio 22 | fixed = fix_text_for_gradio(text0, fix_new_lines=True, fix_angle_brackets=False) 23 | 24 | expected = """You can use the `sorted()` function to merge two sorted lists in Python. The `sorted()` function takes a list as an argument and returns a new sorted list. Here’s an example of how you can use it to merge two sorted lists:

```python 25 | list1 = [1, 2, 3, 4, 5] 26 | list2 = [6, 7, 8, 9, 10] 27 | merged_list = sorted(list1 + list2)
print(merged_list) 28 | ```

The output of this code is:
``` 29 | [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 30 | ```

As you can see, the `sorted()` function has merged the two sorted lists into a single sorted list.""" 31 | assert fixed == expected 32 | -------------------------------------------------------------------------------- /tests/test_vision.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | 4 | from tests.utils import wrap_test_forked 5 | 6 | 7 | @pytest.mark.skip # no longer use 8 | @wrap_test_forked 9 | def test_llava_client2(): 10 | file = "models/wizard.jpg" 11 | llava_model = os.getenv('H2OGPT_LLAVA_MODEL', 'http://192.168.1.46:7861') 12 | from src.vision.utils_vision import get_llava_response 13 | res, llava_prompt = get_llava_response(file, llava_model, allow_prompt_auto=True) 14 | print(res) 15 | assert 'pumpkins' in res or 'glowing' in res 16 | 17 | 18 | @pytest.mark.skip # no longer use 19 | @wrap_test_forked 20 | def test_llava_client_stream(): 21 | file = "models/wizard.jpg" 22 | llava_model = os.getenv('H2OGPT_LLAVA_MODEL', 'http://192.168.1.46:7861') 23 | from src.vision.utils_vision import get_llava_stream 24 | text = '' 25 | for res in get_llava_stream(file, llava_model, allow_prompt_auto=True): 26 | text = res 27 | print(text) 28 | 29 | assert 'The image features' in text or 'The image is an illustration' in text 30 | 31 | 32 | @wrap_test_forked 33 | def test_make_image(): 34 | from src.vision.sdxl_turbo import make_image 35 | prompt = "A cinematic shot of a baby racoon wearing an intricate italian priest robe." 36 | make_image(prompt, filename="output_p2i.png") 37 | 38 | 39 | @wrap_test_forked 40 | def test_change_image(): 41 | from src.vision.sdxl_turbo import change_image 42 | init_file = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png" 43 | change_image(init_file=init_file, 44 | prompt="cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k", 45 | filename="output_i2i.png") 46 | 47 | 48 | @wrap_test_forked 49 | def test_video_extraction(): 50 | urls = ["https://www.youtube.com/shorts/fRkZCriQQNU"] 51 | from src.vision.extract_movie import extract_unique_frames 52 | export_dir = extract_unique_frames(urls=urls, download_dir=None) 53 | image_files = [f for f in os.listdir(export_dir) if os.path.isfile(os.path.join(export_dir, f))] 54 | assert len(image_files) > 9 55 | assert image_files[0].endswith('.jpg') 56 | print(export_dir) 57 | # feh -rF -D 1000 export_dir 58 | 59 | 60 | @wrap_test_forked 61 | def test_make_image_playv2(): 62 | from src.vision.playv2 import make_image 63 | prompt = "A cinematic shot of a baby racoon wearing an intricate italian priest robe." 64 | make_image(prompt, filename="output_p2i_v2.png") 65 | 66 | 67 | @wrap_test_forked 68 | def test_fastfood(): 69 | from src.image_utils import align_image 70 | assert os.path.isfile(align_image("tests/fastfood.jpg")) 71 | # can't find box for receipt 72 | assert align_image("tests/receipt.jpg") == "tests/receipt.jpg" 73 | assert os.path.isfile(align_image("tests/rotate-ex2.png")) 74 | 75 | from src.image_utils import correct_rotation 76 | assert os.path.isfile(correct_rotation("tests/fastfood.jpg")) 77 | assert os.path.isfile(correct_rotation("tests/receipt.jpg")) 78 | assert os.path.isfile(correct_rotation("tests/rotate-ex2.png")) 79 | 80 | # new 81 | assert align_image("tests/revenue.png") == "tests/revenue.png" 82 | assert align_image("tests/dental.png") == "tests/dental.png" 83 | assert align_image("tests/jon.png") == "tests/jon.png" 84 | 85 | assert os.path.isfile(correct_rotation("tests/revenue.png")) 86 | assert os.path.isfile(correct_rotation("tests/dental.png")) 87 | assert os.path.isfile(correct_rotation("tests/jon.png")) 88 | -------------------------------------------------------------------------------- /tests/videotest.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/h2oai/h2ogpt/a0ad1920a847cf12a4e9c3453f6cdb736fe97ba0/tests/videotest.mp4 -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | 0.2.1 2 | --------------------------------------------------------------------------------