├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── 1_bug-report.yml │ ├── 2_feature-request.yml │ ├── 3_documentation.yml │ └── config.yml └── workflows │ ├── basic_check.yml │ ├── build_docker_image.yml │ └── sync_files.yml ├── .gitignore ├── .pre-commit-config.yaml ├── Cookbook ├── CHANGELOG.md ├── CONTRIBUTING.md ├── CONTRIBUTING_cn.md ├── README.md ├── README_cn.md ├── assets │ ├── LM_Studio-0.png │ ├── LM_Studio-1.png │ ├── LM_Studio-2.png │ ├── llama-cpp-0.jpg │ ├── mind_cn.png │ ├── mind_en.png │ ├── ollama-0.png │ ├── ollama-1.png │ ├── ollama-2.png │ ├── ollama-3.png │ ├── ollama-4.png │ ├── ollama-5.png │ ├── ollama-6.png │ ├── ollama-7.png │ └── ollama-8.png ├── cn │ ├── api │ │ ├── function_calling │ │ │ └── function_calling_llamaindex.ipynb │ │ └── rag │ │ │ ├── yi_rag_langchain.ipynb │ │ │ └── yi_rag_llamaindex.ipynb │ ├── ecosystem │ │ ├── assets │ │ │ ├── 2 │ │ │ │ ├── img(4-1).png │ │ │ │ ├── img(4-2).png │ │ │ │ ├── img(4-3).png │ │ │ │ ├── img(4-4).png │ │ │ │ ├── img(4-5).png │ │ │ │ ├── img(4-6).png │ │ │ │ └── img(4-7).png │ │ │ ├── 3 │ │ │ │ ├── img(3-1).png │ │ │ │ ├── img(3-2).png │ │ │ │ ├── img(3-3).png │ │ │ │ └── img(3-4).png │ │ │ ├── 4 │ │ │ │ ├── img(2-1).png │ │ │ │ ├── img.png │ │ │ │ ├── img2.png │ │ │ │ ├── img3.png │ │ │ │ ├── img4.png │ │ │ │ ├── img_1.png │ │ │ │ └── train_memory(GiB).png │ │ │ └── 5 │ │ │ │ ├── img(5-1).png │ │ │ │ ├── img(5-2).jpg │ │ │ │ ├── img(5-3).png │ │ │ │ └── img(5-4).png │ │ ├── yi-vl最佳实践.md │ │ ├── 使用Yi大模型玩转街霸三.md │ │ ├── 基于LlamaIndex和Yi-large构建智能问答系统.md │ │ ├── 基于yi-large构建思维导图生成器.md │ │ └── 强化Yi-1.5-6B-Chat的数学和逻辑能力.md │ └── opensource │ │ ├── Inference │ │ ├── Inference_using_lmdeploy.ipynb │ │ ├── Inference_using_swift.ipynb │ │ ├── Inference_using_transformers.ipynb │ │ └── vLLM_Inference_tutorial.ipynb │ │ ├── fine_tune │ │ ├── finetune-yi-with-llamafactory.md │ │ └── finetune-yi-with-swift.md │ │ ├── function_calling │ │ ├── function_calling.ipynb │ │ └── function_calling_llamaindex.ipynb │ │ ├── local │ │ ├── local-llama.cpp.md │ │ ├── local-lm-studio.md │ │ ├── local-mlx.md │ │ └── local-ollama.md │ │ ├── quantization │ │ ├── autoawq-yi-quantization.md │ │ ├── autogptq-yi-quantization.md │ │ └── swift-yi-quantization.md │ │ └── rag │ │ ├── yi_rag_langchain.ipynb │ │ └── yi_rag_llamaindex.ipynb └── en │ ├── api │ ├── function_calling │ │ └── function_calling_llamaindex.ipynb │ └── rag │ │ ├── yi_rag_langchain.ipynb │ │ └── yi_rag_llamaindex.ipynb │ ├── ecosystem │ ├── Building_a_Mind_Map_Generator_Powered_by_Yi-Large.md │ ├── Building_an_Intelligent_Q&A_System_Based_on_LlamaIndex_and_Yi-large.md │ ├── Enhancing_the_Mathematical_and_Logical_Reasoning_Abilities_of_Yi-1.5-6B-Chat.md │ ├── Mastering_Street_Fighter_III_with_the_Yi_Language_Model.md │ ├── assets │ │ ├── 2 │ │ │ ├── img(4-1).png │ │ │ ├── img(4-2).png │ │ │ ├── img(4-3).png │ │ │ ├── img(4-4).png │ │ │ ├── img(4-5).png │ │ │ ├── img(4-6).png │ │ │ └── img(4-7).png │ │ ├── 3 │ │ │ ├── img(3-1).png │ │ │ ├── img(3-2).png │ │ │ ├── img(3-3).png │ │ │ └── img(3-4).png │ │ ├── 4 │ │ │ ├── img(2-1).png │ │ │ ├── img.png │ │ │ ├── img2.png │ │ │ ├── img3.png │ │ │ ├── img4.png │ │ │ ├── img_1.png │ │ │ └── train_memory(GiB).png │ │ └── 5 │ │ │ ├── img(5-1).png │ │ │ ├── img(5-2).jpg │ │ │ ├── img(5-3).png │ │ │ └── img(5-4).png │ └── yi-vl-best-practice.md │ └── opensource │ ├── Inference │ ├── Inference_using_lmdeploy.ipynb │ ├── Inference_using_swift.ipynb │ ├── Inference_using_transformers.ipynb │ └── vLLM_Inference_tutorial.ipynb │ ├── fine_tune │ ├── finetune-yi-with-llamafactory.md │ └── finetune-yi-with-swift.md │ ├── function_calling │ ├── function_calling.ipynb │ └── function_calling_llamaindex.ipynb │ ├── local │ ├── local-llama.cpp.md │ ├── local-lm-studio.md │ ├── local-mlx.md │ └── local-ollama.md │ ├── quantization │ ├── autoawq-yi-quantization.md │ ├── autogptq-yi-quantization.md │ └── swift-yi-quantization.md │ └── rag │ ├── yi_rag_langchain.ipynb │ └── yi_rag_llamaindex.ipynb ├── Dockerfile ├── Events └── readme.md ├── HUGGINGFACE_README.md ├── LICENSE ├── MODEL_LICENSE_AGREEMENT.txt ├── NOTICE ├── README.md ├── README ├── huggingface_header.md ├── modelscope_header.md └── wisemodel_header.md ├── README_CN.md ├── VL ├── README.md ├── cli.py ├── images │ ├── cats.jpg │ └── extreme_ironing.jpg ├── llava │ ├── __init__.py │ ├── conversation.py │ ├── mm_utils.py │ └── model │ │ ├── __init__.py │ │ ├── clip_encoder │ │ ├── builder.py │ │ └── clip_encoder.py │ │ ├── constants.py │ │ ├── llava_arch.py │ │ ├── llava_llama.py │ │ └── multimodal_projector │ │ └── builder.py ├── openai_api.py ├── requirements.txt ├── single_inference.py └── web_demo.py ├── assets └── img │ ├── 1.png │ ├── 2.png │ ├── 9b.png │ ├── Apply_04.png │ ├── Yi-9B_benchmark_code.png │ ├── Yi-9B_benchmark_details.png │ ├── Yi-9B_benchmark_math.png │ ├── Yi-9B_benchmark_overall.png │ ├── Yi-9B_benchmark_text.png │ ├── Yi_logo_icon_dark.svg │ ├── Yi_logo_icon_light.svg │ ├── benchmark_base.png │ ├── benchmark_chat.png │ ├── coder.gif │ ├── coder │ ├── bench.webp │ ├── bench1.webp │ ├── bench2.webp │ ├── bench3.webp │ ├── bench31.webp │ ├── bench4.webp │ ├── bench5.webp │ ├── bench6.webp │ ├── bench7.webp │ ├── demo1.gif │ ├── demo2.gif │ ├── test │ └── yi-coder-calculator-demo.gif │ ├── events │ ├── 1 │ ├── a.jpeg │ ├── b.jpeg │ ├── c.jpeg │ ├── down.gif │ └── pic.mp4 │ ├── fireworksai.png │ ├── gh.png │ ├── quick_start_path.png │ ├── quick_start_path_CN.png │ ├── yi_34b_chat_web_demo.gif │ ├── yi_llama_cpp1.png │ └── yi_llama_cpp2.png ├── conda-lock.yml ├── demo ├── README.md ├── text_generation.py ├── text_generation_tp.py └── web_demo.py ├── docs └── README_llama.cpp.md ├── finetune ├── README.md ├── README_CN.md ├── constant.py ├── scripts │ ├── run_eval.sh │ ├── run_sft_Yi_34b.sh │ ├── run_sft_Yi_6b.sh │ └── run_sft_lora_Yi_6b.sh ├── sft │ ├── main.py │ └── prompt_eval.py ├── utils │ ├── data │ │ ├── data_utils.py │ │ └── raw_datasets.py │ ├── ds_utils.py │ ├── model │ │ └── model_utils.py │ ├── module │ │ └── lora.py │ ├── perf.py │ └── utils.py └── yi_example_dataset │ └── data │ ├── eval.jsonl │ └── train.jsonl ├── pyproject.toml ├── quantization ├── awq │ ├── README.md │ ├── eval_quantized_model.py │ └── quant_autoawq.py └── gptq │ ├── README.md │ ├── eval_quantized_model.py │ └── quant_autogptq.py └── requirements.txt /.dockerignore: -------------------------------------------------------------------------------- 1 | .git/ 2 | .venv/ 3 | venv/ 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/1_bug-report.yml: -------------------------------------------------------------------------------- 1 | name: 🐞 Bug report 2 | description: "Problems encountered while utilizing Yi model" 3 | title: "Title " 4 | labels: ["type/bug-report"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thank you very much for your contribution! 10 | - type: checkboxes 11 | attributes: 12 | label: Reminder 13 | description: > 14 | Please search [Github Discussion](https://github.com/01-ai/Yi/discussions) and [issues](https://github.com/01-ai/Yi/issues) carefully. 15 | options: 16 | - label: > 17 | I have searched the Github Discussion and issues and have not found anything similar to this. 18 | required: true 19 | - type: textarea 20 | attributes: 21 | label: Environment 22 | description: | 23 | examples: 24 | - **OS**: Ubuntu 20.04 25 | - **Python**: 3.10 26 | - **PyTorch**: 2.0.1+cu118 27 | - **CUDA**: 11.8 28 | value: | 29 | - OS: 30 | - Python: 31 | - PyTorch: 32 | - CUDA: 33 | render: markdown 34 | validations: 35 | required: true 36 | - type: textarea 37 | attributes: 38 | label: Current Behavior 39 | description: | 40 | A concise description of what you're experiencing, with screenshot attached if possible. 41 | validations: 42 | required: true 43 | - type: textarea 44 | attributes: 45 | label: Expected Behavior 46 | description: A concise description of what you expected to happen. 47 | validations: 48 | required: false 49 | - type: textarea 50 | attributes: 51 | label: Steps to Reproduce 52 | description: Steps to reproduce the behavior. 53 | placeholder: | 54 | 1. In this environment... 55 | 2. With this config... 56 | 3. Run '...' 57 | 4. See error... 58 | validations: 59 | required: true 60 | - type: textarea 61 | attributes: 62 | label: Anything Else? 63 | description: | 64 | Links? References? Anything that will give us more context about the issue you are encountering! 65 | validations: 66 | required: false 67 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/2_feature-request.yml: -------------------------------------------------------------------------------- 1 | name: 🚀 Feature request 2 | description: "Add new feature, improve code, and more" 3 | title: "Features " 4 | labels: ["type/feature-request"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thank you very much for your contribution! 10 | - type: checkboxes 11 | attributes: 12 | label: Reminder 13 | description: > 14 | Please search [Github Discussion](https://github.com/01-ai/Yi/discussions) and [issues](https://github.com/01-ai/Yi/issues) carefully. 15 | options: 16 | - label: > 17 | I have searched the Github Discussion and issues and have not found anything similar to this. 18 | required: true 19 | - type: textarea 20 | attributes: 21 | label: Motivation 22 | description: Describe the motivations for this enhancement. 23 | validations: 24 | required: true 25 | - type: textarea 26 | attributes: 27 | label: Solution 28 | description: Describe the proposed solution and add related materials like links if any. 29 | - type: textarea 30 | attributes: 31 | label: Alternatives 32 | description: Describe other alternative solutions or features you considered, but rejected. 33 | - type: textarea 34 | attributes: 35 | label: Anything Else? 36 | - type: checkboxes 37 | attributes: 38 | label: Are you willing to submit a PR? 39 | description: > 40 | We look forward to your contribution to making Yi better. If you are willing to submit a PR to implement this enhancement, please check the box. 41 | options: 42 | - label: I'm willing to submit a PR! 43 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/3_documentation.yml: -------------------------------------------------------------------------------- 1 | name: 📚 Document 2 | description: Suggest document changes 3 | title: "Doc" 4 | labels: ["doc-required"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thank you very much for your contribution! 10 | - type: checkboxes 11 | attributes: 12 | label: Reminder 13 | description: > 14 | Please search [Github Discussion](https://github.com/01-ai/Yi/discussions) and [issues](https://github.com/01-ai/Yi/issues) carefully. 15 | options: 16 | - label: > 17 | I have searched the Github Discussion and issues and have not found anything similar to this. 18 | required: true 19 | - type: textarea 20 | attributes: 21 | label: What issue do you find in Yi docs? 22 | description: For example, something missing, inaccurate, incomplete, hard to use/understand/find, etc. 23 | validations: 24 | required: true 25 | - type: textarea 26 | attributes: 27 | label: What is your suggestion? 28 | description: For example, add explanations, correct descriptions, delete information, etc. 29 | validations: 30 | required: true 31 | - type: textarea 32 | attributes: 33 | label: Any reference? 34 | description: (For example, website links, etc)? 35 | - type: checkboxes 36 | attributes: 37 | label: Are you willing to submit a PR? 38 | description: > 39 | We look forward to your contribution to making Yi better. If you are willing to submit a PR, please check the box. 40 | options: 41 | - label: I'm willing to submit a PR! 42 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | 3 | contact_links: 4 | - name: 💬 Ask a question or get support 5 | url: https://github.com/01-ai/Yi/discussions 6 | about: Ask a question or request support for using Yi 7 | -------------------------------------------------------------------------------- /.github/workflows/basic_check.yml: -------------------------------------------------------------------------------- 1 | name: Basic Check 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | basic-check: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Set up Python 21 | uses: actions/setup-python@v4 22 | with: 23 | python-version: "3.10" 24 | - name: Install linting libraries 25 | run: pip install autoflake isort black 26 | - name: Autoflake 27 | run: | 28 | output="$(autoflake . --remove-all-unused-imports --remove-unused-variables --expand-star-imports --ignore-init-module-imports -r)" 29 | if [[ -n $output ]] 30 | then 31 | printf "%s\n" "$output" 32 | exit 1 33 | else 34 | exit 0 35 | fi 36 | - name: Isort 37 | run: isort . --profile black --check-only --diff 38 | - name: Black 39 | run: black . --diff --color --check 40 | -------------------------------------------------------------------------------- /.github/workflows/build_docker_image.yml: -------------------------------------------------------------------------------- 1 | name: Build Docker Image 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | tags: 7 | - '*.*.*' 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | authorize: 15 | environment: 16 | ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.repo.full_name != github.repository && 'external' || 'internal' }} 17 | runs-on: oversea 18 | steps: 19 | - run: true 20 | 21 | build-docker-image: 22 | needs: authorize 23 | strategy: 24 | matrix: 25 | runner: [oversea] # remove public runner, 24-3-6 26 | runs-on: ${{ matrix.runner }} 27 | steps: 28 | - uses: actions/checkout@v4 29 | # try to create a tag 30 | - uses: SebRollen/toml-action@v1.0.2 31 | id: read_version 32 | with: 33 | file: "pyproject.toml" 34 | field: "tool.poetry.version" 35 | - uses: rickstaa/action-create-tag@v1 36 | id: "tag_create" 37 | if: (github.ref_name == github.event.repository.default_branch) && !contains(steps.read_version.outputs.value, '-') 38 | with: 39 | tag: ${{ steps.read_version.outputs.value }} 40 | # build & push docker image 41 | - name: Set proxy 42 | run: | 43 | echo "http_proxy=$http_proxy" >> $GITHUB_ENV 44 | echo "https_proxy=$https_proxy" >> $GITHUB_ENV 45 | echo "no_proxy=$no_proxy" >> $GITHUB_ENV 46 | - name: Set up Docker Buildx 47 | uses: docker/setup-buildx-action@v3 48 | # with: 49 | # driver-opts: | 50 | # env.http_proxy=${{ env.http_proxy }} 51 | # env.https_proxy=${{ env.http_proxy }} 52 | # buildkitd-flags: --debug 53 | - name: Extract docker meta 54 | id: meta 55 | uses: docker/metadata-action@v5 56 | with: 57 | images: | 58 | ${{ matrix.runner == 'public' && format('{0}/ci/{1}', secrets.DEFAULT_REGISTRY, github.repository) || '' }} 59 | ${{ matrix.runner == 'public' && format('{0}/ci/{1}', secrets.PUBLIC_REGISTRY, github.repository) || '' }} 60 | ${{ matrix.runner == 'oversea' && format('ghcr.io/{0}', github.repository) || '' }} 61 | tags: | 62 | # set latest tag for default branch 63 | type=raw,value=latest,enable={{is_default_branch}} 64 | type=raw,value=${{ steps.read_version.outputs.value }},enable={{is_default_branch}} 65 | # pull request event 66 | type=ref,event=pr 67 | - name: Extract more docker meta 68 | id: more-meta 69 | shell: bash 70 | run: | 71 | PRIMARY_TAG=$(echo '${{ steps.meta.outputs.tags }}' | head -n 1) 72 | echo "PRIMARY_TAG=$PRIMARY_TAG" >> "$GITHUB_OUTPUT" 73 | echo "PRIMARY_TAG_SHORT=$(echo $PRIMARY_TAG | cut -d ':' -f1)" >> "$GITHUB_OUTPUT" 74 | - name: Login to default container registry 75 | if: matrix.runner == 'public' 76 | uses: docker/login-action@v2 77 | with: 78 | registry: ${{ secrets.DEFAULT_REGISTRY }} 79 | username: ${{ secrets.DEFAULT_REGISTRY_USER }} 80 | password: ${{ secrets.DEFAULT_REGISTRY_PASSWORD }} 81 | - name: Login to public container registry 82 | if: matrix.runner == 'public' 83 | uses: docker/login-action@v2 84 | with: 85 | registry: ${{ secrets.PUBLIC_REGISTRY }} 86 | username: ${{ secrets.PUBLIC_REGISTRY_USER }} 87 | password: ${{ secrets.PUBLIC_REGISTRY_PASSWORD }} 88 | - name: Login to ghcr.io 89 | if: matrix.runner == 'oversea' 90 | uses: docker/login-action@v2 91 | with: 92 | registry: ghcr.io 93 | username: ${{ github.actor }} 94 | password: ${{ secrets.GITHUB_TOKEN }} 95 | - name: Build and push 96 | id: build-image 97 | uses: docker/build-push-action@v5 98 | with: 99 | build-args: | 100 | REGISTRY=${{ matrix.runner == 'public' && format('{0}/ci', secrets.DEFAULT_REGISTRY) || 'nvcr.io'}} 101 | HTTP_PROXY=${{ env.http_proxy }} 102 | HTTPS_PROXY=${{ env.https_proxy }} 103 | NO_PROXY=${{ env.no_proxy }} 104 | push: true 105 | tags: ${{ steps.meta.outputs.tags }} 106 | labels: ${{ steps.meta.outputs.labels }} 107 | cache-from: type=registry,ref=${{ steps.more-meta.outputs.PRIMARY_TAG_SHORT }}:buildcache 108 | cache-to: ${{ github.event_name == 'pull_request_target' && '' || format('type=registry,image-manifest=true,ref={0}:buildcache,mode=max', steps.more-meta.outputs.PRIMARY_TAG_SHORT) }} 109 | -------------------------------------------------------------------------------- /.github/workflows/sync_files.yml: -------------------------------------------------------------------------------- 1 | name: Sync Files 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | env: 9 | GIT_CLONE_PROTECTION_ACTIVE: false 10 | 11 | jobs: 12 | detect-changes: 13 | runs-on: ubuntu-latest 14 | outputs: 15 | is_readme_changed: ${{ steps.changed-files.outputs.readme_any_changed }} 16 | is_license_changed: ${{ steps.changed-files.outputs.license_any_changed }} 17 | steps: 18 | - name: Checkout Repository 19 | uses: actions/checkout@v4 20 | with: 21 | fetch-depth: 0 22 | 23 | - name: Get changed files 24 | id: changed-files 25 | uses: tj-actions/changed-files@v41 26 | with: 27 | files_yaml: | 28 | readme: 29 | - README.md 30 | - README/** 31 | license: 32 | - MODEL_LICENSE_AGREEMENT.txt 33 | sync-readme: 34 | runs-on: ${{ matrix.platform == 'huggingface' && 'ubuntu-latest' || 'public' }} 35 | needs: detect-changes 36 | if: needs.detect-changes.outputs.is_readme_changed == 'true' || needs.detect-changes.outputs.is_license_changed == 'true' 37 | strategy: 38 | matrix: 39 | platform: 40 | - huggingface 41 | - modelscope 42 | - wisemodel 43 | repo: 44 | - Yi-6B 45 | - Yi-6B-200K 46 | - Yi-6B-Chat 47 | - Yi-6B-Chat-4bits 48 | - Yi-6B-Chat-8bits 49 | - Yi-9B 50 | - Yi-9B-200K 51 | - Yi-34B 52 | - Yi-34B-200K 53 | - Yi-34B-Chat 54 | - Yi-34B-Chat-4bits 55 | - Yi-34B-Chat-8bits 56 | steps: 57 | - uses: actions/checkout@v4 58 | - run: echo "REPO=${{ matrix.platform == 'huggingface' && format('https://{0}:{1}@huggingface.co/01-ai/{2}', secrets.HF_USER, secrets.HF_TOKEN, matrix.repo) || (matrix.platform == 'modelscope' && format('http://oauth2:{0}@www.modelscope.cn/01ai/{1}.git', secrets.MODELSCOPE_TOKEN, matrix.repo) || format('https://oauth2:{0}@www.wisemodel.cn/01.AI/{1}.git', secrets.WISEMODEL_TOKEN, matrix.repo))}}" >> $GITHUB_ENV 59 | - run: GIT_LFS_SKIP_SMUDGE=1 git clone ${{ env.REPO }} 60 | - if: needs.detect-changes.outputs.is_readme_changed == 'true' && matrix.platform == 'huggingface' 61 | run: | 62 | cat README/huggingface_header.md > ${{ matrix.repo }}/README.md 63 | csplit README.md '//' 64 | cat xx01 >> ${{ matrix.repo }}/README.md 65 | - if: needs.detect-changes.outputs.is_readme_changed == 'true' && matrix.platform != 'huggingface' 66 | run: | 67 | cat README/${{matrix.platform}}_header.md > ${{ matrix.repo }}/README.md 68 | cat README.md >> ${{ matrix.repo }}/README.md 69 | - name: Sync LICENSE to ${{ matrix.platform }} ${{ matrix.repo }} 70 | if: needs.detect-changes.outputs.is_license_changed == 'true' 71 | run: | 72 | cat MODEL_LICENSE_AGREEMENT.txt > ${{ matrix.repo }}/LICENSE 73 | - run: | 74 | cd ${{ matrix.repo }} 75 | git config --global user.email "yi@01.ai" 76 | git config --global user.name "yi-01-ai" 77 | git add . 78 | git commit --dry-run || exit 0 79 | git commit -m "Auto Sync from ${{ github.repositoryUrl}}/commit/${{ github.sha}}" 80 | git push ${{ env.REPO }} 81 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | .vscode/* 3 | 4 | # Local History for Visual Studio Code 5 | .history/ 6 | 7 | # Built Visual Studio Code Extensions 8 | *.vsix 9 | 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Distribution / packaging 19 | .Python 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | share/python-wheels/ 33 | *.egg-info/ 34 | .installed.cfg 35 | *.egg 36 | MANIFEST 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .nox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | *.py,cover 59 | .hypothesis/ 60 | .pytest_cache/ 61 | cover/ 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | *.log 69 | local_settings.py 70 | db.sqlite3 71 | db.sqlite3-journal 72 | 73 | # Flask stuff: 74 | instance/ 75 | .webassets-cache 76 | 77 | # Scrapy stuff: 78 | .scrapy 79 | 80 | # Sphinx documentation 81 | docs/_build/ 82 | 83 | # PyBuilder 84 | .pybuilder/ 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # IPython 91 | profile_default/ 92 | ipython_config.py 93 | 94 | # pyenv 95 | # For a library or package, you might want to ignore these files since the code is 96 | # intended to run in multiple environments; otherwise, check them in: 97 | # .python-version 98 | 99 | # pipenv 100 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 101 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 102 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 103 | # install all needed dependencies. 104 | #Pipfile.lock 105 | 106 | # poetry 107 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 108 | # This is especially recommended for binary packages to ensure reproducibility, and is more 109 | # commonly ignored for libraries. 110 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 111 | #poetry.lock 112 | 113 | # pdm 114 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 115 | #pdm.lock 116 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 117 | # in version control. 118 | # https://pdm.fming.dev/#use-with-ide 119 | .pdm.toml 120 | 121 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 122 | __pypackages__/ 123 | 124 | # Celery stuff 125 | celerybeat-schedule 126 | celerybeat.pid 127 | 128 | # SageMath parsed files 129 | *.sage.py 130 | 131 | # Environments 132 | .env 133 | .venv 134 | env/ 135 | venv/ 136 | ENV/ 137 | env.bak/ 138 | venv.bak/ 139 | 140 | # Spyder project settings 141 | .spyderproject 142 | .spyproject 143 | 144 | # Rope project settings 145 | .ropeproject 146 | 147 | # mkdocs documentation 148 | /site 149 | 150 | # mypy 151 | .mypy_cache/ 152 | .dmypy.json 153 | dmypy.json 154 | 155 | # Pyre type checker 156 | .pyre/ 157 | 158 | # pytype static type analyzer 159 | .pytype/ 160 | 161 | # Cython debug symbols 162 | cython_debug/ 163 | 164 | # PyCharm 165 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 166 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 167 | # and can be added to the global gitignore or merged into this file. For a more nuclear 168 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 169 | #.idea/ 170 | .DS_Store 171 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 23.7.0 4 | hooks: 5 | - id: black 6 | # It is recommended to specify the latest version of Python 7 | # supported by your project here, or alternatively use 8 | # pre-commit's default_language_version, see 9 | # https://pre-commit.com/#top_level-default_language_version 10 | language_version: python3.10 11 | - repo: https://github.com/pycqa/isort 12 | rev: 5.12.0 13 | hooks: 14 | - id: isort 15 | name: isort (python) 16 | args: ["--profile", "black", "--filter-files"] 17 | - repo: https://github.com/PyCQA/autoflake 18 | rev: v2.2.1 19 | hooks: 20 | - id: autoflake 21 | args: [--remove-all-unused-imports, --remove-unused-variables, --expand-star-imports, --ignore-init-module-imports, -r, --in-place] -------------------------------------------------------------------------------- /Cookbook/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ``` 2 | 记录版本变化 3 | ``` 4 | -------------------------------------------------------------------------------- /Cookbook/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guide 2 | 3 | We appreciate your interest in contributing to our repository! To ensure a smooth and efficient process, please follow the guidelines below. 4 | 5 | ## Format 6 | 7 | - **Notebook**: Jupyter Notebook (.ipynb) 8 | - **Markdown**: Markdown file (.md) 9 | 10 | ## Guidelines 11 | 12 | 1. **Originality**: Ensure your content is original and not plagiarized. 13 | 2. **Non-promotional**: Avoid excessive marketing of your work. 14 | 3. **Submission Process**: 15 | - Submit your file and a description of your work. 16 | - Ensure your work can be reproduced based on your documentation. 17 | - If you encountered any issues during your experiments, try to include them in your documentation. 18 | 4. **Collaborative Development**: Aim to help and support fellow developers. 19 | 20 | ## How to Contribute 21 | 22 | 1. Fork the repository. 23 | 2. Create a new branch for your work. 24 | 3. Add your files in the appropriate format (ipynb or md). 25 | 4. Write clear and concise documentation to ensure reproducibility. 26 | 5. Submit a pull request with a detailed description of your work. 27 | 28 | We look forward to your valuable contributions! 29 | 30 | -------------------------------------------------------------------------------- /Cookbook/CONTRIBUTING_cn.md: -------------------------------------------------------------------------------- 1 | # 贡献指南 2 | 3 | 感谢你对我们仓库的贡献兴趣!为确保流程顺利高效,请遵循以下指南。 4 | 5 | ## 格式 6 | 7 | - **Notebook**:Jupyter Notebook (.ipynb) 8 | - **Markdown**:Markdown 文件 (.md) 9 | ## 指南 10 | 11 | 1. **原创性**:确保你的内容是原创的,不得抄袭。 12 | 2. **非宣传性**:避免过度营销你的作品。 13 | 3. **提交流程**: 14 | - 提交你的文件和作品描述。 15 | - 确保你的作品可以根据你的文档复现。 16 | - 如果你在实验中遇到任何问题,请尽量将其包含在文档中。 17 | 4. **互助开发**:目标是帮助和支持其他开发者。 18 | 19 | ## 如何贡献 20 | 21 | 1. Fork 这个仓库。 22 | 2. 创建一个新分支来进行你的工作。 23 | 3. 添加你的文件,确保格式正确(ipynb 或 md)。 24 | 4. 撰写清晰简洁的文档以确保可复现性。 25 | 5. 提交一个详细描述你工作的 Pull Request。 26 | 27 | 我们期待你的宝贵贡献! 28 | -------------------------------------------------------------------------------- /Cookbook/README_cn.md: -------------------------------------------------------------------------------- 1 | # 👋 Yi Cookbook 2 | 3 |

4 | English | 中文 5 |

6 | 7 | 欢迎来到 Yi Cookbook!这是一个关于 Yi 模型的综合资源库,包含教程、演示和详细文档。无论您是初学者还是想要研究模型的高阶使用,这里都能找到有价值的信息。 8 | 9 | ## 最新动态 10 | - **🔥2024-08-09**: 发布 Yi Cookbook 英文版本 11 | - **🔥2024-07-29**: 发布 Yi Cookbook 1.0 版本,包含中文教程和示例。 12 | 13 | ## OpenSource 14 | 15 | | Category | Description | Notebook, Markdown | 16 | |:----------------------------|:---------------------------------------------|:--------------------------------------------------------------------------------------------------------| 17 | | inference, swift | 利用 Swift 快速进行 Yi 模型推理。 | [Inference_using_swift.ipynb](./cn/opensource/Inference/Inference_using_swift.ipynb) | 18 | | inference, transformers | 使用 Transformers 高效推理 Yi 模型。 | [Inference_using_transformers.ipynb](./cn/opensource/Inference/Inference_using_transformers.ipynb) | 19 | | inference, Imdeploy | 通过 Imdeploy 快速上手 Yi 模型推理。 | [Inference_using_lmdeploy.ipynb](./cn/opensource/Inference/Inference_using_lmdeploy.ipynb) | 20 | | inference, vllm | 使用 vllm 体验 Yi 模型的快速推理。 | [vLLM_Inference_tutorial.ipynb](./cn/opensource/Inference/vLLM_Inference_tutorial.ipynb) | 21 | | skypilot | 通过 SkyPilot 在你的云端高效推理 Yi 模型。| [Running_yi_with_skyPilot.md](https://github.com/Haijian06/skypilot/tree/master/llm/yi) | 22 | | quantization, swift | 使用 Swift轻松量化您的专属 Yi 模型。 | [swift-yi-quantization.md](./cn/opensource/quantization/swift-yi-quantization.md) | 23 | | quantization, autoawq | 利用 autoawq 量化您的 Yi 模型。 | [autoawq-yi-quantization.md](./cn/opensource/quantization/autoawq-yi-quantization.md) | 24 | | quantization, autogptq | 使用 autogptq 对 Yi 模型进行量化。 | [autogptq-yi-quantization.md](./cn/opensource/quantization/autogptq-yi-quantization.md) | 25 | | fine-tuning, swift | 使用 Swift 微调,打造您的个性化 Yi 模型。 | [finetune-yi-with-swift.md](./cn/opensource/fine_tune/finetune-yi-with-swift.md) | 26 | | fine-tuning, LlaMA-Factory | 使用 LlaMA-Factory 灵活微调您的 Yi 模型。 | [finetune-yi-with-llamafactory.md](./cn/opensource/fine_tune/finetune-yi-with-llamafactory.md) | 27 | | Local Run, ollama | 使用 ollama 在本地环境中运行 Yi 模型。 | [local-ollama.md](./cn/opensource/local/local-ollama.md) | 28 | | Local Run, MLX-LM | 在 MLX-LM 环境下本地运行 Yi 模型。 | [local-mlx.md](./cn/opensource/local/local-mlx.md) | 29 | | Local Run, LM Studio | 使用 LM Studio 轻松本地运行 Yi 模型。 | [local-lm-studio.md](./cn/opensource/local/local-lm-studio.md) | 30 | | Local Run, llama.cpp | 使用 llama.cpp 在本地运行 Yi 模型。 | [local-llama.cpp.md](./cn/opensource/local/local-llama.cpp.md) | 31 | | RAG, LlamaIndex | 基于 Yi 模型和 LlamaIndex 构建强大的 RAG 系统。| [yi_rag_llamaindex.ipynb](./cn/opensource/rag/yi_rag_llamaindex.ipynb) | 32 | | RAG, LangChain | 使用 LangChain 构建灵活的 RAG 系统。 | [yi_rag_langchain.ipynb](./cn/opensource/rag/yi_rag_langchain.ipynb) | 33 | | function calling | 从零开始,实现函数调用。 | [function_calling.ipynb](./cn/opensource/function_calling/function_calling.ipynb) | 34 | | function calling, LlamaIndex | 基于 LlamaIndex,轻松实现函数调用。 | [function_calling_llamaindex.ipynb](./cn/opensource/function_calling/function_calling_llamaindex.ipynb) | 35 | 36 | 37 | ## API 38 | 39 | | Category | Description | Notebook, Markdown | 40 | |:---------------------------|:---------------------------------|:-------------------------------------------------------------------------------------------------| 41 | | RAG, LlamaIndex | 使用 Yi(api) 模型与 LlamaIndex 构建 RAG 应用。 | [yi_rag_llamaindex.ipynb](./cn/api/rag/yi_rag_llamaindex.ipynb) | 42 | | RAG, angChain | 利用 LangChain,构建基于 Yi API 的 RAG 系统。| [yi_rag_langchain.ipynb](./cn/api/rag/yi_rag_langchain.ipynb) | 43 | | function calling, LlamaIndex | 使用 Yi 模型,基于 LlamaIndex 实现函数调用。 | [function_calling_llamaindex.ipynb](./cn/api/function_calling/function_calling_llamaindex.ipynb) | 44 | 45 | ## Ecosystem 46 | 47 | | Category | Description | Notebook, Markdown | 48 | |:------------|:-----------------------------------------------|:-------------------------------------------------------------------------------------------------| 49 | | fine-tuning | 强化 Yi-1.5-6B-Chat 开源模型的数学与逻辑能力。 | [强化Yi-1.5-6B-Chat的数学和逻辑能力.md](./cn/ecosystem/强化Yi-1.5-6B-Chat的数学和逻辑能力.md) | 50 | | RAG | 基于 LlamaIndex 和 Yi-large 构建智能问答系统。 | [基于LlamaIndex和Yi-large构建智能问答系统.md](./cn/ecosystem/基于LlamaIndex和Yi-large构建智能问答系统.md) | 51 | | demo | 大模型玩游戏?探索 Yi 玩转街霸三的奥秘! | [使用Yi大模型玩转街霸三.md](./cn/ecosystem/使用Yi大模型玩转街霸三.md) | 52 | | demo | 基于 yi-large,打造高效的思维导图生成器。 | [基于yi-large构建思维导图生成器.md](./cn/ecosystem/基于yi-large构建思维导图生成器.md) | 53 | | fine-tuning | 掌握 yi-vl 微调的最佳实践,事半功倍。 | [yi-vl最佳实践.md](./cn/ecosystem/yi-vl最佳实践.md) | 54 | ## 社区贡献 55 | 56 | 我们热烈欢迎社区贡献!以下是参与方式: 57 | 58 | - **问题报告**:发现 bug 或有功能建议?请在 [GitHub Issues](https://github.com/01-ai/Yi/issues) 提交。 59 | - **展示你的作品!**:如果你有基于Yi模型做的有趣的、实用的应用或者教学,我们非常欢迎你提交PR到我们的仓库!请遵循我们的 [贡献指南](./CONTRIBUTING_cn.md)。 60 | -------------------------------------------------------------------------------- /Cookbook/assets/LM_Studio-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/LM_Studio-0.png -------------------------------------------------------------------------------- /Cookbook/assets/LM_Studio-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/LM_Studio-1.png -------------------------------------------------------------------------------- /Cookbook/assets/LM_Studio-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/LM_Studio-2.png -------------------------------------------------------------------------------- /Cookbook/assets/llama-cpp-0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/llama-cpp-0.jpg -------------------------------------------------------------------------------- /Cookbook/assets/mind_cn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/mind_cn.png -------------------------------------------------------------------------------- /Cookbook/assets/mind_en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/mind_en.png -------------------------------------------------------------------------------- /Cookbook/assets/ollama-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/ollama-0.png -------------------------------------------------------------------------------- /Cookbook/assets/ollama-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/ollama-1.png -------------------------------------------------------------------------------- /Cookbook/assets/ollama-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/ollama-2.png -------------------------------------------------------------------------------- /Cookbook/assets/ollama-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/ollama-3.png -------------------------------------------------------------------------------- /Cookbook/assets/ollama-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/ollama-4.png -------------------------------------------------------------------------------- /Cookbook/assets/ollama-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/ollama-5.png -------------------------------------------------------------------------------- /Cookbook/assets/ollama-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/ollama-6.png -------------------------------------------------------------------------------- /Cookbook/assets/ollama-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/ollama-7.png -------------------------------------------------------------------------------- /Cookbook/assets/ollama-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/assets/ollama-8.png -------------------------------------------------------------------------------- /Cookbook/cn/api/function_calling/function_calling_llamaindex.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyPi5Ho7I9ICMU0Jnn+/aVjA"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["## Function Calling\n","\n","大模型使用Function Calling的主要用途是扩展模型的功能,使其能够在生成文本、做出决策或执行任务时,集成更广泛的信息和功能。通过定义和调用外部函数,大模型能够处理更复杂的任务,如进行数学运算、下订单或查询数据库等,从而提高模型的实用性和灵活性。\n"],"metadata":{"id":"tY2fLItjnn6U"}},{"cell_type":"markdown","source":["### 🌟使用LlamaIndex\n","\n","LlamaIndex 开发了简单易用的 Function Calling 使用方法,接下来我们使用yi-large进行一个示例\n","\n","首先安装依赖环境,跟着我一步一步来!\n"],"metadata":{"id":"EiQrQO8In5W2"}},{"cell_type":"code","source":["!pip install llama-index\n","!pip install llama-index-llms-huggingface\n","!pip install llama-index-llms-yi"],"metadata":{"id":"9-f7jfstpCEe"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["确保你安装完成后我们就开始吧\n","\n","加载依赖"],"metadata":{"id":"3tlgHR5spMhG"}},{"cell_type":"code","execution_count":3,"metadata":{"id":"w9F41myGnfCS","executionInfo":{"status":"ok","timestamp":1721973909886,"user_tz":-480,"elapsed":8874,"user":{"displayName":"haijian wang","userId":"16545674694152900117"}}},"outputs":[],"source":["from typing import Any\n","\n","\n","from llama_index.core.llms import (\n"," CustomLLM,\n"," CompletionResponse,\n"," CompletionResponseGen,\n"," LLMMetadata,\n",")\n","from llama_index.core.llms.callbacks import llm_completion_callback\n","from llama_index.llms.yi import Yi\n","\n","from llama_index.core.tools import FunctionTool\n","from llama_index.core.agent import ReActAgent"]},{"cell_type":"markdown","source":["配置模型,需要前往零一万物[开放平台](https://platform.lingyiwanwu.com/apikeys)申请api_key"],"metadata":{"id":"xBu4fAuapTMS"}},{"cell_type":"code","source":["llm = Yi(model=\"yi-large\", api_key=\"your_api_key\")"],"metadata":{"id":"Lq4QFDl-pLn_"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["定义工具,这里我们定义 `加` `减` `乘` 的三种函数"],"metadata":{"id":"gCnRQ2vNpViR"}},{"cell_type":"code","source":["# 定义工具\n","def multiply(a: int, b: int) -> int:\n"," \"\"\"Multiple two integers and returns the result integer\"\"\"\n"," return a * b\n","\n","def plus(a: int, b: int) -> int:\n"," \"\"\"plus two integers and returns the result integer\"\"\"\n"," return a + b\n","def minus(a: int, b: int) -> int:\n"," \"\"\"Subtracts two integers and returns the result integer.\"\"\"\n"," return a - b\n","multiply_tool = FunctionTool.from_defaults(fn=multiply)\n","plus_tool = FunctionTool.from_defaults(fn=plus)\n","minus_tool = FunctionTool.from_defaults(fn=minus)"],"metadata":{"id":"g5-zAq22pXu6"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["创建Agent"],"metadata":{"id":"cvUFnsAkph-O"}},{"cell_type":"code","source":["agent = ReActAgent.from_tools([multiply_tool,plus_tool,minus_tool], llm=llm, verbose=True)"],"metadata":{"id":"Hurvg2J1pjQB"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["确保运行正确后我们就开始对话"],"metadata":{"id":"kEkZMzaXplsK"}},{"cell_type":"code","source":["while True:\n"," user_input = input(\"user>>\") # \"(1+2)*10\"\n"," agent.chat(user_input)"],"metadata":{"id":"Ki3NKbq_pqTz"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["运行结果展示"],"metadata":{"id":"HjcL_f7npr-K"}},{"cell_type":"markdown","source":["``````\n","user>>(1+2)*10\n","Thought: The user's question is in English. I need to use a combination of the 'plus' and 'multiply' tools to solve the mathematical expression (1+2)*10.\n","Action: plus\n","Action Input: {'a': 1, 'b': 2}\n","Observation: 3\n","Thought: I have the result of the 'plus' operation, which is 3. Now I need to multiply this result by 10 to complete the expression (1+2)*10.\n","Action: multiply\n","Action Input: {'a': 3, 'b': 10}\n","Observation: 30\n","Thought: I have the result of the 'multiply' operation, which is 30. This is the final result of the expression (1+2)*10.\n","Thought: I can answer without using any more tools. I'll use the user's language to answer.\n","Answer: The result of the expression (1+2)*10 is 30.\n","``````"],"metadata":{"id":"oBUz2VtkpuUw"}}]} -------------------------------------------------------------------------------- /Cookbook/cn/api/rag/yi_rag_langchain.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 使用Yi的API构建RAG(LangChain)\n", 8 | "\n", 9 | "在本教程中,我们将学习如何使用LangChain和Yi的API构建一个RAG(Retrieval-Augmented Generation)系统。" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## 安装必要的库\n", 17 | "\n", 18 | "首先,我们需要安装LangChain库。" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "metadata": {}, 24 | "source": [ 25 | "!pip install langchain" 26 | ], 27 | "execution_count": null, 28 | "outputs": [] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## 配置环境变量\n", 35 | "\n", 36 | "接下来,我们需要配置LangSmith和Yi的API密钥。请确保您已经在[LangSmith](https://smith.langchain.com/)和零一万物[开放平台](https://platform.lingyiwanwu.com/apikeys)注册并获取了API密钥。" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "metadata": {}, 42 | "source": [ 43 | "import getpass\n", 44 | "import os\n", 45 | "\n", 46 | "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", 47 | "os.environ[\"LANGCHAIN_API_KEY\"] = \"your_langsmith_api_key\"\n", 48 | "os.environ[\"YI_API_KEY\"] = \"your_yi_api_key\"" 49 | ], 50 | "execution_count": null, 51 | "outputs": [] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "## 安装LangChain-OpenAI库\n", 58 | "\n", 59 | "我们还需要安装`langchain-openai`库。" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "metadata": {}, 65 | "source": [ 66 | "!pip install -qU langchain-openai" 67 | ], 68 | "execution_count": null, 69 | "outputs": [] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "## 配置LLM\n", 76 | "\n", 77 | "现在我们来配置LLM,使用Yi的大型语言模型。" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "metadata": {}, 83 | "source": [ 84 | "from langchain_openai import ChatOpenAI\n", 85 | "\n", 86 | "llm = ChatOpenAI(\n", 87 | " base_url=\"https://api.lingyiwanwu.com/v1\",\n", 88 | " api_key=os.environ[\"YI_API_KEY\"],\n", 89 | " model=\"yi-large\",\n", 90 | ")" 91 | ], 92 | "execution_count": null, 93 | "outputs": [] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "## 加载数据\n", 100 | "\n", 101 | "接下来我们将加载一些示例数据。这里我们使用了LangChain的WebBaseLoader来加载网页数据。" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "metadata": {}, 107 | "source": [ 108 | "import bs4\n", 109 | "from langchain_community.document_loaders import WebBaseLoader\n", 110 | "\n", 111 | "loader = WebBaseLoader(\n", 112 | " web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n", 113 | " bs_kwargs=dict(\n", 114 | " parse_only=bs4.SoupStrainer(\n", 115 | " class_=(\"post-content\", \"post-title\", \"post-header\")\n", 116 | " )\n", 117 | " ),\n", 118 | ")\n", 119 | "docs = loader.load()" 120 | ], 121 | "execution_count": null, 122 | "outputs": [] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "## 构建向量数据库\n", 129 | "\n", 130 | "我们将使用HuggingFace的嵌入模型来构建向量数据库,并使用Chroma来存储向量。" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "metadata": {}, 136 | "source": [ 137 | "from langchain.embeddings import HuggingFaceEmbeddings\n", 138 | "from langchain_chroma import Chroma\n", 139 | "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", 140 | "\n", 141 | "# 加载向量模型\n", 142 | "embedding = HuggingFaceEmbeddings(model_name=\"BAAI/bge-base-en-v1.5\")\n", 143 | "\n", 144 | "# 切分文档\n", 145 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", 146 | "splits = text_splitter.split_documents(docs)\n", 147 | "\n", 148 | "# 构建向量数据库\n", 149 | "vectorstore = Chroma.from_documents(documents=splits, embedding=embedding)\n", 150 | "retriever = vectorstore.as_retriever()" 151 | ], 152 | "execution_count": null, 153 | "outputs": [] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "## 构建RAG链\n", 160 | "\n", 161 | "最后,我们将构建RAG链,并使用LangChain的hub来获取提示。" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "metadata": {}, 167 | "source": [ 168 | "from langchain import hub\n", 169 | "from langchain_core.output_parsers import StrOutputParser\n", 170 | "from langchain_core.runnables import RunnablePassthrough\n", 171 | "\n", 172 | "prompt = hub.pull(\"rlm/rag-prompt\")\n", 173 | "\n", 174 | "def format_docs(docs):\n", 175 | " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", 176 | "\n", 177 | "rag_chain = (\n", 178 | " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", 179 | " | prompt\n", 180 | " | llm\n", 181 | " | StrOutputParser()\n", 182 | ")\n", 183 | "\n", 184 | "response = rag_chain.invoke(\"What is Task Decomposition?\")\n", 185 | "print(response)" 186 | ], 187 | "execution_count": null, 188 | "outputs": [] 189 | } 190 | ], 191 | "metadata": { 192 | "kernelspec": { 193 | "display_name": "Python 3", 194 | "language": "python", 195 | "name": "python3" 196 | }, 197 | "language_info": { 198 | "name": "python", 199 | "version": "3.8" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 4 204 | } 205 | -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/2/img(4-1).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/2/img(4-1).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/2/img(4-2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/2/img(4-2).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/2/img(4-3).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/2/img(4-3).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/2/img(4-4).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/2/img(4-4).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/2/img(4-5).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/2/img(4-5).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/2/img(4-6).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/2/img(4-6).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/2/img(4-7).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/2/img(4-7).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/3/img(3-1).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/3/img(3-1).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/3/img(3-2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/3/img(3-2).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/3/img(3-3).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/3/img(3-3).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/3/img(3-4).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/3/img(3-4).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/4/img(2-1).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/4/img(2-1).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/4/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/4/img.png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/4/img2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/4/img2.png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/4/img3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/4/img3.png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/4/img4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/4/img4.png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/4/img_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/4/img_1.png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/4/train_memory(GiB).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/4/train_memory(GiB).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/5/img(5-1).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/5/img(5-1).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/5/img(5-2).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/5/img(5-2).jpg -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/5/img(5-3).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/5/img(5-3).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/assets/5/img(5-4).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/cn/ecosystem/assets/5/img(5-4).png -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/使用Yi大模型玩转街霸三.md: -------------------------------------------------------------------------------- 1 | # 使用Yi大模型玩转街霸三:从安装到应用的详细指南 2 | 3 | ## 引言 4 | 5 | 欢迎来到这篇详细的教学指南!在本文中,我们将探索如何使用Yi大模型(特别是yi-large模型)来玩转经典格斗游戏街霸三。这个有趣的项目将展示大语言模型(LLM)在游戏AI领域的潜力。无论你是AI爱好者还是游戏迷,这个教程都会带给你全新的体验。 6 | 7 | 本教程适合基础较薄弱的读者,我们会逐步详细讲解每个环节,确保你能顺利完成整个过程。 8 | 9 | ## 项目概述 10 | 11 | 在这个项目中,我们将: 12 | 13 | 1. 设置必要的环境和工具 14 | 2. 获取Yi模型的API密钥 15 | 3. 配置和运行游戏环境 16 | 4. 使用Yi模型控制街霸三的角色进行对战 17 | 18 | ![img.png](assets/2/img(4-2).png) 19 | ## 实验环境 20 | 21 | - 操作系统:Windows 或 Mac OS(本教程将以Mac OS为例) 22 | - Python 3.10 23 | - Docker Desktop 24 | 25 | ## 步骤1:环境准备 26 | 27 | ### 1.1 安装Docker Desktop 28 | 29 | 1. 访问 [Docker Desktop官网](https://www.docker.com/products/docker-desktop/) 30 | 2. 下载适合你系统的版本 31 | 3. 按照安装向导完成安装 32 | 4. 安装完成后,重启你的电脑 33 | 34 | ### 1.2 安装Conda 35 | 36 | Conda是一个强大的包管理工具,我们将用它来创建虚拟环境。 37 | 38 | 1. 访问 [Conda官网](https://conda.io/projects/conda/en/latest/index.html) 39 | 2. 下载并安装Miniconda(推荐)或Anaconda 40 | 3. 安装完成后,打开终端,输入以下命令验证安装: 41 | 42 | ``` 43 | conda --version 44 | ``` 45 | 46 | 如果显示版本号,说明安装成功。 47 | ![img_1.png](assets/2/img(4-1).png) 48 | ### 1.3 注册Diambra账号 49 | 50 | Diambra提供了我们需要的游戏环境。 51 | 52 | 1. 访问 [Diambra注册页面](https://diambra.ai/register) 53 | 2. 填写必要信息并完成注册 54 | 55 | ## 步骤2:获取Yi模型API密钥 56 | 57 | 1. 访问 [Yi大模型开放平台](https://platform.lingyiwanwu.com/) 58 | 2. 注册并登录账号 59 | 3. 在平台上创建新的API密钥 60 | 4. 保存好你的API密钥,我们后面会用到 61 | ![img_2.png](assets/2/img(4-3).png) 62 | ## 步骤3:配置项目环境 63 | 64 | ### 3.1 克隆项目仓库 65 | 66 | 打开终端,执行以下命令: 67 | 68 | ```bash 69 | git clone https://github.com/Yimi81/llm-colosseum.git 70 | cd llm-colosseum 71 | ``` 72 | 73 | ### 3.2 创建并激活虚拟环境 74 | 75 | 在项目目录下,执行: 76 | 77 | ```bash 78 | conda create -n yi python=3.10 -y 79 | conda activate yi 80 | ``` 81 | 82 | ### 3.3 安装依赖 83 | 84 | ```bash 85 | pip install -r requirements.txt 86 | ``` 87 | 88 | ### 3.4 配置环境变量 89 | 90 | 1. 复制示例环境文件: 91 | 92 | ```bash 93 | cp .env.example .env 94 | ``` 95 | 96 | 2. 编辑.env文件: 97 | 98 | 在Mac上,你可能需要显示隐藏文件。使用快捷键 `Command + Shift + .` 切换显示/隐藏隐藏文件。 99 | 100 | 3. 打开.env文件,将`YI_API_KEY`替换为你之前获取的API密钥。 101 | ![img_3.png](assets/2/img(4-4).png) 102 | ## 步骤4:启动游戏 103 | 104 | ### 4.1 找到ROM文件路径 105 | 106 | 在Mac环境中,ROM文件通常位于: 107 | 108 | ``` 109 | /Users/你的用户名/Desktop/code/llm-colosseum/.diambra/rom 110 | ``` 111 | 112 | 记住这个路径,我们称之为``。 113 | 114 | ### 4.2 启动游戏 115 | 116 | 在终端中执行: 117 | 118 | ```bash 119 | diambra -r run -l python script.py 120 | ``` 121 | ![img_4.png](assets/2/img(4-5).png) 122 | 123 | 首次运行时,系统会要求你输入Diambra账号的用户名和密码。之后,游戏镜像会开始下载。 124 | 125 | 然后等待启动就可以了 126 | 127 | ![img_5.png](assets/2/img(4-6).png) 128 | 129 | ## 结语 130 | 131 | 恭喜你!现在你已经成功设置并运行了一个由Yi大模型控制的街霸三AI对战系统。这个项目展示了大语言模型在游戏AI领域的潜力。你可以尝试修改代码,使用不同的Yi模型(如yi-medium),或者调整提示来改变AI的行为。 132 | 133 | 记住,使用API时可能会遇到请求频率限制。如果遇到这种情况,可以考虑升级你的API计划。 134 | ![img_6.png](assets/2/img(4-7).png) 135 | 希望你能从这个项目中学到新知识,并对AI在游戏中的应用产生更多兴趣。继续探索,享受AI带来的无限可能吧! 136 | 137 | -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/基于LlamaIndex和Yi-large构建智能问答系统.md: -------------------------------------------------------------------------------- 1 | # 「Yi x LlamaIndex」基于LlamaIndex和Yi-large构建融合网络与本地知识的智能问答系统 2 | 3 | ## 1. 引言 4 | 5 | 在人工智能迅猛发展的今天,大语言模型(LLM)已成为众多应用的核心。然而,LLM 也面临诸如信息可能过时、缺乏专业领域深度知识等挑战。为应对这些问题,检索增强生成(Retrieval-Augmented Generation, RAG)技术应运而生。 6 | 7 | RAG 通过在生成答案前从知识库中检索相关信息,并利用这些信息指导生成过程,显著提升了内容的准确性和相关性。本文将详细介绍如何利用 LlamaIndex 和 Yi-large 模型构建一个融合网络文档和本地知识库的智能问答系统。 8 | 9 | ## 2. 核心技术栈 10 | 11 | ### 2.1 Yi-large 模型 12 | 13 | Yi-large 是由 01.AI 开发的先进大型语言模型,具备强大的自然语言理解和生成能力。它能处理各种复杂的语言任务,为我们的 RAG 系统提供可靠的基础。 14 | 15 | ### 2.2 LlamaIndex 框架 16 | 17 | LlamaIndex 是一个专为 LLM 应用设计的强大数据框架。它提供了丰富的工具和抽象,极大简化了 RAG 系统的构建过程。LlamaIndex 支持多种数据源接入、高效索引构建和查询优化,是实现我们目标的理想选择。 18 | 19 | ### 2.3 BGE Embedding 20 | 21 | BGE (BAAI General Embedding) 是由智源研究院(BAAI)开发的通用文本嵌入模型。在本项目中,我们采用 BGE-base-en-v1.5 作为嵌入模型,它能将文本转换为高质量的向量表示,为后续的相似度检索奠定坚实基础。 22 | 23 | ## 3. 环境配置与依赖安装 24 | 25 | 首先,我们需要安装必要的依赖库。打开终端,执行以下命令: 26 | 27 | ```shell 28 | pip install llama-index 29 | pip install llama-index-llms-yi 30 | pip install llama-index-core llama-index-readers-file llama-index-embeddings-huggingface 31 | ``` 32 | 33 | ## 4. 系统实现步骤 34 | 35 | 让我们按照以下步骤来实现这个智能问答系统: 36 | 37 | ### 4.1 导入必要模块 38 | 39 | ```python 40 | from llama_index.llms.yi import Yi 41 | from llama_index.readers.web import SimpleWebPageReader 42 | from llama_index.embeddings.huggingface import HuggingFaceEmbedding 43 | from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings 44 | ``` 45 | 46 | ### 4.2 配置 Yi-large 模型和 BGE 嵌入 47 | 48 | ```python 49 | llm = Yi(model="yi-large", api_key="your_api_key") 50 | Settings.llm = llm 51 | Settings.embed_model = HuggingFaceEmbedding( 52 | model_name="BAAI/bge-base-en-v1.5" 53 | ) 54 | ``` 55 | 56 | 注意:需要从 [01.AI 平台](https://platform.lingyiwanwu.com/) 获取 API 密钥。 57 | 58 | ### 4.3 加载网络文档和本地知识库 59 | 60 | ```python 61 | documents_web = SimpleWebPageReader(html_to_text=True).load_data( 62 | ["https://docs.llamaindex.ai/en/stable/use_cases/q_and_a/"] 63 | ) 64 | documents_loc = SimpleDirectoryReader("data").load_data() 65 | documents = documents_web + documents_loc 66 | ``` 67 | 68 | ### 4.4 构建索引和查询引擎 69 | 70 | ```python 71 | index = VectorStoreIndex.from_documents(documents) 72 | query_engine = index.as_query_engine() 73 | ``` 74 | 75 | ### 4.5 实现交互式问答循环 76 | 77 | ```python 78 | while True: 79 | user_input = input("User>> ") 80 | response = query_engine.query(user_input) 81 | print('Yi-large>>', response) 82 | ``` 83 | 84 | ### 4.6 我们来测试一下 85 | 86 | ```shell 87 | uesr>>Can llama-index query data in SQL and CSV? 88 | ``` 89 | ```shell 90 | 91 | Yi-large>> Yes, llama-index can query data in SQL and CSV formats. 92 | It supports text-to-SQL operations for querying SQL databases and 93 | text-to-Pandas operations for working with CSV files. This allows 94 | users to interact with structured data sources using natural language 95 | queries, making it easier to extract information from these types of data. 96 | ``` 97 | ```shell 98 | uesr>>I want to use the Yi-large model in python, what code do I need? 99 | ``` 100 | ```shell 101 | Yi-large>> To use the Yi-large model in Python, you can use the following code as a guide. This code snippet demonstrates how to set up the OpenAI SDK to work with the Yi-large model: 102 | 103 | ```python 104 | import openai 105 | from openai import OpenAI 106 | 107 | API_BASE = "https://api.01.ai/v1" 108 | API_KEY = "your key" 109 | 110 | client = OpenAI( 111 | api_key=API_KEY, 112 | base_url=API_BASE 113 | ) 114 | 115 | completion = client.chat.completions.create( 116 | model="yi-large", 117 | messages=[{"role": "user", "content": "Hi, who are you?"}] 118 | ) 119 | 120 | print(completion) 121 | Make sure to replace `"your key"` with your actual api key. This code will send a request to the Yi-large model with the message "Hi, who are you?" and print the response. 122 | 123 | ``` 124 | 125 | ## 5. 系统特点与优势 126 | 127 | 1. **知识融合**:通过同时接入网络文档和本地知识库,系统可利用更广泛、更新的信息源。 128 | 2. **高效检索**:利用 LlamaIndex框架优势和BGE 嵌入模型和向量索引,实现快速、精准的相关信息定位。 129 | 3. **强大生成能力**:Yi-large 模型确保了高质量的自然语言理解和生成。 130 | 4. **灵活扩展性**:基于 LlamaIndex 框架,系统易于扩展和定制,可根据需求添加更多功能。 131 | 5. **实时更新**:通过网络文档的动态加载,系统可以获取最新信息,保持知识的时效性。 132 | 133 | ## 6. 应用场景 134 | 135 | - **客户服务**:快速回答客户询问,提供准确、最新的产品信息。 136 | - **教育辅助**:结合教材内容和网络资源,为学生提供全面的学习支持。 137 | - **研究助手**:整合学术文献和最新研究进展,辅助研究人员进行文献综述。 138 | - **技术支持**:融合产品文档和在线论坛信息,为用户提供全面的技术解答。 139 | 140 | 141 | ## 7. 结语 142 | 143 | 通过结合 LlamaIndex、Yi-large 和 BGE 嵌入,我们构建了一个强大的 RAG 系统,能够无缝融合网络和本地知识,为用户提供准确、相关的回答。这种方法不仅显著提高了回答质量,还大大增强了系统的适应性和可扩展性。 144 | 145 | 在实际应用中,开发者可以根据具体需求调整知识源、优化检索策略,甚至集成更多外部工具,打造更加智能和专业的问答助手。随着技术的不断进步,我们相信这样的系统将在各个领域发挥越来越重要的作用,为人类的知识获取和决策提供有力支持。 146 | 147 | -------------------------------------------------------------------------------- /Cookbook/cn/ecosystem/强化Yi-1.5-6B-Chat的数学和逻辑能力.md: -------------------------------------------------------------------------------- 1 | # 强化Yi-1.5-6B-Chat的数学和逻辑能力 2 | 3 | ## 目录 4 | 1. [简介](#简介) 5 | 2. [环境准备](#环境准备) 6 | 3. [安装SWIFT](#安装swift) 7 | 4. [数据集准备](#数据集准备) 8 | 5. [模型微调](#模型微调) 9 | 6. [LoRA合并](#lora合并) 10 | 7. [损失曲线可视化](#损失曲线可视化) 11 | 8. [模型推理](#模型推理) 12 | 9. [结语](#结语) 13 | 14 | ## 简介 15 | 16 | 本教程将指导您如何使用魔搭的SWIFT框架来微调Yi-1.5-6B-Chat模型。Yi-1.5-6B-Chat是由零一万物开发的一个强大的开源大语言模型,我们将使用SWIFT框架对其数学能力进行进一步的微调,以适应特定的任务需求。 17 | 18 | ## 环境准备 19 | 20 | 在开始之前,请确保您的系统满足以下要求: 21 | - GPU:本教程使用NVIDIA L4 GPU进行实验 22 | 23 | 请注意,大语言模型的训练和推理都需要较高的计算资源。如果您的本地环境不满足要求,可以考虑使用云计算平台提供的GPU实例。 24 | 25 | ## 安装SWIFT 26 | 27 | 首先,我们需要从GitHub克隆SWIFT仓库并安装必要的依赖。请按照以下步骤操作: 28 | 29 | 1. 打开终端,运行以下命令克隆SWIFT仓库: 30 | 31 | ```bash 32 | git clone https://github.com/modelscope/swift.git 33 | ``` 34 | 35 | 2. 进入SWIFT目录: 36 | 37 | ```bash 38 | cd swift 39 | ``` 40 | 41 | 3. 安装SWIFT及其依赖(包括LLM相关的包): 42 | 43 | ```bash 44 | pip install -e '.[llm]' 45 | ``` 46 | 47 | 这个命令会以可编辑模式安装SWIFT,并安装LLM相关的额外依赖。 48 | 49 | 安装过程可能需要几分钟时间,请耐心等待。如果遇到任何错误,请检查您的Python和CUDA版本是否兼容,并确保您有足够的磁盘空间。 50 | 51 | ## 数据集准备 52 | 53 | 在本教程中,我们将使用三个数据集来微调模型: 54 | 55 | 1. ruozhiba:一个中文问答数据集,配比5000条数据以保留模型的常识逻辑能力 56 | 2. AI-ModelScope/blossom-math-v2:一个数学问题数据集(原作者是azure99,原链接点击[这里](https://huggingface.co/datasets/Azure99/blossom-math-v2)) 57 | 3. math.jsonl:自定义数据集,主要是提升高等数学、线性代数、概率论题目的解题能力 58 | 59 | SWIFT框架支持直接使用在线数据集,也支持本地数据集。对于ruozhiba和blossom-math-v2,我们可以直接使用在线版本。对于HJ_math.jsonl,您需要确保它位于正确的目录中(本例中为/content/HJ_math.jsonl)。 60 | 61 | 数据集的格式应该符合SWIFT的要求,通常是JSON Lines格式,每一行包含一个训练样本。例如SWIFT官方示例: 62 | 63 | ```json 64 | {"system": "你是一个数学好能手", "query": "(1+3)*10+99的结果是多少", "response": "首先计算 1+3=4, 4*10=40, 40+99=134, 所以结果为134"} 65 | {"query": "求以下方程的解:x^2 - 4x + 4 = 0", "response": "方程x^2 - 4x + 4 = 0的解为x = 2(重根)。"} 66 | {"query": "求解以下方程组:x + y = 5 和 2x - y = 1", "response": "解这个方程组,得到x = 2,y = 3。"} 67 | ``` 68 | 69 | 确保您的自定义数据集HJ_math.jsonl遵循类似的格式。当然数据集也已经开源在了[HuggingFace](https://huggingface.co/datasets/haijian06/Advanced-Math)并且免费开放下载 70 | 71 | ## 模型微调 72 | 73 | 现在,我们开始微调模型。我们将使用LoRA(Low-Rank Adaptation)技术来微调Yi-1.5-6B-Chat模型。LoRA是一种高效的微调方法,可以显著减少所需的计算资源。 74 | 75 | 以下是微调的命令: 76 | 77 | ```bash 78 | CUDA_VISIBLE_DEVICES=0 swift sft \ 79 | --model_id_or_path 01ai/Yi-1.5-6B-Chat \ 80 | --dataset ruozhiba#5000 AI-ModelScope/blossom-math-v2#5000 /content/HJ_math.jsonl \ 81 | --output_dir output \ 82 | --sft_type lora \ 83 | --num_train_epochs 1 \ 84 | --max_length 1024 \ 85 | --dtype AUTO 86 | ``` 87 | 88 | 这个命令的解释如下(但是更多的也库参考SWIFT官方文档~): 89 | 90 | 1. `CUDA_VISIBLE_DEVICES=0`:指定使用的GPU编号(这里使用第一个GPU)。 91 | 92 | 2. `swift sft`:调用SWIFT的微调(SFT,Supervised Fine-Tuning)功能。 93 | 94 | 3. `--model_id_or_path 01ai/Yi-1.5-6B-Chat`:指定基础模型的ID或路径。 95 | 96 | 4. `--dataset ruozhiba#5000 AI-ModelScope/blossom-math-v2#5000 /content/HJ_math.jsonl`: 97 | - 指定使用的数据集,这里使用了三个数据集 98 | - `#5000`表示从每个数据集中随机选择5000个样本 99 | - 最后一个是本地数据集的路径 100 | 101 | 5. `--output_dir output`:指定输出目录,微调后的模型和日志将保存在这里。 102 | 103 | 6. `--sft_type lora`:指定使用LoRA进行微调。 104 | 105 | 7. `--num_train_epochs 1`:设置训练的轮数(epoch)为1。 106 | 107 | 8. `--max_length 1024`:设置输入序列的最大长度为1024个token。 108 | 109 | 9. `--dtype AUTO`:自动选择适合的数据类型(通常是float16或bfloat16,取决于GPU支持)。 110 | 111 | 运行这个命令后,SWIFT将开始微调过程。微调可能需要几个小时甚至更长时间,取决于您的硬件性能和数据集大小。 112 | ## 合并前测试 113 | 微调完成后,我们首先对LoRA权重模型进行测试如下: 114 | ```计算: (15 * 4 - 12) / 3 + 8^2``` 115 | ![img.png](assets/5/img(5-1).png) 116 | ```求极限 lim(x→+∞) ln(1+x)ln(1+1/x) 的值。``` 117 | ![img_1.png](assets/5/img(5-3).png) 118 | 我们可以看到目前模型的数学解题能力很强! 119 | ## LoRA合并 120 | 121 | 微调完成后,我们需要将LoRA权重与原始模型合并。这一步是可选的,但合并后的模型更容易部署和使用。使用以下命令进行合并: 122 | 123 | ```bash 124 | CUDA_VISIBLE_DEVICES=0 swift export \ 125 | --ckpt_dir '/content/swift/output/yi-1_5-6b-chat/v0-20240713-090427/checkpoint-365' \ 126 | --merge_lora true 127 | ``` 128 | 129 | 这个命令的参数解释: 130 | 131 | - `--ckpt_dir`:指定保存LoRA权重的检查点目录。请根据实际输出的路径进行修改。 132 | - `--merge_lora true`:指示SWIFT将LoRA权重与基础模型合并。 133 | 134 | 合并过程完成后,您将在指定的目录中找到合并后的模型文件。 135 | 136 | ## 损失曲线可视化 137 | 138 | 微调过程中,SWIFT会自动记录训练损失。您可以在文件包中进行查看 139 | 140 | 您应该能看到类似下面的损失曲线图: 141 | 142 | ![train_loss(5-1).png](assets/5/img(5-4).png) 143 | 144 | 损失曲线应该呈现下降趋势,表明模型正在学习并改善其性能。 145 | 146 | ## 模型推理 147 | 148 | 现在我们可以使用微调后的模型进行推理。使用以下命令: 149 | 150 | ```bash 151 | swift infer \ 152 | --ckpt_dir /content/yi-1_5-6b-chat/v0-20240717-024536/checkpoint-682-merged \ 153 | --eval_human true \ 154 | --stop_words "Observation:" \ 155 | --infer_backend pt 156 | ``` 157 | 158 | 参数解释: 159 | - `--ckpt_dir`:指定合并后的模型检查点目录。 160 | - `--eval_human true`:启用人机交互模式。 161 | - `--stop_words "Observation:"`:设置停止词,用于控制输出长度。 162 | - `--infer_backend pt`:使用PyTorch作为推理后端。 163 | 164 | 运行此命令后,您将进入交互式推理模式。您可以输入问题,模型会生成回答。以下是一些示例问题: 165 | 166 | 167 | ![img(5-2).jpg](assets/5/img(5-2).jpg) 168 | 169 | 您可以继续输入更多问题来测试模型的性能。记得观察模型在不同类型问题上的表现,特别是在您用于微调的数据集相关的问题上。 170 | 171 | ## 结语 172 | 173 | 恭喜!您已经成功使用SWIFT框架微调了Yi-1.5-6B-Chat模型,并学会了如何进行推理。这个过程涵盖了从环境设置、数据准备、模型微调到最终的模型使用。 174 | 175 | 记住,模型的性能很大程度上取决于您用于微调的数据质量和数量。您可以尝试使用不同的数据集或调整微调参数来进一步改善模型性能。 176 | 177 | 如果您在过程中遇到任何问题,请查阅SWIFT的[官方文档](https://github.com/modelscope/swift/)。继续探索和实验,您将能够根据自己的需求定制出更强大的语言模型! 178 | -------------------------------------------------------------------------------- /Cookbook/cn/opensource/Inference/Inference_using_lmdeploy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 使用lmdeploy进行Yi-1.5-6B-Chat模型的推理\n", 8 | "\n", 9 | "欢迎来到本教程!在这里,我们将指导您如何使用lmdeploy进行Yi-1.5-6B-Chat模型的推理。lmdeploy是一个涵盖了LLM任务的全套轻量化、部署和服务解决方案。让我们开始吧!" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## 🚀 在Colab上运行\n", 17 | "\n", 18 | "我们还提供了一键运行的[Colab脚本](https://colab.research.google.com/drive/1q3ROpne6ulkoybBzemeanHY6vNP9ykjV?usp=drive_link),让开发变得更简单!" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## 安装\n", 26 | "\n", 27 | "首先,我们需要安装相关的依赖:" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "metadata": { 33 | "id": "installation" 34 | }, 35 | "source": [ 36 | "!pip install lmdeploy" 37 | ], 38 | "execution_count": null, 39 | "outputs": [] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "## 加载模型并开始推理\n", 46 | "\n", 47 | "我们将使用Yi-1.5-6B-Chat模型进行演示。以下是该模型的显存和硬盘占用情况:\n", 48 | "\n", 49 | "| 模型 | 显存使用 | 硬盘占用 |\n", 50 | "|-------|------------|------------------|\n", 51 | "| Yi-1.5-6B-Chat | 20.3G | 18G |\n", 52 | "\n", 53 | "执行以下命令即可开始推理:\n", 54 | "\n", 55 | "⚠️ 请确保您的计算机拥有足够的显存和硬盘空间。" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "metadata": { 61 | "id": "inference" 62 | }, 63 | "source": [ 64 | "!lmdeploy chat 01-ai/Yi-1.5-6B-Chat" 65 | ], 66 | "execution_count": null, 67 | "outputs": [] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "就是这样!您已经成功地使用lmdeploy进行了Yi-1.5-6B-Chat模型的推理。您可以尝试更换不同的模型或调整配置参数,探索更多可能性。祝您实验愉快!" 74 | ] 75 | } 76 | ], 77 | "metadata": { 78 | "kernelspec": { 79 | "display_name": "Python 3", 80 | "language": "python", 81 | "name": "python3" 82 | }, 83 | "language_info": { 84 | "codemirror_mode": { 85 | "name": "ipython", 86 | "version": 3 87 | }, 88 | "file_extension": ".py", 89 | "mimetype": "text/x-python", 90 | "name": "python", 91 | "nbconvert_exporter": "python", 92 | "pygments_lexer": "ipython3", 93 | "version": "3.8.8" 94 | } 95 | }, 96 | "nbformat": 4, 97 | "nbformat_minor": 5 98 | } 99 | -------------------------------------------------------------------------------- /Cookbook/cn/opensource/Inference/Inference_using_swift.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 使用 SWIFT 进行推理\n", 8 | "\n", 9 | "SWIFT 是 ModelScope 开源的一款框架,支持大模型的训练、推理、评测和部署。通过 SWIFT,可以轻松实现从模型训练到应用的完整链路。\n", 10 | "\n", 11 | "本教程将详细介绍如何使用 SWIFT 进行推理,包括安装步骤和推理示例。我们将以 Yi-1.5-6B-Chat 为例进行演示。\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "## 🚀 使用 Colab 运行\n", 19 | "\n", 20 | "我们提供了一键运行的 [Colab 脚本](https://colab.research.google.com/drive/1R0s7cDNWTNCWjod_z-jVpxiFc-R3_7kc?usp=drive_link),你可以直接在 Colab 中运行这个教程。\n" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## 安装\n", 28 | "\n", 29 | "首先,我们需要安装相关的依赖。\n", 30 | "\n", 31 | "(可选)可以选择设置 pip 全局镜像以加快下载速度:\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "metadata": {}, 37 | "source": [ 38 | "!pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/" 39 | ], 40 | "outputs": [], 41 | "execution_count": null 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "安装 ms-swift:\n" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "metadata": {}, 53 | "source": [ 54 | "!pip install 'ms-swift[llm]' -U" 55 | ], 56 | "outputs": [], 57 | "execution_count": null 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "## 开始推理\n", 64 | "\n", 65 | "在开始推理之前,需要注意你的电脑显存和内存。如果显存不足,可能会导致报错。\n", 66 | "\n", 67 | "| 模型 | 显存使用 | 硬盘占用 |\n", 68 | "| -------------- | -------- | -------- |\n", 69 | "| Yi-1.5-6B-Chat | 11.5G | 14.7G |\n" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "我们首先设置环境变量:" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "metadata": {}, 82 | "source": [ 83 | "import os\n", 84 | "os.environ['CUDA_VISIBLE_DEVICES'] = '0'" 85 | ], 86 | "outputs": [], 87 | "execution_count": null 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "然后加载模型和分词器:" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "metadata": {}, 99 | "source": [ 100 | "from swift.llm import (\n", 101 | " get_model_tokenizer, get_template, inference, ModelType, get_default_template_type,\n", 102 | ")\n", 103 | "from swift.utils import seed_everything\n", 104 | "\n", 105 | "# 选择模型类型,这里使用 Yi-1.5-6B-Chat\n", 106 | "model_type = ModelType.yi_1_5_6b_chat\n", 107 | "template_type = get_default_template_type(model_type)\n", 108 | "print(f'template_type: {template_type}') # 模板类型\n", 109 | "\n", 110 | "# 加载模型和分词器\n", 111 | "kwargs = {}\n", 112 | "model, tokenizer = get_model_tokenizer(model_type, model_kwargs={'device_map': 'auto'}, **kwargs)\n", 113 | "\n", 114 | "# 设置生成配置\n", 115 | "model.generation_config.max_new_tokens = 128\n", 116 | "\n", 117 | "# 获取模板\n", 118 | "template = get_template(template_type, tokenizer)\n", 119 | "\n", 120 | "# 设置随机种子\n", 121 | "seed_everything(42)" 122 | ], 123 | "outputs": [], 124 | "execution_count": null 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "接下来,我们进行推理:" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "metadata": {}, 136 | "source": [ 137 | "# 准备输入查询\n", 138 | "query = '你好!'\n", 139 | "\n", 140 | "# 使用模板进行推理\n", 141 | "response, history = inference(model, template, query)\n", 142 | "\n", 143 | "# 打印查询和响应\n", 144 | "print(f'query: {query}')\n", 145 | "print(f'response: {response}')" 146 | ], 147 | "outputs": [], 148 | "execution_count": null 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "以上代码将会输出类似如下的结果:\n", 155 | "\n", 156 | "```\n", 157 | "query: 你好!\n", 158 | "response: 你好!很高兴见到你。有什么我可以帮忙的吗?\n", 159 | "```\n" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "到这里,你已经学会了如何使用 SWIFT 进行 Yi 系列模型的推理。如果在使用过程中遇到任何问题,可以参考 [SWIFT 的官方文档](https://www.modelscope.cn/models/01-ai/Yi-1.5-6B-Chat) 获取更多帮助。" 167 | ] 168 | } 169 | ], 170 | "metadata": { 171 | "kernelspec": { 172 | "display_name": "Python 3", 173 | "language": "python", 174 | "name": "python3" 175 | }, 176 | "language_info": { 177 | "codemirror_mode": { 178 | "name": "ipython", 179 | "version": 3 180 | }, 181 | "file_extension": ".py", 182 | "mimetype": "text/x-python", 183 | "name": "python", 184 | "nbconvert_exporter": "python", 185 | "pygments_lexer": "ipython3", 186 | "version": "3.8.10" 187 | } 188 | }, 189 | "nbformat": 4, 190 | "nbformat_minor": 4 191 | } 192 | -------------------------------------------------------------------------------- /Cookbook/cn/opensource/Inference/vLLM_Inference_tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 使用vLLM进行Yi-1.5-6B-Chat模型的推理\n", 8 | "\n", 9 | "欢迎来到本教程!在这里,我们将指导您如何使用vLLM进行Yi-1.5-6B-Chat模型的推理。vLLM是一个快速且易于使用的大型语言模型(LLM)推理和服务库。让我们开始吧!" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## 🚀 在Colab上运行\n", 17 | "\n", 18 | "我们还提供了一键运行的[Colab脚本](https://colab.research.google.com/drive/1KuydGHHbI31Q0WIpwg7UmH0rfNjii8Wl?usp=drive_link),让开发变得更简单!" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## 安装\n", 26 | "\n", 27 | "首先,我们需要安装相关的依赖。根据官方文档要求,使用pip安装vLLM需要CUDA 12.1。您可以参考官方[文档](https://docs.vllm.ai/en/stable/getting_started/installation.html)获取更多详情。\n", 28 | "\n", 29 | "现在让我们安装vLLM:" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "metadata": { 35 | "id": "installation" 36 | }, 37 | "source": [ 38 | "!pip install vllm" 39 | ], 40 | "execution_count": null, 41 | "outputs": [] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## 加载模型\n", 48 | "\n", 49 | "接下来,我们将加载Yi-1.5-6B-Chat模型。请注意电脑的显存和硬盘占用情况。如果出现错误,可能是由于资源不足引起的。\n", 50 | "\n", 51 | "本教程使用Yi-1.5-6B-Chat模型。以下是该模型的显存和硬盘占用情况:\n", 52 | "\n", 53 | "| 模型 | 显存使用 | 硬盘占用 |\n", 54 | "|-------|------------|------------------|\n", 55 | "| Yi-1.5-6B-Chat | 21G | 15G |" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "metadata": { 61 | "id": "load-model" 62 | }, 63 | "source": [ 64 | "from transformers import AutoTokenizer\n", 65 | "from vllm import LLM, SamplingParams\n", 66 | "\n", 67 | "# 加载分词器\n", 68 | "tokenizer = AutoTokenizer.from_pretrained(\"01-ai/Yi-1.5-6B-Chat\")\n", 69 | "\n", 70 | "# 设置采样参数\n", 71 | "sampling_params = SamplingParams(\n", 72 | " temperature=0.8, \n", 73 | " top_p=0.8)\n", 74 | "\n", 75 | "# 加载模型\n", 76 | "llm = LLM(model=\"01-ai/Yi-1.5-6B-Chat\")" 77 | ], 78 | "execution_count": null, 79 | "outputs": [] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "## 模型推理\n", 86 | "\n", 87 | "现在,让我们准备一个提示词模版并使用模型进行推理。在这个例子中,我们将使用一个简单的问候语提示词。" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "metadata": { 93 | "id": "inference" 94 | }, 95 | "source": [ 96 | "# 准备提示词模版\n", 97 | "prompt = \"你好!\" # 根据需要更改提示词\n", 98 | "messages = [\n", 99 | " {\"role\": \"user\", \"content\": prompt}\n", 100 | "]\n", 101 | "text = tokenizer.apply_chat_template(\n", 102 | " messages,\n", 103 | " tokenize=False,\n", 104 | " add_generation_prompt=True\n", 105 | ")\n", 106 | "print(text)\n", 107 | "\n", 108 | "# 生成回复\n", 109 | "outputs = llm.generate([text], sampling_params)\n", 110 | "\n", 111 | "# 打印输出\n", 112 | "for output in outputs:\n", 113 | " prompt = output.prompt\n", 114 | " generated_text = output.outputs[0].text\n", 115 | " print(f\"Prompt: {prompt!r}, Generated text: {generated_text!r}\")\n", 116 | "# 期望的回复:\"你好!今天见到你很高兴。我能为你做些什么呢?\"" 117 | ], 118 | "execution_count": null, 119 | "outputs": [] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "就是这样!您已经成功地使用vLLM进行了Yi-1.5-6B-Chat模型的推理。请随意尝试不同的提示词并调整采样参数,看看模型会如何响应。祝您实验愉快!" 126 | ] 127 | } 128 | ], 129 | "metadata": { 130 | "kernelspec": { 131 | "display_name": "Python 3", 132 | "language": "python", 133 | "name": "python3" 134 | }, 135 | "language_info": { 136 | "codemirror_mode": { 137 | "name": "ipython", 138 | "version": 3 139 | }, 140 | "file_extension": ".py", 141 | "mimetype": "text/x-python", 142 | "name": "python", 143 | "nbconvert_exporter": "python", 144 | "pygments_lexer": "ipython3", 145 | "version": "3.8.8" 146 | } 147 | }, 148 | "nbformat": 4, 149 | "nbformat_minor": 5 150 | } 151 | -------------------------------------------------------------------------------- /Cookbook/cn/opensource/fine_tune/finetune-yi-with-llamafactory.md: -------------------------------------------------------------------------------- 1 | ### 🌟使用LLaMA-Factory微调 2 | 3 | LLaMA Factory是一款开源低代码大模型微调框架,集成了业界广泛使用的微调技术,是北航的博士生郑耀威的杰作。微调的过程很方便,跟着我们一步一步来! 4 | 5 | #### 安装 6 | 7 | 首先我们拉取LLaMA-Factory到本地: 8 | 9 | `````` 10 | git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git 11 | `````` 12 | 13 | 安装依赖: 14 | 15 | `````` 16 | # ⚠️下面两行命令去终端执行⚠️ 17 | cd LLaMA-Factory 18 | pip install -e ".[torch,metrics]" 19 | `````` 20 | 21 | 如果你还没有下载yi模型建议从**Huggingface**或者**ModelScope**中下载对应的代码如下: 22 | 23 | `````` 24 | # 从ModelScope中下载 25 | git clone https://www.modelscope.cn/01ai/Yi-1.5-6B-Chat.git 26 | # 从Huggingface下载 27 | git clone https://huggingface.co/01-ai/Yi-1.5-6B-Chat 28 | `````` 29 | 30 | #### 开始微调 31 | 32 | 1. 创建微调训练相关的配置文件。 33 | 34 | - 在Llama-Factory的文件夹里,打开examples\train_qlora下提供的`llama3_lora_sft_``awq``.yaml`,复制一份并重命名为`yi_lora_sft_bitsandbytes.yaml`。 35 | 36 | - 这个文件里面写着和微调相关的关键参数:比如使用哪个模型?进行什么样的压缩量化?使用什么数据集(这里是identity)?这个数据集学习几遍(num_train_epochs)?微调后的模型权重保存在哪里? 37 | 38 | 2. `yi_lora_sft_bitsandbytes.yaml`的内容填充为: 39 | 40 | `````` 41 | ### model 42 | model_name_or_path: <你下载的模型位置,不要带括号,比如我写了../Yi-1.5-6B-Chat> 43 | quantization_bit: 4 44 | 45 | ### method 46 | stage: sft 47 | do_train: true 48 | finetuning_type: lora 49 | lora_target: all 50 | 51 | ### dataset 52 | dataset: identity 53 | template: yi 54 | cutoff_len: 1024 55 | max_samples: 1000 56 | overwrite_cache: true 57 | preprocessing_num_workers: 16 58 | 59 | ### output 60 | output_dir: saves/yi-6b/lora/sft 61 | logging_steps: 10 62 | save_steps: 500 63 | plot_loss: true 64 | overwrite_output_dir: true 65 | 66 | ### train 67 | per_device_train_batch_size: 1 68 | gradient_accumulation_steps: 8 69 | learning_rate: 1.0e-4 70 | num_train_epochs: 3.0 71 | lr_scheduler_type: cosine 72 | warmup_ratio: 0.1 73 | fp16: true 74 | 75 | ### eval 76 | val_size: 0.1 77 | per_device_eval_batch_size: 1 78 | eval_strategy: steps 79 | eval_steps: 500 80 | `````` 81 | 82 | 这里我们使用的identity数据集,俗话说就是“自我认知”数据集,也就是说当你问模型“你好你是谁”的时候,模型会告诉你我叫name由author开发。如果你把数据集更改成你自己的名字,那你就可以微调一个属于你自己的大模型啦。 83 | 84 | 3. 打开终端terminal,输入以下命令启动微调脚本(大概需要10分钟): 85 | 86 | ``````bash 87 | llamafactory-cli train examples/train_qlora/yi_lora_sft_bitsandbytes.yaml 88 | `````` 89 | 90 | #### 推理测试 91 | 92 | 1. 请参考Llama-Factory文件夹中,examples\inference下提供的`llama3_lora_sft.yaml`,复制一份,并重命名为`yi_lora_sft.yaml`。 93 | 94 | 内容填充为: 95 | 96 | `````` 97 | model_name_or_path: <和之前一样,你下载的模型位置,比如我写了../Yi-1.5-6B-Chat> 98 | adapter_name_or_path: saves/yi-6b/lora/sft 99 | template: yi 100 | finetuning_type: lora 101 | `````` 102 | 103 | 2. 回到刚刚结束微调的终端Terminal,运行下面的推理命令: 104 | `````` 105 | llamafactory-cli chat examples/inference/yi_lora_sft.yaml 106 | `````` 107 | 108 | 好啦,使用llamafactory微调Yi模型的教程就结束啦,是不是感觉特别有成就感,欢迎继续查看我们其它的教程噢。 -------------------------------------------------------------------------------- /Cookbook/cn/opensource/fine_tune/finetune-yi-with-swift.md: -------------------------------------------------------------------------------- 1 | ### 🌟使用SWIFT微调 2 | SWIFT是ModelScope开源的一款框架,支持多模态大模型的训练、推理、评测和部署。并且可以直接实现模型训练评测到应用的完整链路。 3 | 接下来我们就开始使用SWIFT对Yi模型进行微调! 4 | 5 | #### 安装 6 | 7 | 首先我们拉取SWIFT的代码仓库: 8 | 9 | ``````bash 10 | # 安装ms-swift 11 | git clone https://github.com/modelscope/swift.git 12 | cd swift 13 | pip install -e '.[llm]' 14 | `````` 15 | 16 | #### 开始微调 17 | 18 | 这里我们使用CLI进行微调具体代码如下: 19 | 20 | ``````bash 21 | CUDA_VISIBLE_DEVICES=0 swift sft \ 22 | --model_id_or_path 01ai/Yi-1.5-6B-Chat \ 23 | --dataset AI-ModelScope/blossom-math-v2 \ 24 | --output_dir output \ 25 | `````` 26 | 27 | `--model_id_or_path`可以更换使用的模型 28 | 29 | `--dataset` 选择数据集 30 | 31 | `--output_dir` 微调后模型的存放位置 32 | 33 | 如果你想了解更多的关于SWIFT的内容,你取点击[这里](https://github.com/modelscope/swift)! 34 | -------------------------------------------------------------------------------- /Cookbook/cn/opensource/function_calling/function_calling_llamaindex.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyORfgxDEFPG125uYS1AUVbE"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["## Function Calling\n","\n","大模型使用Function Calling的主要用途是扩展模型的功能,使其能够在生成文本、做出决策或执行任务时,集成更广泛的信息和功能。通过定义和调用外部函数,大模型能够处理更复杂的任务,如进行数学运算、下订单或查询数据库等,从而提高模型的实用性和灵活性。\n"],"metadata":{"id":"tY2fLItjnn6U"}},{"cell_type":"markdown","source":["### 🌟使用LlamaIndex\n","\n","LlamaIndex 开发了简单易用的 Function Calling 使用方法,接下来我们使用yi-large进行一个示例\n","\n","首先安装依赖环境,跟着我一步一步来!\n"],"metadata":{"id":"EiQrQO8In5W2"}},{"cell_type":"code","source":["!pip install llama-index\n","!pip install llama-index-llms-huggingface\n","!pip install llama-index-llms-yi"],"metadata":{"id":"9-f7jfstpCEe"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["确保你安装完成后我们就开始吧\n","\n","加载依赖"],"metadata":{"id":"3tlgHR5spMhG"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"w9F41myGnfCS"},"outputs":[],"source":["from typing import Any\n","\n","\n","from llama_index.core.llms import (\n"," CustomLLM,\n"," CompletionResponse,\n"," CompletionResponseGen,\n"," LLMMetadata,\n",")\n","from llama_index.core.llms.callbacks import llm_completion_callback\n","from llama_index.llms.yi import Yi\n","\n","from llama_index.core.tools import FunctionTool\n","from llama_index.core.agent import ReActAgent"]},{"cell_type":"markdown","source":["配置模型,这里我们通过 huggingface 加载开源模型,同样你也可以直接从Huggingface下载 Yi-1.5-34B-Chat"],"metadata":{"id":"xBu4fAuapTMS"}},{"cell_type":"code","source":["llm = HuggingFaceLLM(\n"," context_window=4096,\n"," max_new_tokens=2048,\n"," generate_kwargs={\"temperature\": 0.0, \"do_sample\": False},\n"," query_wrapper_prompt=query_wrapper_prompt,\n"," tokenizer_name='/model/Yi-1.5-9B-Chat', # 加载本地模型\n"," model_name='/model/Yi-1.5-9B-Chat', # 加载本地模型\n"," device_map=\"auto\",\n"," model_kwargs={\"torch_dtype\": torch.float16},\n",")"],"metadata":{"id":"Lq4QFDl-pLn_"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["定义工具,这里我们定义 `加` `减` `乘` 的三种函数"],"metadata":{"id":"gCnRQ2vNpViR"}},{"cell_type":"code","source":["# 定义工具\n","def multiply(a: int, b: int) -> int:\n"," \"\"\"Multiple two integers and returns the result integer\"\"\"\n"," return a * b\n","\n","def plus(a: int, b: int) -> int:\n"," \"\"\"plus two integers and returns the result integer\"\"\"\n"," return a + b\n","def minus(a: int, b: int) -> int:\n"," \"\"\"Subtracts two integers and returns the result integer.\"\"\"\n"," return a - b\n","multiply_tool = FunctionTool.from_defaults(fn=multiply)\n","plus_tool = FunctionTool.from_defaults(fn=plus)\n","minus_tool = FunctionTool.from_defaults(fn=minus)"],"metadata":{"id":"g5-zAq22pXu6"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["创建Agent"],"metadata":{"id":"cvUFnsAkph-O"}},{"cell_type":"code","source":["agent = ReActAgent.from_tools([multiply_tool,plus_tool,minus_tool], llm=llm, verbose=True)"],"metadata":{"id":"Hurvg2J1pjQB"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["确保运行正确后我们就开始对话"],"metadata":{"id":"kEkZMzaXplsK"}},{"cell_type":"code","source":["while True:\n"," user_input = input(\"user>>\") # \"(1+2)*10\"\n"," agent.chat(user_input)"],"metadata":{"id":"Ki3NKbq_pqTz"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["运行结果展示"],"metadata":{"id":"HjcL_f7npr-K"}},{"cell_type":"markdown","source":["``````\n","user>>(1+2)*10\n","Thought: The user's question is in English. I need to use a combination of the 'plus' and 'multiply' tools to solve the mathematical expression (1+2)*10.\n","Action: plus\n","Action Input: {'a': 1, 'b': 2}\n","Observation: 3\n","Thought: I have the result of the 'plus' operation, which is 3. Now I need to multiply this result by 10 to complete the expression (1+2)*10.\n","Action: multiply\n","Action Input: {'a': 3, 'b': 10}\n","Observation: 30\n","Thought: I have the result of the 'multiply' operation, which is 30. This is the final result of the expression (1+2)*10.\n","Thought: I can answer without using any more tools. I'll use the user's language to answer.\n","Answer: The result of the expression (1+2)*10 is 30.\n","``````"],"metadata":{"id":"oBUz2VtkpuUw"}}]} -------------------------------------------------------------------------------- /Cookbook/cn/opensource/local/local-lm-studio.md: -------------------------------------------------------------------------------- 1 | ### 🌟使用LM Studio本地运行 2 | 3 | LM Studio 是一款易于使用的桌面应用程序,用于试用本地和开源的大型语言模型 (LLM),能更好的在本地部署大模型,并且操作相对简单。 4 | 5 | #### 下载和安装 6 | 7 | LM Studio的安装非常简单,只需要前往LM Studio的[官方网站](https://lmstudio.ai/)。 8 | 9 | 按照自己的电脑操作系统进行安装即可 10 | 11 | ![image-20240615144405423](../../../assets/LM_Studio-0.png) 12 | 13 | 等待下载好后打开LM Studio软件,在搜索框搜索yi1.5-6b-chat或其它模型,以下示例使用yi1.5-6b-chat 14 | 15 | LM Studio会友好的帮你评估本地电脑可以运行哪些模型,这可以很好的避免,显存问题 16 | 17 | ![image-20240615144405423](../../../assets/LM_Studio-1.png) 18 | 19 | 同样的选择你想要本地运行的模型,然后点击download下载就可以进行使用了。 20 | -------------------------------------------------------------------------------- /Cookbook/cn/opensource/local/local-mlx.md: -------------------------------------------------------------------------------- 1 | ### 🌟使用MLX-LM本地运行 2 | 3 | MLX-LM是一款适用Mac os进行本地部署大模型的框架,具体内容参考[官方文档](https://github.com/ml-explore/mlx-examples/tree/main?tab=readme-ov-file)。 4 | 5 | ⚠️请注意MLX-LM仅适用于Mac os操作系统。 6 | 7 | #### 下载和安装 8 | 9 | ``````bash 10 | pip install mlx-lm 11 | `````` 12 | 13 | #### 开始使用 14 | 15 | 以下使用mlx-community/Yi-1.5-6B-Chat-8bit作为示例。 16 | 17 | 同样的也可以替换为其它模型,例如 mlx-community/Yi-1.5-34B-Chat-4bit。 18 | 19 | ``````python 20 | from mlx_lm import load, generate 21 | 22 | model, tokenizer = load("mlx-community/Yi-1.5-6B-Chat-8bit") 23 | 24 | response = generate(model, tokenizer, prompt="hello", verbose=True) 25 | `````` -------------------------------------------------------------------------------- /Cookbook/cn/opensource/local/local-ollama.md: -------------------------------------------------------------------------------- 1 | ### 🌟使用ollama本地运行 2 | 3 | ollama是一个开源的大型语言模型服务工具,它允许用户在自己的硬件环境中轻松部署和使用大规模预训练模型。使得用户能够快速地在本地运行这些模型。 4 | 5 | #### 下载和安装 6 | 7 | 可以直接在ollama[官方网站](https://ollama.com/)进行下载,点击download,选择自己电脑对应的版本进行下载。 8 | 9 | ![ollama](../../../assets/ollama-1.png) 10 | 11 | 12 | 13 | 下载后按照对应的流程安装即可。 14 | 15 | #### 本地使用 16 | 17 | 1.终端运行 18 | 19 | 你可以选择直接在终端中进行运行yi系列模型,官方可选择的模型具体参考[文档](https://ollama.com/library/yi)。 20 | 21 | ``````bash 22 | ollama run yi:6b 23 | `````` 24 | 25 | - 运行命令后ollama会自动下载模型到本地 26 | - 下载完成后即可进行使用 27 | 28 | 下图为运行成功参考示意图: 29 | 30 | ![image-20240615142555895](../../../assets/ollama-2.png) 31 | 32 | 2.使用[OpenWebUI](https://openwebui.com/)运行 33 | 34 | 使用OpenWebUI运行的好处是,能够可视化的进行更多的操作,基本不需要使用命令进行操作,使用体验非常好,操作门槛低。 35 | 36 | 接下来我们开始进行安装操作: 37 | 38 | - 第一步首先确保你已经正确的安装好了ollama 39 | 40 | - 第二步安装docker 41 | 42 | ​ Docker是一种轻量级的虚拟化技术,同时是一个开源的应用容器运行环境搭建平台,可以让开发者以便捷方 式打包应用到一个可移植的容器中,然后安装至任何运行Linux或Windows等系统的服务器上。相较于传统虚 拟机,Docker容器提供轻量化的虚拟化方式、安装便捷、启停速度快。 43 | 44 | ​ 简单来说就是通过docker来运行OpenWebUI。 45 | 46 | ​ 安装也很简单只需要前往docker[官网](https://www.docker.com/get-started/),根据电脑型号,点击Download进行下载。 47 | 48 | ![image-20240615143628193](../../../assets/ollama-4.png) 49 | 50 | - 第三步在终端运行如下指令,等待安装即可 51 | 52 | ``````bash 53 | docker run -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main 54 | `````` 55 | 56 | - 第四步再次启动docker,打开OpenWebUI 57 | 58 | - 第五步下载模型 59 | 60 | ![image-20240615144405423](../../../assets/ollama-5.png) 61 | 62 | 下载完成后就可以在对话中进行使用了 63 | 64 | ![image-20240615144527559](../../../assets/ollama-6.png) -------------------------------------------------------------------------------- /Cookbook/cn/opensource/quantization/autoawq-yi-quantization.md: -------------------------------------------------------------------------------- 1 | ### 🌟使用AutoAWQ量化 2 | 3 | AutoAWQ 是一款易于使用的 4 位量化模型软件包。与 FP16 相比,AutoAWQ 可将模型速度提高 3 倍并将内存需求降低 3 倍。 4 | 5 | AutoAWQ 实现了激活感知权重量化 (AWQ) 算法来量化 LLM。 6 | 7 | 此次所有演示,均采用Yi-1.5-6B-Chat作为示例。 8 | 9 | 下面是此次演示显存和硬盘占用情况: 10 | 11 | | 模型 | 显存使用 | 硬盘占用 | 12 | |--|------|-------| 13 | | Yi-1.5-6B-Chat | 6G | 24.5G | 14 | 15 | #### 安装 16 | AWQ的版本兼容问题比较容易出错 首先我们检查torch和cuda的版本: 17 | 18 | ```python 19 | import torch 20 | print(torch.__version__) 21 | ``` 22 | 这里需要注意如果想要使用pip进行安装,必须满足cuda>=12.1: 23 | ```shell 24 | pip install autoawq 25 | ``` 26 | 对于 CUDA 11.8、ROCm 5.6 和 ROCm 5.7,推荐从源码进行安装: 27 | 28 | ```shell 29 | git clone https://github.com/casper-hansen/AutoAWQ.git 30 | cd AutoAWQ 31 | pip install -e . 32 | ``` 33 | #### 加载模型 34 | AWQ完全兼容transformers,你可以直接粘贴huggingface的模型路径。 35 | 36 | 或者也可以直接将模型路径替换为本地加载下载好的模型或者是你已经微调好的模型: 37 | ```python 38 | from awq import AutoAWQForCausalLM 39 | from transformers import AutoTokenizer 40 | # model_path是模型的路径,这里从huggingface加载Yi模型,如果你有已经微调好的Yi模型,你同样直接替换model_path即可 41 | model_path = '01-ai/Yi-1.5-6B-Chat' 42 | # quant_path为量化后模型的路径 43 | quant_path = 'Yi-1.5-6B-Chat-awq' 44 | quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" } 45 | 46 | # 加载模型和分词器 47 | model = AutoAWQForCausalLM.from_pretrained( 48 | model_path 49 | ) 50 | tokenizer = AutoTokenizer.from_pretrained( 51 | model_path, 52 | trust_remote_code=True 53 | ) 54 | ``` 55 | #### 保存模型 56 | 量化后的模型可以直接保存: 57 | ```python 58 | # 保存模型 59 | model.save_quantized(quant_path) 60 | tokenizer.save_pretrained(quant_path) 61 | 62 | print(f'Model is quantized and saved at "{quant_path}"') 63 | ``` 64 | 同样也可以直接将模型挂载到云盘上,更快速方便的下载: 65 | ```python 66 | from google.colab import drive 67 | import shutil 68 | 69 | # 挂载Google云端硬盘 70 | drive.mount('/content/drive') 71 | 72 | # 将文件或文件夹同步到云端硬盘的指定路径 73 | # 假设你想要同步的文件或文件夹位于Colab的当前工作目录下 74 | # 并且你想要将其同步到云端硬盘的MyDrive/Yi-1.5-6B-Chat-awq文件夹中 75 | 76 | # 定义本地文件或文件夹的路径 77 | local_path = 'Yi-1.5-6B-Chat-awq' 78 | 79 | # 定义云端硬盘的目标路径 80 | drive_path = '/content/drive/MyDrive/Yi-1.5-6B-Chat-awq' 81 | 82 | # 同步操作 83 | # 使用copytree 84 | shutil.copytree(local_path, drive_path) 85 | print(f"文件夹'{local_path}'已同步到'{drive_path}'。") 86 | 87 | ``` 88 | #### 使用量化后的模型 89 | 我们通过transformers直接可以使用量化模型: 90 | ```python 91 | from transformers import AutoModelForCausalLM, AutoTokenizer 92 | # model_path = quant_path 93 | model_path = 'Yi-1.5-6B-Chat-awq' 94 | 95 | tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False) 96 | 97 | model = AutoModelForCausalLM.from_pretrained( 98 | model_path, 99 | device_map="auto", 100 | torch_dtype='auto' 101 | ).eval() 102 | # 提示词 103 | messages = [ 104 | {"role": "user", "content": "hi"} 105 | ] 106 | 107 | input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt') 108 | output_ids = model.generate(input_ids.to('cuda')) 109 | response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True) 110 | 111 | print(response) 112 | ``` -------------------------------------------------------------------------------- /Cookbook/cn/opensource/quantization/autogptq-yi-quantization.md: -------------------------------------------------------------------------------- 1 | ### 🌟使用AutoGPTQ量化 2 | 3 | AutoGPTQ是一个基于 GPTQ 算法,简单易用且拥有用户友好型接口的大语言模型量化工具包。 4 | 5 | 我们可以通过AutoGPTQ量化我们的Yi系列模型。 6 | 7 | | 模型 | 显存使用 | 硬盘占用 | 8 | |--|------|------| 9 | | Yi-1.5-6B-Chat | 7G | 27G | 10 | 11 | #### 安装 12 | 推荐从源码进行安装: 13 | 14 | ```shell 15 | git clone https://github.com/AutoGPTQ/AutoGPTQ 16 | cd AutoGPTQ 17 | pip install . 18 | ``` 19 | #### 加载模型 20 | 21 | 同样的我们还是可以通过transformers加载模型,或许加载微调好的模型。 22 | 23 | 只需要替换模型路径即可,具体代码如下。 24 | 25 | ```python 26 | from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig 27 | from transformers import AutoTokenizer 28 | 29 | # 配置量化超参数 30 | # 加载分词器和模型 31 | model_path = "01-ai/Yi-1.5-6B-Chat" 32 | quant_path = "Yi-1.5-6B-Chat-GPTQ" 33 | quantize_config = BaseQuantizeConfig( 34 | bits=8, # 量化为8-bit 模型 35 | group_size=128, # 推荐128 36 | damp_percent=0.01, 37 | desc_act=False, # 设为 False 可以显著提升推理速度 38 | ) 39 | 40 | tokenizer = AutoTokenizer.from_pretrained(model_path) 41 | model = AutoGPTQForCausalLM.from_pretrained(model_path, quantize_config) 42 | ``` 43 | #### 量化与保存模型 44 | model.quantize(examples)中样本的数据类型应该为 List[Dict],其中字典的键有且仅有 input_ids 和 attention_mask。 45 | 46 | 这里需要注意自己的数据格式! 47 | ```python 48 | import torch 49 | examples = [] 50 | messages = [ 51 | {"role": "user", "content": "hi"}, 52 | {"role": "assistant", "content": "Hello! It's great to see you today. How can I assist you"} 53 | ] 54 | text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False) 55 | model_inputs = tokenizer([text]) 56 | input_ids = torch.tensor(model_inputs.input_ids[:max_len], dtype=torch.int) 57 | examples.append(dict(input_ids=input_ids, attention_mask=input_ids.ne(tokenizer.pad_token_id))) 58 | model.quantize(examples) 59 | 60 | model.save_quantized(quant_path, use_safetensors=True) 61 | tokenizer.save_pretrained(quant_path) 62 | ``` 63 | #### 使用模型 64 | ```python 65 | from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig 66 | 67 | from transformers import AutoTokenizer, GenerationConfig 68 | 69 | quantized_model_dir = 'Yi-1.5-6B-Chat-GPTQ' 70 | 71 | tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, 72 | use_fast=True, 73 | trust_remote_code=True) 74 | 75 | model = AutoGPTQForCausalLM.from_quantized(quantized_model_dir, 76 | device_map="auto", 77 | use_safetensors=True, 78 | trust_remote_code=True) 79 | 80 | 81 | output = tokenizer.decode(model.generate( 82 | **tokenizer("<|im_start|>user Hi!<|im_end|> <|im_start|>assistant", return_tensors="pt").to(model.device), 83 | max_new_tokens=512)[0] 84 | ) 85 | print(output) 86 | ``` -------------------------------------------------------------------------------- /Cookbook/cn/opensource/quantization/swift-yi-quantization.md: -------------------------------------------------------------------------------- 1 | ### 🌟使用SWIFT量化 2 | 3 | SWIFT是modelscope开源的一款框架,支持多模态大模型的训练、推理、评测和部署。并且可以直接实现模型训练评测到应用的完整链路。 4 | 5 | 使用SWIFT量化非常方便,只需要几步即可完成量化,在量化中有许多的参数可以进行调节,比如量化的模型、精度、方式等等,具体的可以参考[官方文档](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM%E9%87%8F%E5%8C%96%E6%96%87%E6%A1%A3.md)。 6 | 7 | #### 安装 8 | 9 | 首先我们先安装ms-swift: 10 | 11 | ``````bash 12 | git clone https://github.com/modelscope/swift.git 13 | cd swift 14 | pip install -e '.[llm]' 15 | `````` 16 | 17 | swift支持使用awq、gptq、bnb、hqq、eetq技术对模型进行量化。 18 | 19 | 如果你想使用哪一种量化方式就可以直接进行安装: 20 | 21 | ``````bash 22 | # 使用awq量化: 23 | # autoawq和cuda版本有对应关系,请按照`https://github.com/casper-hansen/AutoAWQ`选择版本 24 | pip install autoawq -U 25 | 26 | # 使用gptq量化: 27 | # auto_gptq和cuda版本有对应关系,请按照`https://github.com/PanQiWei/AutoGPTQ#quick-installation`选择版本 28 | pip install auto_gptq -U 29 | 30 | # 使用bnb量化: 31 | pip install bitsandbytes -U 32 | 33 | # 使用hqq量化: 34 | # pip install transformers>=4.41 35 | pip install hqq 36 | 37 | # 使用eetq量化: 38 | # pip install transformers>=4.41 39 | 40 | # 参考https://github.com/NetEase-FuXi/EETQ 41 | git clone https://github.com/NetEase-FuXi/EETQ.git 42 | cd EETQ/ 43 | git submodule update --init --recursive 44 | pip install . 45 | `````` 46 | 47 | 如果你运行报错可以进行环境对齐(选择): 48 | 49 | ``````bash 50 | # 环境对齐 (通常不需要运行. 如果你运行错误, 可以跑下面的代码, 仓库使用最新环境测试) 51 | pip install -r requirements/framework.txt -U 52 | pip install -r requirements/llm.txt -U 53 | `````` 54 | 55 | #### 使用swift开始量化 56 | 57 | 我们使用awq和hqq量化为示例进行教学。 58 | 59 | ##### 使用swift进行awq量化 60 | 61 | awq量化需要数据集,这里可以使用自定义数据集,这里使用alpaca-zh alpaca-en sharegpt-gpt4:default作为量化数据集: 62 | 63 | ``````bash 64 | CUDA_VISIBLE_DEVICES=0 swift export \ 65 | --model_type yi-1_5-6b-chat --quant_bits 4 \ 66 | --dataset alpaca-zh alpaca-en sharegpt-gpt4:default --quant_method awq 67 | `````` 68 | 69 | 量化完成后进行推理同样也可以使用swift具体如下: 70 | 71 | `model_type` 替换模型的类型 72 | 73 | `model_id_or_path`量化类型 74 | 75 | ``````bash 76 | CUDA_VISIBLE_DEVICES=0 swift infer \ 77 | --model_type yi-1_5-6b-chat \ 78 | --model_id_or_path yi-1_5-6b-chat-awq-int4 79 | `````` 80 | 81 | ##### 使用swift进行hqq量化 82 | 83 | 对于bnb、hqq、eetq,我们只需要使用swift infer来进行快速量化并推理。 84 | 85 | `quant_method`可以修改量化方法 86 | 87 | `model_type` 替换模型的类型 88 | 89 | `quantization_bit`量化类型 90 | 91 | ``````bash 92 | CUDA_VISIBLE_DEVICES=0 swift infer \ 93 | --model_type yi-1_5-6b-chat \ 94 | --quant_method hqq \ 95 | --quantization_bit 4 96 | `````` -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/Mastering_Street_Fighter_III_with_the_Yi_Language_Model.md: -------------------------------------------------------------------------------- 1 | 2 | # Mastering Street Fighter III with Yi Large Language Model 3 | 4 | ## Introduction 5 | 6 | Welcome to this detailed tutorial! In this article, we'll explore how to use the Yi large language model (specifically the yi-large model) to master the classic fighting game Street Fighter III. This exciting project will showcase the potential of large language models (LLMs) in the realm of game AI. Whether you're an AI enthusiast or a gaming fan, this tutorial will offer you a fresh and exciting experience. 7 | 8 | This guide is designed for readers with a basic understanding, and we'll walk through each step in detail to ensure you can successfully complete the entire process. 9 | 10 | ## Project Overview 11 | 12 | In this project, we will: 13 | 14 | 1. Set up the necessary environment and tools 15 | 2. Obtain an API key for the Yi model 16 | 3. Configure and run the game environment 17 | 4. Use the Yi model to control Street Fighter III characters in battles 18 | 19 | ![img.png](assets/2/img(4-2).png) 20 | 21 | ## Experimental Environment 22 | 23 | - Operating System: Windows or Mac OS (this tutorial will use Mac OS as an example) 24 | - Python 3.10 25 | - Docker Desktop 26 | 27 | ## Step 1: Environment Preparation 28 | 29 | ### 1.1 Installing Docker Desktop 30 | 31 | 1. Visit the [Docker Desktop official website](https://www.docker.com/products/docker-desktop/) 32 | 2. Download the version suitable for your system 33 | 3. Follow the installation wizard to complete the setup 34 | 4. After installation, restart your computer 35 | 36 | ### 1.2 Installing Conda 37 | 38 | Conda is a powerful package management tool that we'll use to create virtual environments. 39 | 40 | 1. Visit the [Conda official website](https://conda.io/projects/conda/en/latest/index.html) 41 | 2. Download and install Miniconda (recommended) or Anaconda 42 | 3. After installation, open a terminal and enter the following command to verify the installation: 43 | 44 | ``` 45 | conda --version 46 | ``` 47 | 48 | If it displays a version number, the installation was successful. 49 | 50 | ![img_1.png](assets/2/img(4-1).png) 51 | 52 | ### 1.3 Registering a Diambra Account 53 | 54 | Diambra provides the game environment we need. 55 | 56 | 1. Visit the [Diambra registration page](https://diambra.ai/register) 57 | 2. Fill in the required information and complete the registration 58 | 59 | ## Step 2: Obtaining Yi Model API Key 60 | 61 | 1. Visit the [Yi Large Language Model Open Platform](https://platform.01.ai/apikeys) 62 | 2. Register and log in to your account 63 | 3. Create a new API key on the platform 64 | 4. Save your API key securely, as we'll need it later 65 | 66 | ![img.png](assets/2/img(4-3).png) 67 | 68 | ## Step 3: Configuring the Project Environment 69 | 70 | ### 3.1 Cloning the Project Repository 71 | 72 | Open a terminal and execute the following commands: 73 | 74 | ```bash 75 | git clone https://github.com/Yimi81/llm-colosseum.git 76 | cd llm-colosseum 77 | ``` 78 | 79 | ### 3.2 Creating and Activating a Virtual Environment 80 | 81 | In the project directory, execute: 82 | 83 | ```bash 84 | conda create -n yi python=3.10 -y 85 | conda activate yi 86 | ``` 87 | 88 | ### 3.3 Installing Dependencies 89 | 90 | ```bash 91 | pip install -r requirements.txt 92 | ``` 93 | 94 | ### 3.4 Configuring Environment Variables 95 | 96 | 1. Copy the example environment file: 97 | 98 | ```bash 99 | cp .env.example .env 100 | ``` 101 | 102 | 2. Edit the .env file: 103 | 104 | On Mac, you might need to show hidden files. Use the shortcut `Command + Shift + .` to toggle showing/hiding hidden files. 105 | 106 | 3. Open the .env file and replace `YI_API_KEY` with the API key you obtained earlier. 107 | 108 | ## Step 4: Launching the Game 109 | 110 | ### 4.1 Locating the ROM File Path 111 | 112 | On a Mac environment, ROM files are typically located at:``` 113 | /Users/your_username/Desktop/code/llm-colosseum/.diambra/rom 114 | 115 | 116 | Remember this path; we'll refer to it as ``. 117 | 118 | ### 4.2 Starting the Game 119 | 120 | In the terminal, execute:```bash 121 | diambra -r run -l python script.py 122 | ![img_4.png](assets/2/img(4-5).png) 123 | 124 | When running for the first time, the system will prompt you to enter your Diambra account username and password. After that, the game image will start downloading. 125 | 126 | Then, just wait for it to launch.![img_5.png](assets/2/img(4-6).png) 127 | 128 | ## Conclusion 129 | 130 | Congratulations! You've now successfully set up and run a Street Fighter III AI battle system controlled by the Yi large language model. This project demonstrates the potential of large language models in the field of game AI. You can try modifying the code, using different Yi models (such as yi-medium), or adjusting the prompts to change the AI's behavior. 131 | 132 | Remember, when using the API, you might encounter request frequency limitations. If this happens, consider upgrading your API plan.![img_6.png](assets/2/img(4-7).png) 133 | 134 | I hope you've learned something new from this project and developed a greater interest in AI applications in gaming. Keep exploring and enjoy the endless possibilities that AI brings!``` 135 | -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/2/img(4-1).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/2/img(4-1).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/2/img(4-2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/2/img(4-2).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/2/img(4-3).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/2/img(4-3).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/2/img(4-4).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/2/img(4-4).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/2/img(4-5).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/2/img(4-5).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/2/img(4-6).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/2/img(4-6).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/2/img(4-7).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/2/img(4-7).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/3/img(3-1).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/3/img(3-1).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/3/img(3-2).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/3/img(3-2).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/3/img(3-3).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/3/img(3-3).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/3/img(3-4).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/3/img(3-4).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/4/img(2-1).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/4/img(2-1).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/4/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/4/img.png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/4/img2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/4/img2.png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/4/img3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/4/img3.png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/4/img4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/4/img4.png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/4/img_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/4/img_1.png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/4/train_memory(GiB).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/4/train_memory(GiB).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/5/img(5-1).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/5/img(5-1).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/5/img(5-2).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/5/img(5-2).jpg -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/5/img(5-3).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/5/img(5-3).png -------------------------------------------------------------------------------- /Cookbook/en/ecosystem/assets/5/img(5-4).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/Cookbook/en/ecosystem/assets/5/img(5-4).png -------------------------------------------------------------------------------- /Cookbook/en/opensource/Inference/Inference_using_swift.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"markdown","metadata":{"id":"LUEkKPO5qaYL"},"source":["# Inference with SWIFT\n","\n","SWIFT is an open-source framework from ModelScope that supports large model training, inference, evaluation, and deployment. With SWIFT, you can easily achieve a complete pipeline from model training to application.\n","\n","This tutorial will detail how to use SWIFT for inference, including installation steps and an inference example. We will use Yi-1.5-6B-Chat for demonstration.\n"]},{"cell_type":"markdown","metadata":{"id":"wxnQfFs7qaYM"},"source":["## 🚀 Run with Colab\n"]},{"cell_type":"markdown","metadata":{"id":"6jdKJYrUqaYM"},"source":["## Installation\n","\n","First, we need to install the necessary dependencies.\n","\n","(Optional) You can set the global pip mirror to speed up downloads:\n"]},{"cell_type":"code","metadata":{"id":"c1q_HC8KqaYN"},"source":["!pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"NQThi0GMqaYN"},"source":["Install ms-swift:\n"]},{"cell_type":"code","metadata":{"id":"xRcwIdKNqaYN"},"source":["!pip install 'ms-swift[llm]' -U"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"OcE1ATZwqaYN"},"source":["## Start Inference\n","\n","Before starting inference, note that your computer's memory and GPU memory should be sufficient. If not, you might encounter errors.\n","\n","| Model | GPU Memory Usage | Disk Usage |\n","| -------------- | ---------------- | ---------- |\n","| Yi-1.5-6B-Chat | 11.5G | 14.7G |\n"]},{"cell_type":"markdown","metadata":{"id":"cDDy2XaSqaYN"},"source":["First, set the environment variable:\n"]},{"cell_type":"code","metadata":{"id":"dN3HoqrtqaYN"},"source":["import os\n","os.environ['CUDA_VISIBLE_DEVICES'] = '0'"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"rDZY2KubqaYO"},"source":["Next, load the model and tokenizer:\n"]},{"cell_type":"code","metadata":{"id":"_UijGj08qaYO"},"source":["from swift.llm import (\n"," get_model_tokenizer, get_template, inference, ModelType, get_default_template_type,\n",")\n","from swift.utils import seed_everything\n","\n","# Select model type, here we use Yi-1.5-6B-Chat\n","model_type = ModelType.yi_1_5_6b_chat\n","template_type = get_default_template_type(model_type)\n","print(f'template_type: {template_type}') # Template type\n","\n","# Load model and tokenizer\n","kwargs = {}\n","model, tokenizer = get_model_tokenizer(model_type, model_kwargs={'device_map': 'auto'}, **kwargs)\n","\n","# Set generation config\n","model.generation_config.max_new_tokens = 128\n","\n","# Get template\n","template = get_template(template_type, tokenizer)\n","\n","# Set random seed\n","seed_everything(42)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"uTWqG_DVqaYO"},"source":["Now, let's perform inference:\n"]},{"cell_type":"code","metadata":{"id":"T2KSI9U6qaYO"},"source":["# Prepare input query\n","query = 'Hello!'\n","\n","# Perform inference using the template\n","response, history = inference(model, template, query)\n","\n","# Print query and response\n","print(f'query: {query}')\n","print(f'response: {response}')"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"6sH-TtA2qaYO"},"source":["The above code will output something like this:\n","\n","```\n","query: Hello!\n","response: Hi! How can I help you today?\n","```\n"]},{"cell_type":"markdown","metadata":{"id":"tyYCjrhjqaYO"},"source":["With this, you have learned how to perform inference using SWIFT with the Yi series models. If you encounter any issues, you can refer to the [SWIFT official documentation](https://www.modelscope.cn/models/01-ai/Yi-1.5-6B-Chat) for more help."]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.10"},"colab":{"provenance":[]}},"nbformat":4,"nbformat_minor":0} -------------------------------------------------------------------------------- /Cookbook/en/opensource/fine_tune/finetune-yi-with-llamafactory.md: -------------------------------------------------------------------------------- 1 | ### 🌟Fine-tuning with LLaMA-Factory 2 | 3 | LLaMA Factory is an open-source, low-code framework for fine-tuning large language models, developed by Yaowei Zheng, a PhD student at Beihang University. It integrates widely-used fine-tuning techniques, making the process straightforward and accessible. Let's get started! 4 | 5 | #### Installation 6 | 7 | First, clone the LLaMA-Factory repository: 8 | 9 | ```bash 10 | git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git 11 | ``` 12 | 13 | Navigate to the LLaMA-Factory directory and install the dependencies: 14 | 15 | ```bash 16 | cd LLaMA-Factory 17 | pip install -e ".[torch,metrics]" 18 | ``` 19 | 20 | If you haven't already, download the Yi model from either Huggingface or ModelScope. Here's how to download `Yi-1.5-6B-Chat`: 21 | 22 | ```bash 23 | # Download from ModelScope 24 | git clone https://www.modelscope.cn/01ai/Yi-1.5-6B-Chat.git 25 | 26 | # Download from Huggingface 27 | git clone https://huggingface.co/01-ai/Yi-1.5-6B-Chat 28 | ``` 29 | 30 | #### Fine-tuning Steps 31 | 32 | 1. **Create the Configuration File** 33 | 34 | Inside the LLaMA-Factory directory, locate the `llama3_lora_sft_awq.yaml` file under `examples/train_qlora`. Duplicate and rename it to `yi_lora_sft_bitsandbytes.yaml`. 35 | 36 | This file houses the key parameters for fine-tuning: 37 | - `model_name_or_path`: Specify the path to your downloaded Yi model. 38 | - `quantization_bit`: Set the model quantization bits. 39 | - `dataset`: Choose the dataset for fine-tuning. 40 | - `num_train_epochs`: Define the number of training epochs. 41 | - `output_dir`: Specify where to save the fine-tuned model weights. 42 | 43 | 2. **Configure Parameters** 44 | 45 | Open `yi_lora_sft_bitsandbytes.yaml` and adjust the parameters according to your requirements. Here's an example configuration: 46 | 47 | ```yaml 48 | ### model 49 | model_name_or_path: 50 | quantization_bit: 4 51 | 52 | ### method 53 | stage: sft 54 | do_train: true 55 | finetuning_type: lora 56 | lora_target: all 57 | 58 | ### dataset 59 | dataset: identity 60 | template: yi 61 | cutoff_len: 1024 62 | max_samples: 1000 63 | overwrite_cache: true 64 | preprocessing_num_workers: 16 65 | 66 | ### output 67 | output_dir: saves/yi-6b/lora/sft 68 | logging_steps: 10 69 | save_steps: 500 70 | plot_loss: true 71 | overwrite_output_dir: true 72 | 73 | ### train 74 | per_device_train_batch_size: 1 75 | gradient_accumulation_steps: 8 76 | learning_rate: 1.0e-4 77 | num_train_epochs: 3.0 78 | lr_scheduler_type: cosine 79 | warmup_ratio: 0.1 80 | fp16: true 81 | 82 | ### eval 83 | val_size: 0.1 84 | per_device_eval_batch_size: 1 85 | eval_strategy: steps 86 | eval_steps: 500 87 | ``` 88 | 89 | In this example, we use the "identity" dataset, which helps the model recognize itself. If you ask the model "Hello, who are you?", it will respond with its designated name and developer. By replacing this dataset with your own information, you can fine-tune the model to create your personalized AI assistant. 90 | 91 | 3. **Initiate Fine-tuning** 92 | 93 | Open your terminal and run the following command to start the fine-tuning process (it might take around 10 minutes): 94 | 95 | ```bash 96 | llamafactory-cli train examples/train_qlora/yi_lora_sft_bitsandbytes.yaml 97 | ``` 98 | 99 | #### Inference Testing 100 | 101 | 1. **Prepare the Inference Configuration** 102 | 103 | Within the LLaMA-Factory folder, find the `llama3_lora_sft.yaml` file under `examples/inference`. Copy and rename it to `yi_lora_sft.yaml`. 104 | 105 | Populate the file with the following content: 106 | 107 | ```yaml 108 | model_name_or_path: 109 | adapter_name_or_path: saves/yi-6b/lora/sft 110 | template: yi 111 | finetuning_type: lora 112 | ``` 113 | 114 | 2. **Run Inference** 115 | 116 | In the terminal where the fine-tuning process finished, execute the inference command: 117 | 118 | ```bash 119 | llamafactory-cli chat examples/inference/yi_lora_sft.yaml 120 | ``` 121 | 122 | Alright, that concludes our tutorial on fine-tuning the Yi model using llamafactory. Feeling accomplished? We invite you to explore our other tutorials as well. 123 | 124 | -------------------------------------------------------------------------------- /Cookbook/en/opensource/fine_tune/finetune-yi-with-swift.md: -------------------------------------------------------------------------------- 1 | ### 🌟Fine-tuning with SWIFT 2 | 3 | SWIFT, developed by ModelScope, is a framework designed for training, inferencing, evaluating, and deploying multimodal large models. It enables a seamless workflow from model training and evaluation to application. Let's explore how to fine-tune the Yi model using SWIFT. 4 | 5 | #### Installation 6 | 7 | Start by cloning the SWIFT repository: 8 | 9 | ```bash 10 | git clone https://github.com/modelscope/swift.git 11 | cd swift 12 | pip install -e '.[llm]' 13 | ``` 14 | 15 | #### Fine-tuning Steps 16 | 17 | We'll use the CLI for fine-tuning. Here's the command: 18 | 19 | ```bash 20 | CUDA_VISIBLE_DEVICES=0 swift sft \ 21 | --model_id_or_path 01ai/Yi-1.5-6B-Chat \ 22 | --dataset AI-ModelScope/blossom-math-v2 \ 23 | --output_dir output \ 24 | ``` 25 | 26 | - `--model_id_or_path`: Replace with the desired model. 27 | - `--dataset`: Specify the dataset for fine-tuning. 28 | - `--output_dir`: Define the directory to save the fine-tuned model. 29 | 30 | For more detailed information on SWIFT, please refer to the official GitHub repository: [https://github.com/modelscope/swift](https://github.com/modelscope/swift). 31 | -------------------------------------------------------------------------------- /Cookbook/en/opensource/local/local-lm-studio.md: -------------------------------------------------------------------------------- 1 | ### 🌟Local Running with LM Studio 2 | 3 | LM Studio is an easy-to-use desktop application for experimenting with local and open-source large language models (LLMs), enabling better local deployment of large models with relatively simple operations. 4 | 5 | #### Download and Installation 6 | 7 | Installing LM Studio is very simple. Just visit the LM Studio [official website](https://lmstudio.ai/). 8 | 9 | Install the version according to your computer's operating system. 10 | 11 | ![image-20240615144405423](../../../assets/LM_Studio-0.png) 12 | 13 | After downloading, open the LM Studio software and search for "yi1.5-6b-chat" or other models in the search bar. The following example uses "yi1.5-6b-chat". 14 | 15 | LM Studio will helpfully assess which models your local computer can run, which can effectively avoid memory issues. 16 | 17 | ![image-20240615144405423](../../../assets/LM_Studio-2.png) 18 | 19 | Similarly, select the model you want to run locally and click "download" to start using it. 20 | -------------------------------------------------------------------------------- /Cookbook/en/opensource/local/local-mlx.md: -------------------------------------------------------------------------------- 1 | ### 🌟Local Running with MLX-LM 2 | 3 | MLX-LM is a framework for local deployment of large models on Mac OS. For detailed information, please refer to the [official documentation](https://github.com/ml-explore/mlx-examples/tree/main?tab=readme-ov-file). 4 | 5 | ⚠️Please note that MLX-LM is only compatible with the Mac OS operating system. 6 | 7 | #### Download and Installation 8 | 9 | ``````bash 10 | pip install mlx-lm 11 | `````` 12 | 13 | #### Getting Started 14 | 15 | The following example uses "mlx-community/Yi-1.5-6B-Chat-8bit". 16 | 17 | You can also replace it with other models, such as "mlx-community/Yi-1.5-34B-Chat-4bit". 18 | 19 | ``````python 20 | from mlx_lm import load, generate 21 | 22 | model, tokenizer = load("mlx-community/Yi-1.5-6B-Chat-8bit") 23 | 24 | response = generate(model, tokenizer, prompt="hello", verbose=True) 25 | `````` -------------------------------------------------------------------------------- /Cookbook/en/opensource/local/local-ollama.md: -------------------------------------------------------------------------------- 1 | ### 🌟Local Running with ollama 2 | 3 | ollama is an open-source large language model serving tool that allows users to easily deploy and use large pre-trained models in their own hardware environments. This enables users to quickly run these models locally. 4 | 5 | #### Download and Installation 6 | 7 | You can directly download it from the ollama [official website](https://ollama.com/). Click on "download" and choose the version that matches your computer system. 8 | 9 | ![ollama](../../../assets/ollama-1.png) 10 | 11 | After downloading, install it according to the corresponding process. 12 | 13 | #### Local Usage 14 | 15 | There are two ways to use it locally. 16 | 17 | 1. **Terminal Execution** 18 | 19 | You can choose to run the Yi series models directly in the terminal. For the officially available models, please refer to the [documentation](https://ollama.com/library/yi). 20 | 21 | ``````bash 22 | ollama run yi:6b 23 | `````` 24 | 25 | - After running the command, ollama will automatically download the model to your local machine. 26 | - Once the download is complete, you can start using it. 27 | 28 | The following image shows an example of a successful run: 29 | 30 | ![image-20240615142555895](../../../assets/ollama-7.png) 31 | 32 | 2. **Running with [OpenWebUI](https://openwebui.com/)** 33 | 34 | The advantage of using OpenWebUI is that it allows for more visual operations, basically eliminating the need for command-line operations. It provides a very good user experience and has a low barrier to entry. 35 | 36 | Let's proceed with the installation: 37 | 38 | - **Step 1:** Ensure you have correctly installed ollama. 39 | 40 | - **Step 2:** Install Docker. 41 | 42 | ​ Docker is a lightweight virtualization technology and an open-source platform for building application container runtime environments. It allows developers to easily package applications into a portable container that can be installed on any server running Linux or Windows. Compared to traditional virtual machines, Docker containers offer a lightweight virtualization approach, easy installation, and fast startup and shutdown speeds. 43 | 44 | ​ Simply put, we will use Docker to run OpenWebUI. 45 | 46 | ​ Installation is easy. Just go to the Docker [official website](https://www.docker.com/get-started/) and click on "Download" according to your computer model. 47 | 48 | ![image-20240615143628193](../../../assets/ollama-4.png) 49 | 50 | - **Step 3:** Run the following command in the terminal and wait for the installation to complete: 51 | 52 | ``````bash 53 | docker run -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main 54 | `````` 55 | 56 | - **Step 4:** Start Docker again and open OpenWebUI. 57 | 58 | - **Step 5:** Download the model. 59 | 60 | 61 | Once the download is complete, you can start using it in the conversation interface. 62 | 63 | ![image-20240615144527559](../../../assets/ollama-8.png) -------------------------------------------------------------------------------- /Cookbook/en/opensource/quantization/autoawq-yi-quantization.md: -------------------------------------------------------------------------------- 1 | ### 🌟Quantize with AutoAWQ 2 | 3 | AutoAWQ is an easy-to-use 4-bit quantization model package. Compared to FP16, AutoAWQ can increase model speed by 3 times and reduce memory requirements by 3 times. 4 | 5 | AutoAWQ implements the Activation-aware Weight Quantization (AWQ) algorithm to quantize LLMs. 6 | 7 | All the following demos use Yi-1.5-6B-Chat as an example. 8 | 9 | Here are the memory and disk usage for this demo: 10 | 11 | | Model | Memory Usage | Disk Usage | 12 | |--|------|-------| 13 | | Yi-1.5-6B-Chat | 6G | 24.5G | 14 | 15 | #### Installation 16 | AWQ's version compatibility issues are prone to errors. First we check the version of torch and cuda: 17 | 18 | ```python 19 | import torch 20 | print(torch.__version__) 21 | ``` 22 | It should be noted that to install using pip, you must meet the requirement cuda>=12.1: 23 | ```shell 24 | pip install autoawq 25 | ``` 26 | For CUDA 11.8, ROCm 5.6 and ROCm 5.7, installation from source is recommended: 27 | 28 | ```shell 29 | git clone https://github.com/casper-hansen/AutoAWQ.git 30 | cd AutoAWQ 31 | pip install -e . 32 | ``` 33 | 34 | #### Load the model 35 | AWQ is fully compatible with transformers, you can directly paste the huggingface model path. 36 | 37 | Or you can directly replace the model path with a locally downloaded model or a model you have fine-tuned: 38 | ```python 39 | from awq import AutoAWQForCausalLM 40 | from transformers import AutoTokenizer 41 | # model_path is the path to the model, here we load the Yi model from huggingface, if you have a fine-tuned Yi model, you can also directly replace model_path 42 | model_path = '01-ai/Yi-1.5-6B-Chat' 43 | # quant_path is the path to the quantized model 44 | quant_path = 'Yi-1.5-6B-Chat-awq' 45 | quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" } 46 | 47 | # Load the model and tokenizer 48 | model = AutoAWQForCausalLM.from_pretrained( 49 | model_path 50 | ) 51 | tokenizer = AutoTokenizer.from_pretrained( 52 | model_path, 53 | trust_remote_code=True 54 | ) 55 | ``` 56 | #### Save the model 57 | The quantized model can be saved directly: 58 | ```python 59 | # Save the model 60 | model.save_quantized(quant_path) 61 | tokenizer.save_pretrained(quant_path) 62 | 63 | print(f'Model is quantized and saved at "{quant_path}"') 64 | ``` 65 | You can also directly mount the model to the cloud drive for faster and more convenient download: 66 | ```python 67 | from google.colab import drive 68 | import shutil 69 | 70 | # Mount Google Drive 71 | drive.mount('/content/drive') 72 | 73 | # Synchronize files or folders to a specific path on Google Drive 74 | # Suppose the file or folder you want to synchronize is in the current working directory of Colab 75 | # And you want to sync it to the MyDrive/Yi-1.5-6B-Chat-awq folder on Google Drive 76 | 77 | # Define the path to the local file or folder 78 | local_path = 'Yi-1.5-6B-Chat-awq' 79 | 80 | # Define the target path on Google Drive 81 | drive_path = '/content/drive/MyDrive/Yi-1.5-6B-Chat-awq' 82 | 83 | # Sync operation 84 | # Use copytree 85 | shutil.copytree(local_path, drive_path) 86 | print(f"Folder '{local_path}' synced to '{drive_path}'.") 87 | 88 | ``` 89 | #### Using the quantized model 90 | We can use the quantized model directly through transformers: 91 | ```python 92 | from transformers import AutoModelForCausalLM, AutoTokenizer 93 | # model_path = quant_path 94 | model_path = 'Yi-1.5-6B-Chat-awq' 95 | 96 | tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False) 97 | 98 | model = AutoModelForCausalLM.from_pretrained( 99 | model_path, 100 | device_map="auto", 101 | torch_dtype='auto' 102 | ).eval() 103 | # Prompt 104 | messages = [ 105 | {"role": "user", "content": "hi"} 106 | ] 107 | 108 | input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt') 109 | output_ids = model.generate(input_ids.to('cuda')) 110 | response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True) 111 | 112 | print(response) 113 | ``` -------------------------------------------------------------------------------- /Cookbook/en/opensource/quantization/autogptq-yi-quantization.md: -------------------------------------------------------------------------------- 1 | ### 🌟Quantize with AutoGPTQ 2 | 3 | AutoGPTQ is a large language model quantization toolkit based on the GPTQ algorithm, which is simple, easy to use and has a user-friendly interface. 4 | 5 | We can quantize our Yi series models through AutoGPTQ. 6 | 7 | | Model | Memory Usage | Disk Usage | 8 | |--|------|------| 9 | | Yi-1.5-6B-Chat | 7G | 27G | 10 | 11 | #### Installation 12 | Installation from source is recommended: 13 | 14 | ```shell 15 | git clone https://github.com/AutoGPTQ/AutoGPTQ 16 | cd AutoGPTQ 17 | pip install . 18 | ``` 19 | 20 | #### Load the model 21 | 22 | Similarly, we can still load the model through transformers, or perhaps load a fine-tuned model. 23 | 24 | Just replace the model path, the specific code is as follows. 25 | 26 | ```python 27 | from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig 28 | from transformers import AutoTokenizer 29 | 30 | # Configure quantization hyperparameters 31 | # Load tokenizer and model 32 | model_path = "01-ai/Yi-1.5-6B-Chat" 33 | quant_path = "Yi-1.5-6B-Chat-GPTQ" 34 | quantize_config = BaseQuantizeConfig( 35 | bits=8, # Quantize to 8-bit model 36 | group_size=128, # Recommended 128 37 | damp_percent=0.01, 38 | desc_act=False, # Setting to False can significantly improve inference speed 39 | ) 40 | 41 | tokenizer = AutoTokenizer.from_pretrained(model_path) 42 | model = AutoGPTQForCausalLM.from_pretrained(model_path, quantize_config) 43 | ``` 44 | 45 | #### Quantize and save the model 46 | The data type of the samples in model.quantize(examples) should be List[Dict], where the keys of the dictionary are and only input_ids and attention_mask. 47 | 48 | Please pay attention to your data format here! 49 | ```python 50 | import torch 51 | examples = [] 52 | messages = [ 53 | {"role": "user", "content": "hi"}, 54 | {"role": "assistant", "content": "Hello! It's great to see you today. How can I assist you"} 55 | ] 56 | text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False) 57 | model_inputs = tokenizer([text]) 58 | input_ids = torch.tensor(model_inputs.input_ids[:max_len], dtype=torch.int) 59 | examples.append(dict(input_ids=input_ids, attention_mask=input_ids.ne(tokenizer.pad_token_id))) 60 | model.quantize(examples) 61 | 62 | model.save_quantized(quant_path, use_safetensors=True) 63 | tokenizer.save_pretrained(quant_path) 64 | ``` 65 | 66 | #### Using the model 67 | ```python 68 | from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig 69 | 70 | from transformers import AutoTokenizer, GenerationConfig 71 | 72 | quantized_model_dir = 'Yi-1.5-6B-Chat-GPTQ' 73 | 74 | tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, 75 | use_fast=True, 76 | trust_remote_code=True) 77 | 78 | model = AutoGPTQForCausalLM.from_quantized(quantized_model_dir, 79 | device_map="auto", 80 | use_safetensors=True, 81 | trust_remote_code=True) 82 | 83 | 84 | output = tokenizer.decode(model.generate( 85 | **tokenizer("<|im_start|>user Hi!<|im_end|> <|im_start|>assistant", return_tensors="pt").to(model.device), 86 | max_new_tokens=512)[0] 87 | ) 88 | print(output) 89 | ``` -------------------------------------------------------------------------------- /Cookbook/en/opensource/quantization/swift-yi-quantization.md: -------------------------------------------------------------------------------- 1 | ### 🌟Quantize with SWIFT 2 | 3 | SWIFT is a framework open-sourced by modelscope that supports training, inference, evaluation, and deployment of multi-modal large models. It can directly implement the complete chain from model training and evaluation to application. 4 | 5 | Quantization using SWIFT is very convenient, and can be completed in just a few steps. There are many parameters that can be adjusted during quantization, such as the model to be quantized, precision, method, etc. For details, please refer to the [official documentation](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM%E9%87%8F%E5%8C%96%E6%96%87%E6%A1%A3.md). 6 | 7 | #### Installation 8 | 9 | First, let's install ms-swift: 10 | 11 | ``````bash 12 | git clone https://github.com/modelscope/swift.git 13 | cd swift 14 | pip install -e '.[llm]' 15 | `````` 16 | 17 | swift supports quantizing models using awq, gptq, bnb, hqq, and eetq techniques. 18 | 19 | You can directly install the quantization method you want to use: 20 | 21 | ``````bash 22 | # Use awq quantization: 23 | # autoawq and cuda versions have a correspondence, please select the version according to `https://github.com/casper-hansen/AutoAWQ` 24 | pip install autoawq -U 25 | 26 | # Use gptq quantization: 27 | # auto_gptq and cuda versions have a correspondence, please select the version according to `https://github.com/PanQiWei/AutoGPTQ#quick-installation` 28 | pip install auto_gptq -U 29 | 30 | # Use bnb quantization: 31 | pip install bitsandbytes -U 32 | 33 | # Use hqq quantization: 34 | # pip install transformers>=4.41 35 | pip install hqq 36 | 37 | # Use eetq quantization: 38 | # pip install transformers>=4.41 39 | 40 | # Refer to https://github.com/NetEase-FuXi/EETQ 41 | git clone https://github.com/NetEase-FuXi/EETQ.git 42 | cd EETQ/ 43 | git submodule update --init --recursive 44 | pip install . 45 | `````` 46 | 47 | If you encounter errors during runtime, you can align the environment (optional): 48 | 49 | ``````bash 50 | # Environment alignment (usually not required to run. If you encounter errors, you can run the following code, the repository uses the latest environment for testing) 51 | pip install -r requirements/framework.txt -U 52 | pip install -r requirements/llm.txt -U 53 | `````` 54 | 55 | #### Start Quantization with swift 56 | 57 | We will use awq and hqq quantization as examples for teaching. 58 | 59 | ##### AWQ Quantization with swift 60 | 61 | awq quantization requires a dataset, you can use a custom dataset here, here we use alpaca-zh alpaca-en sharegpt-gpt4:default as the quantization dataset: 62 | 63 | ``````bash 64 | CUDA_VISIBLE_DEVICES=0 swift export \ 65 | --model_type yi-1_5-6b-chat --quant_bits 4 \ 66 | --dataset alpaca-zh alpaca-en sharegpt-gpt4:default --quant_method awq 67 | `````` 68 | 69 | After quantization, you can also use swift for inference, as follows: 70 | 71 | Replace `model_type` with the type of your model. 72 | 73 | Replace `model_id_or_path` with the path to your quantized model. 74 | 75 | ``````bash 76 | CUDA_VISIBLE_DEVICES=0 swift infer \ 77 | --model_type yi-1_5-6b-chat \ 78 | --model_id_or_path yi-1_5-6b-chat-awq-int4 79 | `````` 80 | 81 | ##### HQQ Quantization with swift 82 | 83 | For bnb, hqq, and eetq, we only need to use swift infer for quick quantization and inference. 84 | 85 | You can modify the quantization method with `quant_method`. 86 | 87 | Replace `model_type` with the type of your model. 88 | 89 | Replace `quantization_bit` with the desired quantization bit. 90 | 91 | ``````bash 92 | CUDA_VISIBLE_DEVICES=0 swift infer \ 93 | --model_type yi-1_5-6b-chat \ 94 | --quant_method hqq \ 95 | --quantization_bit 4 96 | `````` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG REGISTRY="nvcr.io" 2 | ARG CUDA_VERSION="11.8.0" 3 | FROM mambaorg/micromamba:1.5.1 as micromamba 4 | FROM ${REGISTRY}/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 as base 5 | 6 | ##### 7 | # Setup micromamba 8 | ##### 9 | 10 | USER root 11 | 12 | ARG MAMBA_USER=yi 13 | ARG MAMBA_USER_ID=1000 14 | ARG MAMBA_USER_GID=100 15 | ENV MAMBA_USER=$MAMBA_USER 16 | ENV MAMBA_ROOT_PREFIX="/opt/conda" 17 | ENV MAMBA_EXE="/bin/micromamba" 18 | ENV ENV_NAME=yi 19 | 20 | ENV DEBIAN_FRONTEND="noninteractive" 21 | ENV TZ="Asia/Shanghai" 22 | ENV LC_ALL=C.UTF-8 23 | ENV LANG=C.UTF-8 24 | 25 | RUN apt-get update -y \ 26 | && apt-get install -y sudo tzdata git ninja-build \ 27 | && useradd -ms /bin/bash -d /home/$MAMBA_USER $MAMBA_USER --uid $MAMBA_USER_ID --gid $MAMBA_USER_GID \ 28 | && usermod -aG sudo $MAMBA_USER \ 29 | && echo "$MAMBA_USER ALL=NOPASSWD: ALL" >> /etc/sudoers \ 30 | && rm -rf /var/lib/apt/lists/* \ 31 | && apt-get clean 32 | 33 | COPY --from=micromamba "$MAMBA_EXE" "$MAMBA_EXE" 34 | COPY --from=micromamba /usr/local/bin/_activate_current_env.sh /usr/local/bin/_activate_current_env.sh 35 | COPY --from=micromamba /usr/local/bin/_dockerfile_shell.sh /usr/local/bin/_dockerfile_shell.sh 36 | COPY --from=micromamba /usr/local/bin/_entrypoint.sh /usr/local/bin/_entrypoint.sh 37 | COPY --from=micromamba /usr/local/bin/_dockerfile_initialize_user_accounts.sh /usr/local/bin/_dockerfile_initialize_user_accounts.sh 38 | COPY --from=micromamba /usr/local/bin/_dockerfile_setup_root_prefix.sh /usr/local/bin/_dockerfile_setup_root_prefix.sh 39 | 40 | RUN /usr/local/bin/_dockerfile_initialize_user_accounts.sh && \ 41 | /usr/local/bin/_dockerfile_setup_root_prefix.sh 42 | 43 | USER $MAMBA_USER 44 | SHELL ["/usr/local/bin/_dockerfile_shell.sh"] 45 | ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"] 46 | CMD ["/bin/bash"] 47 | 48 | # Install dependencies 49 | 50 | WORKDIR /home/${MAMBA_USER}/workspace/Yi 51 | COPY --chown=${MAMBA_USER_ID}:${MAMBA_USER_GID} ./conda-lock.yml . 52 | RUN micromamba create -y -n ${ENV_NAME} -f conda-lock.yml && \ 53 | micromamba clean --all --yes 54 | 55 | COPY --chown=${MAMBA_USER_ID}:${MAMBA_USER_GID} . . -------------------------------------------------------------------------------- /Events/readme.md: -------------------------------------------------------------------------------- 1 | # Community Events 2 | 3 | 4 |

5 | Event Highlights 6 |

7 | 8 | 9 | 👏 Welcome to the Community Events folder! Here, you can find information about all the events related to our open-source project. This folder aims to keep our community engaged and informed about upcoming and past events. 10 | 11 | ## 📅 Upcoming Events 12 | 13 | Stay tuned for our upcoming events! You can find detailed information about each event below: 14 | 15 | - **[AICon Shanghai](https://aicon.infoq.cn/202408/shanghai/)** 16 | 17 | - 📅 Date: August 18-19, 2024 18 | - 📍 Location: 上海中谷小南国花园酒店 19 | - 📝 Info: AICon 全球人工智能开发与应用大会是由极客邦科技旗下 InfoQ 中国主办的人工智能和大模型技术盛会,主要面向各行业对人工智能和大模型感兴趣的资深工程师、产品经理、数据分析师,会议聚焦大模型训练与推理、AI agent、RAG、多模态大模型等热门方向,会议不仅安排了精彩的演讲,还策划了包括闭门会议、圆桌交流、互动展区等多种社交活动,一方面为参会人员提供宝贵的交流学习、拓展人脉的机会,另一方面也为相关企业和机构提供一个展示自身实力和成果的舞台。 20 | 21 | ## 📚 Past Events 22 | 23 | Browse through our past events to see what we've been up to: 24 | 25 | - **[CommunityOverCode Asia 2024](https://asia.communityovercode.org/)** 26 | - 📅 Date: July 26-28, 2024 27 | - 📍 Location: Hangzhou, China 28 | - 📝 Info: CommunityOverCode (formly known as ApacheCon) is the official global conference series of The Apache Software Foundation (ASF). Since 1998 – before the ASF’s incorporation – ApacheCon has been drawing participants at all levels to explore ”Tomorrow’s Technology Today” across 300+ Apache projects and their diverse communities. CommunityOverCode showcases the latest developments in Apache projects and emerging innovations through hands-on sessions, keynotes, real-world case studies, trainings, hackathons, and more. 29 | 30 | - **[ChinaJoy CDEC 2024](https://www.chinajoy.net/#/cdecPage/cdecHome?language=Zh&)** 31 | - 📅 Date: July 25-26, 2024 32 | - 📍 Location: KerryHotel Pudong, Shanghai 33 | - 📝 Info: 中国国际数字娱乐产业大会(以下简称:CDEC)是中国国际数码互动娱乐展览会同期举办的产业权威高端会议。自2003年创立, 至今已有20年的辉煌历史。历届大会得到我国数字出版产业政府主管部门高度重视和深度参与,大会每年邀请海内外知名数字娱乐企业高层参加,已发展成最具专业性、权威性和国际性的数字娱乐产业顶级盛会。 大会彰显数字娱乐跨界融合的独特魅力,全息透视全球网络游戏、影视、网络文学、音乐及动漫等主要数字娱乐产业发展前沿,将娱乐数字化的全新技术趋势、我国数字娱乐产业政策与市场前景全景呈现,让全球数字娱乐界专业听众共享我国数字娱乐产业全生态发展的先机。 34 | 35 | ## How to Get Involved 36 | 37 | 🎉 Students and faculty members are encouraged to collaborate with us. Here's how you can get involved: 38 | 39 | 1. **Organize a Campus Tour**: We are eager to visit college campuses and explore the limitless possibilities of large models with students. 40 | 2. **Collaborate on Projects**: Work with our team on real-world projects involving large models. 41 | 3. **Join Our Community**: Stay connected with us through our online platforms and contribute to the development of open-source projects. 42 | 43 | 44 | ### Updating Event Information 45 | 46 | If you need to update information for an existing event: 47 | 48 | 1. **Locate the Event File**: Find the event file in either the `upcoming_events` or `past_events` folder. 49 | 2. **Edit the File**: Make the necessary changes. 50 | 3. **Submit a Pull Request**: Submit a pull request with your updates. 51 | 52 | ## 📞 Contact Us 53 | 54 | If you have any questions or need assistance, feel free to reach out: 55 | 56 | - **Email**: yi@01.ai 57 | - **Discord**: [Join our Discord Channel](https://discord.com/channels/1197807921187270676/1197807921979985925) 58 | - **GitHub**: [ Visit 01.AI GitHub repo](https://github.com/01-ai/Yi) 59 | 60 | Thank you for being a part of our open-source community! Together, we can make amazing things happen. 61 | 62 | --- 63 | 64 | **Note**: This README is subject to updates. Please check back regularly for the latest information. 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright 2023-2024 01.AI 2 | 3 | This work, [Your Model Name], is based on the work originally authored by 01.AI. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | Attribution 18 | 19 | If you create derivative works based on this model, please include the following attribution in your derivative works: 20 | 21 | This work is a derivative of [The Yi Series Model You Base On] by 01.AI, used under the Apache 2.0 License. 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /README/huggingface_header.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: apache-2.0 3 | widget: 4 | - example_title: "Yi-34B-Chat" 5 | text: "hi" 6 | output: 7 | text: " Hello! How can I assist you today?" 8 | - example_title: "Yi-34B" 9 | text: "There's a place where time stands still. A place of breath taking wonder, but also" 10 | output: 11 | text: " an eerie sense that something is just not right…\nBetween the two worlds lies The Forgotten Kingdom - home to creatures long since thought extinct and ancient magic so strong it defies belief! Only here can you find what has been lost for centuries: An Elixir Of Life which will restore youth and vitality if only those who seek its power are brave enough to face up against all manner of dangers lurking in this mysterious land! But beware; some say there may even exist powerful entities beyond our comprehension whose intentions towards humanity remain unclear at best ---- they might want nothing more than destruction itself rather then anything else from their quest after immortality (and maybe someone should tell them about modern medicine)? In any event though – one thing remains true regardless : whether or not success comes easy depends entirely upon how much effort we put into conquering whatever challenges lie ahead along with having faith deep down inside ourselves too ;) So let’s get started now shall We?" 12 | pipeline_tag: text-generation 13 | --- 14 | 15 |
16 | 17 | 18 | 19 | 20 | specify theme context for images 21 | 22 | 23 |
24 |
25 | 26 |
27 | 28 | 29 | 30 |
31 | 32 |
33 | 34 | 35 | 36 |
37 | 38 |
39 | 40 |
41 |

Building the Next Generation of Open-Source and Bilingual LLMs

42 |
43 | 44 |

45 | 🤗 Hugging Face • 🤖 ModelScope • ✡️ WiseModel 46 |

47 | 48 |

49 | 👩‍🚀 Ask questions or discuss ideas on GitHub 50 |

51 | 52 |

53 | 👋 Join us on 👾 Discord or 💬 WeChat 54 |

55 | 56 |

57 | 📝 Check out Yi Tech Report 58 |

59 | 60 |

61 | 📚 Grow at Yi Learning Hub 62 |

63 | -------------------------------------------------------------------------------- /README/modelscope_header.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: apache-2.0 3 | --- 4 | -------------------------------------------------------------------------------- /README/wisemodel_header.md: -------------------------------------------------------------------------------- 1 | --- 2 | language: 3 | - en 4 | - zh 5 | license: apache-2.0 6 | tasks: 7 | - text-generation 8 | --- 9 | 10 | -------------------------------------------------------------------------------- /VL/README.md: -------------------------------------------------------------------------------- 1 | # Quick Start 2 | 3 | 1. Download the Yi-VL model. 4 | 5 | | Model | Download | 6 | | --------- | -------------------------------------------------------------------------------------------------------------------------------------- | 7 | | Yi-VL-34B | • [🤗 Hugging Face](https://huggingface.co/01-ai/Yi-VL-34B) • [🤖 ModelScope](https://www.modelscope.cn/models/01ai/Yi-VL-34B/summary) | 8 | | Yi-VL-6B | • [🤗 Hugging Face](https://huggingface.co/01-ai/Yi-VL-6B) • [🤖 ModelScope](https://www.modelscope.cn/models/01ai/Yi-VL-6B/summary) | 9 | 10 | 2. To set up the environment and install the required packages, execute the following command. 11 | 12 | ```bash 13 | git clone https://github.com/01-ai/Yi.git 14 | cd Yi/VL 15 | export PYTHONPATH=$PYTHONPATH:$(pwd) 16 | pip install -r requirements.txt 17 | ``` 18 | 19 | 3. To perform inference of Yi-VL, execute the following command. 20 | 21 | ```python 22 | python single_inference.py --model-path path-to-yi-vl-model --image-file path-to-image --question question-content 23 | ``` 24 | 25 | A quick example: 26 | 27 | ```python 28 | CUDA_VISIBLE_DEVICES=0 python single_inference.py --model-path ../model/Yi-VL-34B --image-file images/cats.jpg --question "Describe the cats and what they are doing in detail." 29 | ``` 30 | 31 | Since the temperature is set to 0.2 by default, the output is not always the same. An example output is: 32 | 33 | ``` 34 | ---------- 35 | question: Describe the cats and what they are doing in detail. 36 | outputs: In the image, there are three cats situated on a stone floor. The first cat, with a mix of black, orange, and white fur, is actively eating from a metal bowl. The second cat, which is entirely black, is also engaged in eating from a separate metal bowl. The third cat, a mix of gray and white, is not eating but is instead looking off to the side, seemingly distracted from the food. The bowls are positioned close to each other, and the cats are all within a similar proximity to the bowls. The scene captures a typical moment of feline behavior, with some cats enjoying their meal while others appear indifferent or distracted. 37 | ---------- 38 | ``` 39 | 40 | ## Web demo 41 | 42 | You can build a web UI demo for **Yi-VL** models. 43 | 44 | ```python 45 | python web_demo.py --model-path path-to-yi-vl-model 46 | ``` 47 | 48 | ## Command Line Interface 49 | 50 | Perform conversational inference via the command line interface. 51 | 52 | ```python 53 | python cli.py --model-path path-to-yi-vl-model --image path-to-image 54 | ``` 55 | 56 | A quick example: 57 | 58 | ``` 59 | CUDA_VISIBLE_DEVICES=0 python cli.py --model-path ../model/Yi-VL-34B --image-file images/cats.jpg 60 | 61 | Human: what is in this image 62 | Assistant: cats 63 | Human: what are they doing 64 | Assistant: eating 65 | Human: what are they eating 66 | Assistant: cat food 67 | ``` 68 | 69 | ## API 70 | 71 | Deploy an OpenAI-style API that supports the Yi-VL models on your own server. 72 | 73 | ```python 74 | python openai_api.py --model-path path-to-yi-vl-model 75 | ``` 76 | 77 | Test Code: 78 | 79 | ```python 80 | from openai import OpenAI 81 | 82 | client = OpenAI( 83 | api_key="EMPTY", 84 | base_url="http://127.0.0.1:8000/v1/", 85 | ) 86 | 87 | stream = client.chat.completions.create( 88 | messages=[ 89 | { 90 | "role": "user", 91 | "content": [ 92 | { 93 | "type": "text", 94 | "text": "What’s in this image?" 95 | }, 96 | { 97 | "type": "image_url", 98 | "image_url": { 99 | # Either an url or a local path 100 | "url": "https://github.com/01-ai/Yi/blob/main/VL/images/cats.jpg?raw=true" 101 | } 102 | } 103 | ] 104 | } 105 | ], 106 | model="yi-vl", 107 | stream=True, 108 | ) 109 | for part in stream: 110 | print(part.choices[0].delta.content or "", end="", flush=True) 111 | ``` 112 | 113 | ## Major difference with LLaVA 114 | 115 | 1. We change the image token from `` to ``. The system prompt is modified to: 116 | 117 | ``` 118 | This is a chat between an inquisitive human and an AI assistant. Assume the role of the AI assistant. Read all the images carefully, and respond to the human's questions with informative, helpful, detailed and polite answers. 这是一个好奇的人类和一个人工智能助手之间的对话。假设你扮演这个AI助手的角色。仔细阅读所有的图像,并对人类的问题做出信息丰富、有帮助、详细的和礼貌的回答。 119 | ### Human: 120 | Describe the cats and what they are doing in detail. 121 | ### Assistant: 122 | ``` 123 | 124 | 2. We add LayNorm in the two-layer MLP of the projection module. 125 | 3. We train the parameters of ViT and scale up the input image resolution. 126 | 4. We utilize Laion-400M data for pretraining. 127 | -------------------------------------------------------------------------------- /VL/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import torch 5 | from llava.conversation import conv_templates 6 | from llava.mm_utils import ( 7 | KeywordsStoppingCriteria, 8 | get_model_name_from_path, 9 | load_pretrained_model, 10 | process_images, 11 | tokenizer_image_token, 12 | ) 13 | from llava.model.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX, key_info 14 | from PIL import Image 15 | 16 | 17 | def load_image(image_file): 18 | image = Image.open(image_file).convert("RGB") 19 | return image 20 | 21 | 22 | def main(args): 23 | model_path = os.path.expanduser(args.model_path) 24 | key_info["model_path"] = model_path 25 | get_model_name_from_path(model_path) 26 | tokenizer, model, image_processor, _ = load_pretrained_model(model_path) 27 | 28 | conv = conv_templates["mm_default"].copy() 29 | roles = conv.roles 30 | 31 | image = load_image(args.image_file) 32 | image_tensor = process_images([image], image_processor, model.config) 33 | image_tensor = image_tensor.to(model.device, dtype=torch.bfloat16) 34 | 35 | while True: 36 | try: 37 | inp = input(f"{roles[0]}: ") 38 | except EOFError: 39 | inp = "" 40 | if not inp: 41 | print("exit...") 42 | break 43 | 44 | print(f"{roles[1]}: ", end="") 45 | 46 | if image is not None: 47 | inp = DEFAULT_IMAGE_TOKEN + "\n" + inp 48 | conv.append_message(conv.roles[0], inp) 49 | image = None 50 | else: 51 | conv.append_message(conv.roles[0], inp) 52 | conv.append_message(conv.roles[1], None) 53 | prompt = conv.get_prompt() 54 | 55 | input_ids = ( 56 | tokenizer_image_token( 57 | prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt" 58 | ) 59 | .unsqueeze(0) 60 | .to(model.device) 61 | ) 62 | stop_str = conv.sep 63 | keywords = [stop_str] 64 | stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids) 65 | 66 | with torch.inference_mode(): 67 | output_ids = model.generate( 68 | input_ids, 69 | images=image_tensor, 70 | do_sample=True if args.temperature > 0 else False, 71 | temperature=args.temperature, 72 | max_new_tokens=args.max_new_tokens, 73 | use_cache=True, 74 | stopping_criteria=[stopping_criteria], 75 | ) 76 | 77 | input_token_len = input_ids.shape[1] 78 | outputs = tokenizer.batch_decode( 79 | output_ids[:, input_token_len:], skip_special_tokens=True 80 | )[0] 81 | outputs = outputs.strip() 82 | if outputs.endswith(stop_str): 83 | outputs = outputs[: -len(stop_str)] 84 | outputs = outputs.strip() 85 | 86 | print(outputs) 87 | 88 | conv.messages[-1][-1] = outputs 89 | 90 | if args.debug: 91 | print("\n", {"prompt": prompt, "outputs": outputs}, "\n") 92 | 93 | 94 | if __name__ == "__main__": 95 | parser = argparse.ArgumentParser() 96 | parser.add_argument("--model-path", type=str, required=True) 97 | parser.add_argument("--image-file", type=str, required=True) 98 | parser.add_argument("--temperature", type=float, default=0.2) 99 | parser.add_argument("--max-new-tokens", type=int, default=512) 100 | parser.add_argument("--debug", action="store_true") 101 | args = parser.parse_args() 102 | main(args) 103 | -------------------------------------------------------------------------------- /VL/images/cats.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/VL/images/cats.jpg -------------------------------------------------------------------------------- /VL/images/extreme_ironing.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/VL/images/extreme_ironing.jpg -------------------------------------------------------------------------------- /VL/llava/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import LlavaLlamaForCausalLM 2 | -------------------------------------------------------------------------------- /VL/llava/mm_utils.py: -------------------------------------------------------------------------------- 1 | import base64 2 | from io import BytesIO 3 | 4 | import torch 5 | from llava.model import LlavaLlamaForCausalLM 6 | from llava.model.constants import IMAGE_TOKEN_INDEX 7 | from PIL import Image 8 | from transformers import AutoTokenizer, StoppingCriteria 9 | 10 | 11 | def load_image_from_base64(image): 12 | return Image.open(BytesIO(base64.b64decode(image))) 13 | 14 | 15 | def process_images(images, image_processor, model_cfg): 16 | return image_processor(images, return_tensors="pt")["pixel_values"] 17 | 18 | 19 | def expand2square(pil_img, background_color): 20 | width, height = pil_img.size 21 | if width == height: 22 | return pil_img 23 | elif width > height: 24 | result = Image.new(pil_img.mode, (width, width), background_color) 25 | result.paste(pil_img, (0, (width - height) // 2)) 26 | return result 27 | else: 28 | result = Image.new(pil_img.mode, (height, height), background_color) 29 | result.paste(pil_img, ((height - width) // 2, 0)) 30 | return result 31 | 32 | 33 | def tokenizer_image_token( 34 | prompt, tokenizer, image_token_index=IMAGE_TOKEN_INDEX, return_tensors=None 35 | ): 36 | prompt_chunks = [ 37 | tokenizer(chunk).input_ids for chunk in prompt.split("") 38 | ] 39 | 40 | def insert_separator(X, sep): 41 | return [ele for sublist in zip(X, [sep] * len(X)) for ele in sublist][:-1] 42 | 43 | input_ids = [] 44 | offset = 0 45 | if ( 46 | len(prompt_chunks) > 0 47 | and len(prompt_chunks[0]) > 0 48 | and prompt_chunks[0][0] == tokenizer.bos_token_id 49 | ): 50 | offset = 1 51 | input_ids.append(prompt_chunks[0][0]) 52 | 53 | for x in insert_separator(prompt_chunks, [image_token_index] * (offset + 1)): 54 | input_ids.extend(x[offset:]) 55 | 56 | if return_tensors is not None: 57 | if return_tensors == "pt": 58 | return torch.tensor(input_ids, dtype=torch.long) 59 | raise ValueError(f"Unsupported tensor type: {return_tensors}") 60 | return input_ids 61 | 62 | 63 | def get_model_name_from_path(model_path): 64 | model_path = model_path.strip("/") 65 | model_paths = model_path.split("/") 66 | if model_paths[-1].startswith("checkpoint-"): 67 | return model_paths[-2] + "_" + model_paths[-1] 68 | else: 69 | return model_paths[-1] 70 | 71 | 72 | def load_pretrained_model( 73 | model_path, load_8bit=False, load_4bit=False, device_map="auto", multimodal="IMAGE" 74 | ): 75 | kwargs = {"device_map": device_map} 76 | kwargs["torch_dtype"] = torch.bfloat16 77 | 78 | tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False) 79 | model = LlavaLlamaForCausalLM.from_pretrained( 80 | model_path, low_cpu_mem_usage=True, **kwargs 81 | ) 82 | image_processor = None 83 | model.resize_token_embeddings(len(tokenizer)) 84 | vision_tower = model.get_vision_tower() 85 | 86 | if not vision_tower.is_loaded: 87 | vision_tower.load_model() 88 | vision_tower.to(device="cuda", dtype=torch.bfloat16) 89 | image_processor = vision_tower.image_processor 90 | 91 | if hasattr(model.config, "max_sequence_length"): 92 | context_len = model.config.max_sequence_length 93 | else: 94 | context_len = 2048 95 | 96 | return tokenizer, model, image_processor, context_len 97 | 98 | 99 | class KeywordsStoppingCriteria(StoppingCriteria): 100 | def __init__(self, keywords, tokenizer, input_ids): 101 | self.keywords = keywords 102 | self.tokenizer = tokenizer 103 | self.start_len = None 104 | self.input_ids = input_ids 105 | 106 | def __call__( 107 | self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs 108 | ) -> bool: 109 | if self.start_len is None: 110 | self.start_len = self.input_ids.shape[1] 111 | return False 112 | else: 113 | outputs = self.tokenizer.batch_decode( 114 | output_ids[:, self.start_len :], skip_special_tokens=True 115 | ) 116 | flag = True 117 | for output in outputs: 118 | for keyword in self.keywords: 119 | if keyword not in output: 120 | flag = False 121 | return False 122 | return flag 123 | -------------------------------------------------------------------------------- /VL/llava/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .llava_llama import LlavaConfig, LlavaLlamaForCausalLM 2 | -------------------------------------------------------------------------------- /VL/llava/model/clip_encoder/builder.py: -------------------------------------------------------------------------------- 1 | from .clip_encoder import CLIPVisionTower 2 | 3 | 4 | def build_vision_tower(vision_tower_cfg, **kwargs): 5 | vision_tower = getattr( 6 | vision_tower_cfg, 7 | "mm_vision_tower", 8 | getattr(vision_tower_cfg, "vision_tower", None), 9 | ) 10 | 11 | return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs) 12 | -------------------------------------------------------------------------------- /VL/llava/model/clip_encoder/clip_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from transformers import CLIPImageProcessor, CLIPVisionConfig, CLIPVisionModel 4 | 5 | 6 | class CLIPVisionTower(nn.Module): 7 | def __init__(self, vision_tower, args, delay_load=False): 8 | super().__init__() 9 | 10 | self.is_loaded = False 11 | 12 | self.vision_tower_name = vision_tower 13 | self.select_layer = args.mm_vision_select_layer 14 | self.select_feature = getattr(args, "mm_vision_select_feature", "patch") 15 | 16 | if not delay_load: 17 | self.load_model() 18 | else: 19 | self.cfg_only = CLIPVisionConfig.from_pretrained(self.vision_tower_name) 20 | 21 | def load_model(self): 22 | self.image_processor = CLIPImageProcessor.from_pretrained( 23 | self.vision_tower_name 24 | ) 25 | self.vision_tower = CLIPVisionModel.from_pretrained( 26 | self.vision_tower_name, ignore_mismatched_sizes=True 27 | ) 28 | 29 | self.is_loaded = True 30 | 31 | def feature_select(self, image_forward_outs): 32 | image_features = image_forward_outs.hidden_states[self.select_layer] 33 | if self.select_feature == "patch": 34 | image_features = image_features[:, 1:] 35 | elif self.select_feature == "cls_patch": 36 | image_features = image_features 37 | else: 38 | raise ValueError(f"Unexpected select feature: {self.select_feature}") 39 | return image_features 40 | 41 | # @torch.no_grad() 42 | def forward(self, images): 43 | if type(images) is list: 44 | image_features = [] 45 | for image in images: 46 | image_forward_out = self.vision_tower( 47 | image.to(device=self.device, dtype=self.dtype).unsqueeze(0), 48 | output_hidden_states=True, 49 | ) 50 | image_feature = self.feature_select(image_forward_out).to(image.dtype) 51 | image_features.append(image_feature) 52 | else: 53 | image_forward_outs = self.vision_tower( 54 | images.to(device=self.device, dtype=self.dtype), 55 | output_hidden_states=True, 56 | ) 57 | image_features = self.feature_select(image_forward_outs).to(images.dtype) 58 | 59 | return image_features 60 | 61 | @property 62 | def dummy_feature(self): 63 | return torch.zeros(1, self.hidden_size, device=self.device, dtype=self.dtype) 64 | 65 | @property 66 | def dtype(self): 67 | return self.vision_tower.dtype 68 | 69 | @property 70 | def device(self): 71 | return self.vision_tower.device 72 | 73 | @property 74 | def config(self): 75 | if self.is_loaded: 76 | return self.vision_tower.config 77 | else: 78 | return self.cfg_only 79 | 80 | @property 81 | def hidden_size(self): 82 | return self.config.hidden_size 83 | 84 | @property 85 | def num_patches(self): 86 | return (self.config.image_size // self.config.patch_size) ** 2 87 | -------------------------------------------------------------------------------- /VL/llava/model/constants.py: -------------------------------------------------------------------------------- 1 | # Model Constants 2 | IGNORE_INDEX = -100 3 | IMAGE_TOKEN_INDEX = -200 4 | DEFAULT_IMAGE_TOKEN = "" 5 | 6 | key_info = {"model_path": None} 7 | -------------------------------------------------------------------------------- /VL/llava/model/multimodal_projector/builder.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import torch.nn as nn 4 | 5 | 6 | class IdentityMap(nn.Module): 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def forward(self, x, *args, **kwargs): 11 | return x 12 | 13 | @property 14 | def config(self): 15 | return {"mm_projector_type": "identity"} 16 | 17 | 18 | class SimpleResBlock(nn.Module): 19 | def __init__(self, channels): 20 | super().__init__() 21 | self.pre_norm = nn.LayerNorm(channels) 22 | 23 | self.proj = nn.Sequential( 24 | nn.Linear(channels, channels), nn.GELU(), nn.Linear(channels, channels) 25 | ) 26 | 27 | def forward(self, x): 28 | x = self.pre_norm(x) 29 | return x + self.proj(x) 30 | 31 | 32 | def build_vision_projector(config, delay_load=False, **kwargs): 33 | projector_type = getattr(config, "mm_projector_type", "linear") 34 | 35 | if projector_type == "linear": 36 | return nn.Linear(config.mm_hidden_size, config.hidden_size) 37 | 38 | use_norm = False 39 | if "_Norm" in projector_type: 40 | use_norm = True 41 | projector_type = projector_type.replace("_Norm", "") 42 | mlp_gelu_match = re.match(r"^mlp(\d+)x_gelu$", projector_type) 43 | if mlp_gelu_match: 44 | mlp_depth = int(mlp_gelu_match.group(1)) 45 | if use_norm: 46 | modules = [ 47 | nn.Linear(config.mm_hidden_size, config.hidden_size), 48 | nn.LayerNorm(config.hidden_size), 49 | ] 50 | else: 51 | modules = [nn.Linear(config.mm_hidden_size, config.hidden_size)] 52 | for _ in range(1, mlp_depth): 53 | modules.append(nn.GELU()) 54 | if use_norm: 55 | modules.append(nn.Linear(config.hidden_size, config.hidden_size)) 56 | modules.append(nn.LayerNorm(config.hidden_size)) 57 | else: 58 | modules.append(nn.Linear(config.hidden_size, config.hidden_size)) 59 | return nn.Sequential(*modules) 60 | 61 | if projector_type == "identity": 62 | return IdentityMap() 63 | 64 | raise ValueError(f"Unknown projector type: {projector_type}") 65 | -------------------------------------------------------------------------------- /VL/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers>=4.36.2 2 | gradio>=4.13.0 3 | protobuf>=4.25.1 4 | torch>=2.0.1 5 | openai==1.8.0 6 | uvicorn 7 | loguru 8 | sse_starlette 9 | torchvision 10 | accelerate 11 | sentencepiece 12 | deepspeed 13 | datasets -------------------------------------------------------------------------------- /VL/single_inference.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import torch 5 | from llava.conversation import conv_templates 6 | from llava.mm_utils import ( 7 | KeywordsStoppingCriteria, 8 | expand2square, 9 | get_model_name_from_path, 10 | load_pretrained_model, 11 | tokenizer_image_token, 12 | ) 13 | from llava.model.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX, key_info 14 | from PIL import Image 15 | 16 | 17 | def disable_torch_init(): 18 | """ 19 | Disable the redundant torch default initialization to accelerate model creation. 20 | """ 21 | import torch 22 | 23 | setattr(torch.nn.Linear, "reset_parameters", lambda self: None) 24 | setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None) 25 | 26 | 27 | def single_infer(args): 28 | disable_torch_init() 29 | model_path = os.path.expanduser(args.model_path) 30 | key_info["model_path"] = model_path 31 | get_model_name_from_path(model_path) 32 | tokenizer, model, image_processor, context_len = load_pretrained_model(model_path) 33 | 34 | image_file = args.image_file 35 | qs = args.question 36 | qs = DEFAULT_IMAGE_TOKEN + "\n" + qs 37 | conv = conv_templates[args.conv_mode].copy() 38 | conv.append_message(conv.roles[0], qs) 39 | conv.append_message(conv.roles[1], None) 40 | prompt = conv.get_prompt() 41 | 42 | input_ids = ( 43 | tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt") 44 | .unsqueeze(0) 45 | .cuda() 46 | ) 47 | 48 | image = Image.open(image_file) 49 | if getattr(model.config, "image_aspect_ratio", None) == "pad": 50 | image = expand2square( 51 | image, tuple(int(x * 255) for x in image_processor.image_mean) 52 | ) 53 | image_tensor = image_processor.preprocess(image, return_tensors="pt")[ 54 | "pixel_values" 55 | ][0] 56 | 57 | stop_str = conv.sep 58 | keywords = [stop_str] 59 | stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids) 60 | model = model.to(dtype=torch.bfloat16) 61 | with torch.inference_mode(): 62 | output_ids = model.generate( 63 | input_ids, 64 | images=image_tensor.unsqueeze(0).to(dtype=torch.bfloat16).cuda(), 65 | do_sample=True, 66 | temperature=args.temperature, 67 | top_p=args.top_p, 68 | num_beams=args.num_beams, 69 | stopping_criteria=[stopping_criteria], 70 | max_new_tokens=1024, 71 | use_cache=True, 72 | ) 73 | 74 | input_token_len = input_ids.shape[1] 75 | n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item() 76 | if n_diff_input_output > 0: 77 | print( 78 | f"[Warning] {n_diff_input_output} output_ids are not the same as the input_ids" 79 | ) 80 | outputs = tokenizer.batch_decode( 81 | output_ids[:, input_token_len:], skip_special_tokens=True 82 | )[0] 83 | outputs = outputs.strip() 84 | 85 | if outputs.endswith(stop_str): 86 | outputs = outputs[: -len(stop_str)] 87 | outputs = outputs.strip() 88 | print("----------") 89 | print("question:", args.question) 90 | print("outputs:", outputs) 91 | print("----------") 92 | 93 | 94 | if __name__ == "__main__": 95 | parser = argparse.ArgumentParser() 96 | parser.add_argument("--model-path", type=str, default="01-ai/Yi-VL-6B") 97 | parser.add_argument("--image-file", type=str, default="images/cats.jpg") 98 | parser.add_argument( 99 | "--question", 100 | type=str, 101 | default="Describe the cats and what they are doing in detail.", 102 | ) 103 | parser.add_argument("--model-base", type=str, default=None) 104 | parser.add_argument("--conv-mode", type=str, default="mm_default") 105 | parser.add_argument("--temperature", type=float, default=0.2) 106 | parser.add_argument("--top_p", type=float, default=None) 107 | parser.add_argument("--num_beams", type=int, default=1) 108 | args = parser.parse_args() 109 | 110 | single_infer(args) 111 | -------------------------------------------------------------------------------- /assets/img/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/1.png -------------------------------------------------------------------------------- /assets/img/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/2.png -------------------------------------------------------------------------------- /assets/img/9b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/9b.png -------------------------------------------------------------------------------- /assets/img/Apply_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/Apply_04.png -------------------------------------------------------------------------------- /assets/img/Yi-9B_benchmark_code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/Yi-9B_benchmark_code.png -------------------------------------------------------------------------------- /assets/img/Yi-9B_benchmark_details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/Yi-9B_benchmark_details.png -------------------------------------------------------------------------------- /assets/img/Yi-9B_benchmark_math.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/Yi-9B_benchmark_math.png -------------------------------------------------------------------------------- /assets/img/Yi-9B_benchmark_overall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/Yi-9B_benchmark_overall.png -------------------------------------------------------------------------------- /assets/img/Yi-9B_benchmark_text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/Yi-9B_benchmark_text.png -------------------------------------------------------------------------------- /assets/img/Yi_logo_icon_dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /assets/img/Yi_logo_icon_light.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /assets/img/benchmark_base.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/benchmark_base.png -------------------------------------------------------------------------------- /assets/img/benchmark_chat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/benchmark_chat.png -------------------------------------------------------------------------------- /assets/img/coder.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/coder.gif -------------------------------------------------------------------------------- /assets/img/coder/bench.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/coder/bench.webp -------------------------------------------------------------------------------- /assets/img/coder/bench1.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/coder/bench1.webp -------------------------------------------------------------------------------- /assets/img/coder/bench2.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/coder/bench2.webp -------------------------------------------------------------------------------- /assets/img/coder/bench3.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/coder/bench3.webp -------------------------------------------------------------------------------- /assets/img/coder/bench31.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/coder/bench31.webp -------------------------------------------------------------------------------- /assets/img/coder/bench4.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/coder/bench4.webp -------------------------------------------------------------------------------- /assets/img/coder/bench5.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/coder/bench5.webp -------------------------------------------------------------------------------- /assets/img/coder/bench6.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/coder/bench6.webp -------------------------------------------------------------------------------- /assets/img/coder/bench7.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/coder/bench7.webp -------------------------------------------------------------------------------- /assets/img/coder/demo1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/coder/demo1.gif -------------------------------------------------------------------------------- /assets/img/coder/demo2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/coder/demo2.gif -------------------------------------------------------------------------------- /assets/img/coder/test: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /assets/img/coder/yi-coder-calculator-demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/coder/yi-coder-calculator-demo.gif -------------------------------------------------------------------------------- /assets/img/events/1: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /assets/img/events/a.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/events/a.jpeg -------------------------------------------------------------------------------- /assets/img/events/b.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/events/b.jpeg -------------------------------------------------------------------------------- /assets/img/events/c.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/events/c.jpeg -------------------------------------------------------------------------------- /assets/img/events/down.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/events/down.gif -------------------------------------------------------------------------------- /assets/img/events/pic.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/events/pic.mp4 -------------------------------------------------------------------------------- /assets/img/fireworksai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/fireworksai.png -------------------------------------------------------------------------------- /assets/img/gh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/gh.png -------------------------------------------------------------------------------- /assets/img/quick_start_path.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/quick_start_path.png -------------------------------------------------------------------------------- /assets/img/quick_start_path_CN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/quick_start_path_CN.png -------------------------------------------------------------------------------- /assets/img/yi_34b_chat_web_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/yi_34b_chat_web_demo.gif -------------------------------------------------------------------------------- /assets/img/yi_llama_cpp1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/yi_llama_cpp1.png -------------------------------------------------------------------------------- /assets/img/yi_llama_cpp2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/01-ai/Yi/f73194cc16e82df438bc01240b8e4b622209f3eb/assets/img/yi_llama_cpp2.png -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | # Text Generation Task 2 | 3 | To run text generation task in the streaming mode: 4 | 5 | ```shell 6 | python text_generation.py \ 7 | --model 01-ai/Yi-6B \ 8 | --tokenizer 01-ai/Yi-6B \ 9 | --max-tokens 512 \ 10 | --eos-token $'\n' \ 11 | --streaming 12 | ``` 13 | 14 | You can also provide an extra `--prompt` argument to try some other prompts. 15 | 16 | When dealing with extremely long input sequences, you may need multiple GPU devices and to enable tensor parallelism acceleration during inference to avoid insufficient memory error. 17 | 18 | To run text generation task using tensor parallelism acceleration with 2 GPU devices: 19 | 20 | ```shell 21 | torchrun --nproc_per_node 2 \ 22 | text_generation_tp.py \ 23 | --model 01-ai/Yi-6B \ 24 | --max-tokens 512 \ 25 | --eos-token $'\n' \ 26 | --streaming 27 | 28 | ``` 29 | -------------------------------------------------------------------------------- /demo/text_generation.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer 4 | 5 | 6 | def parse_inputs(): 7 | parser = argparse.ArgumentParser(description="Yi-6B text generation demo") 8 | parser.add_argument( 9 | "--model", 10 | type=str, 11 | default="01-ai/Yi-6B", 12 | help="pretrained model path locally or name on huggingface", 13 | ) 14 | parser.add_argument( 15 | "--max-tokens", 16 | type=int, 17 | default=256, 18 | help="max number of tokens to generate", 19 | ) 20 | parser.add_argument( 21 | "--streaming", 22 | action="store_true", 23 | help="whether to enable streaming text generation", 24 | ) 25 | parser.add_argument( 26 | "--prompt", 27 | type=str, 28 | default="Let me tell you an interesting story about cat Tom and mouse Jerry,", 29 | help="The prompt to start with", 30 | ) 31 | parser.add_argument("--cpu", action="store_true", help="Run demo with CPU only") 32 | args = parser.parse_args() 33 | return args 34 | 35 | 36 | def main(args): 37 | print(args) 38 | 39 | if args.cpu: 40 | device_map = "cpu" 41 | else: 42 | device_map = "auto" 43 | 44 | model = AutoModelForCausalLM.from_pretrained( 45 | args.model, device_map=device_map, torch_dtype="auto" 46 | ) 47 | tokenizer = AutoTokenizer.from_pretrained(args.model) 48 | inputs = tokenizer( 49 | args.prompt, 50 | return_tensors="pt", 51 | ).to(model.device) 52 | 53 | streamer = TextStreamer(tokenizer) if args.streaming else None 54 | outputs = model.generate( 55 | **inputs, 56 | max_new_tokens=args.max_tokens, 57 | streamer=streamer, 58 | # do_sample=True, 59 | # repetition_penalty=1.3, 60 | # no_repeat_ngram_size=5, 61 | # temperature=0.7, 62 | # top_k=40, 63 | # top_p=0.8, 64 | ) 65 | 66 | if streamer is None: 67 | print(tokenizer.decode(outputs[0], skip_special_tokens=True)) 68 | 69 | 70 | if __name__ == "__main__": 71 | args = parse_inputs() 72 | main(args) 73 | -------------------------------------------------------------------------------- /demo/text_generation_tp.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import deepspeed 5 | import torch 6 | from deepspeed.module_inject import auto_tp 7 | from torch import distributed, nn 8 | from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer 9 | 10 | 11 | def parse_inputs(): 12 | parser = argparse.ArgumentParser( 13 | description="Yi-6B text generation demo with tensor parallelism acceleration" 14 | ) 15 | parser.add_argument( 16 | "--model", 17 | type=str, 18 | default="01-ai/Yi-6B", 19 | help="pretrained model path locally or name on huggingface", 20 | ) 21 | parser.add_argument( 22 | "--tokenizer", 23 | type=str, 24 | default="", 25 | help="tokenizer path locally or name on huggingface", 26 | ) 27 | parser.add_argument( 28 | "--max-tokens", 29 | type=int, 30 | default=512, 31 | help="max number of tokens to generate", 32 | ) 33 | parser.add_argument( 34 | "--streaming", 35 | action="store_true", 36 | help="whether to enable streaming text generation", 37 | ) 38 | parser.add_argument( 39 | "--prompt", 40 | type=str, 41 | default="Let me tell you an interesting story about cat Tom and mouse Jerry,", 42 | help="The prompt to start with", 43 | ) 44 | parser.add_argument( 45 | "--eos-token", 46 | type=str, 47 | default="<|endoftext|>", 48 | help="End of sentence token", 49 | ) 50 | args = parser.parse_args() 51 | return args 52 | 53 | 54 | def main(args): 55 | # module_inject for model Yi 56 | def is_load_module(module): 57 | load_layers = [nn.Linear, nn.Embedding, nn.LayerNorm] 58 | load_layer_names = [ 59 | "LPLayerNorm", 60 | "SharedEmbedding", 61 | "OPTLearnedPositionalEmbedding", 62 | "LlamaRMSNorm", 63 | "YiRMSNorm", 64 | ] 65 | return module.__class__ in load_layers or module._get_name() in load_layer_names 66 | 67 | auto_tp.Loading.is_load_module = is_load_module 68 | 69 | def on_finalized_text(self, text: str, stream_end: bool = False): 70 | """Prints the new text to stdout. If the stream is ending, also prints a newline.""" 71 | if distributed.get_rank() == 0: 72 | print(text, flush=True, end="" if not stream_end else None) 73 | 74 | TextStreamer.on_finalized_text = on_finalized_text 75 | 76 | torch.cuda.set_device(int(os.environ["LOCAL_RANK"])) 77 | model = AutoModelForCausalLM.from_pretrained( 78 | args.model, device_map="cuda", torch_dtype="auto", trust_remote_code=True 79 | ) 80 | 81 | model = deepspeed.init_inference( 82 | model, mp_size=int(os.environ["WORLD_SIZE"]), replace_with_kernel_inject=False 83 | ) 84 | 85 | # reserve GPU memory for the following long context 86 | torch.cuda.empty_cache() 87 | 88 | tokenizer = AutoTokenizer.from_pretrained( 89 | args.tokenizer or args.model, trust_remote_code=True 90 | ) 91 | inputs = tokenizer( 92 | args.prompt, 93 | return_tensors="pt", 94 | ) 95 | streamer = ( 96 | TextStreamer(tokenizer, skip_special_tokens=True) if args.streaming else None 97 | ) 98 | outputs = model.generate( 99 | inputs.input_ids.cuda(), 100 | max_new_tokens=args.max_tokens, 101 | streamer=streamer, 102 | eos_token_id=tokenizer.convert_tokens_to_ids(args.eos_token), 103 | do_sample=True, 104 | repetition_penalty=1.3, 105 | no_repeat_ngram_size=5, 106 | temperature=0.7, 107 | top_k=40, 108 | top_p=0.8, 109 | ) 110 | if distributed.get_rank() == 0 and streamer is None: 111 | print(tokenizer.decode(outputs[0], skip_special_tokens=True)) 112 | 113 | 114 | if __name__ == "__main__": 115 | args = parse_inputs() 116 | main(args) 117 | -------------------------------------------------------------------------------- /docs/README_llama.cpp.md: -------------------------------------------------------------------------------- 1 | # Load Yi Series Chat Model with `llama.cpp` 2 | [`llama.cpp`](https://github.com/ggerganov/llama.cpp) is a library that allows you to convert and run LLaMa models using 4-bit integer quantization on MacBook. 3 | 4 | ## 1. Download `llama.cpp` 5 | Please skip this step if `llama.cpp` is already build. For simplicity, only one building option is shown below. Check the [website](https://github.com/ggerganov/llama.cpp#usage) for more details. 6 | ```bash 7 | git clone https://github.com/ggerganov/llama.cpp 8 | cd llama.cpp 9 | make 10 | ``` 11 | The folder should be like: 12 | ```bash 13 | |-- llama.cpp 14 | | |-- convert.py 15 | | |-- gguf-py 16 | | | |-- examples 17 | | | |-- gguf 18 | | | |-- scripts 19 | | | |-- ... 20 | | |-- ... 21 | ``` 22 | 23 | ## 2. Download Yi Series Model 24 | Please skip this step if the model is already downloaded. Again, other options are provided on the [website](https://github.com/01-ai/Yi#-models). 25 | ```bash 26 | # Make sure you have git-lfs installed (https://git-lfs.com) 27 | git lfs install 28 | git clone https://huggingface.co/01-ai/Yi-6B-Chat 29 | ``` 30 | To install git-lfs: 31 | ```bash 32 | brew install git-lfs 33 | ``` 34 | A typical folder of models is like: 35 | ```bash 36 | |-- $MODEL_PATH 37 | | |-- config.json 38 | | |-- generation_config.json 39 | | |-- LICENSE 40 | | |-- main.py 41 | | |-- model-00001-of-00003.safetensors 42 | | |-- model-00002-of-00003.safetensors 43 | | |-- model-00003-of-00003.safetensors 44 | | |-- model.safetensors.index.json 45 | | |-- tokenizer_config.json 46 | | |-- tokenizer.model 47 | | |-- ... 48 | ``` 49 | 50 | ## 3. Convert and Quantize the Model to 4-bits 51 | Make sure all Python dependencies required by `llama.cpp` are installed: 52 | ```bash 53 | cd llama.cpp 54 | python3 -m pip install -r requirements.txt 55 | ``` 56 | Then, convert the model to gguf FP16 format: 57 | ```bash 58 | python3 convert.py $MODEL_PATH 59 | ``` 60 | Lastly, quantize the model to 4-bits (using q4_0 method): 61 | ```bash 62 | ./quantize $MODEL_PATH/ggml-model-f16.gguf q4_0 63 | ``` 64 | 65 | ## 3. Override EOS Token ID 66 | It seems like the EOS token is converted incorrectly, therefore one additional step needed to reset the EOS token id. 67 | ```bash 68 | python3 ./gguf-py/scripts/gguf-set-metadata.py $MODEL_PATH/ggml-model-q4_0.gguf tokenizer.ggml.eos_token_id 7 69 | ``` 70 | 71 | ## 4. Run the Model 72 | ```bash 73 | ./main -m $MODEL_PATH/ggml-model-q4_0.gguf --chatml 74 | ``` 75 | Finally, you should be able to type your prompts and interact with the model. 76 | -------------------------------------------------------------------------------- /finetune/README.md: -------------------------------------------------------------------------------- 1 | # Finetune code for Yi 6B and 34B 2 | 3 | ## Preparation 4 | 5 | ### From Image 6 | 7 | By default, we use a small dataset from [BAAI/COIG](https://huggingface.co/datasets/BAAI/COIG) to finetune the base model. 8 | You can also prepare your customized dataset in the following `jsonl` format: 9 | 10 | ```json 11 | { "prompt": "Human: Who are you? Assistant:", "chosen": "I'm Yi." } 12 | ``` 13 | 14 | And then mount them in the container to replace the default ones: 15 | 16 | ```bash 17 | docker run -it \ 18 | -v /path/to/save/finetuned/model/:/finetuned-model \ 19 | -v /path/to/train.jsonl:/yi/finetune/data/train.json \ 20 | -v /path/to/eval.jsonl:/yi/finetune/data/eval.json \ 21 | ghcr.io/01-ai/yi:latest \ 22 | bash finetune/scripts/run_sft_Yi_6b.sh 23 | ``` 24 | 25 | ### From Local Server 26 | 27 | Make sure you have conda. If not, use 28 | 29 | ```bash 30 | mkdir -p ~/miniconda3 31 | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh 32 | bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 33 | rm -rf ~/miniconda3/miniconda.sh 34 | ~/miniconda3/bin/conda init bash 35 | source ~/.bashrc 36 | ``` 37 | 38 | Then, create a conda env: 39 | 40 | ```bash 41 | conda create -n dev_env python=3.10 -y 42 | conda activate dev_env 43 | pip install torch==2.0.1 deepspeed==0.10 tensorboard transformers datasets sentencepiece accelerate ray==2.7 44 | ``` 45 | 46 | ## Hardware Setup 47 | 48 | For the Yi-6B model, a node with 4 GPUs, each has GPU mem larger than 60GB is recommended. 49 | 50 | For the Yi-34B model, because the usage of zero-offload technique takes a lot CPU memory, please be careful to limit the GPU numbers in 34B finetune training. Please use CUDA_VISIBLE_DEVICES to limit the GPU number (as shown in scripts/run_sft_Yi_34b.sh). 51 | 52 | A typical hardware setup for finetuning 34B model is a node with 8GPUS (limit to 4 in running by CUDA_VISIBLE_DEVICES=0,1,2,3), each has GPU mem larger than 80GB, with total CPU mem larger than 900GB. 53 | 54 | ## Quick Start 55 | 56 | Download a LLM-base model to MODEL_PATH (6B and 34B). A typical folder of models is like: 57 | 58 | ```bash 59 | |-- $MODEL_PATH 60 | | |-- config.json 61 | | |-- pytorch_model-00001-of-00002.bin 62 | | |-- pytorch_model-00002-of-00002.bin 63 | | |-- pytorch_model.bin.index.json 64 | | |-- tokenizer_config.json 65 | | |-- tokenizer.model 66 | | |-- ... 67 | ``` 68 | 69 | Download a dataset from huggingface to local storage DATA_PATH, e.g. Dahoas/rm-static. 70 | 71 | ```bash 72 | |-- $DATA_PATH 73 | | |-- data 74 | | | |-- train-00000-of-00001-2a1df75c6bce91ab.parquet 75 | | | |-- test-00000-of-00001-8c7c51afc6d45980.parquet 76 | | |-- dataset_infos.json 77 | | |-- README.md 78 | ``` 79 | 80 | `finetune/yi_example_dataset` has example datasets, which are modified from [BAAI/COIG](https://huggingface.co/datasets/BAAI/COIG) 81 | 82 | ```bash 83 | |-- $DATA_PATH 84 | |--data 85 | |-- train.jsonl 86 | |-- eval.jsonl 87 | ``` 88 | 89 | `cd` into the scripts folder, copy and paste the script, and run. For example: 90 | 91 | ```bash 92 | cd finetune/scripts 93 | 94 | bash run_sft_Yi_6b.sh 95 | ``` 96 | 97 | For the Yi-6B base model, setting training_debug_steps=20 and num_train_epochs=4 can output a chat model, which takes about 20 minutes. 98 | 99 | For the Yi-34B base model, it takes a relatively long time for initialization. Please be patient. 100 | 101 | ## Evaluation 102 | 103 | ```bash 104 | cd finetune/scripts 105 | 106 | bash run_eval.sh 107 | ``` 108 | 109 | Then you'll see the answer from both the base model and the finetuned model 110 | -------------------------------------------------------------------------------- /finetune/README_CN.md: -------------------------------------------------------------------------------- 1 | # Yi-6B及Yi-34B的微调代码 2 | 3 | ## 准备 4 | 5 | ### 使用镜像 6 | 7 | ### 使用本地服务器 8 | 9 | 推荐使用conda进行开发环境配置,如果您还没有安装conda,参考如下 10 | 11 | ```bash 12 | mkdir -p ~/miniconda3 13 | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh 14 | bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 15 | rm -rf ~/miniconda3/miniconda.sh 16 | ~/miniconda3/bin/conda init bash 17 | source ~/.bashrc 18 | ``` 19 | 20 | 然后创建conda环境 21 | 22 | ```bash 23 | conda create -n dev_env python=3.10 -y 24 | conda activate dev_env 25 | pip install torch==2.0.1 deepspeed==0.10 tensorboard transformers datasets sentencepiece accelerate ray==2.7 26 | ``` 27 | 28 | ## 硬件设置 29 | 30 | 对于Yi-6B模型,推荐使用4卡,单卡显存大于60GB的节点。 31 | 32 | 对于Yi-34B模型,由于训练设置采用了zero-offload,会消耗大量CPU内存。因此请使用CUDA_VISIBLE_DEVICES参数来限制GPU使用量(如scripts/run_sft_Yi_34b.sh所示) 33 | 34 | 对于Yi-34B模型微调训练的典型配置是8卡(仅4卡使用,CUDA_VISIBLE_DEVICES=0,1,2,3),单卡显存大于80GB,整个节点的CPU内存大于900GB。 35 | 36 | ## 快速开始 37 | 38 | 将模型下载到本地的MODEL_PATH,典型的模型文件夹如下: 39 | 40 | ```bash 41 | |-- $MODEL_PATH 42 | | |-- config.json 43 | | |-- pytorch_model-00001-of-00002.bin 44 | | |-- pytorch_model-00002-of-00002.bin 45 | | |-- pytorch_model.bin.index.json 46 | | |-- tokenizer_config.json 47 | | |-- tokenizer.model 48 | | |-- ... 49 | ``` 50 | 51 | 从Huggingface上下载一个数据集到本地DATA_PATH,如Dahoas/rm-static 52 | 53 | ```bash 54 | |-- $DATA_PATH 55 | | |-- data 56 | | | |-- train-00000-of-00001-2a1df75c6bce91ab.parquet 57 | | | |-- test-00000-of-00001-8c7c51afc6d45980.parquet 58 | | |-- dataset_infos.json 59 | | |-- README.md 60 | ``` 61 | 62 | /finetune/yi_example_dataset也有一个样例数据集,修改自BAAI/COIG 63 | 64 | ```bash 65 | |-- $DATA_PATH 66 | | |--data 67 | | | |-- train.jsonl 68 | | | |-- eval.jsonl 69 | ``` 70 | 71 | cd进入脚本文件夹,修改脚本当中的MODEL_PATH和DATA_PATH,运行脚本,例如: 72 | 73 | ```bash 74 | cd Yi/finetune/scripts 75 | 76 | bash run_sft_Yi_6b.sh 77 | ``` 78 | 79 | 对于Yi-6B基础模型,将training_debug_steps=20和num_train_epochs=4设置为如此的值,就可以输出一个chat模型,端到端预计消耗20分钟。 80 | 81 | 对于Yi-34B基础模型,初始化阶段会消耗较长时间,请保持耐心。 82 | 83 | ## 评估输出模型 84 | 85 | ```bash 86 | cd Yi/finetune/scripts 87 | 88 | bash run_eval.sh 89 | ``` 90 | 91 | 可以分别打印出base模型和finetune之后模型的输出。 92 | -------------------------------------------------------------------------------- /finetune/constant.py: -------------------------------------------------------------------------------- 1 | SFT = 0 2 | RM = 1 3 | PPO = 2 4 | RSFT = 3 5 | DPO = 4 6 | -------------------------------------------------------------------------------- /finetune/scripts/run_eval.sh: -------------------------------------------------------------------------------- 1 | #/usr/bin/env bash 2 | 3 | cd "$(dirname "${BASH_SOURCE[0]}")/../sft/" 4 | 5 | python prompt_eval.py \ 6 | --model_name_or_path_base=/base_model \ 7 | --model_name_or_path_finetune=/finetuned_model \ 8 | --language Chinese 9 | -------------------------------------------------------------------------------- /finetune/scripts/run_sft_Yi_34b.sh: -------------------------------------------------------------------------------- 1 | #/usr/bin/env bash 2 | 3 | cd "$(dirname "${BASH_SOURCE[0]}")/../sft/" 4 | 5 | export CUDA_VISIBLE_DEVICES=0,1,2,3 #limit parallelism to avoid cpu oom 6 | 7 | deepspeed main.py \ 8 | --data_path /DATA_PATH/ \ 9 | --model_name_or_path /MODEL_PATH/ \ 10 | --per_device_train_batch_size 1 \ 11 | --per_device_eval_batch_size 1 \ 12 | --max_seq_len 4096 \ 13 | --learning_rate 2e-6 \ 14 | --weight_decay 0. \ 15 | --num_train_epochs 4 \ 16 | --training_debug_steps 50 \ 17 | --gradient_accumulation_steps 1 \ 18 | --lr_scheduler_type cosine \ 19 | --num_warmup_steps 0 \ 20 | --seed 1234 \ 21 | --gradient_checkpointing \ 22 | --zero_stage 2 \ 23 | --deepspeed \ 24 | --offload \ 25 | --output_dir /finetuned_model 26 | -------------------------------------------------------------------------------- /finetune/scripts/run_sft_Yi_6b.sh: -------------------------------------------------------------------------------- 1 | #/usr/bin/env bash 2 | 3 | cd "$(dirname "${BASH_SOURCE[0]}")/../sft/" 4 | 5 | deepspeed main.py \ 6 | --data_path ../yi_example_dataset/ \ 7 | --model_name_or_path /base_model \ 8 | --per_device_train_batch_size 1 \ 9 | --per_device_eval_batch_size 1 \ 10 | --max_seq_len 4096 \ 11 | --learning_rate 2e-6 \ 12 | --weight_decay 0. \ 13 | --num_train_epochs 4 \ 14 | --training_debug_steps 20 \ 15 | --gradient_accumulation_steps 1 \ 16 | --lr_scheduler_type cosine \ 17 | --num_warmup_steps 0 \ 18 | --seed 1234 \ 19 | --gradient_checkpointing \ 20 | --zero_stage 2 \ 21 | --deepspeed \ 22 | --offload \ 23 | --output_dir ./finetuned_model 24 | -------------------------------------------------------------------------------- /finetune/scripts/run_sft_lora_Yi_6b.sh: -------------------------------------------------------------------------------- 1 | #/usr/bin/env bash 2 | 3 | cd ../sft/ 4 | 5 | deepspeed main.py \ 6 | --data_path /DATA_PATH/ \ 7 | --model_name_or_path /MODEL_PATH/ \ 8 | --per_device_train_batch_size 4 \ 9 | --per_device_eval_batch_size 4 \ 10 | --max_seq_len 4096 \ 11 | --learning_rate 2e-6 \ 12 | --weight_decay 0. \ 13 | --num_train_epochs 4 \ 14 | --training_debug_steps 20 \ 15 | --gradient_accumulation_steps 1 \ 16 | --lr_scheduler_type cosine \ 17 | --num_warmup_steps 0 \ 18 | --seed 1234 \ 19 | --gradient_checkpointing \ 20 | --zero_stage 2 \ 21 | --deepspeed \ 22 | --offload \ 23 | --lora_dim 128 \ 24 | --lora_module_name "layers." \ 25 | --output_dir ./output_Yi_6b_chat_sft_lora 26 | -------------------------------------------------------------------------------- /finetune/sft/prompt_eval.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import sys 5 | 6 | import torch 7 | from transformers import AutoModelForCausalLM 8 | 9 | sys.path.append( 10 | os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) 11 | ) 12 | from utils.model.model_utils import create_hf_model 13 | from utils.utils import load_hf_tokenizer 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def parse_args(): 19 | parser = argparse.ArgumentParser(description="Eval the finetued SFT model") 20 | parser.add_argument( 21 | "--model_name_or_path_baseline", 22 | type=str, 23 | help="Path to baseline model", 24 | required=True, 25 | ) 26 | parser.add_argument( 27 | "--model_name_or_path_finetune", 28 | type=str, 29 | help="Path to pretrained model", 30 | required=True, 31 | ) 32 | parser.add_argument( 33 | "--num_beams", 34 | type=int, 35 | default=1, 36 | help="Specify num of beams", 37 | ) 38 | parser.add_argument( 39 | "--num_beam_groups", 40 | type=int, 41 | default=1, 42 | help="Specify num of beams", 43 | ) 44 | parser.add_argument( 45 | "--top_k", 46 | type=int, 47 | default=4, 48 | help="Specify num of beams", 49 | ) 50 | parser.add_argument( 51 | "--penalty_alpha", 52 | type=float, 53 | default=0.6, 54 | help="Specify num of beams", 55 | ) 56 | parser.add_argument( 57 | "--num_return_sequences", 58 | type=int, 59 | default=1, 60 | help="Specify num of return sequences", 61 | ) 62 | parser.add_argument( 63 | "--max_new_tokens", 64 | type=int, 65 | default=200, 66 | help="Specify num of return sequences", 67 | ) 68 | parser.add_argument( 69 | "--language", type=str, default="Chinese", choices=["English", "Chinese"] 70 | ) 71 | parser.add_argument("--eos", type=str, default="<|endoftext|>") 72 | 73 | args = parser.parse_args() 74 | 75 | return args 76 | 77 | 78 | def generate( 79 | model, 80 | tokenizer, 81 | inputs, 82 | num_beams=1, 83 | num_beam_groups=1, 84 | do_sample=False, 85 | num_return_sequences=1, 86 | max_new_tokens=200, 87 | eos="<|endoftext|>", 88 | ): 89 | stop_token_id = tokenizer.convert_tokens_to_ids(eos) 90 | # by default, stop_token_id = tokenizer.eos_token_id 91 | 92 | generate_ids = model.generate( 93 | inputs.input_ids, max_new_tokens=max_new_tokens, eos_token_id=stop_token_id 94 | ) 95 | 96 | result = tokenizer.batch_decode( 97 | generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False 98 | ) 99 | return result 100 | 101 | 102 | def print_utils(gen_output): 103 | for i in range(len(gen_output)): 104 | print() 105 | print(gen_output[i]) 106 | print() 107 | 108 | 109 | def prompt_eval(args, model_baseline, model_fintuned, tokenizer, device, prompts): 110 | for prompt in prompts: 111 | inputs = tokenizer(prompt, return_tensors="pt").to(device) 112 | print("==========Baseline: Greedy=========") 113 | r_base = generate( 114 | model_baseline, 115 | tokenizer, 116 | inputs, 117 | num_beams=1, 118 | num_return_sequences=args.num_return_sequences, 119 | max_new_tokens=args.max_new_tokens, 120 | eos=args.eos, 121 | ) 122 | print_utils(r_base) 123 | print("==========finetune: Greedy=========") 124 | r_finetune_g = generate( 125 | model_fintuned, 126 | tokenizer, 127 | inputs, 128 | num_beams=1, 129 | num_return_sequences=args.num_return_sequences, 130 | max_new_tokens=args.max_new_tokens, 131 | eos=args.eos, 132 | ) 133 | print_utils(r_finetune_g) 134 | print("====================prompt end=============================") 135 | print() 136 | print() 137 | 138 | 139 | def main(): 140 | args = parse_args() 141 | 142 | device = torch.device("cuda:0") 143 | 144 | tokenizer = load_hf_tokenizer( 145 | args.model_name_or_path_baseline, fast_tokenizer=False 146 | ) 147 | 148 | model_baseline = create_hf_model( 149 | AutoModelForCausalLM, 150 | args.model_name_or_path_baseline, 151 | tokenizer, 152 | None, 153 | eval_mode=True, 154 | ) 155 | model_fintuned = create_hf_model( 156 | AutoModelForCausalLM, 157 | args.model_name_or_path_finetune, 158 | tokenizer, 159 | None, 160 | eval_mode=True, 161 | ) 162 | 163 | if args.language == "English": 164 | prompts = [ 165 | "Human: Hello. Assistant:", 166 | "Human: Please explain Large Language Model. Assistant:", 167 | ] 168 | elif args.language == "Chinese": 169 | prompts = [ 170 | "Human: 你好。 Assistant:", 171 | "Human: 请介绍一下大语言模型? Assistant:", 172 | ] 173 | else: 174 | # TODO: 175 | prompts = [] 176 | 177 | prompt_eval(args, model_baseline, model_fintuned, tokenizer, device, prompts) 178 | 179 | 180 | if __name__ == "__main__": 181 | main() 182 | -------------------------------------------------------------------------------- /finetune/utils/ds_utils.py: -------------------------------------------------------------------------------- 1 | import deepspeed.comm as dist 2 | import torch 3 | 4 | GLOBAL_BATCH_SIZE = 32 5 | MICRO_BATCH_SIZE = 4 6 | 7 | 8 | def get_train_ds_config( 9 | offload, 10 | stage=2, 11 | enable_hybrid_engine=False, 12 | inference_tp_size=1, 13 | release_inference_cache=False, 14 | pin_parameters=True, 15 | tp_gather_partition_size=8, 16 | max_out_tokens=512, 17 | enable_tensorboard=False, 18 | enable_mixed_precision_lora=False, 19 | tb_path="", 20 | tb_name="", 21 | ): 22 | device = "cpu" if offload else "none" 23 | zero_opt_dict = { 24 | "stage": stage, 25 | "offload_param": {"device": device}, 26 | "offload_optimizer": {"device": device}, 27 | "stage3_param_persistence_threshold": 1e4, 28 | "stage3_max_live_parameters": 3e7, 29 | "stage3_prefetch_bucket_size": 3e7, 30 | "memory_efficient_linear": False, 31 | } 32 | if enable_mixed_precision_lora: 33 | zero_opt_dict["zero_quantized_nontrainable_weights"] = True 34 | if dist.get_world_size() != torch.cuda.device_count(): 35 | zero_opt_dict["zero_hpz_partition_size"] = torch.cuda.device_count() 36 | return { 37 | "train_batch_size": GLOBAL_BATCH_SIZE, 38 | "train_micro_batch_size_per_gpu": MICRO_BATCH_SIZE, 39 | "steps_per_print": 10, 40 | "zero_optimization": zero_opt_dict, 41 | "fp16": {"enabled": True, "loss_scale_window": 100}, 42 | "gradient_clipping": 1.0, 43 | "prescale_gradients": False, 44 | "wall_clock_breakdown": False, 45 | "hybrid_engine": { 46 | "enabled": enable_hybrid_engine, 47 | "max_out_tokens": max_out_tokens, 48 | "inference_tp_size": inference_tp_size, 49 | "release_inference_cache": release_inference_cache, 50 | "pin_parameters": pin_parameters, 51 | "tp_gather_partition_size": tp_gather_partition_size, 52 | }, 53 | "tensorboard": { 54 | "enabled": enable_tensorboard, 55 | "output_path": f"{tb_path}/ds_tensorboard_logs/", 56 | "job_name": f"{tb_name}_tensorboard", 57 | }, 58 | } 59 | 60 | 61 | def get_eval_ds_config(offload, stage=0): 62 | device = "cpu" if offload else "none" 63 | zero_opt_dict = { 64 | "stage": stage, 65 | "stage3_param_persistence_threshold": 1e4, 66 | "offload_param": {"device": device}, 67 | "memory_efficient_linear": False, 68 | } 69 | return { 70 | "train_batch_size": GLOBAL_BATCH_SIZE, 71 | "train_micro_batch_size_per_gpu": MICRO_BATCH_SIZE, 72 | "steps_per_print": 10, 73 | "zero_optimization": zero_opt_dict, 74 | "fp16": {"enabled": True}, 75 | "gradient_clipping": 1.0, 76 | "prescale_gradients": False, 77 | "wall_clock_breakdown": False, 78 | } 79 | -------------------------------------------------------------------------------- /finetune/utils/model/model_utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from transformers import AutoConfig 4 | from transformers.deepspeed import HfDeepSpeedConfig 5 | 6 | 7 | def create_hf_model( 8 | model_class, 9 | model_name_or_path, 10 | tokenizer, 11 | ds_config=None, 12 | rlhf_training=False, 13 | disable_dropout=False, 14 | eval_mode=False, 15 | ): 16 | model_config = AutoConfig.from_pretrained( 17 | model_name_or_path, trust_remote_code=True 18 | ) 19 | # print(model_config) 20 | if disable_dropout: 21 | model_config.dropout = 0.0 22 | # Note: dschf is defined in function scope to avoid global effects 23 | # https://huggingface.co/docs/transformers/main_classes/deepspeed#nontrainer-deepspeed-integration 24 | if ds_config is not None and ds_config["zero_optimization"]["stage"] == 3: 25 | HfDeepSpeedConfig(ds_config) 26 | else: 27 | pass 28 | 29 | if not eval_mode: 30 | model = model_class.from_pretrained( 31 | model_name_or_path, 32 | from_tf=bool(".ckpt" in model_name_or_path), 33 | config=model_config, 34 | trust_remote_code=True, 35 | use_flash_attention_2=True, 36 | ) 37 | else: 38 | model = model_class.from_pretrained( 39 | model_name_or_path, 40 | from_tf=bool(".ckpt" in model_name_or_path), 41 | config=model_config, 42 | trust_remote_code=True, 43 | device_map="auto", 44 | torch_dtype="auto", 45 | ) 46 | 47 | model.config.end_token_id = tokenizer.eos_token_id 48 | model.config.pad_token_id = model.config.eos_token_id 49 | 50 | model.resize_token_embeddings( 51 | int(8 * math.ceil(len(tokenizer) / 8.0)) 52 | ) # make the vocab size multiple of 8 53 | 54 | print("length of tokenizer is {}".format(len(tokenizer))) 55 | print( 56 | "resize_token_embeddings is {}".format(int(8 * math.ceil(len(tokenizer) / 8.0))) 57 | ) 58 | 59 | return model 60 | -------------------------------------------------------------------------------- /finetune/utils/module/lora.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import deepspeed 4 | import torch 5 | import torch.nn.functional as F 6 | from deepspeed.compression.helper import recursive_getattr, recursive_setattr 7 | from torch import nn 8 | 9 | 10 | class LinearLayer_LoRA(nn.Module): 11 | # an simple implementation of LoRA 12 | # for now only support Linear Layer 13 | def __init__(self, weight, lora_dim=0, lora_scaling=1, lora_droppout=0, bias=None): 14 | super(LinearLayer_LoRA, self).__init__() 15 | self.weight = weight 16 | self.bias = bias 17 | 18 | if lora_dim <= 0: 19 | raise ValueError( 20 | "You are training to use LoRA, whose reduced dim should be larger than 1" 21 | ) 22 | 23 | try: 24 | # for zero stage 3 25 | rows, columns = weight.ds_shape 26 | except: 27 | rows, columns = weight.shape 28 | self.lora_right_weight = nn.Parameter( 29 | torch.zeros(columns, lora_dim) 30 | ) # apply transpose so in forward we do not need to 31 | self.lora_left_weight = nn.Parameter(torch.zeros(lora_dim, rows)) 32 | self.lora_scaling = lora_scaling / lora_dim 33 | 34 | if lora_droppout > 0: 35 | self.lora_dropout = nn.Dropout(lora_droppout) 36 | else: 37 | self.lora_dropout = nn.Identity() 38 | 39 | self.reset_parameters() 40 | # disable the original weight gradient 41 | self.weight.requires_grad = False 42 | # fuse LoRA to the original weight 43 | self.fuse_lora = False 44 | 45 | def eval(self): 46 | self.lora_dropout.eval() 47 | 48 | # self.fuse_lora_weight() 49 | 50 | def train(self, mode=True): 51 | self.lora_dropout.train(mode) 52 | # self.unfuse_lora_weight() 53 | 54 | def reset_parameters(self): 55 | nn.init.kaiming_uniform_(self.lora_right_weight, a=math.sqrt(5)) 56 | nn.init.zeros_(self.lora_left_weight) 57 | 58 | def fuse_lora_weight(self): 59 | if not self.fuse_lora: 60 | self.weight.data += self.lora_scaling * torch.matmul( 61 | self.lora_left_weight.t(), self.lora_right_weight.t() 62 | ) 63 | self.fuse_lora = True 64 | 65 | def unfuse_lora_weight(self): 66 | if self.fuse_lora: 67 | self.weight.data -= self.lora_scaling * torch.matmul( 68 | self.lora_left_weight.t(), self.lora_right_weight.t() 69 | ) 70 | self.fuse_lora = False 71 | 72 | def forward(self, input): 73 | if self.fuse_lora: 74 | return F.linear(input, self.weight, self.bias) 75 | else: 76 | return ( 77 | F.linear(input, self.weight, self.bias) 78 | + ( 79 | self.lora_dropout(input) 80 | @ self.lora_right_weight 81 | @ self.lora_left_weight 82 | ) 83 | * self.lora_scaling 84 | ) 85 | 86 | 87 | # convert the linear layer to LoRA 88 | def convert_linear_layer_to_lora( 89 | model, part_module_name, lora_dim=0, lora_scaling=1, lora_droppout=0 90 | ): 91 | replace_name = [] 92 | for name, module in model.named_modules(): 93 | if isinstance(module, nn.Linear) and part_module_name in name: 94 | replace_name.append(name) 95 | for name in replace_name: 96 | module = recursive_getattr(model, name) 97 | tmp = ( 98 | LinearLayer_LoRA( 99 | module.weight, lora_dim, lora_scaling, lora_droppout, module.bias 100 | ) 101 | .to(module.weight.device) 102 | .to(module.weight.dtype) 103 | ) 104 | recursive_setattr(model, name, tmp) 105 | return model 106 | 107 | 108 | def _z3_params_to_fetch(param_list): 109 | return [ 110 | p 111 | for p in param_list 112 | if hasattr(p, "ds_id") 113 | and p.ds_status 114 | == deepspeed.runtime.zero.partition_parameters.ZeroParamStatus.NOT_AVAILABLE 115 | ] 116 | 117 | 118 | # convert the LoRA layer to linear layer 119 | def convert_lora_to_linear_layer(model): 120 | replace_name = [] 121 | for name, module in model.named_modules(): 122 | if isinstance(module, LinearLayer_LoRA): 123 | replace_name.append(name) 124 | for name in replace_name: 125 | module = recursive_getattr(model, name) 126 | zero_stage_3 = hasattr(module.weight, "ds_id") 127 | with deepspeed.zero.GatheredParameters( 128 | _z3_params_to_fetch( 129 | [ 130 | module.weight, 131 | module.bias, 132 | module.lora_left_weight, 133 | module.lora_right_weight, 134 | ] 135 | ), 136 | modifier_rank=0, 137 | enabled=zero_stage_3, 138 | ): 139 | module.fuse_lora_weight() 140 | return model 141 | 142 | 143 | def only_optimize_lora_parameters(model): 144 | # turn off the gradient of all the parameters except the LoRA parameters 145 | for name, param in model.named_parameters(): 146 | if "lora_right_weight" in name or "lora_left_weight" in name: 147 | param.requires_grad = True 148 | else: 149 | param.requires_grad = False 150 | return model 151 | 152 | 153 | def make_model_gradient_checkpointing_compatible(model): 154 | # Higgingface added this enable input require grads function to make gradient checkpointing work for lora-only optimization 155 | if hasattr(model, "enable_input_require_grads"): 156 | model.enable_input_require_grads() 157 | elif hasattr(model, "get_input_embeddings"): 158 | 159 | def make_inputs_require_grad(module, input, output): 160 | output.requires_grad_(True) 161 | 162 | model.get_input_embeddings().register_forward_hook(make_inputs_require_grad) 163 | return model 164 | -------------------------------------------------------------------------------- /finetune/utils/perf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | # This function can be used to print throughput for Step 1 and 2 only 5 | def print_throughput(hf_model, args, e2e_time, rank=0): 6 | if rank <= 0: 7 | hf_config = hf_model.config 8 | num_layers, hidden_size, vocab_size = get_hf_configs(hf_config) 9 | 10 | gpus_per_model = torch.distributed.get_world_size() 11 | seq_length = args.max_seq_len 12 | batch_size = args.per_device_train_batch_size 13 | samples_per_second = batch_size / e2e_time 14 | checkpoint_activations_factor = 4 if args.gradient_checkpointing else 3 15 | if args.lora_dim > 0: 16 | k = args.lora_dim * 2 / hidden_size 17 | checkpoint_activations_factor -= 1 - k 18 | 19 | hf_model._num_params = sum( 20 | [ 21 | p.ds_numel if hasattr(p, "ds_tensor") else p.numel() 22 | for p in hf_model.parameters() 23 | ] 24 | ) 25 | params_in_billions = hf_model._num_params / (1e9) 26 | 27 | # Megatron paper's formula to calculate training flops 28 | train_flops_per_iteration = calculate_flops( 29 | checkpoint_activations_factor, batch_size, seq_length, hf_config 30 | ) 31 | 32 | train_tflops = train_flops_per_iteration / ( 33 | e2e_time * gpus_per_model * (10**12) 34 | ) 35 | 36 | param_string = ( 37 | f"{params_in_billions:.3f} B" if params_in_billions != 0 else "NA" 38 | ) 39 | print( 40 | f"Model Parameters: {param_string}, Latency: {e2e_time:.2f}s, TFLOPs: {train_tflops:.2f}, Samples/sec: {samples_per_second:.2f}, Time/seq {e2e_time/batch_size:.2f}s, Batch Size: {batch_size}, Sequence Length: {seq_length}" 41 | ) 42 | 43 | 44 | # Helper function to calculate FLOPs using the Megatron-LM paper's formula 45 | def calculate_flops(checkpoint_activations_factor, batch_size, seq_length, hf_config): 46 | num_layers, hidden_size, vocab_size = get_hf_configs(hf_config) 47 | # TODO: check hidden_size is not None 48 | flops_per_iteration = ( 49 | 24 50 | * checkpoint_activations_factor 51 | * batch_size 52 | * seq_length 53 | * num_layers 54 | * (hidden_size**2) 55 | ) * ( 56 | 1.0 57 | + (seq_length / (6.0 * hidden_size)) 58 | + (vocab_size / (16.0 * num_layers * hidden_size)) 59 | ) 60 | return flops_per_iteration 61 | 62 | 63 | def get_hf_configs(hf_config): 64 | num_layers = getattr( 65 | hf_config, "num_hidden_layers", getattr(hf_config, "n_layer", None) 66 | ) 67 | hidden_size = getattr(hf_config, "hidden_size", getattr(hf_config, "n_embd", None)) 68 | vocab_size = getattr(hf_config, "vocab_size", None) 69 | assert all( 70 | (num_layers, hidden_size, vocab_size) 71 | ), "Could not determine number of layers, hidden size, and vocab size of the model" 72 | 73 | return num_layers, hidden_size, vocab_size 74 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "yi-models" 3 | version = "0.1.1-dev" 4 | description = "" 5 | authors = ["Yi Team "] 6 | license = "Apache 2.0" 7 | readme = "README.md" 8 | 9 | [tool.poetry.dependencies] 10 | python = "~3.10" 11 | pytorch = "2.0.1" 12 | pytorch-cuda = "11.8" 13 | deepspeed = "0.12.2" 14 | transformers = "4.36.0" 15 | sentencepiece = "0.1.99" 16 | accelerate = "0.24.1" 17 | datasets = "2.14.5" 18 | optimum = "1.13.2" 19 | einops = "0.7.0" 20 | flash-attn = {version = "2.3.6", source = "pypi"} 21 | 22 | [tool.conda-lock] 23 | channels = ["conda-forge", "pytorch", "nvidia", "nodefaults"] 24 | platforms = ["linux-64"] 25 | 26 | [tool.conda-lock.dependencies] 27 | pip = ">=23.3" 28 | 29 | [build-system] 30 | requires = ["poetry-core"] 31 | build-backend = "poetry.core.masonry.api" 32 | -------------------------------------------------------------------------------- /quantization/awq/README.md: -------------------------------------------------------------------------------- 1 | # AWQ quantization 2 | 3 | 4 | [AWQ](https://github.com/mit-han-lab/llm-awq) is a PTQ(Post-Training Quantization) 5 | method. It's an efficient and accurate low-bit weight quantization (INT3/4) for LLMs. 6 | 7 | Yi models can be AWQ quantized without a lot of efforts. 8 | We provide a step-by-step tutorial below. 9 | 10 | To run AWQ, we will use [AutoAWQ](https://github.com/casper-hansen/AutoAWQ). 11 | 12 | ## Do Quantization 13 | 14 | The `quant_autoawq.py` script is provided for you to perform AWQ quantization: 15 | 16 | ```bash 17 | python quant_autoawq.py --model /base_model \ 18 | --output_dir /quantized_model --bits 4 --group_size 128 --trust_remote_code 19 | ``` 20 | 21 | ## Run Quantized Model 22 | 23 | You can run a quantized model using the `eval_quantized_model.py`: 24 | 25 | ```bash 26 | python eval_quantized_model.py --model /quantized_model --trust_remote_code 27 | ``` 28 | -------------------------------------------------------------------------------- /quantization/awq/eval_quantized_model.py: -------------------------------------------------------------------------------- 1 | from awq import AutoAWQForCausalLM 2 | from transformers import AutoTokenizer 3 | 4 | 5 | def run_quantization(args): 6 | # Load model 7 | tokenizer = AutoTokenizer.from_pretrained( 8 | args.model, 9 | trust_remote_code=args.trust_remote_code, 10 | local_files_only=True, 11 | legacy=True, 12 | use_fast=False, 13 | ) 14 | model = ( 15 | AutoAWQForCausalLM.from_quantized( 16 | args.model, 17 | trust_remote_code=args.trust_remote_code, 18 | fuse_layers=True, 19 | batch_size=args.batch, 20 | ) 21 | .cuda() 22 | .eval() 23 | ) 24 | 25 | prompt = "count to 1000: 0 1 2 3" 26 | prompts = [prompt] * args.batch 27 | inputs = tokenizer(prompts, return_tensors="pt", add_special_tokens=False).to( 28 | "cuda:0" 29 | ) 30 | output_ids = model.generate( 31 | **inputs, 32 | do_sample=False, 33 | max_new_tokens=4096, 34 | ) 35 | generate_tokens = tokenizer.batch_decode(output_ids) 36 | print(generate_tokens) 37 | 38 | 39 | if __name__ == "__main__": 40 | import argparse 41 | 42 | parser = argparse.ArgumentParser(description="Run AWQ quantized model") 43 | parser.add_argument("--model", type=str, help="The quantized name") 44 | parser.add_argument( 45 | "--trust_remote_code", action="store_true", help="Trust remote code" 46 | ) 47 | parser.add_argument("--batch", type=int, default=4) 48 | 49 | args = parser.parse_args() 50 | run_quantization(args) 51 | -------------------------------------------------------------------------------- /quantization/awq/quant_autoawq.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | 4 | from awq import AutoAWQForCausalLM 5 | from transformers import AutoTokenizer 6 | 7 | 8 | def run_quantization(args): 9 | # Load model 10 | tokenizer = AutoTokenizer.from_pretrained( 11 | args.model, trust_remote_code=args.trust_remote_code 12 | ) 13 | model = AutoAWQForCausalLM.from_pretrained(args.model) 14 | 15 | quant_config = { 16 | "zero_point": True, 17 | "q_group_size": args.group_size, 18 | "w_bit": args.bits, 19 | } 20 | # Quantize 21 | model.quantize(tokenizer, quant_config=quant_config) 22 | 23 | # Save quantized model 24 | model.save_quantized(args.output_dir, safetensors=True) 25 | tokenizer.save_pretrained(args.output_dir) 26 | 27 | 28 | if __name__ == "__main__": 29 | logger = logging.getLogger() 30 | logging.basicConfig( 31 | format="%(asctime)s %(levelname)s [%(name)s] %(message)s", 32 | level=logging.INFO, 33 | datefmt="%Y-%m-%d %H:%M:%S", 34 | ) 35 | 36 | parser = argparse.ArgumentParser(description="AutoAWQ quantize") 37 | parser.add_argument( 38 | "--model", 39 | type=str, 40 | default="01-ai/Yi-6b", 41 | help="Pretrained model path locally or name on huggingface", 42 | ) 43 | parser.add_argument("--output_dir", type=str, help="Output base folder") 44 | parser.add_argument( 45 | "--trust_remote_code", action="store_true", help="Trust remote code" 46 | ) 47 | parser.add_argument("--bits", type=int, default=4, help="Quantize bit(s)") 48 | parser.add_argument( 49 | "--group_size", type=int, default=128, help="Quantize group size(s)" 50 | ) 51 | 52 | args = parser.parse_args() 53 | run_quantization(args) 54 | -------------------------------------------------------------------------------- /quantization/gptq/README.md: -------------------------------------------------------------------------------- 1 | # GPT-Q quantization 2 | 3 | [GPT-Q](https://github.com/IST-DASLab/gptq) is a PTQ(Post-Training Quantization) 4 | method. It's memory saving and provides potential speedups while retaining the accuracy 5 | of the model. 6 | 7 | Yi models can be GPT-Q quantized without a lot of efforts. 8 | We provide a step-by-step tutorial below. 9 | 10 | To run GPT-Q, we will use [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) and 11 | [exllama](https://github.com/turboderp/exllama). 12 | And the huggingface transformers has integrated optimum and auto-gptq to perform 13 | GPTQ quantization on language models. 14 | 15 | ## Do Quantization 16 | 17 | The `quant_autogptq.py` script is provided for you to perform GPT-Q quantization: 18 | 19 | ```bash 20 | python quant_autogptq.py --model /base_model \ 21 | --output_dir /quantized_model --bits 4 --group_size 128 --trust_remote_code 22 | ``` 23 | 24 | 25 | ## Run Quantized Model 26 | 27 | You can run a quantized model using the `eval_quantized_model.py`: 28 | 29 | ```bash 30 | python eval_quantized_model.py --model /quantized_model --trust_remote_code 31 | ``` 32 | 33 | -------------------------------------------------------------------------------- /quantization/gptq/eval_quantized_model.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoModelForCausalLM, AutoTokenizer 2 | 3 | 4 | def run_quantization(args): 5 | # Load model 6 | tokenizer = AutoTokenizer.from_pretrained( 7 | args.model, 8 | trust_remote_code=args.trust_remote_code, 9 | local_files_only=True, 10 | legacy=True, 11 | use_fast=False, 12 | ) 13 | 14 | model = AutoModelForCausalLM.from_pretrained( 15 | args.model, 16 | device_map="cuda:0", 17 | trust_remote_code=args.trust_remote_code, 18 | use_safetensors=True, 19 | ).eval() 20 | 21 | prompt = "count to 1000: 0 1 2 3" 22 | prompts = [prompt] * 4 23 | inputs = tokenizer(prompts, return_tensors="pt", add_special_tokens=False).to( 24 | model.device 25 | ) 26 | output_ids = model.generate( 27 | **inputs, 28 | do_sample=False, 29 | max_new_tokens=4096, 30 | ) 31 | generate_tokens = tokenizer.batch_decode(output_ids) 32 | print(generate_tokens) 33 | 34 | 35 | if __name__ == "__main__": 36 | import argparse 37 | 38 | parser = argparse.ArgumentParser(description="Run GPTQ quantized model") 39 | parser.add_argument("--model", type=str, help="The quantized name") 40 | parser.add_argument( 41 | "--trust_remote_code", action="store_true", help="Trust remote code" 42 | ) 43 | 44 | args = parser.parse_args() 45 | run_quantization(args) 46 | -------------------------------------------------------------------------------- /quantization/gptq/quant_autogptq.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | 4 | from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, GPTQConfig 5 | 6 | 7 | def run_quantization(args): 8 | # Load model 9 | tokenizer = AutoTokenizer.from_pretrained( 10 | args.model, 11 | trust_remote_code=args.trust_remote_code, 12 | local_files_only=True, 13 | legacy=True, 14 | use_fast=False, 15 | ) 16 | 17 | model_config = AutoConfig.from_pretrained( 18 | args.model, 19 | trust_remote_code=args.trust_remote_code, 20 | ) 21 | model_config.max_position_embeddings = 4096 # to avoid OOM 22 | # Quantize 23 | quantization_config = GPTQConfig( 24 | bits=args.bits, 25 | group_size=args.group_size, 26 | dataset="wikitext2", 27 | tokenizer=tokenizer, 28 | disable_exllama=False, 29 | ) 30 | model = AutoModelForCausalLM.from_pretrained( 31 | args.model, 32 | config=model_config, 33 | device_map="auto", 34 | trust_remote_code=args.trust_remote_code, 35 | quantization_config=quantization_config, 36 | ) 37 | 38 | # Save quantized model 39 | model.save_pretrained(args.output_dir, safe_serialization=True) 40 | tokenizer.save_pretrained(args.output_dir) 41 | 42 | 43 | if __name__ == "__main__": 44 | logger = logging.getLogger() 45 | logging.basicConfig( 46 | format="%(asctime)s %(levelname)s [%(name)s] %(message)s", 47 | level=logging.INFO, 48 | datefmt="%Y-%m-%d %H:%M:%S", 49 | ) 50 | 51 | parser = argparse.ArgumentParser(description="GPT-Q quantize") 52 | parser.add_argument( 53 | "--model", 54 | type=str, 55 | default="01-ai/Yi-6b", 56 | help="Pretrained model path locally or name on huggingface", 57 | ) 58 | parser.add_argument("--output_dir", type=str, help="Output base folder") 59 | parser.add_argument( 60 | "--trust_remote_code", action="store_true", help="Trust remote code" 61 | ) 62 | parser.add_argument("--bits", type=int, default=4, help="Quantize bit(s)") 63 | parser.add_argument( 64 | "--group_size", type=int, default=128, help="Quantize group size(s)" 65 | ) 66 | 67 | args = parser.parse_args() 68 | run_quantization(args) 69 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | transformers>=4.36.2 2 | gradio>=4.13.0 3 | protobuf>=4.25.1 4 | torch==2.0.1 5 | accelerate 6 | sentencepiece 7 | deepspeed 8 | datasets --------------------------------------------------------------------------------