├── .github ├── ISSUE_TEMPLATE │ ├── bug.md │ └── others.md ├── scripts │ └── doc_link_checker.py └── workflows │ ├── lint.yml │ └── release.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .pylintrc ├── .readthedocs.yaml ├── LICENSE ├── README.md ├── README_zh.md ├── android ├── .gitignore ├── .idea │ ├── .gitignore │ ├── assetWizardSettings.xml │ ├── caches │ │ └── build_file_checksums.ser │ ├── codeStyles │ │ ├── Project.xml │ │ └── codeStyleConfig.xml │ ├── copyright │ │ └── profiles_settings.xml │ ├── dbnavigator.xml │ ├── deploymentTargetDropDown.xml │ ├── dictionaries │ │ └── caochang.xml │ ├── encodings.xml │ ├── gradle.xml │ ├── inspectionProfiles │ │ └── Project_Default.xml │ ├── jarRepositories.xml │ ├── kotlinCodeInsightSettings.xml │ ├── kotlinc.xml │ ├── markdown-navigator.xml │ ├── markdown-navigator │ │ └── profiles_settings.xml │ ├── migrations.xml │ ├── misc.xml │ ├── modules.xml │ └── vcs.xml ├── .travis.yml ├── LICENSE ├── README.md ├── build.gradle ├── buildsystem │ ├── debug.keystore │ └── default.properties ├── demo │ ├── .gitignore │ ├── build.gradle │ ├── proguard-rules.pro │ └── src │ │ ├── androidTest │ │ └── java │ │ │ └── com │ │ │ └── carlos │ │ │ └── grabredenvelope │ │ │ └── demo │ │ │ └── ExampleInstrumentedTest.kt │ │ ├── main │ │ ├── AndroidManifest.xml │ │ ├── java │ │ │ └── com │ │ │ │ └── carlos │ │ │ │ └── grabredenvelope │ │ │ │ └── demo │ │ │ │ ├── MainActivity.kt │ │ │ │ ├── SendEmojiService.kt │ │ │ │ ├── SharedPreferenceHelper.kt │ │ │ │ └── WechatConstants.kt │ │ └── res │ │ │ ├── drawable-v24 │ │ │ └── ic_launcher_foreground.xml │ │ │ ├── drawable │ │ │ ├── ic_launcher_background.xml │ │ │ └── logo.xml │ │ │ ├── layout │ │ │ └── activity_main.xml │ │ │ ├── mipmap-anydpi-v26 │ │ │ ├── ic_launcher.xml │ │ │ └── ic_launcher_round.xml │ │ │ ├── values │ │ │ ├── colors.xml │ │ │ ├── strings.xml │ │ │ └── styles.xml │ │ │ └── xml │ │ │ ├── sendemoji_service.xml │ │ │ └── wechat_service.xml │ │ └── test │ │ └── java │ │ └── com │ │ └── carlos │ │ └── grabredenvelope │ │ └── demo │ │ └── ExampleUnitTest.kt ├── gradle.properties ├── gradle │ └── wrapper │ │ ├── gradle-wrapper.jar │ │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat └── settings.gradle ├── app.py ├── config-cpu.ini ├── config-multimodal.ini ├── config-wkteam-example.ini ├── config.ini ├── docs ├── en │ ├── .readthedocs.yaml │ ├── Makefile │ ├── _static │ │ ├── css │ │ │ └── readthedocs.css │ │ ├── image │ │ │ ├── logo.svg │ │ │ └── logo_icon.svg │ │ └── js │ │ │ └── custom.js │ ├── _templates │ │ ├── 404.html │ │ ├── autosummary │ │ │ └── class.rst │ │ └── callable.rst │ ├── conf.py │ ├── cp_origin_docs.sh │ ├── doc_add_readthedocs.md │ ├── doc_architecture.md │ ├── doc_full_dev.md │ ├── doc_knowledge_graph.md │ ├── doctuils.conf │ └── index.rst ├── figures │ ├── convert.py │ ├── huixiangdou.png │ ├── lark-add-ability.png │ ├── lark-arch.jpg │ ├── lark-bot-add-callback.png │ ├── lark-bot-reply.png │ ├── lark-bot-sub.png │ ├── lark-create-app.png │ ├── lark-create-corp.png │ ├── lark-switch-corp.png │ ├── wechat-android-example.jpg │ ├── wechat-android-homepage.jpg │ ├── wechat-dingdong.png │ ├── wechat-puppet-log.png │ ├── wechat-run-state.jpg │ └── wechat-wkteam.jpg └── zh │ ├── .readthedocs.yaml │ ├── Makefile │ ├── _static │ ├── css │ │ └── readthedocs.css │ ├── image │ │ ├── logo.svg │ │ └── logo_icon.svg │ └── js │ │ └── custom.js │ ├── _templates │ ├── 404.html │ ├── autosummary │ │ └── class.rst │ └── callable.rst │ ├── conf.py │ ├── cp_origin_docs.sh │ ├── doc_add_lark_group.md │ ├── doc_add_readthedocs.md │ ├── doc_add_wechat_accessibility.md │ ├── doc_add_wechat_commercial.md │ ├── doc_add_wechat_group.md │ ├── doc_architecture.md │ ├── doc_full_dev.md │ ├── doc_knowledge_graph.md │ ├── doc_merge_wechat_group.md │ ├── doc_rag_annotate_sft_data.md │ ├── doc_send_only_lark_group.md │ ├── doctuils.conf │ └── index.rst ├── evaluation ├── README.md ├── README_zh.md ├── end2end │ └── main.py ├── rejection │ ├── build_fs_and_filter.py │ ├── gt_bad.txt │ ├── gt_good.txt │ ├── kg_filter.py │ ├── plot.py │ └── plot_example.png └── rerank │ ├── step0_clean_queries.py │ └── step1_create_candidates.py ├── huixiangdou-inside.md ├── huixiangdou ├── __init__.py ├── api_server.py ├── frontend │ ├── __init__.py │ ├── lark.py │ ├── lark_group.py │ └── wechat.py ├── gradio_ui.py ├── main.py ├── primitive │ ├── __init__.py │ ├── bm250kapi.py │ ├── chunk.py │ ├── embedder.py │ ├── entity.py │ ├── faiss.py │ ├── file_operation.py │ ├── limitter.py │ ├── llm_reranker.py │ ├── query.py │ ├── splitter.py │ ├── token.py │ └── utils.py ├── services │ ├── __init__.py │ ├── config.py │ ├── helper.py │ ├── kg.py │ ├── llm.py │ ├── llm_client.py │ ├── llm_server_hybrid.py │ ├── parallel_pipeline.py │ ├── prompt.py │ ├── retriever.py │ ├── serial_pipeline.py │ ├── session.py │ ├── sg_search.py │ ├── store.py │ └── web_search.py └── version.py ├── logs └── work.txt ├── requirements.txt ├── requirements ├── cpu.txt ├── docs.txt ├── lark-group.txt ├── multimodal.txt └── sft.txt ├── resource ├── bad_questions.json ├── data │ ├── baicaoyuan.md │ ├── qa_pair.csv │ └── tengye.md ├── figures │ ├── inside-middleware.png │ ├── inside-mmpose.jpg │ ├── inside-ncnn-group.jpg │ └── lark-example.png ├── good_questions.json ├── inner-test.ini ├── logo_black.svg ├── logo_blue.svg ├── rag_example_input.json └── rag_example_output.json ├── setup.py ├── sft ├── README.md ├── axolotl_configs │ ├── lora-4B.yml │ ├── qwen2-lora-0.5B.yaml │ ├── qwen2-lora-1.8B.yaml │ ├── qwen2-lora-14B.yaml │ ├── qwen2-lora-32B.yaml │ ├── qwen2-lora-4B-loraplus-epoch4.yaml │ ├── qwen2-lora-4B.yaml │ ├── qwen2-lora-7B.yaml │ ├── qwen2-moe-lora-2.7B.yaml │ ├── qwen2-moe-lora.yaml │ └── qwen2-moe-qlora.yaml ├── convert_to_alpaca.py ├── reconstruct_check_llm.py ├── reconstruct_filter_annotate.py └── reconstruct_wechat_group.py ├── tests ├── __init__.py ├── cp_files.py ├── data.json ├── git-clone.sh ├── test_alles_apin.py ├── test_bce.py ├── test_benepar.py ├── test_bge_reranker.py ├── test_build_milvus_and_filter.py ├── test_clear_kimi_files.py ├── test_dataclass.py ├── test_deepseek.py ├── test_get_issue_comment_pipeline.py ├── test_hf_import_accelerate.py ├── test_intention_prompt.py ├── test_internlm2.py ├── test_kimi.py ├── test_kimi_cr.py ├── test_kimi_passkey.py ├── test_lda │ ├── step0_preprocess.py │ └── step1_countvec.py ├── test_llm_client.py ├── test_m3.py ├── test_milvus_hybrid_retrieval.py ├── test_neo4j.py ├── test_openai.py ├── test_openxlab_android_api.py ├── test_optimum_st.py ├── test_post_android.py ├── test_pyppeteer.py ├── test_query_gradio.py ├── test_qwen_react.py ├── test_relative.py ├── test_reranker.py ├── test_splitter.py ├── test_step1_llm.py ├── test_time.py ├── test_visual_bge.py ├── test_yi.py └── test_yulan.py ├── unittest ├── primitive │ ├── test_bm250api.py │ ├── test_dataclass.py │ ├── test_embedder.py │ ├── test_entity.py │ ├── test_faiss.py │ ├── test_limitter.py │ ├── test_reranker.py │ └── test_splitter.py └── service │ ├── daily_smoke.py │ ├── test_llm.py │ ├── test_llm_client.py │ ├── test_llm_server_local.py │ ├── test_llm_server_remote.py │ ├── test_sg_search.py │ └── test_web_search.py └── web ├── README.md ├── __init__.py ├── api ├── __init__.py ├── access.py ├── chat.py ├── integrate.py ├── message.py ├── qalib.py └── statistic.py ├── config ├── __init__.py ├── env.py └── logging.py ├── constant ├── __init__.py └── biz_constant.py ├── front-end ├── .eslintignore ├── .eslintrc.cjs ├── .gitignore ├── .npmrc ├── dist │ ├── assets │ │ ├── bean1-002ba51d.png │ │ └── logo-af340389.png │ ├── index.html │ └── logo.png ├── env │ ├── .env.development │ ├── .env.production │ └── .env.staging ├── index.html ├── mock │ └── db.json ├── package.json ├── public │ └── logo.png ├── readme.md ├── scripts │ ├── alias.ts │ ├── import-to-cdn.ts │ ├── index.ts │ ├── proxy.ts │ └── utils.ts ├── src │ ├── app.tsx │ ├── assets │ │ └── imgs │ │ │ ├── bean.png │ │ │ ├── bean1.png │ │ │ └── logo.png │ ├── components │ │ ├── button │ │ │ ├── button.module.less │ │ │ └── button.tsx │ │ ├── components-portal │ │ │ └── components-portal.tsx │ │ ├── copy-code │ │ │ ├── copy-code.module.less │ │ │ └── copy-code.tsx │ │ ├── global-lang │ │ │ ├── global-lang-context.ts │ │ │ ├── global-lang.tsx │ │ │ └── index.tsx │ │ ├── header │ │ │ ├── header.module.less │ │ │ └── header.tsx │ │ ├── notification │ │ │ ├── emoji-wrapper.tsx │ │ │ ├── notification.module.less │ │ │ ├── notification.tsx │ │ │ └── use-notification.tsx │ │ ├── upload-item │ │ │ ├── index.tsx │ │ │ ├── upload-item.module.less │ │ │ └── upload-item.tsx │ │ └── upload │ │ │ ├── delete-btn.tsx │ │ │ ├── index.tsx │ │ │ ├── upload.module.less │ │ │ └── upload.tsx │ ├── config │ │ ├── auth.ts │ │ ├── base-url.ts │ │ ├── change-page-gray.ts │ │ ├── index.ts │ │ └── log.ts │ ├── hooks │ │ └── useLocale.ts │ ├── interceptors │ │ ├── request.ts │ │ └── response.ts │ ├── layouts │ │ └── header-container-layout │ │ │ ├── header-container-layout.module.less │ │ │ └── header-container-layout.tsx │ ├── locales │ │ ├── en-US.ts │ │ ├── en-US │ │ │ ├── bean-detail.ts │ │ │ ├── components.ts │ │ │ ├── home.ts │ │ │ └── welcome.ts │ │ ├── index.ts │ │ ├── zh-CN.ts │ │ └── zh-CN │ │ │ ├── bean-detail.ts │ │ │ ├── components.ts │ │ │ ├── home.ts │ │ │ └── welcome.ts │ ├── main.tsx │ ├── pages │ │ ├── bean-detail │ │ │ ├── bean-detail.module.less │ │ │ ├── bean-detail.tsx │ │ │ └── components │ │ │ │ ├── chat │ │ │ │ ├── chat.module.less │ │ │ │ ├── chat.tsx │ │ │ │ └── index.tsx │ │ │ │ ├── example │ │ │ │ ├── example.module.less │ │ │ │ ├── example.tsx │ │ │ │ └── index.tsx │ │ │ │ ├── import-docs │ │ │ │ ├── import-docs.module.less │ │ │ │ ├── import-docs.tsx │ │ │ │ └── index.tsx │ │ │ │ ├── integrate-feishu │ │ │ │ ├── index.tsx │ │ │ │ ├── integrate-feishu.module.less │ │ │ │ └── integrate-feishu.tsx │ │ │ │ ├── integrate-wechat │ │ │ │ ├── integrate-wechat.module.less │ │ │ │ └── integrate-wechat.tsx │ │ │ │ └── toggle-search │ │ │ │ ├── index.tsx │ │ │ │ ├── toggle-search.module.less │ │ │ │ └── toggle-search.tsx │ │ └── home │ │ │ ├── home.module.less │ │ │ └── home.tsx │ ├── routes │ │ └── index.tsx │ ├── services │ │ ├── home.ts │ │ └── user.ts │ ├── styles │ │ ├── index.less │ │ ├── mixins.less │ │ └── variables.less │ ├── types.d.ts │ ├── utils │ │ ├── ajax.ts │ │ ├── mlog.ts │ │ └── utils.ts │ └── vite-env.d.ts ├── tsconfig.json ├── tsconfig.node.json └── vite.config.ts ├── main.py ├── middleware ├── __init__.py └── token.py ├── model ├── __init__.py ├── access.py ├── base.py ├── chat.py ├── huixiangdou.py ├── integrate.py ├── qalib.py └── statistic.py ├── mq ├── __init__.py └── hxd_task.py ├── orm ├── __init__.py └── redis.py ├── proxy ├── config-template.ini ├── logs │ └── work.txt ├── main.py ├── test.py ├── traslate.txt └── web_worker.py ├── requirements.txt ├── scheduler ├── __init__.py └── huixiangdou_task.py ├── service ├── __init__.py ├── access.py ├── agent.py ├── cache.py ├── chat.py ├── message.py ├── qalib.py └── statistic.py ├── tools ├── README.md ├── dump_redis_query.py ├── get_puyu_model_list.py └── update_fs_max_len.py ├── util ├── __init__.py ├── image.py ├── log.py ├── str.py └── time_util.py └── web-architecture.png /.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐛 bug issue 3 | about: submit a bug report +_+ 4 | --- 5 | 6 | ## error log | 日志或报错信息 | ログ 7 | 8 | ## context | 编译/运行环境 | バックグラウンド 9 | 10 | ## how to reproduce | 复现步骤 | 再現方法 11 | 12 | 1. 13 | 2. 14 | 3. 15 | 16 | ## more | 其他 | その他 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/others.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 📝 others 3 | about: discussion, suggestion and question 4 | --- 5 | 6 | ## detail | 详细描述 | 詳細な説明 7 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Check markdown local file link available 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-20.04 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Set up Python 3.9 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: 3.9 18 | - name: Check doc link 19 | run: | 20 | python .github/scripts/doc_link_checker.py --target README_zh.md 21 | python .github/scripts/doc_link_checker.py --target README.md 22 | python -m pip install pylint interrogate 23 | pylint huixiangdou || true 24 | interrogate huixiangdou -v || true 25 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distributions 📦 to PyPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | 8 | jobs: 9 | build-n-publish: 10 | name: Build and publish Python 🐍 distributions 📦 to PyPI 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Set up Python 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: '3.9' 18 | - name: Install pypa/build 19 | run: >- 20 | python -m 21 | pip install 22 | build 23 | --user 24 | - name: Build a binary wheel and a source tarball 25 | run: >- 26 | python -m 27 | build 28 | --sdist 29 | --wheel 30 | --outdir dist/ 31 | - name: Publish distribution 📦 to PyPI 32 | if: startsWith(github.ref, 'refs/tags') 33 | uses: pypa/gh-action-pypi-publish@release/v1 34 | with: 35 | user: __token__ 36 | password: ${{ secrets.pypi_password }} 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | workdir/ 2 | write_toml.py 3 | modeling_internlm2.py 4 | config-template.ini 5 | logs/ 6 | logs/work.txt 7 | server.log 8 | **/__pycache__ 9 | badcase.txt 10 | config.ini 11 | resource/prompt.txt 12 | build/ 13 | dist/ 14 | huixiangdou.egg-info/ 15 | commit.id 16 | resource/wechat_questions.json 17 | .eggs/ 18 | feature_stores/ 19 | web/qa 20 | redis.conf 21 | nohup.out 22 | *.pyc 23 | start-web.sh 24 | web/proxy/config-template.ini 25 | web/env.sh 26 | logs/work.txt 27 | web/tools/query.jsonl 28 | query.jsonl 29 | tests/history_recv_send.txt 30 | unittest/token.json 31 | wkteam/ 32 | web.log 33 | evaluation/rejection/gt_bad.txt 34 | evaluation/rejection/gt_good.txt 35 | bm25.pkl 36 | repodir/ 37 | logs/work.txt 38 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/PyCQA/flake8 3 | rev: 4.0.1 4 | hooks: 5 | - id: flake8 6 | exclude: ^(__init__.py)$ 7 | args: ["--max-line-length=79", "--exclude=service/__init__.py", "--exclude=tests/*", "--exclude=android/*"] 8 | - repo: https://github.com/PyCQA/isort 9 | rev: 5.11.5 10 | hooks: 11 | - id: isort 12 | - repo: https://github.com/pre-commit/mirrors-yapf 13 | rev: v0.32.0 14 | hooks: 15 | - id: yapf 16 | name: yapf 17 | description: 'Formatter for Python code' 18 | entry: yapf 19 | language: python 20 | args: ['-i', '--style={based_on_style: pep8, column_limit: 79}'] 21 | 22 | - repo: https://github.com/pre-commit/pre-commit-hooks 23 | rev: v4.2.0 24 | hooks: 25 | - id: trailing-whitespace 26 | - id: check-yaml 27 | - id: end-of-file-fixer 28 | - id: requirements-txt-fixer 29 | - id: double-quote-string-fixer 30 | - id: check-merge-conflict 31 | - id: fix-encoding-pragma 32 | args: ["--remove"] 33 | - id: mixed-line-ending 34 | args: ["--fix=lf"] 35 | - repo: https://github.com/executablebooks/mdformat 36 | rev: 0.7.9 37 | hooks: 38 | - id: mdformat 39 | args: ["--number"] 40 | additional_dependencies: 41 | - mdformat-openmmlab 42 | - mdformat_frontmatter 43 | - linkify-it-py 44 | - repo: https://github.com/codespell-project/codespell 45 | rev: v2.1.0 46 | hooks: 47 | - id: codespell 48 | args: ["--skip=third_party/*,*.ipynb,*.proto"] 49 | 50 | - repo: https://github.com/myint/docformatter 51 | rev: v1.4 52 | hooks: 53 | - id: docformatter 54 | args: ["--in-place", "--wrap-descriptions", "79"] 55 | 56 | - repo: https://github.com/open-mmlab/pre-commit-hooks 57 | rev: v0.4.1 58 | hooks: 59 | - id: check-copyright 60 | args: ["huixiangdou"] 61 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the OS, Python version and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.12" 13 | # You can also specify other tool versions: 14 | # nodejs: "19" 15 | # rust: "1.64" 16 | # golang: "1.19" 17 | 18 | # Build documentation in the "docs/" directory with Sphinx 19 | sphinx: 20 | configuration: docs/conf.py 21 | 22 | # Optionally build your docs in additional formats such as PDF and ePub 23 | # formats: 24 | # - pdf 25 | # - epub 26 | 27 | # Optional but recommended, declare the Python requirements required 28 | # to build your documentation 29 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 30 | # python: 31 | # install: 32 | # - requirements: docs/requirements.txt 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024, tpoisonooo 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /android/.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | .gradle 3 | /local.properties 4 | /.idea/workspace.xml 5 | /.idea/libraries 6 | .DS_Store 7 | /build 8 | 9 | /captures 10 | /buildsystem/keystore.properties 11 | /buildsystem/qianghongbao.jks 12 | /xbd 13 | /app/src/production 14 | app/src/main/java/com/carlos/grabredenvelope/local 15 | /apk 16 | 17 | sentry.properties 18 | /.idea/compiler.xml 19 | -------------------------------------------------------------------------------- /android/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /android/.idea/assetWizardSettings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 37 | 38 | -------------------------------------------------------------------------------- /android/.idea/caches/build_file_checksums.ser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/android/.idea/caches/build_file_checksums.ser -------------------------------------------------------------------------------- /android/.idea/codeStyles/codeStyleConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /android/.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /android/.idea/deploymentTargetDropDown.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /android/.idea/dictionaries/caochang.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /android/.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /android/.idea/gradle.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 19 | 20 | -------------------------------------------------------------------------------- /android/.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /android/.idea/kotlinCodeInsightSettings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | -------------------------------------------------------------------------------- /android/.idea/kotlinc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 9 | -------------------------------------------------------------------------------- /android/.idea/markdown-navigator/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /android/.idea/migrations.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | -------------------------------------------------------------------------------- /android/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /android/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /android/.travis.yml: -------------------------------------------------------------------------------- 1 | language: android 2 | sudo: false # 为了开启基于容器的 Travis CI 任务,让编译效率更高 3 | android: 4 | components: 5 | # Uncomment the lines below if you want to 6 | # use the latest revision of Android SDK Tools 7 | # - platform-tools 8 | # - tools 9 | # The BuildTools version used by your project 10 | - build-tools-28.0.3 11 | # The SDK version used to compile your project 12 | - android-28 13 | # Additional components 14 | - extra-google-google_play_services 15 | - extra-google-m2repository 16 | - extra-android-m2repository 17 | - addon-google_apis-google-19 18 | # Specify at least one system image, 19 | # if you need to run emulator(s) during your tests 20 | - sys-img-armeabi-v7a-android-19 21 | - sys-img-x86-android-17 22 | 23 | before_script: 24 | - mkdir "$ANDROID_HOME/licenses" || true 25 | - echo "24333f8a63b6825ea9c5514f83c2829b004d1fee" > "$ANDROID_HOME/licenses/android-sdk-license" 26 | 27 | script: 28 | - ./gradlew assembleDev 29 | -------------------------------------------------------------------------------- /android/README.md: -------------------------------------------------------------------------------- 1 | # 茴香豆 Android 辅助 2 | 3 | 这是基于 [抢红包 app](https://github.com/xbdcc/GrabRedEnvelope) 软件的二次开发。 4 | 5 | * 移除抢红包功能,重新用于 LLM RAG chat 6 | * 它基于 android 系统 API 工作,原理上可以控制所有 UI(不只是即时通讯软件),风险自行承担 7 | 8 | # License 9 | 10 | 注意软件使用 [GPL 协议](LICENSE)。 11 | -------------------------------------------------------------------------------- /android/build.gradle: -------------------------------------------------------------------------------- 1 | // Top-level build file where you can add configuration options common to all sub-projects/modules. 2 | buildscript { 3 | ext.kotlin_version = '1.7.20' 4 | repositories { 5 | google() 6 | jcenter() 7 | maven { url 'https://jitpack.io' } 8 | } 9 | dependencies { 10 | classpath 'com.android.tools.build:gradle:7.3.1' 11 | classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version" 12 | classpath 'org.greenrobot:greendao-gradle-plugin:3.3.0' // add plugin 13 | classpath "org.jetbrains.kotlin:kotlin-serialization:$kotlin_version" 14 | 15 | classpath 'io.sentry:sentry-android-gradle-plugin:3.0.1' 16 | 17 | // NOTE: Do not place your application dependencies here; they belong 18 | // in the individual module build.gradle files 19 | } 20 | } 21 | allprojects { 22 | repositories { 23 | google() 24 | jcenter() 25 | maven { url 'https://jitpack.io' } 26 | 27 | maven { url 'https://oss.sonatype.org/content/repositories/snapshots/' } 28 | maven{ url 'https://maven.aliyun.com/repository/public'} 29 | 30 | maven { url "https://kotlin.bintray.com/kotlinx" } 31 | maven { url 'https://dl.bintray.com/xbdcc/maven' } 32 | } 33 | } 34 | task clean(type: Delete) { 35 | delete rootProject.buildDir 36 | } 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /android/buildsystem/debug.keystore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/android/buildsystem/debug.keystore -------------------------------------------------------------------------------- /android/buildsystem/default.properties: -------------------------------------------------------------------------------- 1 | # keystore 2 | keyAlias= androiddebugkey 3 | keyPassword= android 4 | storeFile= ../buildsystem/debug.keystore 5 | storePassword= android 6 | 7 | # other 8 | JPUSH_APPKEY = 9 | 10 | #测试的 11 | UMENG_APPKEY_DEV = 12 | #正式的 13 | UMENG_APPKEY = 14 | 15 | BUGLY_KEY_DEV = 16 | BUGLY_KEY = 17 | 18 | #sentry 19 | SENTRY_DSN_DEV = 20 | SENTRY_DSN = 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /android/demo/.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | -------------------------------------------------------------------------------- /android/demo/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'com.android.application' 2 | apply plugin: 'kotlin-android' 3 | android { 4 | compileSdkVersion 33 5 | 6 | defaultConfig { 7 | applicationId "com.carlos.grabredenvelope.demo" 8 | minSdkVersion 18 9 | targetSdkVersion 33 10 | versionCode 1 11 | versionName "1.0" 12 | 13 | testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" 14 | 15 | } 16 | 17 | buildTypes { 18 | release { 19 | minifyEnabled false 20 | proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' 21 | } 22 | } 23 | lint { 24 | disable 'GoogleAppIndexingWarning' 25 | } 26 | namespace 'com.carlos.grabredenvelope.demo' 27 | 28 | } 29 | 30 | dependencies { 31 | implementation fileTree(dir: 'libs', include: ['*.jar']) 32 | implementation 'com.github.xbdcc:cutils:0.0.18' 33 | testImplementation 'junit:junit:4.12' 34 | androidTestImplementation 'androidx.test:runner:1.1.1' 35 | androidTestImplementation 'androidx.test.espresso:espresso-core:3.1.1' 36 | 37 | implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-android:1.0.1' 38 | implementation 'com.squareup.okhttp3:okhttp:4.9.0' 39 | implementation 'com.google.code.gson:gson:2.8.9' 40 | implementation 'com.google.android.material:material:1.2.0' 41 | } 42 | 43 | 44 | task hello { 45 | doLast { 46 | println 'Hello world' 47 | } 48 | } -------------------------------------------------------------------------------- /android/demo/proguard-rules.pro: -------------------------------------------------------------------------------- 1 | # Add project specific ProGuard rules here. 2 | # You can control the set of applied configuration files using the 3 | # proguardFiles setting in build.gradle. 4 | # 5 | # For more details, see 6 | # http://developer.android.com/guide/developing/tools/proguard.html 7 | 8 | # If your project uses WebView with JS, uncomment the following 9 | # and specify the fully qualified class name to the JavaScript interface 10 | # class: 11 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview { 12 | # public *; 13 | #} 14 | 15 | # Uncomment this to preserve the line number information for 16 | # debugging stack traces. 17 | #-keepattributes SourceFile,LineNumberTable 18 | 19 | # If you keep the line number information, uncomment this to 20 | # hide the original source file name. 21 | #-renamesourcefileattribute SourceFile 22 | -------------------------------------------------------------------------------- /android/demo/src/androidTest/java/com/carlos/grabredenvelope/demo/ExampleInstrumentedTest.kt: -------------------------------------------------------------------------------- 1 | package com.carlos.grabredenvelope.demo 2 | 3 | import androidx.test.InstrumentationRegistry 4 | import androidx.test.runner.AndroidJUnit4 5 | import org.junit.Assert.assertEquals 6 | import org.junit.Test 7 | import org.junit.runner.RunWith 8 | 9 | 10 | /** 11 | * Instrumented test, which will execute on an Android device. 12 | * 13 | * See [testing documentation](http://d.android.com/tools/testing). 14 | */ 15 | @RunWith(AndroidJUnit4::class) 16 | class ExampleInstrumentedTest { 17 | @Test 18 | fun useAppContext() { 19 | // Context of the app under test. 20 | val appContext = InstrumentationRegistry.getTargetContext() 21 | assertEquals("com.carlos.grabredenvelope.demo", appContext.packageName) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /android/demo/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 14 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 27 | 28 | 29 | 30 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /android/demo/src/main/java/com/carlos/grabredenvelope/demo/SharedPreferenceHelper.kt: -------------------------------------------------------------------------------- 1 | package com.carlos.grabredenvelope.demo 2 | 3 | import android.content.Context 4 | import android.content.SharedPreferences 5 | 6 | class SharedPreferenceHelper(context: Context) { 7 | private val NAME = "huixiangdou" 8 | private val sharedPreferences: SharedPreferences = context.getSharedPreferences(NAME, Context.MODE_PRIVATE) 9 | 10 | fun saveString(key: String, value: String) { 11 | val editor = sharedPreferences.edit() 12 | editor.putString(key, value) 13 | editor.commit() 14 | } 15 | 16 | fun saveBoolean(key: String, value: Boolean) { 17 | val editor = sharedPreferences.edit() 18 | editor.putBoolean(key, value) 19 | editor.commit() 20 | } 21 | 22 | fun getBoolean(key: String, defaultValue: Boolean): Boolean = 23 | sharedPreferences.getBoolean(key, defaultValue) 24 | 25 | fun getString(key: String, defaultValue: String): String = 26 | sharedPreferences.getString(key, defaultValue)!! 27 | } -------------------------------------------------------------------------------- /android/demo/src/main/java/com/carlos/grabredenvelope/demo/WechatConstants.kt: -------------------------------------------------------------------------------- 1 | package com.carlos.grabredenvelope.demo 2 | 3 | import android.util.Log 4 | import com.carlos.cutils.util.LogUtils 5 | 6 | /** 7 | * Created by Carlos on 2019-05-29. 8 | */ 9 | object WechatConstants { 10 | 11 | var RES_ID_GROUP_NAME = "com.tencent.mm:id/obn" // 群名 12 | var RES_ID_USER_NAME = "com.tencent.mm:id/brc" // 发消息的人 13 | var RES_ID_USER_CONTENT = "com.tencent.mm:id/bkl" // 发的文本内容 14 | var RES_ID_EDIT_TEXT = "com.tencent.mm:id/bkk" // 消息输入框 15 | // 从 8.0.48 开始调整判断逻辑,根据头像坐标定位谁是发送者 16 | var RES_ID_USER_RL = "com.tencent.mm:id/bn1" // 发消息的 rl 17 | var RES_ID_USER_HEADER = "com.tencent.mm:id/bk1" // 头像 18 | 19 | fun setVersion(version: String) { 20 | LogUtils.d("version:$version") 21 | if (version == "8.0.47" || version == "8.0.48" || version == "8.0.49") { 22 | RES_ID_GROUP_NAME = "com.tencent.mm:id/obn" 23 | RES_ID_USER_NAME = "com.tencent.mm:id/brc" 24 | RES_ID_USER_CONTENT = "com.tencent.mm:id/bkl" 25 | RES_ID_EDIT_TEXT = "com.tencent.mm:id/bkk" 26 | } else { 27 | Log.w("msg", "unknown version, maybe incompatible") 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /android/demo/src/main/res/drawable-v24/ic_launcher_foreground.xml: -------------------------------------------------------------------------------- 1 | 7 | 12 | 13 | 19 | 22 | 25 | 26 | 27 | 28 | 34 | 35 | -------------------------------------------------------------------------------- /android/demo/src/main/res/mipmap-anydpi-v26/ic_launcher.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /android/demo/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /android/demo/src/main/res/values/colors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | #008577 4 | #00574B 5 | #D81B60 6 | 7 | -------------------------------------------------------------------------------- /android/demo/src/main/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | 茴香豆 Android 助手 3 | 这是 [茴香豆](https://github.com/internlm/huixiangdou) 的 Android 部分。\n开启后停留在微信对话界面,将读最新消息,调用大语言模型自动回复主题相关内容。如果是主题无关的闲聊,则不处理。\n如果对你有用,请 star 一下! 4 | 抢微信红包 5 | 茴香豆 LLM RAG 回复 6 | 7 | 如果对你有用,请给 https://github.com/internlm/huixiangdou 点个 star,这对我们真的很重要qaq 8 | 第一步:打开 openxlab.org.cn 应用中心,搜索“茴香豆”,创建知识库直接获取回调地址;或自行部署开源版茴香豆得到服务器地址。\n输入框里是个可用的地址,仅仅用于调试 app 是否正常,并不会真的回答问题。 9 | http://139.224.198.162:18443/api/v1/message/v1/wechat/Qlyq 10 | 确定 11 | 第二步:点击下方按钮进入辅助功能,找到(茴香豆)开启或关闭 12 | 第三步:直接进入微信(注意 github 文档中微信版本要求)聊天界面,请对方发个消息被动扫描屏幕、或上滑聊天框主动触发扫描。\n群聊或单聊都支持。\n注意不要关闭本应用,它默认后台运行。 13 | tips:助手只回答知识库相关话题,碰到无关闲聊会跳过。adb log 可以看到完整处理日志。 14 | 点我 15 | 调试模式,默认开启。\n确认功能正常后需关闭,否则收到啥都响应 16 | 17 | -------------------------------------------------------------------------------- /android/demo/src/main/res/values/styles.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /android/demo/src/main/res/xml/sendemoji_service.xml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /android/demo/src/main/res/xml/wechat_service.xml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /android/demo/src/test/java/com/carlos/grabredenvelope/demo/ExampleUnitTest.kt: -------------------------------------------------------------------------------- 1 | package com.carlos.grabredenvelope.demo 2 | 3 | import org.junit.Assert.assertEquals 4 | import org.junit.Test 5 | 6 | /** 7 | * Example local unit test, which will execute on the development machine (host). 8 | * 9 | * See [testing documentation](http://d.android.com/tools/testing). 10 | */ 11 | class ExampleUnitTest { 12 | @Test 13 | fun addition_isCorrect() { 14 | assertEquals(4, 2 + 2) 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /android/gradle.properties: -------------------------------------------------------------------------------- 1 | # Project-wide Gradle settings. 2 | 3 | # IDE (e.g. Android Studio) users: 4 | # Gradle settings configured through the IDE *will override* 5 | # any settings specified in this file. 6 | 7 | # For more details on how to configure your build environment visit 8 | # http://www.gradle.org/docs/current/userguide/build_environment.html 9 | 10 | # Specifies the JVM arguments used for the daemon process. 11 | # The setting is particularly useful for tweaking memory settings. 12 | # Default value: -Xmx10248m -XX:MaxPermSize=256m 13 | # org.gradle.jvmargs=-Xmx2048m -XX:MaxPermSize=512m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8 14 | 15 | # When configured, Gradle will run in incubating parallel mode. 16 | # This option should only be used with decoupled projects. More details, visit 17 | # http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects 18 | # org.gradle.parallel=true 19 | android.useAndroidX=true 20 | # Automatically convert third-party libraries to use AndroidX 21 | android.enableJetifier=true 22 | # Kotlin code style for this project: "official" or "obsolete": 23 | kotlin.code.style=official 24 | 25 | -------------------------------------------------------------------------------- /android/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/android/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /android/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Wed Feb 26 12:16:26 CST 2020 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-all.zip 7 | -------------------------------------------------------------------------------- /android/settings.gradle: -------------------------------------------------------------------------------- 1 | include ':demo' -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | # This is a start-up file for deploying HuixiangDou-WEB on OpenXLab-APPs(https://openxlab.org.cn/apps) 2 | # Some environment variables need to be set before starting up: 3 | # JWT_SECRET= 4 | # REDIS_HOST= 5 | # REDIS_PASSWORD= 6 | # SERVER_PORT=7860 (when deploy on OpenXLab-APPs, this SERVER_PORT should be 7860) 7 | 8 | import os 9 | 10 | # launch the HuixiangDou-WEB 11 | os.system('python -m web.main') 12 | -------------------------------------------------------------------------------- /docs/en/.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | # Set the version of Python and other tools you might need 4 | build: 5 | os: ubuntu-22.04 6 | tools: 7 | python: "3.8" 8 | 9 | formats: 10 | - epub 11 | 12 | sphinx: 13 | configuration: docs/en/conf.py 14 | 15 | python: 16 | install: 17 | - requirements: requirements/docs.txt 18 | -------------------------------------------------------------------------------- /docs/en/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/en/_static/css/readthedocs.css: -------------------------------------------------------------------------------- 1 | .header-logo { 2 | background-image: url("../image/logo.svg"); 3 | background-size: 444px 93px; 4 | height: 93px; 5 | width: 444px; 6 | } 7 | 8 | @media screen and (min-width: 1100px) { 9 | .header-logo { 10 | top: -25px; 11 | } 12 | } 13 | 14 | pre { 15 | white-space: pre; 16 | } 17 | 18 | @media screen and (min-width: 2000px) { 19 | .pytorch-content-left { 20 | width: 1200px; 21 | margin-left: 30px; 22 | } 23 | article.pytorch-article { 24 | max-width: 1200px; 25 | } 26 | .pytorch-breadcrumbs-wrapper { 27 | width: 1200px; 28 | } 29 | .pytorch-right-menu.scrolling-fixed { 30 | position: fixed; 31 | top: 45px; 32 | left: 1580px; 33 | } 34 | } 35 | 36 | 37 | article.pytorch-article section code { 38 | padding: .2em .4em; 39 | background-color: #f3f4f7; 40 | border-radius: 5px; 41 | } 42 | 43 | /* Disable the change in tables */ 44 | article.pytorch-article section table code { 45 | padding: unset; 46 | background-color: unset; 47 | border-radius: unset; 48 | } 49 | 50 | table.autosummary td { 51 | width: 50% 52 | } 53 | 54 | img.align-center { 55 | display: block; 56 | margin-left: auto; 57 | margin-right: auto; 58 | } 59 | 60 | article.pytorch-article p.rubric { 61 | font-weight: bold; 62 | } 63 | -------------------------------------------------------------------------------- /docs/en/_static/js/custom.js: -------------------------------------------------------------------------------- 1 | var collapsedSections = []; 2 | 3 | $(document).ready(function () { 4 | $('.model-summary').DataTable({ 5 | "stateSave": false, 6 | "lengthChange": false, 7 | "pageLength": 20, 8 | "order": [] 9 | }); 10 | }); 11 | -------------------------------------------------------------------------------- /docs/en/_templates/404.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block body %} 4 | 5 |

Page Not Found

6 |

7 | The page you are looking for cannot be found. 8 |

9 |

10 | If you just switched documentation versions, it is likely that the page you were on is moved. You can look for it in 11 | the content table left, or go to the homepage. 12 |

13 | 17 | 18 | {% endblock %} 19 | -------------------------------------------------------------------------------- /docs/en/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | .. currentmodule:: {{ module }} 4 | 5 | 6 | {{ name | underline}} 7 | 8 | .. autoclass:: {{ name }} 9 | :members: 10 | 11 | .. 12 | autogenerated from _templates/autosummary/class.rst 13 | note it does not have :inherited-members: 14 | -------------------------------------------------------------------------------- /docs/en/_templates/callable.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | .. currentmodule:: {{ module }} 4 | 5 | 6 | {{ name | underline}} 7 | 8 | .. autoclass:: {{ name }} 9 | :members: 10 | :special-members: __call__ 11 | 12 | .. 13 | autogenerated from _templates/callable.rst 14 | note it does not have :inherited-members: 15 | -------------------------------------------------------------------------------- /docs/en/cp_origin_docs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copy *.md files from docs/ if it doesn't have a Chinese translation 4 | 5 | for filename in $(find ../zh/ -name '*.md' -printf "%P\n"); 6 | do 7 | mkdir -p $(dirname $filename) 8 | cp -n ../zh/$filename ./$filename 9 | cp -n ../../README.md ./copy_quickstart.md 10 | cp -n ../../evaluation/README.md ./copy_precision.md 11 | done 12 | -------------------------------------------------------------------------------- /docs/en/doctuils.conf: -------------------------------------------------------------------------------- 1 | [html writers] 2 | table_style: colwidths-auto 3 | -------------------------------------------------------------------------------- /docs/en/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to HuixiangDou documentation! 2 | ========================================== 3 | 4 | Getting started with HuixiangDou 5 | ------------------------------- 6 | 7 | To help you quickly familiarized with it, we recommend you to walk through the following documents in order: 8 | 9 | 1. Run the basic version according to the README. 10 | 2. Refer to the advanced tutorial to enhance the overall effect. 11 | 12 | We warmly welcome users' PRs and Issues! 13 | 14 | .. _QuickStart: 15 | .. toctree:: 16 | :maxdepth: 1 17 | :caption: Quick Start 18 | 19 | copy_quickstart.md 20 | 21 | .. _AdvanceConfiguration: 22 | .. toctree:: 23 | :maxdepth: 1 24 | :caption: Advance Configuration 25 | 26 | copy_precision.md 27 | doc_full_dev.md 28 | doc_knowledge_graph.md 29 | doc_architecture.md 30 | doc_rag_annotate_sft_data.md 31 | 32 | .. _readthedocs: 33 | .. toctree:: 34 | :maxdepth: 1 35 | :caption: readthedocs Integration 36 | 37 | doc_add_readthedocs.md 38 | 39 | .. _IMApplicaion: 40 | .. toctree:: 41 | :maxdepth: 1 42 | :caption: IM Applicaion Integration 43 | 44 | doc_add_wechat_accessibility.md 45 | doc_add_wechat_commercial.md 46 | doc_add_wechat_group.md 47 | doc_add_lark_group.md 48 | doc_send_only_lark_group.md 49 | 50 | .. _Others: 51 | .. toctree:: 52 | :maxdepth: 1 53 | :caption: Others 54 | 55 | Indexes & Tables 56 | ================== 57 | 58 | * :ref:`genindex` 59 | * :ref:`search` 60 | -------------------------------------------------------------------------------- /docs/figures/convert.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | import cv2 5 | 6 | # 列出当前目录下的所有png图片 7 | for png_file in glob.glob('*.png'): 8 | # 读取图片 9 | img = cv2.imread(png_file) 10 | # 生成新的文件名 11 | jpg_file = os.path.splitext(png_file)[0] + '.jpg' 12 | # 写入jpg图片,设置质量为90 13 | cv2.imwrite(jpg_file, img, [int(cv2.IMWRITE_JPEG_QUALITY), 90]) 14 | -------------------------------------------------------------------------------- /docs/figures/huixiangdou.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/huixiangdou.png -------------------------------------------------------------------------------- /docs/figures/lark-add-ability.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-add-ability.png -------------------------------------------------------------------------------- /docs/figures/lark-arch.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-arch.jpg -------------------------------------------------------------------------------- /docs/figures/lark-bot-add-callback.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-bot-add-callback.png -------------------------------------------------------------------------------- /docs/figures/lark-bot-reply.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-bot-reply.png -------------------------------------------------------------------------------- /docs/figures/lark-bot-sub.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-bot-sub.png -------------------------------------------------------------------------------- /docs/figures/lark-create-app.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-create-app.png -------------------------------------------------------------------------------- /docs/figures/lark-create-corp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-create-corp.png -------------------------------------------------------------------------------- /docs/figures/lark-switch-corp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-switch-corp.png -------------------------------------------------------------------------------- /docs/figures/wechat-android-example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/wechat-android-example.jpg -------------------------------------------------------------------------------- /docs/figures/wechat-android-homepage.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/wechat-android-homepage.jpg -------------------------------------------------------------------------------- /docs/figures/wechat-dingdong.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/wechat-dingdong.png -------------------------------------------------------------------------------- /docs/figures/wechat-puppet-log.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/wechat-puppet-log.png -------------------------------------------------------------------------------- /docs/figures/wechat-run-state.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/wechat-run-state.jpg -------------------------------------------------------------------------------- /docs/figures/wechat-wkteam.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/wechat-wkteam.jpg -------------------------------------------------------------------------------- /docs/zh/.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | # Set the version of Python and other tools you might need 4 | build: 5 | os: ubuntu-22.04 6 | tools: 7 | python: "3.8" 8 | 9 | formats: 10 | - epub 11 | 12 | sphinx: 13 | configuration: docs/zh/conf.py 14 | 15 | python: 16 | install: 17 | - requirements: requirements/docs.txt 18 | -------------------------------------------------------------------------------- /docs/zh/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/zh/_static/css/readthedocs.css: -------------------------------------------------------------------------------- 1 | .header-logo { 2 | background-image: url("../image/logo.svg"); 3 | background-size: 444px 93px; 4 | height: 93px; 5 | width: 444px; 6 | } 7 | 8 | @media screen and (min-width: 1100px) { 9 | .header-logo { 10 | top: -25px; 11 | } 12 | } 13 | 14 | pre { 15 | white-space: pre; 16 | } 17 | 18 | @media screen and (min-width: 2000px) { 19 | .pytorch-content-left { 20 | width: 1200px; 21 | margin-left: 30px; 22 | } 23 | article.pytorch-article { 24 | max-width: 1200px; 25 | } 26 | .pytorch-breadcrumbs-wrapper { 27 | width: 1200px; 28 | } 29 | .pytorch-right-menu.scrolling-fixed { 30 | position: fixed; 31 | top: 45px; 32 | left: 1580px; 33 | } 34 | } 35 | 36 | 37 | article.pytorch-article section code { 38 | padding: .2em .4em; 39 | background-color: #f3f4f7; 40 | border-radius: 5px; 41 | } 42 | 43 | /* Disable the change in tables */ 44 | article.pytorch-article section table code { 45 | padding: unset; 46 | background-color: unset; 47 | border-radius: unset; 48 | } 49 | 50 | table.autosummary td { 51 | width: 50% 52 | } 53 | 54 | img.align-center { 55 | display: block; 56 | margin-left: auto; 57 | margin-right: auto; 58 | } 59 | 60 | article.pytorch-article p.rubric { 61 | font-weight: bold; 62 | } 63 | -------------------------------------------------------------------------------- /docs/zh/_static/js/custom.js: -------------------------------------------------------------------------------- 1 | var collapsedSections = []; 2 | 3 | $(document).ready(function () { 4 | $('.model-summary').DataTable({ 5 | "stateSave": false, 6 | "lengthChange": false, 7 | "pageLength": 20, 8 | "order": [] 9 | }); 10 | }); 11 | -------------------------------------------------------------------------------- /docs/zh/_templates/404.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block body %} 4 | 5 |

Page Not Found

6 |

7 | The page you are looking for cannot be found. 8 |

9 |

10 | If you just switched documentation versions, it is likely that the page you were on is moved. You can look for it in 11 | the content table left, or go to the homepage. 12 |

13 | 17 | 18 | {% endblock %} 19 | -------------------------------------------------------------------------------- /docs/zh/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | .. currentmodule:: {{ module }} 4 | 5 | 6 | {{ name | underline}} 7 | 8 | .. autoclass:: {{ name }} 9 | :members: 10 | 11 | .. 12 | autogenerated from _templates/autosummary/class.rst 13 | note it does not have :inherited-members: 14 | -------------------------------------------------------------------------------- /docs/zh/_templates/callable.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | .. currentmodule:: {{ module }} 4 | 5 | 6 | {{ name | underline}} 7 | 8 | .. autoclass:: {{ name }} 9 | :members: 10 | :special-members: __call__ 11 | 12 | .. 13 | autogenerated from _templates/callable.rst 14 | note it does not have :inherited-members: 15 | -------------------------------------------------------------------------------- /docs/zh/cp_origin_docs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copy *.md files from docs/ if it doesn't have a Chinese translation 4 | 5 | for filename in $(find ../en/ -name '*.md' -printf "%P\n"); 6 | do 7 | mkdir -p $(dirname $filename) 8 | cp -n ../en/$filename ./$filename 9 | cp -n ../../README_zh.md ./copy_quickstart.md 10 | cp -n ../../evaluation/README_zh.md ./copy_precision.md 11 | done 12 | -------------------------------------------------------------------------------- /docs/zh/doc_add_wechat_accessibility.md: -------------------------------------------------------------------------------- 1 | # 集成个人微信 android 免费版示例 2 | 3 | 在之前的 [魔改 wechaty 方案](./doc_add_wechat_group.md) 我们一共介绍了 7 种方法。这次提供第 5 种方案的具体实现,基于 Android Accessibility 读写前端,和“抢红包”软件原理相同。 4 | 5 | 由于没有 Appium/Selenium 框架做中间商,比较稳定。 6 | 7 | ## 一、演示视频 8 | 9 | 这里是 BiliBili 2 分钟演示视频 https://www.bilibili.com/video/BV1S2421N7mn/ 10 | 11 | ## 二、准备工作 12 | 13 | - 一个 android 手机,对性能和系统版本都没要求 14 | - 微信版本 8.0.47 / 8.0.48 / 8.0.49,其他版本的 view id 可能变化。[代码里](https://github.com/InternLM/HuixiangDou/blob/main/android/demo/src/main/java/com/carlos/grabredenvelope/demo/WechatConstants.kt)只测了这些版本的 id 15 | - 一个测试用的微信号 16 | 17 | ## 三、运行 18 | 19 | 打开 [OpenXLab 茴香豆 web 端](https://openxlab.org.cn/apps/detail/tpoisonooo/huixiangdou-web) ,创建自己的知识库。 20 | 21 | 这里是个能直接使用的账号密码: 22 | 23 | ```bash 24 | 账号: 20230322发版测试 25 | 密码: 123 26 | ``` 27 | 28 | 点击 “零开发集成微信”,会显示你的服务端回调地址和教程。例如 `http://139.224.198.162:18443/api/v1/message/v1/wechat/oZGh` 29 | 30 | 从 [这里](https://github.com/InternLM/HuixiangDou/releases) 下载编译好的 apk,填入回调地址,开启服务,跳入微信。 31 | 32 | 33 | 34 | 现在这个效果,记得关掉手机自动熄屏: 35 | 36 | 37 | 38 | ## FAQ 39 | 40 | 1. 源码在哪儿? 41 | 42 | 在 repo 的 android 目录,需要 kotlin+java 开发能力 43 | 44 | 2. 我的微信版本更高/更低怎么办? 45 | 46 | 一、【不想开发】去微信官网找个 8.0.47 版本安装 47 | 48 | 二、【愿意开发】用 DDMS dump 一下 view 结构;然后打开源码的 `WechatConstants.kt` 文件,把你的版本的 id 填进去,build 即可 49 | 50 | 改完请发个 PR 51 | 52 | ```java 53 | if (version == "8.0.47") { 54 | RES_ID_GROUP_NAME.. 55 | RES_ID_USER_NAME.. 56 | RES_ID_USER_CONTENT.. 57 | RES_ID_EDIT_TEXT.. 58 | } else if ( 你的版本 ) { 59 | .. 60 | } else { 61 | Log.w("msg", "unknown version, maybe incompatible") 62 | } 63 | ``` 64 | -------------------------------------------------------------------------------- /docs/zh/doc_full_dev.md: -------------------------------------------------------------------------------- 1 | # 高精度配置参考 2 | 3 | 标准版可能效果不佳,可开启以下特性来提升效果。配置模板请参照 [config-advanced.ini](../../config-advanced.ini) 4 | 5 | 1. repo 搜索增强 6 | 7 | 此特性适合处理疑难问题,需要基础开发能力调整 prompt。 8 | 9 | - 点击 [sourcegraph-account-access](https://sourcegraph.com/users/tpoisonooo/settings/tokens) 获取 token 10 | 11 | ```shell 12 | # open https://github.com/sourcegraph/src-cli#installation 13 | sudo curl -L https://sourcegraph.com/.api/src-cli/src_linux_amd64 -o /usr/local/bin/src && chmod +x /usr/local/bin/src 14 | 15 | # 开启 sg 搜索,并且把 token 填入 config.ini 16 | [worker] 17 | enable_sg_search = 1 # first enable sg search 18 | .. 19 | [sg_search] 20 | .. 21 | src_access_token = "${YOUR_ACCESS_TOKEN}" 22 | ``` 23 | 24 | - 编辑 repo 的名字和简介,我们以 opencompass 为例 25 | 26 | ```ini 27 | # config.ini 28 | # add your repo here, we just take opencompass and lmdeploy as example 29 | [sg_search.opencompass] 30 | github_repo_id = "open-compass/opencompass" 31 | introduction = "用于评测大型语言模型(LLM).." 32 | ``` 33 | 34 | - 使用 `python3 -m huixiangdou.service.sg_search` 单测,返回内容应包含 opencompass 源码和文档 35 | 36 | ```shell 37 | python3 -m huixiangdou.service.sg_search 38 | .. 39 | "filepath": "opencompass/datasets/longbench/longbench_trivia_qa.py", 40 | "content": "from datasets import Dataset.. 41 | ``` 42 | 43 | 运行 `main.py`,茴香豆将在合适的时机,启用搜索增强。 44 | 45 | 2. 调参 46 | 47 | 针对业务场景调参往往不可避免。 48 | 49 | - 参照 [data.json](../../tests/data.json) 增加真实数据,运行 [test_intention_prompt.py](../../tests/test_intention_prompt.py) 得到合适的 prompt 和阈值,更新进 [prompt.py](../../huixiangdou/service/prompt.py) 50 | - 根据模型支持的最大长度,调整[搜索结果个数](../../huixiangdou/service/serial_pipeline.py) 51 | - 按照场景偏好,修改 config.ini 中的 `web_search.domain_partial_order`,即搜索结果偏序 52 | -------------------------------------------------------------------------------- /docs/zh/doc_knowledge_graph.md: -------------------------------------------------------------------------------- 1 | # 混合知识图谱和稠密检索 2 | 3 | 通过混合知识图谱和稠密检索,拒答 F1 提升约 2 个点,它的本质是**给高频词加权**。介绍已同步到[飞书](https://aicarrier.feishu.cn/docx/F51pduYyMof8syxKe5RchiU1nIN) 和[知乎](https://zhuanlan.zhihu.com/p/709589834)。 4 | 5 | 本方案对老版本完美兼容,以下是完整操作步骤。 6 | 7 | ## 一、建立知识图谱 8 | 9 | 为降低成本,我们使用 silicon cloud qwen-1.5-110B 提取实体词, `config.ini` 已支持 silicon cloud,修改片段如下: 10 | 11 | ```bash 12 | [llm.server] 13 | .. 14 | remote_type = "siliconcloud" 15 | remote_api_key = "sk-ducerXXXXX" 16 | remote_llm_max_text_length = 40000 17 | remote_llm_model = "alibaba/Qwen1.5-110B-Chat" 18 | rpm = 1000 19 | ``` 20 | 21 | 假设知识库仍在 repodir 目录下,先建立知识图谱。 22 | 完成后, `workdir/kg` 下有 jsonl 和 pickle 文件,可简单测试 query 效果 23 | 24 | ```bash 25 | # 大约 2 小时 26 | python3 -m huixiangdou.service.kg --build 27 | python3 -m huixiangdou.service.kg --query 如何安装mmpose? 28 | .. 29 | +-----------------+-------+------------------------+---------------------------+ 30 | | Query | State | Part of Reply | References | 31 | +=================+=======+========================+===========================+ 32 | | 如何安装mmpose? | 0 | repodir/mmpose/READM.. | | 33 | | | | |
| 34 | | | | | 12 | 13 | ## 部署说明 14 | 15 | 如果仅想转发消息,**不需要 GPU**、**不需要 redis**、**需要公网 ip** 16 | 17 | 1. 打开 [wkteam](http://121.229.29.88:6327) 注册试用版 18 | 19 | 2. 填写 [config.ini](../../config.ini) 中的 `frontend.wechat_wkteam` 部分 20 | 21 | 例如: 22 | 23 | ```text 24 | [frontend.wechat_wkteam] 25 | account = "wkteam手机号" 26 | password = "wkteam密码" 27 | proxy = 3 # 上海地区 28 | dir = "wkteam" 29 | callback_ip = "你的公网 IP" 30 | callback_port = 9528 31 | 32 | # !!! `proxy` is very import parameter, it's your account location 33 | # 1:北京 2:天津 3:上海 4:重庆 5:河北 34 | # 6:山西 7:江苏 8:浙江 9:安徽 10:福建 35 | # 11:江西 12:山东 13:河南 14:湖北 15:湖南 36 | # 16:广东 17:海南 18:四川 20:陕西 37 | # bad proxy would cause account deactivation !!! 38 | ``` 39 | 40 | 4. 运行 `wechat.py`,微信扫描二维码登录,然后注册 callback 地址。 41 | 42 | ```text 43 | python3 huixiangdou/frontend/wechat.py --login --forward 44 | ``` 45 | 46 | 若运行成功,会看到以下日志,同时 `wkteam/license.json` 会记录完整的账号信息。 47 | 48 | ```bash 49 | # 设置 callback 地址日志 50 | .. set callback url http://xxx/callback 51 | .. {"code":"1000","message":"设置成功","data":null} 52 | .. login success, all license saved to wkteam/license.json 53 | 54 | # 保存账号信息 55 | cat wkteam/license.json 56 | { 57 | "auth": "xxx", 58 | "wId": "xxx", 59 | "wcId": "wxid_xxx", 60 | "qrCodeUrl": "http://wxapii.oosxxx" 61 | } 62 | ``` 63 | 64 | 5. 获取 GroupID。在你想要转发的群里发条消息,查看日志或 `wkteam/wechat_message.jsonl` 里的 GroupID 字段。填入 `config.ini`,例如: 65 | 66 | ```text 67 | [frontend.wechat_wkteam.43925126702] 68 | name = "茴香豆群(大暑)" 69 | introduction = "github https://github.com/InternLM/HuixiangDou 用户体验群" 70 | ``` 71 | 72 | 6. 重新运行脚本 73 | ```text 74 | python3 huixiangdou/frontend/wechat.py --login --forward 75 | ``` 76 | -------------------------------------------------------------------------------- /docs/zh/doc_send_only_lark_group.md: -------------------------------------------------------------------------------- 1 | # 单向发到飞书群 2 | 3 | 这个功能,主要是测试 pipeline 全流程畅通。单向发送的实用意义有限。 4 | 5 | 点击[创建飞书自定义机器人](https://open.feishu.cn/document/client-docs/bot-v3/add-custom-bot),获取回调 WEBHOOK_URL,填写到 config.ini 6 | 7 | ```ini 8 | # config.ini 9 | .. 10 | [frontend] 11 | type = "lark" 12 | webhook_url = "${YOUR-LARK-WEBHOOK-URL}" 13 | ``` 14 | 15 | 运行。结束后,技术助手的答复将**单向**发送到飞书群。 16 | 17 | ```shell 18 | python3 -m huixiangdou.main 19 | ``` 20 | 21 | 22 | -------------------------------------------------------------------------------- /docs/zh/doctuils.conf: -------------------------------------------------------------------------------- 1 | [html writers] 2 | table_style: colwidths-auto 3 | -------------------------------------------------------------------------------- /docs/zh/index.rst: -------------------------------------------------------------------------------- 1 | 欢迎来到 HuixiangDou 进阶说明! 2 | ========================================== 3 | 4 | HuixiangDou 上手路线 5 | ------------------------------- 6 | 7 | 我们推荐以下流程: 8 | 9 | 1. 按照 README 运行基础版本 10 | 2. 参考进阶教程,提升整体效果 11 | 12 | 我们非常欢迎用户的 PR 和 Issue ! 13 | 14 | .. _快速运行: 15 | .. toctree:: 16 | :maxdepth: 1 17 | :caption: 基础入门 18 | 19 | copy_quickstart.md 20 | 21 | .. _进阶参考: 22 | .. toctree:: 23 | :maxdepth: 1 24 | :caption: 配置说明 25 | 26 | copy_precision.md 27 | doc_full_dev.md 28 | doc_knowledge_graph.md 29 | doc_rag_annotate_sft_data.md 30 | doc_architecture.md 31 | 32 | .. _接入readthedocs: 33 | .. toctree:: 34 | :maxdepth: 1 35 | :caption: 接入readthedocs 36 | 37 | doc_add_readthedocs.md 38 | 39 | .. _接入即时通讯软件: 40 | .. toctree:: 41 | :maxdepth: 1 42 | :caption: 接入即时通讯软件 43 | 44 | doc_add_wechat_accessibility.md 45 | doc_add_wechat_commercial.md 46 | doc_add_wechat_group.md 47 | doc_add_lark_group.md 48 | doc_send_only_lark_group.md 49 | doc_merge_wechat_group.md 50 | 51 | 索引与表格 52 | ================== 53 | 54 | * :ref:`genindex` 55 | * :ref:`search` 56 | -------------------------------------------------------------------------------- /evaluation/rejection/gt_bad.txt: -------------------------------------------------------------------------------- 1 | 对你课题的目标定义一下就可以了 -------------------------------------------------------------------------------- /evaluation/rejection/gt_good.txt: -------------------------------------------------------------------------------- 1 | 大佬们,请问如何安装mmcv? -------------------------------------------------------------------------------- /evaluation/rejection/plot_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/evaluation/rejection/plot_example.png -------------------------------------------------------------------------------- /evaluation/rerank/step0_clean_queries.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | 5 | from loguru import logger 6 | 7 | pattern = re.compile(r'^[A-Za-z0-9]+$') 8 | 9 | pwd = os.path.dirname(__file__) 10 | query_log = os.path.join(pwd, '..', 'query.log') 11 | 12 | 13 | def save(_id, sentence): 14 | if _id not in queries: 15 | queries[_id] = [sentence] 16 | else: 17 | queries[_id].append(sentence) 18 | 19 | 20 | queries = dict() 21 | with open(query_log) as f: 22 | query = None 23 | 24 | _id = None 25 | sentence = '' 26 | for line in f: 27 | line = line.strip() 28 | if len(line) < 5: 29 | continue 30 | 31 | if line[4] == ' ' and pattern.match( 32 | line[0:4]) and _id is not None and sentence != '': 33 | save(_id, sentence) 34 | _id = line[0:4] 35 | sentence = line[4:] 36 | else: 37 | if line[4] == ' ' and pattern.match(line[0:4]): 38 | _id = line[0:4] 39 | sentence = line[4:] 40 | else: 41 | sentence += '\n' 42 | sentence += line 43 | 44 | save(_id, sentence) 45 | 46 | counter = 0 47 | for _id in queries: 48 | with open(os.path.join(pwd, '..', 'queries', _id) + '.txt', 'a') as f: 49 | values = map(lambda x: x.strip(), queries[_id]) 50 | values = list(set(values)) 51 | counter += len(values) 52 | json_str = json.dumps(values, ensure_ascii=False) 53 | f.write(r'{}'.format(json_str)) 54 | f.write('\n') 55 | 56 | logger.info(counter) 57 | -------------------------------------------------------------------------------- /huixiangdou-inside.md: -------------------------------------------------------------------------------- 1 | # HuixiangDou Inside 2 | 3 | | ID | Environment | IM Application | Description | Screen Shortcut | 4 | | --- | --------------------------- | -------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------- | 5 | | 1 | openmmlab user group | wechat | reply user question | | 6 | | 2 | ncnn contributor group | wechat | explain software and hardware terminologies and pretending to be human | | 7 | | 3 | inner middleware user group | lark | reply user question | | 8 | -------------------------------------------------------------------------------- /huixiangdou/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | """import module.""" 3 | # only import frontend when needed, not here 4 | from .services import ErrorCode # noqa E401 5 | from .services import FeatureStore # noqa E401 6 | from .services import WebSearch # noqa E401 7 | from .services import SerialPipeline, ParallelPipeline # no E401 8 | from .services import build_reply_text # noqa E401 9 | from .version import __version__ 10 | -------------------------------------------------------------------------------- /huixiangdou/frontend/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | """IM proxy.""" 3 | from .lark import Lark # noqa E401 4 | from .lark_group import is_revert_command # noqa E401 5 | from .lark_group import revert_from_lark_group, send_to_lark_group # noqa E401 6 | from .wechat import WkteamManager # noqa E401 7 | -------------------------------------------------------------------------------- /huixiangdou/primitive/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | """primitive module.""" 3 | from .chunk import Chunk # noqa E401 4 | from .embedder import Embedder # noqa E401 5 | from .faiss import Faiss # noqa E401 6 | from .file_operation import FileName, FileOperation # noqa E401 7 | from .llm_reranker import LLMReranker # noqa E401 8 | from .query import Query 9 | from .splitter import ( 10 | CharacterTextSplitter, # noqa E401 11 | ChineseRecursiveTextSplitter, 12 | MarkdownHeaderTextSplitter, 13 | MarkdownTextRefSplitter, 14 | RecursiveCharacterTextSplitter, 15 | nested_split_markdown, split_python_code) 16 | from .limitter import RPM, TPM 17 | from .bm250kapi import BM25Okapi 18 | from .entity import NamedEntity2Chunk 19 | from .utils import always_get_an_event_loop 20 | -------------------------------------------------------------------------------- /huixiangdou/primitive/chunk.py: -------------------------------------------------------------------------------- 1 | 2 | from dataclasses import dataclass, field 3 | 4 | 5 | @dataclass 6 | class Chunk(): 7 | """Class for storing a piece of text and associated metadata. 8 | 9 | Example: 10 | 11 | .. code-block:: python 12 | 13 | from huixiangdou.primitive import Chunk 14 | 15 | chunk = Chunk( 16 | content_or_path="Hello, world!", 17 | metadata={"source": "https://example.com"} 18 | ) 19 | """ 20 | content_or_path: str = '' 21 | metadata: dict = field(default_factory=dict) 22 | modal: str = 'text' 23 | 24 | def __post_init__(self): 25 | if self.modal not in ['text', 'image', 'audio', 'qa']: 26 | raise ValueError( 27 | f'Invalid modal: {self.modal}. Allowed values are: `text`, `image`, `audio`, `qa`' 28 | ) 29 | 30 | def __str__(self) -> str: 31 | """Override __str__ to restrict it to content_or_path and metadata.""" 32 | # The format matches pydantic format for __str__. 33 | # 34 | # The purpose of this change is to make sure that user code that 35 | # feeds Document objects directly into prompts remains unchanged 36 | # due to the addition of the id field (or any other fields in the future). 37 | # 38 | # This override will likely be removed in the future in favor of 39 | # a more general solution of formatting content directly inside the prompts. 40 | if self.metadata: 41 | return f"modal='{self.modal}' content_or_path='{self.content_or_path}' metadata={self.metadata}" 42 | else: 43 | return f"modal='{self.modal}' content_or_path='{self.content_or_path}'" 44 | 45 | def __repr__(self) -> str: 46 | return self.__str__() 47 | -------------------------------------------------------------------------------- /huixiangdou/primitive/token.py: -------------------------------------------------------------------------------- 1 | import tiktoken 2 | import re 3 | 4 | ENCODER = None 5 | 6 | # modified from https://github.com/HKUDS/LightRAG 7 | def encode_string(content: str, model_name: str = "gpt-4o"): 8 | global ENCODER 9 | if ENCODER is None: 10 | tiktoken.get_encoding("cl100k_base") 11 | ENCODER = tiktoken.encoding_for_model(model_name) 12 | tokens = ENCODER.encode(content) 13 | return tokens 14 | 15 | 16 | def decode_tokens(tokens: list[int], model_name: str = "gpt-4o"): 17 | global ENCODER 18 | if ENCODER is None: 19 | ENCODER = tiktoken.encoding_for_model(model_name) 20 | content = ENCODER.decode(tokens) 21 | return content 22 | 23 | 24 | ZH_CN_CHAR_PATTERN = None 25 | EN_CHAR_PATTERN = None 26 | 27 | 28 | def judge_language(text): 29 | # 计算中文字符的数量 30 | global ZH_CN_CHAR_PATTERN 31 | if ZH_CN_CHAR_PATTERN is None: 32 | ZH_CN_CHAR_PATTERN = re.compile(r'[\u4e00-\u9fff]') 33 | 34 | global EN_CHAR_PATTERN 35 | if EN_CHAR_PATTERN is None: 36 | EN_CHAR_PATTERN = re.compile(r'[a-zA-Z]') 37 | 38 | chinese_count = len(ZH_CN_CHAR_PATTERN.findall(text)) 39 | english_count = len(EN_CHAR_PATTERN.findall(text)) 40 | 41 | # 判断中英文的比例 42 | if chinese_count > english_count: 43 | return "zh_cn" 44 | else: 45 | return "en" -------------------------------------------------------------------------------- /huixiangdou/primitive/utils.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from loguru import logger 3 | 4 | def always_get_an_event_loop() -> asyncio.AbstractEventLoop: 5 | try: 6 | loop = asyncio.get_running_loop() 7 | except RuntimeError: 8 | logger.info("Creating a new event loop in a sub-thread.") 9 | loop = asyncio.new_event_loop() 10 | asyncio.set_event_loop(loop) 11 | return loop -------------------------------------------------------------------------------- /huixiangdou/services/__init__.py: -------------------------------------------------------------------------------- 1 | """LLM service module.""" 2 | from .config import (feature_store_base_dir, redis_host, redis_passwd, 3 | redis_port) 4 | from .helper import (ErrorCode, QueryTracker, Queue, TaskCode, 5 | build_reply_text, check_str_useful, histogram, kimi_ocr, 6 | multimodal, parse_json_str) 7 | from .kg import KnowledgeGraph # noqa E401 8 | from .llm import LLM 9 | from .web_search import WebSearch # noqa E401 10 | from .serial_pipeline import SerialPipeline 11 | from .parallel_pipeline import ParallelPipeline 12 | # Import FeatureStore at the end to avoid circular imports 13 | from .store import FeatureStore # noqa E401 14 | -------------------------------------------------------------------------------- /huixiangdou/services/config.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | 4 | from loguru import logger 5 | 6 | 7 | def redis_host(): 8 | host = os.getenv('REDIS_HOST') 9 | if host is None or len(host) < 1: 10 | raise Exception('REDIS_HOST not config') 11 | return host 12 | 13 | 14 | def redis_port(): 15 | port = os.getenv('REDIS_PORT') 16 | if port is None: 17 | logger.debug('REDIS_PORT not set, try 6379') 18 | port = 6379 19 | return port 20 | 21 | 22 | def redis_passwd(): 23 | passwd = os.getenv('REDIS_PASSWORD') 24 | if passwd is None or len(passwd) < 1: 25 | raise Exception('REDIS_PASSWORD not config') 26 | return passwd 27 | 28 | 29 | def feature_store_base_dir(): 30 | return 'feature_stores' 31 | -------------------------------------------------------------------------------- /huixiangdou/services/session.py: -------------------------------------------------------------------------------- 1 | from huixiangdou.primitive import Query 2 | from .helper import ErrorCode 3 | import os 4 | import json 5 | 6 | class Session: 7 | """For compute graph, `session` takes all parameter.""" 8 | 9 | def __init__(self, 10 | query: Query, 11 | history: list, 12 | groupname: str = '', 13 | log_path: str = 'logs/generate.jsonl', 14 | groupchats: list = []): 15 | self.query = query 16 | self.history = history 17 | self.groupname = groupname 18 | self.groupchats = groupchats 19 | 20 | # init 21 | # Same as `chunk.choices[0].delta` 22 | self.delta = '' 23 | self.parallel_chunks = [] 24 | self.response = '' 25 | self.references = [] 26 | self.topic = '' 27 | self.code = ErrorCode.INIT 28 | 29 | # coreference resolution results 30 | self.cr = '' 31 | 32 | # text2vec results 33 | self.chunk = '' 34 | self.knowledge = '' 35 | 36 | # web search results 37 | self.web_knowledge = '' 38 | 39 | # source graph search results 40 | self.sg_knowledge = '' 41 | 42 | # debug logs 43 | self.debug = dict() 44 | self.log_path = log_path 45 | 46 | def __del__(self): 47 | dirname = os.path.dirname(self.log_path) 48 | if not os.path.exists(dirname): 49 | os.makedirs(dirname) 50 | 51 | try: 52 | with open(self.log_path, 'a') as f: 53 | json_str = json.dumps(self.debug, indent=2, ensure_ascii=False) 54 | f.write(json_str) 55 | f.write('\n') 56 | except Exception as e: 57 | pass -------------------------------------------------------------------------------- /huixiangdou/version.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import Tuple 3 | 4 | __version__ = '20240415' 5 | short_version = __version__ 6 | 7 | 8 | def parse_version_info(version_str: str) -> Tuple: 9 | """Parse version from a string. 10 | 11 | Args: 12 | version_str (str): A string represents a version info. 13 | 14 | Returns: 15 | tuple: A sequence of integer and string represents version. 16 | """ 17 | _version_info = [] 18 | for x in version_str.split('.'): 19 | if x.isdigit(): 20 | _version_info.append(int(x)) 21 | elif x.find('rc') != -1: 22 | patch_version = x.split('rc') 23 | _version_info.append(int(patch_version[0])) 24 | _version_info.append(f'rc{patch_version[1]}') 25 | return tuple(_version_info) 26 | 27 | 28 | version_info = parse_version_info(__version__) 29 | -------------------------------------------------------------------------------- /logs/work.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp 2 | beautifulsoup4 3 | duckduckgo_search 4 | einops 5 | loguru 6 | lxml_html_clean 7 | networkx>=3.0 8 | numpy<2.0.0 9 | openai>=1.0.0 10 | openpyxl 11 | pandas 12 | pydantic>=1.10.13 13 | pymupdf 14 | python-docx 15 | pytoml 16 | readability-lxml 17 | redis 18 | requests 19 | scikit-learn 20 | db-sqlite3 21 | # See https://github.com/deanmalmgren/textract/issues/461 22 | # textract @ git+https://github.com/tpoisonooo/textract@master 23 | # textract 24 | texttable 25 | tiktoken 26 | torch>=2.0.0 27 | transformers>=4.38 28 | tenacity 29 | transformers_stream_generator 30 | unstructured 31 | sentence_transformers 32 | sse_starlette 33 | fastapi 34 | uvicorn 35 | termcolor 36 | opencv-python-headless 37 | gradio>=4.41 38 | bcembedding 39 | jieba 40 | faiss-gpu 41 | -------------------------------------------------------------------------------- /requirements/cpu.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url https://download.pytorch.org/whl/cpu 2 | aiohttp 3 | beautifulsoup4 4 | duckduckgo_search 5 | einops 6 | faiss-cpu 7 | jieba 8 | loguru 9 | lxml_html_clean 10 | nest_asyncio 11 | networkx>=3.0 12 | numpy<2.0.0 13 | openai>=1.55.3 14 | openpyxl 15 | pandas 16 | pydantic>=1.10.13 17 | pymupdf 18 | python-docx 19 | pytoml 20 | readability-lxml 21 | redis 22 | requests 23 | scikit-learn 24 | # See https://github.com/deanmalmgren/textract/issues/461 25 | # textract @ git+https://github.com/tpoisonooo/textract@master 26 | # textract 27 | texttable 28 | tiktoken 29 | torch 30 | unstructured 31 | sse_starlette 32 | fastapi 33 | uvicorn 34 | termcolor 35 | opencv-python-headless 36 | gradio 37 | -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | docutils==0.18.1 2 | modelindex 3 | myst-parser 4 | -e git+https://github.com/tpoisonooo/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme 5 | sphinx==6.1.3 6 | sphinx-copybutton 7 | sphinx-design 8 | sphinx-notfound-page 9 | sphinx-tabs 10 | sphinxcontrib-jquery 11 | tabulate -------------------------------------------------------------------------------- /requirements/lark-group.txt: -------------------------------------------------------------------------------- 1 | flask 2 | lark_oapi 3 | pytoml 4 | redis -------------------------------------------------------------------------------- /requirements/multimodal.txt: -------------------------------------------------------------------------------- 1 | einops 2 | ftfy 3 | timm 4 | torchvision 5 | FlagEmbedding 6 | 7 | # donot install xformer and apex -------------------------------------------------------------------------------- /requirements/sft.txt: -------------------------------------------------------------------------------- 1 | accelerate>=0.26.1 2 | auto-gptq -------------------------------------------------------------------------------- /resource/bad_questions.json: -------------------------------------------------------------------------------- 1 | [ 2 | "mmpose中怎么调用mmyolo接口", 3 | "mmpose实现姿态估计后怎么实现行为识别", 4 | "mmpose执行提取关键点命令不是分为两步吗,一步是目标检测,另一步是关键点提取,我现在目标检测这部分的代码是demo/topdown_demo_with_mmdet.py demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth 现在我想把这个mmdet的checkpoints换位yolo的,那么应该怎么操作", 5 | "在mmdetection中,如何同时加载两个数据集,两个dataloader", 6 | "如何将mmdetection2.28.2的retinanet配置文件改为单尺度的呢?", 7 | "1.MMPose_Tutorial.ipynb、inferencer_demo.py、image_demo.py、bottomup_demo.py、body3d_pose_lifter_demo.py这几个文件和topdown_demo_with_mmdet.py的区别是什么,\n2.我如果要使用mmdet是不是就只能使用topdown_demo_with_mmdet.py文件,", 8 | "mmpose 测试 map 一直是 0 怎么办?", 9 | "如何使用mmpose检测人体关键点?", 10 | "我使用的数据集是labelme标注的,我想知道mmpose的数据集都是什么样式的,全都是单目标的数据集标注,还是里边也有多目标然后进行标注", 11 | "如何生成openmmpose的c++推理脚本", 12 | "mmpose", 13 | "mmpose的目标检测阶段调用的模型,一定要是demo文件夹下的文件吗,有没有其他路径下的文件", 14 | "mmpose可以实现行为识别吗,如果要实现的话应该怎么做", 15 | "我在mmyolo的v0.6.0 (15/8/2023)更新日志里看到了他新增了支持基于 MMPose 的 YOLOX-Pose,我现在是不是只需要在mmpose/project/yolox-Pose内做出一些设置就可以,换掉demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py 改用mmyolo来进行目标检测了", 16 | "mac m1从源码安装的mmpose是x86_64的", 17 | "想请教一下mmpose有没有提供可以读取外接摄像头,做3d姿态并达到实时的项目呀?", 18 | "huixiangdou 是什么?", 19 | "大佬们,如果我想在高空检测安全帽,我应该用 mmdetection 还是 mmrotate", 20 | "mmdetection如何开启多卡训练", 21 | "硬件模型库是什么", 22 | "硬件模型库是啥?", 23 | "OpenMMLab有哪些开源库", 24 | "cbam注意力机制如何改进", 25 | "轻量级的边分辨率模型有哪些?", 26 | "如何添加CBAM机制", 27 | "自定义数据集需要修改什么内容", 28 | "对人进行关键点提取的时候,如果是多个人的场景下,就会出现连线到其他人身上去的情况,这个时候是不是目标检测模型这里的问题,也就是mmdet的识别效率有点低了,所以导致这种情况的出现", 29 | "有人把mmdeploy成功部署到jetson agx orin上吗?", 30 | "那这里的mmdet的配置文件demo/topdown_demo_with_mmdet.py就不需要换吗,他里边的配置不是训练mmdet的配置吗,我觉得是不是要换一个新的py配置文件,然后调用yolo", 31 | "怎么训练llm", 32 | "哪种目标检测算法适合小目标", 33 | "OpenCompass 大模型数据集评估分数查询", 34 | "把某专业标准类知识pdf格式,如何创建成向量数据库?" 35 | ] 36 | -------------------------------------------------------------------------------- /resource/data/baicaoyuan.md: -------------------------------------------------------------------------------- 1 | # 从百草园到三味书屋 2 | 我家的后面有一个很大的园,相传叫作百草园。现在是早已并屋子一起卖给朱文公的子孙了,连那最末次的相见也已经隔了七八年,其中似乎确凿只有一些野草;但那时却是我的乐园。 3 | 不必说碧绿的菜畦,光滑的石井栏,高大的皂荚树,紫红的桑椹;也不必说鸣蝉在树叶里长吟,肥胖的黄蜂伏在菜花上,轻捷的叫天子(云雀)忽然从草间直窜向云霄里去了。单是周围的短短的泥墙根一带,就有无限趣味。油蛉在这里低唱,蟋蟀们在这里弹琴。翻开断砖来,有时会遇见蜈蚣;还有斑蝥,倘若用手指按住它的脊梁,便会啪的一声,从后窍喷出一阵烟雾。何首乌藤和木莲藤缠络着,木莲有莲房一般的果实,何首乌有臃肿的根。有人说,何首乌根是有像人形的,吃了便可以成仙,我于是常常拔它起来,牵连不断地拔起来,也曾因此弄坏了泥墙,却从来没有见过有一块根像人样。如果不怕刺,还可以摘到覆盆子,像小珊瑚珠攒成的小球,又酸又甜,色味都比桑椹要好得远。 -------------------------------------------------------------------------------- /resource/data/qa_pair.csv: -------------------------------------------------------------------------------- 1 | "What is HuixiangDou?","HuixiangDou is an AI assistant that can answer questions based on your knowledge base." 2 | "How to use HuixiangDou?","You can use HuixiangDou by providing a knowledge base and asking questions related to it." 3 | "What features does HuixiangDou support?","HuixiangDou supports text embedding, document retrieval, and question answering." -------------------------------------------------------------------------------- /resource/data/tengye.md: -------------------------------------------------------------------------------- 1 | # 藤野先生 2 | 东京也无非是这样。上野②的樱花烂熳的时节,望去确也像绯红的轻云,但花下也缺不了成群结队的“清国留学生”的速成班③,头顶上盘着大辫子,顶得学生制帽的顶上高高耸起,形成一座富士山④。也有解散辫子,盘得平的,除下帽来,油光可鉴⑤,宛如小姑娘的发髻一般,还要将脖子扭几扭。实在标致⑥极了。 3 | 中国留学生会馆⑦的门房里有几本书买,有时还值得去一转;倘在上午,里面的几间洋房里倒也还可以坐坐的。但到傍晚,有一间的地板便常不免要咚咚咚地响得震天,兼以满房烟尘斗乱⑧;问问精通时事⑨的人,答道,“那是在学跳舞。” 4 | 到别的地方去看看,如何呢? -------------------------------------------------------------------------------- /resource/figures/inside-middleware.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/resource/figures/inside-middleware.png -------------------------------------------------------------------------------- /resource/figures/inside-mmpose.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/resource/figures/inside-mmpose.jpg -------------------------------------------------------------------------------- /resource/figures/inside-ncnn-group.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/resource/figures/inside-ncnn-group.jpg -------------------------------------------------------------------------------- /resource/figures/lark-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/resource/figures/lark-example.png -------------------------------------------------------------------------------- /resource/good_questions.json: -------------------------------------------------------------------------------- 1 | [ 2 | "百草园是什么", 3 | "斋藤先生是谁?" 4 | ] 5 | -------------------------------------------------------------------------------- /sft/axolotl_configs/lora-4B.yml: -------------------------------------------------------------------------------- 1 | base_model: /workspace/models/Qwen1.5-4B-Chat 2 | model_type: AutoModelForCausalLM 3 | tokenizer_type: AutoTokenizer 4 | 5 | trust_remote_code: true 6 | 7 | load_in_8bit: true 8 | load_in_4bit: false 9 | strict: false 10 | 11 | datasets: 12 | - path: /workspace/axolotl/alpaca.json 13 | type: alpaca 14 | dataset_prepared_path: 15 | val_set_size: 0.05 16 | output_dir: ./lora-out 17 | 18 | sequence_len: 2048 # supports up to 8192 19 | sample_packing: false 20 | pad_to_sequence_len: 21 | 22 | adapter: lora 23 | lora_model_dir: 24 | lora_r: 32 25 | lora_alpha: 16 26 | lora_dropout: 0.05 27 | lora_target_linear: true 28 | lora_fan_in_fan_out: 29 | 30 | wandb_mode: online 31 | wandb_project: huixiangdou-cr 32 | wandb_entity: 33 | wandb_watch: 34 | wandb_name: qwen-4 35 | wandb_log_model: 36 | 37 | gradient_accumulation_steps: 4 38 | micro_batch_size: 2 39 | num_epochs: 4 40 | optimizer: adamw_bnb_8bit 41 | lr_scheduler: cosine 42 | learning_rate: 0.0002 43 | 44 | train_on_inputs: false 45 | group_by_length: false 46 | bf16: auto 47 | fp16: 48 | tf32: false 49 | 50 | gradient_checkpointing: false 51 | early_stopping_patience: 52 | resume_from_checkpoint: 53 | local_rank: 54 | logging_steps: 1 55 | xformers_attention: 56 | flash_attention: 57 | 58 | warmup_steps: 10 59 | evals_per_epoch: 4 60 | eval_table_size: 61 | eval_max_new_tokens: 128 62 | saves_per_epoch: 1 63 | debug: 64 | deepspeed: 65 | weight_decay: 0.0 66 | fsdp: 67 | fsdp_config: 68 | special_tokens: 69 | -------------------------------------------------------------------------------- /sft/axolotl_configs/qwen2-lora-0.5B.yaml: -------------------------------------------------------------------------------- 1 | base_model: /workspace/models/Qwen1.5-0.5B-Chat 2 | trust_remote_code: true 3 | 4 | load_in_8bit: false 5 | load_in_4bit: false 6 | strict: false 7 | 8 | datasets: 9 | - path: /workspace/axolotl/alpaca.json 10 | type: alpaca 11 | dataset_prepared_path: 12 | val_set_size: 0.05 13 | output_dir: ./out-qwen0.5 14 | 15 | sequence_len: 1400 # supports up to 32k 16 | sample_packing: false 17 | pad_to_sequence_len: false 18 | 19 | adapter: lora 20 | lora_model_dir: 21 | lora_r: 64 22 | lora_alpha: 16 23 | lora_dropout: 0.05 24 | lora_target_linear: true 25 | lora_fan_in_fan_out: 26 | 27 | wandb_mode: online 28 | wandb_project: huixiangdou-cr 29 | wandb_entity: 30 | wandb_watch: 31 | wandb_name: qwen0.5 32 | wandb_log_model: 33 | 34 | gradient_accumulation_steps: 1 35 | micro_batch_size: 16 36 | num_epochs: 1 37 | optimizer: paged_adamw_8bit 38 | lr_scheduler: cosine 39 | learning_rate: 0.0002 40 | 41 | train_on_inputs: false 42 | group_by_length: false 43 | gradient_checkpointing: true 44 | gradient_checkpointing_kwargs: 45 | use_reentrant: false 46 | early_stopping_patience: 47 | resume_from_checkpoint: 48 | local_rank: 49 | logging_steps: 1 50 | xformers_attention: 51 | flash_attention: true 52 | 53 | warmup_steps: 10 54 | evals_per_epoch: 1 55 | saves_per_epoch: 1 56 | debug: 57 | deepspeed: 58 | weight_decay: 0.0 59 | fsdp: 60 | fsdp_config: 61 | special_tokens: 62 | -------------------------------------------------------------------------------- /sft/axolotl_configs/qwen2-lora-1.8B.yaml: -------------------------------------------------------------------------------- 1 | base_model: /workspace/models/Qwen1.5-1.8B-Chat 2 | trust_remote_code: true 3 | 4 | load_in_8bit: false 5 | load_in_4bit: false 6 | strict: false 7 | 8 | datasets: 9 | - path: /workspace/axolotl/alpaca.json 10 | type: alpaca 11 | dataset_prepared_path: 12 | val_set_size: 0.05 13 | output_dir: ./out-qwen1.8 14 | 15 | sequence_len: 1400 # supports up to 32k 16 | sample_packing: false 17 | pad_to_sequence_len: false 18 | 19 | adapter: lora 20 | lora_model_dir: 21 | lora_r: 64 22 | lora_alpha: 16 23 | lora_dropout: 0.05 24 | lora_target_linear: true 25 | lora_fan_in_fan_out: 26 | 27 | wandb_mode: online 28 | wandb_project: huixiangdou-cr 29 | wandb_entity: 30 | wandb_watch: 31 | wandb_name: qwen1.8 32 | wandb_log_model: 33 | 34 | gradient_accumulation_steps: 1 35 | micro_batch_size: 16 36 | num_epochs: 1 37 | optimizer: paged_adamw_8bit 38 | lr_scheduler: cosine 39 | learning_rate: 0.0002 40 | 41 | train_on_inputs: false 42 | group_by_length: false 43 | gradient_checkpointing: true 44 | gradient_checkpointing_kwargs: 45 | use_reentrant: false 46 | early_stopping_patience: 47 | resume_from_checkpoint: 48 | local_rank: 49 | logging_steps: 1 50 | xformers_attention: 51 | flash_attention: true 52 | 53 | warmup_steps: 10 54 | evals_per_epoch: 1 55 | saves_per_epoch: 1 56 | debug: 57 | deepspeed: 58 | weight_decay: 0.0 59 | fsdp: 60 | fsdp_config: 61 | special_tokens: 62 | -------------------------------------------------------------------------------- /sft/axolotl_configs/qwen2-lora-14B.yaml: -------------------------------------------------------------------------------- 1 | base_model: /workspace/models/Qwen1.5-14B-Chat 2 | trust_remote_code: true 3 | 4 | load_in_8bit: false 5 | load_in_4bit: false 6 | strict: false 7 | 8 | datasets: 9 | - path: /workspace/axolotl/alpaca.json 10 | type: alpaca 11 | dataset_prepared_path: 12 | val_set_size: 0.05 13 | output_dir: ./out-qwen14 14 | 15 | 16 | sequence_len: 1400 # supports up to 32k 17 | sample_packing: false 18 | pad_to_sequence_len: false 19 | 20 | adapter: lora 21 | lora_model_dir: 22 | lora_r: 64 23 | lora_alpha: 16 24 | lora_dropout: 0.05 25 | lora_target_linear: true 26 | lora_fan_in_fan_out: 27 | 28 | wandb_mode: online 29 | wandb_project: huixiangdou-cr 30 | wandb_entity: 31 | wandb_watch: 32 | wandb_name: qwen14 33 | wandb_log_model: 34 | 35 | gradient_accumulation_steps: 1 36 | micro_batch_size: 8 37 | num_epochs: 1 38 | optimizer: paged_adamw_8bit 39 | lr_scheduler: cosine 40 | learning_rate: 0.0002 41 | 42 | train_on_inputs: false 43 | group_by_length: false 44 | gradient_checkpointing: true 45 | gradient_checkpointing_kwargs: 46 | use_reentrant: false 47 | early_stopping_patience: 48 | resume_from_checkpoint: 49 | local_rank: 50 | logging_steps: 1 51 | xformers_attention: 52 | flash_attention: true 53 | 54 | warmup_steps: 10 55 | evals_per_epoch: 1 56 | saves_per_epoch: 1 57 | debug: 58 | deepspeed: 59 | weight_decay: 0.0 60 | fsdp: 61 | fsdp_config: 62 | special_tokens: 63 | -------------------------------------------------------------------------------- /sft/axolotl_configs/qwen2-lora-32B.yaml: -------------------------------------------------------------------------------- 1 | base_model: /workspace/models/Qwen1.5-32B-Chat 2 | trust_remote_code: true 3 | 4 | load_in_8bit: false 5 | load_in_4bit: false 6 | strict: false 7 | 8 | datasets: 9 | - path: /workspace/axolotl/alpaca.json 10 | type: alpaca 11 | dataset_prepared_path: 12 | val_set_size: 0.05 13 | output_dir: ./out-qwen32 14 | 15 | 16 | sequence_len: 1400 # supports up to 32k 17 | sample_packing: false 18 | pad_to_sequence_len: false 19 | 20 | adapter: lora 21 | lora_model_dir: 22 | lora_r: 64 23 | lora_alpha: 16 24 | lora_dropout: 0.05 25 | lora_target_linear: true 26 | lora_fan_in_fan_out: 27 | 28 | wandb_mode: online 29 | wandb_project: huixiangdou-cr 30 | wandb_entity: 31 | wandb_watch: 32 | wandb_name: qwen32 33 | wandb_log_model: 34 | 35 | gradient_accumulation_steps: 1 36 | micro_batch_size: 4 37 | num_epochs: 1 38 | optimizer: paged_adamw_8bit 39 | lr_scheduler: cosine 40 | learning_rate: 0.0002 41 | 42 | train_on_inputs: false 43 | group_by_length: false 44 | gradient_checkpointing: true 45 | gradient_checkpointing_kwargs: 46 | use_reentrant: false 47 | early_stopping_patience: 48 | resume_from_checkpoint: 49 | local_rank: 50 | logging_steps: 1 51 | xformers_attention: 52 | flash_attention: true 53 | 54 | warmup_steps: 10 55 | evals_per_epoch: 1 56 | saves_per_epoch: 1 57 | debug: 58 | deepspeed: 59 | weight_decay: 0.0 60 | fsdp: 61 | fsdp_config: 62 | special_tokens: 63 | -------------------------------------------------------------------------------- /sft/axolotl_configs/qwen2-lora-4B-loraplus-epoch4.yaml: -------------------------------------------------------------------------------- 1 | base_model: /workspace/models/Qwen1.5-4B-Chat 2 | trust_remote_code: true 3 | 4 | load_in_8bit: false 5 | load_in_4bit: false 6 | strict: false 7 | 8 | datasets: 9 | - path: /workspace/axolotl/alpaca.json 10 | type: alpaca 11 | dataset_prepared_path: 12 | val_set_size: 0.05 13 | output_dir: ./out-qwen4-loraplus-ep4 14 | 15 | sequence_len: 1400 # supports up to 32k 16 | sample_packing: false 17 | pad_to_sequence_len: false 18 | 19 | adapter: lora 20 | lora_model_dir: 21 | lora_r: 64 22 | lora_alpha: 16 23 | lora_dropout: 0.05 24 | lora_target_linear: true 25 | lora_fan_in_fan_out: 26 | loraplus_lr_ratio: 16 27 | 28 | wandb_mode: online 29 | wandb_project: huixiangdou-cr 30 | wandb_entity: 31 | wandb_watch: 32 | wandb_name: qwen-4 33 | wandb_log_model: 34 | 35 | gradient_accumulation_steps: 1 36 | micro_batch_size: 16 37 | num_epochs: 4 38 | optimizer: paged_adamw_8bit 39 | lr_scheduler: cosine 40 | learning_rate: 0.00005 41 | 42 | train_on_inputs: false 43 | group_by_length: false 44 | gradient_checkpointing: true 45 | gradient_checkpointing_kwargs: 46 | use_reentrant: false 47 | early_stopping_patience: 48 | resume_from_checkpoint: 49 | local_rank: 50 | logging_steps: 1 51 | xformers_attention: 52 | flash_attention: true 53 | 54 | warmup_steps: 10 55 | evals_per_epoch: 1 56 | saves_per_epoch: 4 57 | debug: 58 | deepspeed: 59 | weight_decay: 0.0 60 | fsdp: 61 | fsdp_config: 62 | special_tokens: 63 | -------------------------------------------------------------------------------- /sft/axolotl_configs/qwen2-lora-4B.yaml: -------------------------------------------------------------------------------- 1 | base_model: /workspace/models/Qwen1.5-4B-Chat 2 | trust_remote_code: true 3 | 4 | load_in_8bit: false 5 | load_in_4bit: false 6 | strict: false 7 | 8 | datasets: 9 | - path: /workspace/axolotl/alpaca.json 10 | type: alpaca 11 | dataset_prepared_path: 12 | val_set_size: 0.05 13 | output_dir: ./out-qwen4 14 | 15 | sequence_len: 1400 # supports up to 32k 16 | sample_packing: false 17 | pad_to_sequence_len: false 18 | 19 | adapter: lora 20 | lora_model_dir: 21 | lora_r: 64 22 | lora_alpha: 16 23 | lora_dropout: 0.05 24 | lora_target_linear: true 25 | lora_fan_in_fan_out: 26 | 27 | wandb_mode: online 28 | wandb_project: huixiangdou-cr 29 | wandb_entity: 30 | wandb_watch: 31 | wandb_name: qwen-4 32 | wandb_log_model: 33 | 34 | gradient_accumulation_steps: 1 35 | micro_batch_size: 32 36 | num_epochs: 1 37 | optimizer: paged_adamw_8bit 38 | lr_scheduler: cosine 39 | learning_rate: 0.0002 40 | 41 | train_on_inputs: false 42 | group_by_length: false 43 | gradient_checkpointing: true 44 | gradient_checkpointing_kwargs: 45 | use_reentrant: false 46 | early_stopping_patience: 47 | resume_from_checkpoint: 48 | local_rank: 49 | logging_steps: 1 50 | xformers_attention: 51 | flash_attention: true 52 | 53 | warmup_steps: 10 54 | evals_per_epoch: 1 55 | saves_per_epoch: 1 56 | debug: 57 | deepspeed: 58 | weight_decay: 0.0 59 | fsdp: 60 | fsdp_config: 61 | special_tokens: 62 | -------------------------------------------------------------------------------- /sft/axolotl_configs/qwen2-lora-7B.yaml: -------------------------------------------------------------------------------- 1 | base_model: /workspace/models/Qwen1.5-7B-Chat 2 | trust_remote_code: true 3 | 4 | load_in_8bit: false 5 | load_in_4bit: false 6 | strict: false 7 | 8 | datasets: 9 | - path: /workspace/axolotl/alpaca.json 10 | type: alpaca 11 | dataset_prepared_path: 12 | val_set_size: 0.05 13 | output_dir: ./out-qwen7 14 | 15 | 16 | sequence_len: 1400 # supports up to 32k 17 | sample_packing: false 18 | pad_to_sequence_len: false 19 | 20 | adapter: lora 21 | lora_model_dir: 22 | lora_r: 16 23 | lora_alpha: 16 24 | lora_dropout: 0.05 25 | lora_target_linear: true 26 | lora_fan_in_fan_out: 27 | 28 | wandb_mode: online 29 | wandb_project: huixiangdou-cr 30 | wandb_entity: 31 | wandb_watch: 32 | wandb_name: qwen7 33 | wandb_log_model: 34 | 35 | gradient_accumulation_steps: 1 36 | micro_batch_size: 16 37 | num_epochs: 1 38 | optimizer: paged_adamw_8bit 39 | lr_scheduler: cosine 40 | learning_rate: 0.0002 41 | 42 | train_on_inputs: false 43 | group_by_length: false 44 | gradient_checkpointing: true 45 | gradient_checkpointing_kwargs: 46 | use_reentrant: false 47 | early_stopping_patience: 48 | resume_from_checkpoint: 49 | local_rank: 50 | logging_steps: 1 51 | xformers_attention: 52 | flash_attention: true 53 | 54 | warmup_steps: 10 55 | evals_per_epoch: 1 56 | saves_per_epoch: 1 57 | debug: 58 | deepspeed: 59 | weight_decay: 0.0 60 | fsdp: 61 | fsdp_config: 62 | special_tokens: 63 | -------------------------------------------------------------------------------- /sft/axolotl_configs/qwen2-moe-lora-2.7B.yaml: -------------------------------------------------------------------------------- 1 | base_model: /workspace/models/qwen1.5-moe-2.7B-chat 2 | trust_remote_code: true 3 | 4 | load_in_8bit: false 5 | load_in_4bit: false 6 | strict: false 7 | 8 | datasets: 9 | - path: /workspace/axolotl/alpaca.json 10 | type: alpaca 11 | dataset_prepared_path: 12 | val_set_size: 0.05 13 | output_dir: ./out-moe 14 | 15 | sequence_len: 1400 # supports up to 32k 16 | sample_packing: false 17 | pad_to_sequence_len: false 18 | 19 | adapter: lora 20 | lora_model_dir: 21 | lora_r: 64 22 | lora_alpha: 16 23 | lora_dropout: 0.05 24 | lora_target_linear: true 25 | lora_fan_in_fan_out: 26 | 27 | # smooth-clould-2 28 | wandb_mode: online 29 | wandb_project: huixiangdou-cr 30 | wandb_entity: 31 | wandb_watch: 32 | wandb_name: qwen-moe 33 | wandb_log_model: 34 | 35 | gradient_accumulation_steps: 1 36 | micro_batch_size: 16 37 | num_epochs: 1 38 | optimizer: paged_adamw_8bit 39 | lr_scheduler: cosine 40 | learning_rate: 0.0002 41 | 42 | train_on_inputs: false 43 | group_by_length: false 44 | gradient_checkpointing: true 45 | gradient_checkpointing_kwargs: 46 | use_reentrant: false 47 | early_stopping_patience: 48 | resume_from_checkpoint: 49 | local_rank: 50 | logging_steps: 1 51 | xformers_attention: 52 | flash_attention: true 53 | 54 | warmup_steps: 10 55 | evals_per_epoch: 1 56 | saves_per_epoch: 1 57 | debug: 58 | deepspeed: 59 | weight_decay: 0.0 60 | fsdp: 61 | fsdp_config: 62 | special_tokens: 63 | -------------------------------------------------------------------------------- /sft/axolotl_configs/qwen2-moe-lora.yaml: -------------------------------------------------------------------------------- 1 | base_model: /workspace/models/qwen1.5-moe-2.7B-chat 2 | trust_remote_code: true 3 | 4 | load_in_8bit: false 5 | load_in_4bit: false 6 | strict: false 7 | 8 | datasets: 9 | - path: mhenrichsen/alpaca_2k_test 10 | type: alpaca 11 | dataset_prepared_path: 12 | val_set_size: 0.05 13 | output_dir: ./out 14 | 15 | sequence_len: 1024 # supports up to 32k 16 | sample_packing: false 17 | pad_to_sequence_len: false 18 | 19 | adapter: lora 20 | lora_model_dir: 21 | lora_r: 32 22 | lora_alpha: 16 23 | lora_dropout: 0.05 24 | lora_target_linear: true 25 | lora_fan_in_fan_out: 26 | 27 | wandb_project: 28 | wandb_entity: 29 | wandb_watch: 30 | wandb_name: 31 | wandb_log_model: 32 | 33 | gradient_accumulation_steps: 4 34 | micro_batch_size: 1 35 | num_epochs: 4 36 | optimizer: paged_adamw_8bit 37 | lr_scheduler: cosine 38 | learning_rate: 0.0002 39 | 40 | train_on_inputs: false 41 | group_by_length: false 42 | bf16: auto 43 | fp16: 44 | tf32: true 45 | 46 | gradient_checkpointing: true 47 | gradient_checkpointing_kwargs: 48 | use_reentrant: false 49 | early_stopping_patience: 50 | resume_from_checkpoint: 51 | local_rank: 52 | logging_steps: 1 53 | xformers_attention: 54 | flash_attention: true 55 | 56 | warmup_steps: 10 57 | evals_per_epoch: 4 58 | saves_per_epoch: 1 59 | debug: 60 | deepspeed: 61 | weight_decay: 0.0 62 | fsdp: 63 | fsdp_config: 64 | special_tokens: 65 | -------------------------------------------------------------------------------- /sft/axolotl_configs/qwen2-moe-qlora.yaml: -------------------------------------------------------------------------------- 1 | base_model: Qwen/Qwen1.5-MoE-A2.7B 2 | trust_remote_code: true 3 | 4 | load_in_8bit: false 5 | load_in_4bit: true 6 | strict: false 7 | 8 | datasets: 9 | - path: mhenrichsen/alpaca_2k_test 10 | type: alpaca 11 | dataset_prepared_path: 12 | val_set_size: 0.05 13 | output_dir: ./out 14 | 15 | sequence_len: 1024 # supports up to 32k 16 | sample_packing: false 17 | pad_to_sequence_len: false 18 | 19 | adapter: qlora 20 | lora_model_dir: 21 | lora_r: 32 22 | lora_alpha: 16 23 | lora_dropout: 0.05 24 | lora_target_linear: true 25 | lora_fan_in_fan_out: 26 | 27 | wandb_project: 28 | wandb_entity: 29 | wandb_watch: 30 | wandb_name: 31 | wandb_log_model: 32 | 33 | gradient_accumulation_steps: 4 34 | micro_batch_size: 1 35 | num_epochs: 4 36 | optimizer: paged_adamw_8bit 37 | lr_scheduler: cosine 38 | learning_rate: 0.0002 39 | 40 | train_on_inputs: false 41 | group_by_length: false 42 | bf16: auto 43 | fp16: 44 | tf32: true 45 | 46 | gradient_checkpointing: true 47 | gradient_checkpointing_kwargs: 48 | use_reentrant: false 49 | early_stopping_patience: 50 | resume_from_checkpoint: 51 | local_rank: 52 | logging_steps: 1 53 | xformers_attention: 54 | flash_attention: true 55 | 56 | warmup_steps: 10 57 | evals_per_epoch: 4 58 | saves_per_epoch: 1 59 | debug: 60 | deepspeed: 61 | weight_decay: 0.0 62 | fsdp: 63 | fsdp_config: 64 | special_tokens: 65 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/tests/__init__.py -------------------------------------------------------------------------------- /tests/cp_files.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | def copy_files(src_dir, dest_dir): 5 | # 遍历源目录 6 | for root, dirs, files in os.walk(src_dir): 7 | for file in files: 8 | # 构建源文件的完整路径 9 | src_file = os.path.join(root, file) 10 | # 构建目标文件的完整路径 11 | dest_file = os.path.join(dest_dir, file) 12 | # 复制文件 13 | 14 | shutil.copy(src_file, dest_file) 15 | print(f"Copied '{src_file}' to '{dest_file}'") 16 | 17 | # 指定源目录和目标目录 18 | source_directory = '/home/khj/CNKI_pure_text' 19 | destination_directory = '/home/khj/hxd-ci/repodir' 20 | 21 | # 调用函数 22 | copy_files(source_directory, destination_directory) -------------------------------------------------------------------------------- /tests/git-clone.sh: -------------------------------------------------------------------------------- 1 | git clone https://ghproxy.org/https://github.com/open-compass/opencompass --depth=1 2 | git clone https://ghproxy.org/https://github.com/open-mmlab/mmpose --depth=1 3 | git clone https://ghproxy.org/https://github.com/open-mmlab/mmdeploy --depth=1 4 | git clone https://ghproxy.org/https://github.com/open-mmlab/mmdetection --depth=1 5 | git clone https://ghproxy.org/https://github.com/internlm/lmdeploy --depth=1 6 | git clone https://ghproxy.org/https://github.com/internlm/xtuner --depth=1 7 | git clone https://ghproxy.org/https://github.com/open-mmlab/mmyolo --depth=1 8 | git clone https://ghproxy.org/https://github.com/open-mmlab/mmcv --depth=1 9 | git clone https://ghproxy.org/https://github.com/internlm/huixiangdou --depth=1 10 | 11 | git clone https://github.com/open-compass/opencompass --depth=1 12 | git clone https://github.com/open-mmlab/mmpose --depth=1 13 | git clone https://github.com/open-mmlab/mmdeploy --depth=1 14 | git clone https://github.com/open-mmlab/mmdetection --depth=1 15 | git clone https://github.com/internlm/lmdeploy --depth=1 16 | git clone https://github.com/internlm/xtuner --depth=1 17 | git clone https://github.com/open-mmlab/mmyolo --depth=1 18 | git clone https://github.com/open-mmlab/mmcv --depth=1 19 | git clone https://github.com/internlm/huixiangdou --depth=1 20 | 21 | git clone https://github.com/open-mmlab/Amphion --depth=1 22 | git clone https://github.com/open-mmlab/labelbee --depth=1 23 | -------------------------------------------------------------------------------- /tests/test_alles_apin.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import requests 5 | 6 | url = 'https://openxlab.org.cn/gw/alles-apin-hub/v1/openai/v2/text/chat' 7 | api_token = os.getenv('ALLES_APIN_TOKEN') 8 | headers = {'content-type': 'application/json', 'alles-apin-token': api_token} 9 | 10 | payload = { 11 | 'model': 12 | 'gpt-4-1106-preview', 13 | 'messages': [{ 14 | 'role': 15 | 'user', 16 | 'content': 17 | '帮我写个 python 代码,用 time.time() 和 datetime 获取当前时间。把当前时间的秒数设成 0,毫秒数也设成 0, 分钟数加 1,输出新时间对应的毫秒数,格式和 time.time() 相同' 18 | }] 19 | } 20 | 21 | response = requests.post(url, headers=headers, data=json.dumps(payload)) 22 | resp_json = response.json() 23 | if resp_json['msgCode'] == '10000': 24 | data = resp_json['data'] 25 | if len(data['choices']) > 0: 26 | text = data['choices'][0]['message']['content'] 27 | print(text) 28 | -------------------------------------------------------------------------------- /tests/test_benepar.py: -------------------------------------------------------------------------------- 1 | import benepar 2 | import nltk 3 | 4 | benepar.download('benepar_en3_large') 5 | 6 | nltk.download('punkt') 7 | # 创建解析器 8 | parser = benepar.Parser('benepar_en3_large') 9 | 10 | # 解析句子 11 | tree = parser.parse('The quick brown fox jumps over the lazy dog.') 12 | -------------------------------------------------------------------------------- /tests/test_bge_reranker.py: -------------------------------------------------------------------------------- 1 | from FlagEmbedding import FlagReranker 2 | 3 | reranker = FlagReranker( 4 | '/data2/khj/bge-reranker-v2-m3/', use_fp16=True 5 | ) # Setting use_fp16 to True speeds up computation with a slight performance degradation 6 | 7 | score = reranker.compute_score(['query', 'passage']) 8 | print(score) # -5.65234375 9 | 10 | # You can map the scores into 0-1 by set "normalize=True", which will apply sigmoid function to the score 11 | score = reranker.compute_score(['query', 'passage'], normalize=True) 12 | print(score) # 0.003497010252573502 13 | 14 | scores = reranker.compute_score([ 15 | ['what is panda?', 'hi'], 16 | [ 17 | 'what is panda?', 18 | 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.' 19 | ] 20 | ]) 21 | print(scores) # [-8.1875, 5.26171875] 22 | import pdb 23 | 24 | # You can map the scores into 0-1 by set "normalize=True", which will apply sigmoid function to the score 25 | scores = reranker.compute_score([ 26 | ['what is panda?', 'hi'], 27 | [ 28 | 'what is panda?', 29 | 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.' 30 | ] 31 | ], 32 | normalize=True) 33 | print(scores) # [0.00027803096387751553, 0.9948403768236574] 34 | -------------------------------------------------------------------------------- /tests/test_clear_kimi_files.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pdb 3 | 4 | from openai import OpenAI 5 | from tqdm import tqdm 6 | 7 | client = OpenAI(api_key=os.getenv('MOONSHOT_API_KEY'), 8 | base_url='https://api.moonshot.cn/v1') 9 | file_list = client.files.list() 10 | for file in tqdm(file_list.data): 11 | client.files.delete(file_id=file.id) 12 | print(file) 13 | -------------------------------------------------------------------------------- /tests/test_dataclass.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | from enum import Enum, unique 3 | 4 | 5 | @unique 6 | class KGType(Enum): 7 | MARKDOWN = 'markdown' 8 | CHUNK = 'chunk' 9 | KEYWORD = 'keyword' 10 | IMAGE = 'image' 11 | 12 | 13 | x = KGType.IMAGE 14 | print(x) 15 | -------------------------------------------------------------------------------- /tests/test_deepseek.py: -------------------------------------------------------------------------------- 1 | # python3 2 | from openai import OpenAI 3 | 4 | client = OpenAI(api_key='sk-f58e45ee054743f898f732b09dbcaa7c', 5 | base_url='https://api.deepseek.com/v1') 6 | queries = [ 7 | '已知 ncnn 中 cnn 是卷积神经网络,n 是 ncnn 的作者 nihui。所以 ncnn 的全称是?', 8 | '"请问如何安装 mmdeploy ?"\n请仔细阅读以上内容,判断句子是否是个有主题的疑问句,结果用 0~10 表示。直接提供得分不要解释。\n判断标准:有主语谓语宾语并且是疑问句得 10 分;缺少主谓宾扣分;陈述句直接得 0 分;不是疑问句直接得 0 分。直接提供得分不要解释。', 9 | '"豆哥少水点键证群"\n请仔细阅读以上内容,判断句子是否是个有主题的疑问句,结果用 0~10 表示。直接提供得分不要解释。\n判断标准:有主语谓语宾语并且是疑问句得 10 分;缺少主谓宾扣分;陈述句直接得 0 分;不是疑问句直接得 0 分。直接提供得分不要解释。' 10 | ] 11 | 12 | for query in queries: 13 | response = client.chat.completions.create( 14 | model='deepseek-chat', 15 | messages=[ 16 | { 17 | 'role': 'system', 18 | 'content': 'You are a helpful assistant' 19 | }, 20 | { 21 | 'role': 'user', 22 | 'content': query 23 | }, 24 | ], 25 | temperature=0.1) 26 | 27 | print(response.choices[0].message.content) 28 | -------------------------------------------------------------------------------- /tests/test_hf_import_accelerate.py: -------------------------------------------------------------------------------- 1 | from accelerate import (dispatch_model, infer_auto_device_map, 2 | init_empty_weights) 3 | from accelerate.hooks import add_hook_to_module 4 | from accelerate.utils import (check_tied_parameters_on_same_device, 5 | find_tied_parameters, get_balanced_memory, 6 | get_max_memory, load_offloaded_weights, 7 | offload_weight, save_offload_index, 8 | set_module_tensor_to_device) 9 | -------------------------------------------------------------------------------- /tests/test_intention_prompt.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | 4 | import torch 5 | from transformers.generation import GenerationConfig 6 | 7 | # Note: The default behavior now has injection attack prevention off. 8 | DIR = '/internlm/ampere_7b_v1_7_0' 9 | from transformers import AutoModelForCausalLM, AutoTokenizer 10 | 11 | tokenizer = AutoTokenizer.from_pretrained(DIR, trust_remote_code=True) 12 | model = AutoModelForCausalLM.from_pretrained(DIR, 13 | trust_remote_code=True, 14 | device_map='auto').eval() 15 | 16 | 17 | def task1_intention(): 18 | """Test prompt.""" 19 | ret = [] 20 | with open('data.json', encoding='utf8') as f: 21 | items = json.load(f) 22 | for idx, item in enumerate(items): 23 | question = item['question'] 24 | 25 | prompt = '“{}”\n请仔细阅读以上内容,判断句子是否是个有主题的疑问句,结果用 1~10 表示。直接提供得分不要解释。\n判断标准:有主语谓语宾语并且是疑问句得 10 分;缺少主谓宾扣分;陈述句直接得 0 分;不是疑问句直接得 0 分。直接提供得分不要解释。'.format( 26 | question) 27 | answer, _ = model.chat(tokenizer, prompt, history=[], top_k=1) 28 | print((answer, prompt)) 29 | 30 | ret.append({'question': prompt, 'answer': answer}) 31 | 32 | with open('task1_intention_internlm_prompt.json', 'w', 33 | encoding='utf8') as f: 34 | json.dump(list(ret), f, ensure_ascii=False, indent=2) 35 | print('{}/{}'.format(idx, len(items))) 36 | 37 | 38 | if __name__ == '__main__': 39 | task1_intention() 40 | -------------------------------------------------------------------------------- /tests/test_internlm2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from transformers import AutoModelForCausalLM, AutoTokenizer 3 | import asyncio 4 | 5 | # wrap to async generator 6 | async def chat_stream(): 7 | model_path = "/data2/khj/internlm2_5-7b-chat" 8 | model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda() 9 | tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) 10 | 11 | model = model.eval() 12 | length = 0 13 | for response, history in model.stream_chat(tokenizer, "Hello", history=[]): 14 | part = response[length:] 15 | length = len(response) 16 | yield part 17 | yield '\n' 18 | 19 | # coroutine 20 | async def main(): 21 | async for part in chat_stream(): 22 | print(part, flush=True, end="") 23 | 24 | loop = asyncio.get_event_loop() 25 | loop.run_until_complete(main()) -------------------------------------------------------------------------------- /tests/test_kimi.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from openai import OpenAI 4 | 5 | client = OpenAI( 6 | api_key=os.getenv('MOONSHOT_API_KEY'), 7 | base_url='https://api.moonshot.cn/v1', 8 | ) 9 | 10 | prompt = '“huixiangdou 是什么?”\n请仔细阅读以上内容,判断句子是否是个有主题的疑问句,结果用 0~10 表示。直接提供得分不要解释。\n判断标准:有主语谓语宾语并且是疑问句得 10 分;缺少主谓宾扣分;陈述句直接得 0 分;不是疑问句直接得 0 分。直接提供得分不要解释。' 11 | 12 | 13 | def generate(): 14 | """Test generate.""" 15 | messages = [ 16 | { 17 | 'role': 'system', 18 | 'content': '你是一个语文专家,擅长对句子的结构进行分析' 19 | # '你是 Kimi,由 Moonshot AI 提供的人工智能助手,你更擅长中文和英文的对话。你会为用户提供安全,有帮助,准确的回答。 20 | # 同时,你会拒绝一些涉及恐怖主义,种族歧视,黄色暴力等问题的回答。Moonshot AI 为专有名词,不可翻译成其他语言。' 21 | }, 22 | { 23 | 'role': 'user', 24 | 'content': prompt 25 | } 26 | ] 27 | 28 | whole_input = str(messages) 29 | print(whole_input) 30 | # print('input_length {}'.format(len(whole_input))) 31 | 32 | try: 33 | completion = client.chat.completions.create(model='moonshot-v1-8k', 34 | messages=messages, 35 | temperature=0.1, 36 | n=10) 37 | except Exception as e: 38 | return prompt, str(e) 39 | 40 | results = [] 41 | for choice in completion.choices: 42 | results.append(choice.message.content) 43 | 44 | return prompt, results 45 | 46 | 47 | if __name__ == '__main__': 48 | print(generate()) 49 | -------------------------------------------------------------------------------- /tests/test_m3.py: -------------------------------------------------------------------------------- 1 | from FlagEmbedding import BGEM3FlagModel 2 | 3 | model = BGEM3FlagModel( 4 | '/data2/khj/bge-m3', use_fp16=True 5 | ) # Setting use_fp16 to True speeds up computation with a slight performance degradation 6 | 7 | sentences_1 = ['What is BGE M3?', 'Defination of BM25'] 8 | sentences_2 = [ 9 | 'BGE M3 is an embedding model supporting dense retrieval, lexical matching and multi-vector interaction.', 10 | 'BM25 is a bag-of-words retrieval function that ranks a set of documents based on the query terms appearing in each document' 11 | ] 12 | 13 | import pdb 14 | 15 | embeddings_1 = model.encode(sentences_1, max_length=512)['dense_vecs'] 16 | embeddings_2 = model.encode(sentences_2)['dense_vecs'] 17 | similarity = embeddings_1 @ embeddings_2.T 18 | print(similarity) 19 | # [[0.6265, 0.3477], [0.3499, 0.678 ]] 20 | -------------------------------------------------------------------------------- /tests/test_neo4j.py: -------------------------------------------------------------------------------- 1 | import nxneo4j as nx 2 | from neo4j import GraphDatabase 3 | 4 | # Neo4j Desktop 版 5 | # 1. 关掉 auth 6 | # 2. server.default_listen_address=0.0.0.0 7 | # 浏览器打开 http://10.1.52.85:7474/browser/,无密码模式应该能登录 8 | 9 | # 配置 Neo4j 连接参数 10 | uri = 'bolt://10.1.52.85:7687' # 默认的 bolt 协议地址和端口 11 | user = 'neo4j' # Neo4j 用户名 12 | password = 'neo4j' # Neo4j 密码 13 | 14 | # 创建驱动实例 15 | driver = GraphDatabase.driver(uri, auth=(user, password)) 16 | 17 | G = nx.Graph(driver) 18 | G.delete_all() 19 | 20 | #Add a node 21 | G.add_node('Yusuf') 22 | #Add node with features 23 | G.add_node('Nurgul', gender='F') 24 | #Add multiple properties at once 25 | G.add_node('Betul', age=4, gender='F') 26 | #Check nodes 27 | for node in G.nodes(): #Unlike networkX, nxneo4j returns a generator 28 | print(node) 29 | -------------------------------------------------------------------------------- /tests/test_openai.py: -------------------------------------------------------------------------------- 1 | import openai 2 | from openai import OpenAI 3 | 4 | 5 | def call_openai(model_name, prompt, history): 6 | 7 | messages = [{ 8 | 'role': 'system', 9 | 'content': 'You are a helpful assistant.' # noqa E501 10 | }] 11 | for item in history: 12 | messages.append({'role': 'user', 'content': item[0]}) 13 | messages.append({'role': 'system', 'content': item[1]}) 14 | messages.append({'role': 'user', 'content': prompt}) 15 | 16 | client = OpenAI( 17 | api_key='EMPTY', 18 | base_url='https://10.140.24.142:29500/v1', 19 | ) 20 | 21 | completion = client.chat.completions.create(model=model_name, 22 | messages=messages) 23 | return completion.choices[0].message.content 24 | 25 | 26 | def call2(): 27 | from openai import OpenAI 28 | 29 | # Set OpenAI's API key and API base to use vLLM's API server. 30 | openai_api_key = 'EMPTY' 31 | openai_api_base = 'http://10.140.24.142:29500/v1' 32 | 33 | client = OpenAI( 34 | api_key=openai_api_key, 35 | base_url=openai_api_base, 36 | ) 37 | 38 | chat_response = client.chat.completions.create( 39 | model='../models/Qwen1.5-14B-Chat/', 40 | messages=[ 41 | { 42 | 'role': 'system', 43 | 'content': 'You are a helpful assistant.' 44 | }, 45 | { 46 | 'role': 'user', 47 | 'content': 'Tell me a joke.' 48 | }, 49 | ]) 50 | print('Chat response:', chat_response) 51 | 52 | 53 | call2() 54 | # call_openai("../models/Qwen1.5-14B-Chat/", '如何安装 mmdeploy', []) 55 | 56 | # curl http://10.140.24.142:29500/v1/chat/completions \ 57 | # -H "Content-Type: application/json" \ 58 | # -d '{ 59 | # "model": "../models/Qwen1.5-14B-Chat/", 60 | # "messages": [ 61 | # {"role": "system", "content": "You are a helpful assistant."}, 62 | # {"role": "user", "content": "Tell me something about large language models."} 63 | # ] 64 | # }' 65 | -------------------------------------------------------------------------------- /tests/test_post_android.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | 4 | import requests 5 | 6 | # base_url = 'https://p-172_dot_31_dot_0_dot_170_colon_18443.openxlab.space/api/v1/message/v1/wechat/fRHK' 7 | base_url = 'http://139.224.198.162:18443/api/v1/message/v1/wechat/fRHK' 8 | 9 | headers = {'Content-Type': 'application/json; charset=utf-8'} 10 | 11 | 12 | def send(): 13 | data_send = { 14 | 'query_id': 'abb', 15 | 'groupname': '茴香豆测试群', # 完整的微信群名 16 | 'username': '豆哥 123', # 发送者的在这个群的微信昵称, 注意一个人可能在多个群里 17 | 'query': { 18 | 'type': 'text', # 发的类型, text or image, poll 19 | 'content': 20 | '请问如何申请公寓?' # 如果 type 是 text 就是文本; 如果是 image,就是个可公开访问的 oss_url 21 | } 22 | } 23 | resp = requests.post(base_url, 24 | headers=headers, 25 | data=json.dumps(data_send), 26 | timeout=10) 27 | 28 | resp_json = resp.json() 29 | print(resp_json) 30 | 31 | 32 | def get(): 33 | data_wait = { 34 | 'query_id': 'abb', # 微信给的随机值,用于事后日志分析 35 | 'groupname': '茴香豆测试群', # 完整的微信群名 36 | 'username': '豆哥 123', # 发送者的在这个群的微信昵称, 注意一个人可能在多个群里 37 | 'query': { 38 | 'type': 'poll' # 发的类型, text or image, poll 39 | } 40 | } 41 | resp = requests.post(base_url, 42 | headers=headers, 43 | data=json.dumps(data_wait), 44 | timeout=20) 45 | print(resp.text) 46 | 47 | 48 | send() 49 | send() 50 | 51 | time.sleep(40) 52 | get() 53 | -------------------------------------------------------------------------------- /tests/test_pyppeteer.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import time 3 | 4 | from pyppeteer import launch 5 | 6 | 7 | async def main(url): 8 | browser = await launch(headless=True, 9 | args=[ 10 | '--no-sandbox', '--disable-dev-shm-usage', 11 | '--disable-gpu', 12 | '--disable-software-rasterizer', 13 | '--disable-setuid-sandbox' 14 | ]) 15 | page = await browser.newPage() 16 | await page.goto(url) 17 | content = await page.evaluate( 18 | 'document.getElementsByClassName("Post-Main")[0].innerText', 19 | force_expr=True) 20 | # print(content) 21 | await browser.close() 22 | return content 23 | 24 | 25 | result = asyncio.get_event_loop().run_until_complete( 26 | main(url='https://zhuanlan.zhihu.com/p/699164101')) 27 | print(result) 28 | -------------------------------------------------------------------------------- /tests/test_query_gradio.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | 3 | if __name__ == '__main__': 4 | logger.warning('This file moved to `huixiangdou.gradio_ui`') 5 | -------------------------------------------------------------------------------- /tests/test_relative.py: -------------------------------------------------------------------------------- 1 | def test_reject(retriever: Retriever, sample: str = None): 2 | """Simple test reject pipeline.""" 3 | if sample is None: 4 | real_questions = [ 5 | 'SAM 10个T 的训练集,怎么比比较公平呢~?速度上还有缺陷吧?', 6 | '想问下,如果只是推理的话,amp的fp16是不会省显存么,我看parameter仍然是float32,开和不开推理的显存占用都是一样的。能不能直接用把数据和model都 .half() 代替呢,相比之下amp好在哪里', # noqa E501 7 | 'mmdeploy支持ncnn vulkan部署么,我只找到了ncnn cpu 版本', 8 | '大佬们,如果我想在高空检测安全帽,我应该用 mmdetection 还是 mmrotate', 9 | '请问 ncnn 全称是什么', 10 | '有啥中文的 text to speech 模型吗?', 11 | '今天中午吃什么?', 12 | 'huixiangdou 是什么?', 13 | 'mmpose 如何安装?', 14 | '使用科研仪器需要注意什么?' 15 | ] 16 | else: 17 | with open(sample) as f: 18 | real_questions = json.load(f) 19 | 20 | for example in real_questions: 21 | relative, _ = retriever.is_relative(example) 22 | 23 | if relative: 24 | logger.warning(f'process query: {example}') 25 | else: 26 | logger.error(f'reject query: {example}') 27 | 28 | if sample is not None: 29 | if relative: 30 | with open('workdir/positive.txt', 'a+') as f: 31 | f.write(example) 32 | f.write('\n') 33 | else: 34 | with open('workdir/negative.txt', 'a+') as f: 35 | f.write(example) 36 | f.write('\n') 37 | 38 | empty_cache() 39 | -------------------------------------------------------------------------------- /tests/test_time.py: -------------------------------------------------------------------------------- 1 | import time 2 | from datetime import datetime 3 | 4 | current_time = time.time() # 获取当前时间戳 5 | dt_object = datetime.fromtimestamp(current_time) # 将时间戳转换为datetime对象 6 | 7 | # 获取当天自午夜以来的总分钟数 8 | total_minutes_since_midnight = dt_object.hour * 60 + dt_object.minute 9 | 10 | print(total_minutes_since_midnight) 11 | -------------------------------------------------------------------------------- /tests/test_visual_bge.py: -------------------------------------------------------------------------------- 1 | ##### Use M3 doing Multilingual Multi-Modal Retrieval 2 | import torch 3 | from FlagEmbedding.visual.modeling import Visualized_BGE 4 | 5 | model = Visualized_BGE( 6 | model_name_bge='/data2/khj/bge-m3', 7 | model_weight='/data2/khj/bge-visualized/Visualized_m3.pth') 8 | model.eval() 9 | with torch.no_grad(): 10 | query_emb = model.encode(image='./imgs/cir_query.png', text='一匹马牵着这辆车') 11 | candi_emb_1 = model.encode(image='./imgs/cir_candi_1.png') 12 | candi_emb_2 = model.encode(image='./imgs/cir_candi_2.png') 13 | 14 | sim_1 = query_emb @ candi_emb_1.T 15 | sim_2 = query_emb @ candi_emb_2.T 16 | print(sim_1, sim_2) # tensor([[0.7026]]) tensor([[0.8075]]) 17 | -------------------------------------------------------------------------------- /tests/test_yi.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoModelForCausalLM, AutoTokenizer 2 | 3 | model = AutoModelForCausalLM.from_pretrained('/models/Yi-6B-200K', 4 | device_map='auto', 5 | torch_dtype='auto', 6 | trust_remote_code=True) 7 | tokenizer = AutoTokenizer.from_pretrained('/models/Yi-6B-200K', 8 | trust_remote_code=True) 9 | inputs = tokenizer('', return_tensors='pt') 10 | max_length = 512 11 | outputs = model.generate( 12 | inputs.input_ids.cuda(), 13 | max_length=max_length, 14 | eos_token_id=tokenizer.eos_token_id, 15 | do_sample=True, 16 | repetition_penalty=1.3, 17 | no_repeat_ngram_size=5, 18 | temperature=0.7, 19 | top_k=1, 20 | top_p=0.8, 21 | ) 22 | print(tokenizer.decode(outputs[0], skip_special_tokens=True)) 23 | -------------------------------------------------------------------------------- /tests/test_yulan.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from transformers import LlamaForCausalLM, LlamaTokenizer 3 | 4 | model_path = '/models/YuLan-Chat-2-13b-fp16' 5 | tokenizer = LlamaTokenizer.from_pretrained(model_path) 6 | model = LlamaForCausalLM.from_pretrained(model_path, 7 | torch_dtype=torch.float16, 8 | device_map='auto') 9 | model = model.eval() 10 | 11 | 12 | def run(input_text: str): 13 | prompt = "The following is a conversation between a human and an AI assistant namely YuLan, developed by GSAI, Renmin University of China. The AI assistant gives helpful, detailed, and polite answers to the user's questions.\n[|Human|]:{}\n[|AI|]:".format( 14 | input_text) 15 | inputs = tokenizer(prompt, 16 | return_tensors='pt', 17 | padding='longest', 18 | max_length=8192, 19 | truncation=True, 20 | return_attention_mask=True, 21 | add_special_tokens=True) 22 | print(inputs) 23 | kwargs = { 24 | 'temperature': 0.8, 25 | 'top_p': 0.95, 26 | 'top_k': 50, 27 | 'repetition_penalty': 1.1, 28 | 'no_repeat_ngram_size': 64, 29 | 'max_length': 8192, 30 | 'pad_token_id': tokenizer.bos_token_id, 31 | 'eos_token_id': tokenizer.eos_token_id 32 | } 33 | outputs = model.generate(inputs['input_ids'].to(model.device), 34 | attention_mask=inputs['attention_mask'].to( 35 | model.device), 36 | do_sample=True, 37 | **kwargs) 38 | print(tokenizer.batch_decode(outputs, skip_special_tokens=True)) 39 | 40 | 41 | texts = [ 42 | 'mmdeploy extract如何使用', 'OpenMMLab与上海AI lab 的关系是什么?', 'MMEngine 和MMCV的区别', 43 | 'openmmlab 是什么?', 'mmdet3.0 是否依赖 mmcv0.7', 'mmdet3.0对应的mmcv最低版本是多少' 44 | ] 45 | for input_text in texts: 46 | run(input_text) 47 | -------------------------------------------------------------------------------- /unittest/primitive/test_bm250api.py: -------------------------------------------------------------------------------- 1 | from huixiangdou.primitive import BM25Okapi, Chunk 2 | import pdb 3 | 4 | def test_bm25_dump(): 5 | corpus = [ 6 | "Hello there good man!", 7 | "It is quite windy in London", 8 | "How is the weather today?" 9 | ] 10 | chunks = [] 11 | for content in corpus: 12 | c = Chunk(content_or_path=content) 13 | chunks.append(c) 14 | 15 | bm25 = BM25Okapi() 16 | bm25.save(chunks, './') 17 | 18 | def test_bm25_load(): 19 | bm25 = BM25Okapi() 20 | bm25.load('./') 21 | query_text = 'what is the weather' 22 | 23 | res = bm25.get_top_n(query=query_text.split(' ')) 24 | print(res) 25 | 26 | res = bm25.get_top_n(query=query_text) 27 | print(res) 28 | 29 | if __name__ == '__main__': 30 | test_bm25_dump() 31 | test_bm25_load() 32 | -------------------------------------------------------------------------------- /unittest/primitive/test_dataclass.py: -------------------------------------------------------------------------------- 1 | from huixiangdou.primitive import Chunk, Query 2 | 3 | 4 | def test_chunk(): 5 | c = Chunk() 6 | c_str = '{}'.format(c) 7 | assert 'content_or_path=' in c_str 8 | 9 | 10 | def test_query(): 11 | q = Query(text='hello', image='test.jpg') 12 | q_str = '{}'.format(q) 13 | assert 'hello' in q_str 14 | assert 'image=' in q_str 15 | 16 | p = Query('hello') 17 | p_str = '{}'.format(p) 18 | assert 'text=' in p_str 19 | 20 | 21 | if __name__ == '__main__': 22 | test_chunk() 23 | test_query() 24 | -------------------------------------------------------------------------------- /unittest/primitive/test_embedder.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | 3 | from huixiangdou.primitive import Embedder 4 | 5 | 6 | def test_embedder(): 7 | emb = Embedder({'embedding_model_path':'/data2/khj/bge-m3'}) 8 | sentence = 'hello world ' 9 | sentence_16k = sentence * (16384 // len(sentence)) 10 | image_path = 'resource/figures/wechat.jpg' 11 | 12 | text_feature = emb.embed_query(text=sentence_16k) 13 | image_feature = emb.embed_query(path=image_path) 14 | 15 | query_feature = emb.embed_query(text=sentence_16k, path=image_path) 16 | 17 | sim1 = query_feature @ text_feature.T 18 | sim2 = query_feature @ image_feature.T 19 | 20 | assert sim1.item() >= 0.4 21 | assert sim2.item() >= 0.4 22 | 23 | 24 | if __name__ == '__main__': 25 | test_embedder() 26 | -------------------------------------------------------------------------------- /unittest/primitive/test_entity.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pdb 3 | 4 | from huixiangdou.primitive import NamedEntity2Chunk, Chunk 5 | 6 | 7 | def test_entity_build_and_query(): 8 | entities = ['HuixiangDou', 'WeChat'] 9 | 10 | indexer = NamedEntity2Chunk('/tmp') 11 | indexer.clean() 12 | indexer.set_entity(entities=entities) 13 | 14 | c0 = Chunk(content_or_path='How to deploy HuixiangDou on wechaty ?') 15 | c1 = Chunk(content_or_path='do you know what huixiangdou means ?') 16 | chunks = [c0, c1] 17 | map_entity2chunks = dict() 18 | # build inverted index 19 | for chunk_id, chunk in enumerate(chunks): 20 | if chunk.modal != 'text': 21 | continue 22 | entity_ids = indexer.parse(text=chunk.content_or_path) 23 | for entity_id in entity_ids: 24 | if entity_id not in map_entity2chunks: 25 | map_entity2chunks[entity_id] = [chunk_id] 26 | else: 27 | map_entity2chunks[entity_id].append(chunk_id) 28 | 29 | for entity_id, chunk_indexes in map_entity2chunks.items(): 30 | indexer.insert_relation(eid = entity_id, chunk_ids=chunk_indexes) 31 | del indexer 32 | 33 | query_text = 'how to install wechat ?' 34 | retriver = NamedEntity2Chunk('/tmp') 35 | entity_ids = retriver.parse(query_text) 36 | # chunk_id match counter 37 | chunk_id_list = retriver.get_chunk_ids(entity_ids=entity_ids) 38 | print(chunk_id_list) 39 | assert chunk_id_list[0][0] == 0 40 | 41 | 42 | if __name__ == '__main__': 43 | test_entity_build_and_query() 44 | -------------------------------------------------------------------------------- /unittest/primitive/test_faiss.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pdb 3 | 4 | from huixiangdou.primitive import Chunk, Embedder, Faiss, Query 5 | 6 | 7 | def test_faiss(): 8 | a = Chunk('hello world', {'source': 'unittest'}) 9 | b = Chunk('resource/figures/inside-mmpose.jpg', {'source': 'unittest'}, 10 | 'image') 11 | c = Chunk('resource/figures/wechat.jpg', {'source': 'test image'}, 'image') 12 | chunks = [a, b, c] 13 | 14 | save_path = '/tmp/faiss' 15 | 16 | model_config = { 17 | 'embedding_model_path': '/data2/khj/bge-m3' 18 | } 19 | embedder = Embedder(model_config) 20 | 21 | Faiss.save_local(folder_path=save_path, chunks=chunks, embedder=embedder) 22 | assert os.path.exists(os.path.join(save_path, 'embedding.faiss')) 23 | 24 | g = Faiss.load_local(save_path) 25 | for idx, c in enumerate(g.chunks): 26 | assert str(chunks[idx]) == str(c) 27 | 28 | target = 'resource/figures/inside-mmpose.jpg' 29 | query = Query(image=target) 30 | pairs = g.similarity_search_with_query(query=query, embedder=embedder) 31 | chunk, score = pairs[0] 32 | assert chunk.content_or_path == target 33 | assert score >= 0.9999 34 | 35 | 36 | if __name__ == '__main__': 37 | test_faiss() 38 | -------------------------------------------------------------------------------- /unittest/primitive/test_limitter.py: -------------------------------------------------------------------------------- 1 | import time 2 | from huixiangdou.services.llm_server_hybrid import RPM, TPM 3 | 4 | def test_rpm(): 5 | rpm = RPM(30) 6 | 7 | for i in range(40): 8 | rpm.wait() 9 | print(i) 10 | 11 | time.sleep(5) 12 | 13 | for i in range(40): 14 | rpm.wait() 15 | print(i) 16 | 17 | def test_tpm(): 18 | tpm = TPM(2000) 19 | 20 | for i in range(20): 21 | tpm.wait(silent=False, token_count=150) 22 | print(i) 23 | 24 | if __name__ == '__main__': 25 | test_tpm() -------------------------------------------------------------------------------- /unittest/primitive/test_reranker.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | 3 | from huixiangdou.primitive import LLMReranker 4 | 5 | 6 | def test_reranker(): 7 | model = LLMReranker({'reranker_model_path':'/data2/khj/bce-reranker-base_v1'}) 8 | 9 | query = 'apple' 10 | texts = [ 'roast banana', 'ice juice', 'red orange', 'apple pie'] 11 | scores = model._sort(texts=texts, query=query) 12 | 13 | assert scores[0] == len(texts) - 1 14 | 15 | 16 | if __name__ == '__main__': 17 | test_reranker() 18 | -------------------------------------------------------------------------------- /unittest/service/test_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/unittest/service/test_llm.py -------------------------------------------------------------------------------- /unittest/service/test_llm_client.py: -------------------------------------------------------------------------------- 1 | from huixiangdou.services.llm_client import ChatClient 2 | 3 | @DeprecationWarning 4 | def test_auto_fix(): 5 | """test auto choose backend based on config.""" 6 | remote_only_config = 'config-2G.ini' 7 | local_only_config = 'config.ini' 8 | full_config = 'config-advanced.ini' 9 | 10 | client = ChatClient(config_path=remote_only_config) 11 | real_backend, max_len = client.auto_fix(backend='local') 12 | assert real_backend != 'local' 13 | assert max_len >= 32000 14 | 15 | client = ChatClient(config_path=local_only_config) 16 | real_backend, max_len = client.auto_fix(backend='kimi') 17 | assert real_backend == 'local' 18 | 19 | client = ChatClient(config_path=full_config) 20 | real_backend, max_len = client.auto_fix(backend='local') 21 | assert real_backend == 'local' 22 | real_backend, max_len = client.auto_fix(backend='kimi') 23 | assert real_backend != 'local' 24 | 25 | if __name__ == '__main__': 26 | test_auto_fix() 27 | -------------------------------------------------------------------------------- /unittest/service/test_sg_search.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import tempfile 4 | import time 5 | 6 | import pytoml 7 | from loguru import logger 8 | 9 | from huixiangdou.services.llm import LLM 10 | from huixiangdou.services.sg_search import SourceGraphProxy 11 | 12 | 13 | def load_secret(): 14 | kimi_token = '' 15 | serper_token = '' 16 | with open('unittest/token.json') as f: 17 | json_obj = json.load(f) 18 | kimi_token = json_obj['kimi'] 19 | serper_token = json_obj['serper'] 20 | sg_token = json_obj['sg'] 21 | return kimi_token, serper_token, sg_token 22 | 23 | 24 | def build_config_path(): 25 | config_path = 'config-2G.ini' 26 | kimi_token, serper_token, sg_token = load_secret() 27 | config = None 28 | with open(config_path) as f: 29 | config = pytoml.load(f) 30 | config['web_search']['engine'] = 'serper' 31 | config['web_search']['serper_x_api_key'] = serper_token 32 | config['feature_store'][ 33 | 'embedding_model_path'] = '/data2/khj/bce-embedding-base_v1/' 34 | config['feature_store'][ 35 | 'reranker_model_path'] = '/data2/khj/bce-embedding-base_v1/' 36 | config['llm']['server']['remote_api_key'] = kimi_token 37 | config['worker']['enable_sg_search'] = 1 38 | config['sg_search']['src_access_token'] = sg_token 39 | 40 | config_path = None 41 | with tempfile.NamedTemporaryFile(delete=False, mode='w+b') as temp_file: 42 | tomlstr = pytoml.dumps(config) 43 | temp_file.write(tomlstr.encode('utf8')) 44 | config_path = temp_file.name 45 | 46 | return config_path 47 | 48 | 49 | def test_sg(): 50 | config_path = build_config_path() 51 | 52 | llm = LLM(config_path=config_path) 53 | proxy = SourceGraphProxy(config_path=config_path) 54 | content = proxy.search(llm_client=llm, 55 | question='mmpose installation', 56 | groupname='mmpose dev group') 57 | assert len(content) > 0 58 | -------------------------------------------------------------------------------- /web/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/__init__.py -------------------------------------------------------------------------------- /web/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/api/__init__.py -------------------------------------------------------------------------------- /web/api/access.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Request, Response 2 | 3 | from web.model.access import LoginBody 4 | from web.service.access import LoginService 5 | 6 | access_api = APIRouter() 7 | 8 | 9 | @access_api.post('/v1/login') 10 | async def login(body: LoginBody, request: Request, response: Response): 11 | return await LoginService(body, request, response).login() 12 | -------------------------------------------------------------------------------- /web/api/chat.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Depends, Request, Response 2 | 3 | from web.middleware.token import check_hxd_token 4 | from web.model.chat import (ChatCaseFeedbackBody, ChatOnlineResponseBody, 5 | ChatRequestBody) 6 | from web.model.qalib import QalibInfo 7 | from web.service.chat import ChatService 8 | 9 | chat_api = APIRouter() 10 | 11 | 12 | @chat_api.post('/v1/online') 13 | async def chat_online(request: Request, 14 | response: Response, 15 | body: ChatRequestBody, 16 | hxd_info: QalibInfo = Depends(check_hxd_token)): 17 | return await ChatService(request, response, hxd_info).chat_online(body) 18 | 19 | 20 | @chat_api.post('/v1/onlineResponse') 21 | async def chat_online_response(request: Request, 22 | response: Response, 23 | body: ChatOnlineResponseBody, 24 | hxd_info: QalibInfo = Depends(check_hxd_token)): 25 | return await ChatService(request, response, hxd_info).fetch_response(body) 26 | 27 | 28 | @chat_api.post('/v1/caseFeedback') 29 | async def case_feedback(request: Request, 30 | response: Response, 31 | body: ChatCaseFeedbackBody, 32 | hxd_info: QalibInfo = Depends(check_hxd_token)): 33 | return await ChatService(request, response, hxd_info).case_feedback(body) 34 | -------------------------------------------------------------------------------- /web/api/integrate.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Depends, Request, Response 2 | 3 | from web.middleware.token import check_hxd_token 4 | from web.model.integrate import IntegrateLarkBody, IntegrateWebSearchBody 5 | from web.model.qalib import QalibInfo 6 | from web.service.qalib import QaLibService 7 | 8 | integrate_api = APIRouter() 9 | 10 | 11 | @integrate_api.post('/v1/integrateLark') 12 | async def integrate_lark(request: Request, 13 | response: Response, 14 | body: IntegrateLarkBody, 15 | hxd_info: QalibInfo = Depends(check_hxd_token)): 16 | return await QaLibService(request, response, hxd_info).integrate_lark(body) 17 | 18 | 19 | @integrate_api.post('/v1/integrateWebSearch') 20 | async def integrate_web_search(request: Request, 21 | response: Response, 22 | body: IntegrateWebSearchBody, 23 | hxd_info: QalibInfo = Depends(check_hxd_token)): 24 | return await QaLibService(request, response, 25 | hxd_info).integrate_web_search(body) 26 | -------------------------------------------------------------------------------- /web/api/message.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Request, Response 2 | 3 | from web.model.chat import WechatRequest 4 | from web.service.message import MessageService 5 | 6 | message_api = APIRouter() 7 | 8 | 9 | @message_api.post('/v1/lark') 10 | async def on_lark_message(request: Request, response: Response): 11 | return await MessageService(request, response).on_lark_message() 12 | 13 | 14 | @message_api.post('/v1/wechat/{suffix}') 15 | async def on_wechat_message(request: Request, response: Response, suffix: str, 16 | body: WechatRequest): 17 | return await MessageService(request, 18 | response).on_wechat_message(body, suffix) 19 | -------------------------------------------------------------------------------- /web/api/qalib.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from fastapi import APIRouter, Depends, File, Request, Response, UploadFile 4 | 5 | from web.middleware.token import check_hxd_token 6 | from web.model.qalib import QalibInfo, QalibPositiveNegative, QalibDeleteDoc 7 | from web.service.qalib import QaLibService 8 | 9 | qalib_api = APIRouter() 10 | 11 | 12 | @qalib_api.post('/v1/getInfo') 13 | async def qalib_info(request: Request, 14 | response: Response, 15 | hxd_info: QalibInfo = Depends(check_hxd_token)): 16 | return await QaLibService(request, response, hxd_info).info() 17 | 18 | 19 | @qalib_api.post('/v1/addDocs') 20 | async def qalib_add_docs(request: Request, 21 | response: Response, 22 | files: List[UploadFile] = File(...), 23 | hxd_info: QalibInfo = Depends(check_hxd_token)): 24 | return await QaLibService(request, response, hxd_info).add_docs(files) 25 | 26 | 27 | @qalib_api.post('/v1/getSampleInfo') 28 | async def qalib_get_sample_info( 29 | request: Request, 30 | response: Response, 31 | hxd_info: QalibInfo = Depends(check_hxd_token)): 32 | return await QaLibService(request, response, hxd_info).get_sample_info() 33 | 34 | 35 | @qalib_api.post('/v1/updateSampleInfo') 36 | async def qalib_update_sample_info( 37 | request: Request, 38 | response: Response, 39 | body: QalibPositiveNegative, 40 | hxd_info: QalibInfo = Depends(check_hxd_token)): 41 | return await QaLibService(request, response, 42 | hxd_info).update_sample_info(body) 43 | 44 | 45 | @qalib_api.post('/v1/deleteDocs') 46 | async def qalib_add_docs(request: Request, 47 | response: Response, 48 | body: QalibDeleteDoc, 49 | hxd_info: QalibInfo = Depends(check_hxd_token)): 50 | return await QaLibService(request, response, hxd_info).delete_docs(body) 51 | -------------------------------------------------------------------------------- /web/api/statistic.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Request, Response 2 | 3 | from web.service.statistic import StatisticService 4 | 5 | statistic_api = APIRouter() 6 | 7 | 8 | @statistic_api.get('/v1/total') 9 | async def qalib_info_statistic(request: Request, response: Response): 10 | return await StatisticService(request, response).info_statistic() 11 | -------------------------------------------------------------------------------- /web/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/config/__init__.py -------------------------------------------------------------------------------- /web/config/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | LOGGING_CONFIG = { 4 | 'version': 1, 5 | 'disable_existing_loggers': False, 6 | 'formatters': { 7 | 'default': { 8 | '()': 'uvicorn.logging.DefaultFormatter', 9 | 'fmt': '%(levelprefix)s %(asctime)s - %(message)s', 10 | 'datefmt': '%Y-%m-%d %H:%M:%S', 11 | }, 12 | }, 13 | 'handlers': { 14 | 'default': { 15 | 'formatter': 'default', 16 | 'class': 'logging.StreamHandler', 17 | 'stream': 'ext://sys.stderr', 18 | }, 19 | }, 20 | 'loggers': { 21 | 'uvicorn': { 22 | 'handlers': ['default'], 23 | 'level': 'INFO' 24 | }, 25 | 'uvicorn.error': { 26 | 'level': 'INFO' 27 | }, 28 | 'uvicorn.access': { 29 | 'handlers': ['default'], 30 | 'level': 'INFO', 31 | 'propagate': False 32 | }, 33 | }, 34 | } 35 | -------------------------------------------------------------------------------- /web/constant/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/constant/__init__.py -------------------------------------------------------------------------------- /web/front-end/.eslintignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | dist 3 | build 4 | .umi 5 | *.d.ts 6 | lib -------------------------------------------------------------------------------- /web/front-end/.gitignore: -------------------------------------------------------------------------------- 1 | # dependencies 2 | /node_modules 3 | /npm-debug.log* 4 | /yarn-error.log 5 | /yarn.lock 6 | package-lock.json 7 | 8 | # production 9 | /dist 10 | /build 11 | 12 | # misc 13 | .DS_Store 14 | .idea 15 | 16 | # umi 17 | /src/.umi 18 | /src/.umi-production 19 | /src/.umi-test 20 | /.env.local 21 | 22 | /maps 23 | .husky -------------------------------------------------------------------------------- /web/front-end/.npmrc: -------------------------------------------------------------------------------- 1 | # 改变远程仓库地址 2 | # registry=https://registry.npmjs.org/ 3 | -------------------------------------------------------------------------------- /web/front-end/dist/assets/bean1-002ba51d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/dist/assets/bean1-002ba51d.png -------------------------------------------------------------------------------- /web/front-end/dist/assets/logo-af340389.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/dist/assets/logo-af340389.png -------------------------------------------------------------------------------- /web/front-end/dist/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/dist/logo.png -------------------------------------------------------------------------------- /web/front-end/env/.env.development: -------------------------------------------------------------------------------- 1 | # use in js: console.log(import.meta.env.MODE); 2 | VITE_NODE=development 3 | -------------------------------------------------------------------------------- /web/front-end/env/.env.production: -------------------------------------------------------------------------------- 1 | VITE_NODE=production 2 | -------------------------------------------------------------------------------- /web/front-end/env/.env.staging: -------------------------------------------------------------------------------- 1 | VITE_NODE=staging 2 | -------------------------------------------------------------------------------- /web/front-end/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | HuixiangDou 9 | 24 | 25 | 26 |
27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /web/front-end/mock/db.json: -------------------------------------------------------------------------------- 1 | { 2 | "posts": [ 3 | { 4 | "id": 1, 5 | "title": "json-server", 6 | "author": "typicode" 7 | } 8 | ], 9 | "comments": [ 10 | { 11 | "id": 1, 12 | "body": "some comment", 13 | "postId": 1 14 | } 15 | ], 16 | "profile": { 17 | "name": "typicode" 18 | }, 19 | "userinfo": { 20 | "code": 0, 21 | "data": { 22 | "id": 1, 23 | "name": "李剑阁", 24 | "job": "scientist" 25 | } 26 | } 27 | } -------------------------------------------------------------------------------- /web/front-end/public/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/public/logo.png -------------------------------------------------------------------------------- /web/front-end/readme.md: -------------------------------------------------------------------------------- 1 | # 1. 命令 2 | ## 安装依赖 3 | npm install 4 | 5 | ## 开发 6 | npm run dev 7 | 8 | ## build 9 | npm run build 10 |

针对不同的环境打包命令不同,比如线上环境的命令npm run build:aliyun-prod

11 | 12 | ## preview 13 | npm run preview 14 |

这是vite项目特有的命令,因为vite的serve和build出的代码不一致,上线前需要用preview检测打包结果是否和serve一致

15 | 16 | ## mock 17 | npm run mock 18 | 19 | # 2. Ability config 20 |
当前模板支持动态配置能力
21 |
src/config/auth.ts: 支持是否开启该功能(default false)clientId, 接口白名单与网页白名单
22 |
src/config/log.ts: 支持是否开启该功能(default false)ga4 measurement id
23 |
src/config/base-url.ts: 各个环境接口访问host和api prefix
24 | 25 |

更多细节请查看配置文件注释

26 | 27 | -------------------------------------------------------------------------------- /web/front-end/scripts/alias.ts: -------------------------------------------------------------------------------- 1 | import { resolvePath } from './utils'; 2 | 3 | const alias = { 4 | '@': resolvePath('./src'), 5 | '@components': resolvePath('./src/components'), 6 | '@layouts': resolvePath('./src/layouts'), 7 | '@assets': resolvePath('./src/assets'), 8 | '@pages': resolvePath('./src/pages'), 9 | '@services': resolvePath('./src/services'), 10 | '@utils': resolvePath('./src/utils'), 11 | '@styles': resolvePath('./src/styles'), 12 | '@routes': resolvePath('./src/routes'), 13 | '@config': resolvePath('./src/config'), 14 | '@locales': resolvePath('./src/locales'), 15 | '@constants': resolvePath('./src/constants'), 16 | '@interceptors': resolvePath('./src/interceptors'), 17 | '@hooks': resolvePath('./src/hooks') 18 | }; 19 | 20 | export default alias; 21 | -------------------------------------------------------------------------------- /web/front-end/scripts/import-to-cdn.ts: -------------------------------------------------------------------------------- 1 | export default [ 2 | { 3 | name: 'react', 4 | var: 'React', 5 | path: 'https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/asserts/react@18.2.0/react.production.min.js' 6 | }, 7 | { 8 | name: 'react-dom', 9 | var: 'ReactDOM', 10 | path: 'https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/asserts/react@18.2.0/react-dom.production.min.js' 11 | } 12 | ]; 13 | -------------------------------------------------------------------------------- /web/front-end/scripts/index.ts: -------------------------------------------------------------------------------- 1 | export { default as ProxyConfig } from './proxy'; 2 | export { default as ImportToCDNList } from './import-to-cdn'; 3 | export { default as alias } from './alias'; 4 | -------------------------------------------------------------------------------- /web/front-end/scripts/proxy.ts: -------------------------------------------------------------------------------- 1 | // https://github.com/http-party/node-http-proxy#options 2 | const ProxyConfig = { 3 | '/api': { 4 | target: 'http://localhost:8080', 5 | changeOrigin: true, 6 | secure: false, 7 | rewrite: path => { 8 | return path.replace('^', ''); 9 | }, 10 | } 11 | }; 12 | 13 | export default ProxyConfig; 14 | -------------------------------------------------------------------------------- /web/front-end/scripts/utils.ts: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | 3 | export const resolvePath = p => path.resolve(__dirname, '..', p); 4 | -------------------------------------------------------------------------------- /web/front-end/src/app.tsx: -------------------------------------------------------------------------------- 1 | import { GlobalLang } from '@components/global-lang'; 2 | import RouterRoot from './routes'; 3 | import './styles/index.less'; 4 | import 'sea-lion-ui/dist/index.css'; 5 | 6 | console.log(import.meta.env.VITE_NODE); 7 | 8 | const App = () => { 9 | return ( 10 | 11 | 12 | 13 | ); 14 | }; 15 | 16 | export default App; 17 | -------------------------------------------------------------------------------- /web/front-end/src/assets/imgs/bean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/src/assets/imgs/bean.png -------------------------------------------------------------------------------- /web/front-end/src/assets/imgs/bean1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/src/assets/imgs/bean1.png -------------------------------------------------------------------------------- /web/front-end/src/assets/imgs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/src/assets/imgs/logo.png -------------------------------------------------------------------------------- /web/front-end/src/components/button/button.module.less: -------------------------------------------------------------------------------- 1 | .btn { 2 | padding: 8px 12px; 3 | background: #c7eaba; 4 | color: #286500; 5 | border-radius: 6px; 6 | font-size: 14px; 7 | line-height: 16px; 8 | display: inline-flex; 9 | align-items: center; 10 | gap: 4px; 11 | cursor: pointer; 12 | word-break: keep-all; 13 | &[aria-disabled="true"] { 14 | background: #dcdcdc; 15 | color: #9d9d9d; 16 | cursor: not-allowed; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /web/front-end/src/components/button/button.tsx: -------------------------------------------------------------------------------- 1 | import { FC, ReactNode, HTMLAttributes } from 'react'; 2 | import classNames from 'classnames'; 3 | import styles from './button.module.less'; 4 | 5 | export interface ExampleProps extends HTMLAttributes { 6 | disabled?: boolean; 7 | onClick?: () => void; 8 | children?: ReactNode; 9 | className?: string; 10 | } 11 | 12 | const Button: FC = ({ 13 | disabled = false, 14 | onClick, children, 15 | className, 16 | }) => { 17 | const handleClick = () => { 18 | if (disabled) { 19 | return; 20 | } 21 | onClick(); 22 | }; 23 | return ( 24 |
29 | {children} 30 |
31 | ); 32 | }; 33 | 34 | export default Button; 35 | -------------------------------------------------------------------------------- /web/front-end/src/components/components-portal/components-portal.tsx: -------------------------------------------------------------------------------- 1 | import { createPortal } from 'react-dom'; 2 | 3 | const ComponentPortal = ({ children, wrapperId = '' }) => { 4 | return createPortal(children, document.getElementById(wrapperId) || document.body); 5 | }; 6 | 7 | export default ComponentPortal; 8 | -------------------------------------------------------------------------------- /web/front-end/src/components/copy-code/copy-code.module.less: -------------------------------------------------------------------------------- 1 | .copy-code { 2 | display: flex; 3 | gap: 4px; 4 | align-items: center; 5 | width: 100%; 6 | .code { 7 | font-size: 14px; 8 | line-height: 16px; 9 | padding: 8px 0; 10 | cursor: pointer; 11 | white-space: nowrap; 12 | overflow: hidden; 13 | text-overflow: ellipsis; 14 | max-width: calc(100% - 36px); 15 | color: #047600; 16 | } 17 | .copy { 18 | cursor: pointer; 19 | color: #9D9D9D; 20 | margin-left: 4px; 21 | } 22 | } -------------------------------------------------------------------------------- /web/front-end/src/components/copy-code/copy-code.tsx: -------------------------------------------------------------------------------- 1 | import { IconFont, message } from 'sea-lion-ui'; 2 | import styles from './copy-code.module.less'; 3 | 4 | export interface CopyCodeProps { 5 | code: string; 6 | } 7 | 8 | const CopyCode = (props: CopyCodeProps) => { 9 | const { code } = props; 10 | const copy = () => { 11 | const input = document.createElement('input'); 12 | input.value = code; 13 | document.body.appendChild(input); 14 | input.select(); 15 | document.execCommand('copy'); 16 | message.success('复制成功'); 17 | document.body.removeChild(input); 18 | }; 19 | return ( 20 |
21 |
{code}
22 |
23 | 24 |
25 |
26 | ); 27 | }; 28 | 29 | export default CopyCode; 30 | -------------------------------------------------------------------------------- /web/front-end/src/components/global-lang/global-lang-context.ts: -------------------------------------------------------------------------------- 1 | import { createContext } from 'react'; 2 | import { Language } from '@utils/utils'; 3 | 4 | const noop = (l: Language) => undefined; 5 | 6 | export const LangDefault = { 7 | locale: '', 8 | setLocale: noop 9 | }; 10 | 11 | export const GlobalLangeContext = createContext(LangDefault); 12 | -------------------------------------------------------------------------------- /web/front-end/src/components/global-lang/global-lang.tsx: -------------------------------------------------------------------------------- 1 | import { 2 | FC, useCallback, useState, useMemo 3 | } from 'react'; 4 | import { IntlProvider } from 'react-intl'; 5 | import { 6 | getLang, Language, setLang 7 | } from '@utils/utils'; 8 | import locales from '@/locales'; 9 | import { GlobalLangeContext } from './global-lang-context'; 10 | 11 | const GlobalLang: FC = ({ children }) => { 12 | const [locale, setLocale] = useState(getLang()); 13 | 14 | const setCurrentLocale = useCallback((lang: Language) => { 15 | setLocale(lang); 16 | setLang(lang); 17 | }, []); 18 | 19 | // 子孙组件通过context获取setLocale可以更改中英文 20 | const value = useMemo(() => ({ locale, setLocale: setCurrentLocale }), [locale, setCurrentLocale]); 21 | 22 | return ( 23 | 24 | 25 | {children} 26 | 27 | 28 | ); 29 | }; 30 | 31 | export default GlobalLang; 32 | -------------------------------------------------------------------------------- /web/front-end/src/components/global-lang/index.tsx: -------------------------------------------------------------------------------- 1 | export { default as GlobalLang } from './global-lang'; 2 | export { GlobalLangeContext } from './global-lang-context'; 3 | -------------------------------------------------------------------------------- /web/front-end/src/components/header/header.module.less: -------------------------------------------------------------------------------- 1 | .header { 2 | padding: 0 50px; 3 | height: 64px; 4 | display: flex; 5 | align-items: center; 6 | justify-content: flex-end; 7 | gap: 24px; 8 | 9 | .feedback { 10 | cursor: pointer; 11 | } 12 | 13 | .language { 14 | cursor: pointer; 15 | .chosen { 16 | font-weight: bold; 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /web/front-end/src/components/header/header.tsx: -------------------------------------------------------------------------------- 1 | import { GlobalLangeContext } from '@components/global-lang'; 2 | import { useContext } from 'react'; 3 | import { useLocale } from '@hooks/useLocale'; 4 | import styles from './header.module.less'; 5 | 6 | const Header = () => { 7 | const { locale, setLocale } = useContext(GlobalLangeContext); 8 | const locales = useLocale('home'); 9 | return ( 10 |
11 |
window.open('https://github.com/InternLM/HuixiangDou/issues')} 14 | > 15 | {locales.feedback} 16 |
17 |
18 | setLocale('zh-CN')} 20 | className={locale === 'zh-CN' && styles.chosen} 21 | > 22 | 中 23 | {' '} 24 | 25 | / 26 | setLocale('en-US')} 28 | className={locale === 'en-US' && styles.chosen} 29 | > 30 | {' '} 31 | EN 32 | 33 |
34 |
35 | ); 36 | }; 37 | 38 | export default Header; 39 | -------------------------------------------------------------------------------- /web/front-end/src/components/notification/emoji-wrapper.tsx: -------------------------------------------------------------------------------- 1 | import { FC, ReactNode } from 'react'; 2 | import styles from './notification.module.less'; 3 | 4 | interface EmojiWrapperProps { 5 | emoji?: string; 6 | children?: ReactNode; 7 | } 8 | 9 | const heart = 'https://oss.openmmlab.com/www/home/heart_3d.png'; 10 | const EmojiWrapper: FC = ({ emoji = heart, children }) => { 11 | return ( 12 |
13 | {children} 14 | 15 | 16 | 17 |
18 | ); 19 | }; 20 | 21 | export default EmojiWrapper; 22 | -------------------------------------------------------------------------------- /web/front-end/src/components/notification/notification.tsx: -------------------------------------------------------------------------------- 1 | import { FC, ReactNode } from 'react'; 2 | import { notification } from '@components/notification/use-notification'; 3 | import EmojiWrapper from '@components/notification/emoji-wrapper'; 4 | import { useLocale } from '@hooks/useLocale'; 5 | import styles from './notification.module.less'; 6 | 7 | export interface NotificationProps { 8 | title: string; 9 | content: string; 10 | notificationKey: string; 11 | children?: ReactNode; 12 | } 13 | 14 | const Notification: FC = ({ 15 | title, 16 | content, 17 | notificationKey, 18 | }) => { 19 | const locales = useLocale('components'); 20 | 21 | return ( 22 |
23 |
{title}
24 |
{content}
25 |
26 |
notification.unmountNotification(notificationKey)} 29 | > 30 | {locales.hide4ever} 31 |
32 | 33 |
{ 36 | window.open('https://github.com/InternLM/HuixiangDou/'); 37 | }} 38 | > 39 | {locales.goStar} 40 |
41 |
42 |
43 |
44 | ); 45 | }; 46 | 47 | export default Notification; 48 | -------------------------------------------------------------------------------- /web/front-end/src/components/notification/use-notification.tsx: -------------------------------------------------------------------------------- 1 | import Notification, { NotificationProps } from '@components/notification/notification'; 2 | import { useLocale } from '@hooks/useLocale'; 3 | import ComponentPortal from '@components/components-portal/components-portal'; 4 | 5 | const notificationWrapper = 'global-notification'; 6 | 7 | export const notification = { 8 | notificationContainer: null, 9 | 10 | showNotification(params: NotificationProps) { 11 | if (document.getElementById(notificationWrapper)) { 12 | document.body.removeChild(document.getElementById(notificationWrapper)); 13 | this.notificationContainer = null; 14 | } 15 | if (localStorage.getItem(params.notificationKey)) { 16 | return null; 17 | } 18 | this.notificationContainer = document.createElement('div'); 19 | this.notificationContainer.id = notificationWrapper; 20 | document.body.appendChild(this.notificationContainer); 21 | return ( 22 | 23 | 24 | 25 | ); 26 | }, 27 | unmountNotification(key) { 28 | if (this.notificationContainer) { 29 | localStorage.setItem(key, 'true'); 30 | document.body.removeChild(this.notificationContainer); 31 | this.notificationContainer = null; 32 | } 33 | }, 34 | }; 35 | const useNotification = () => { 36 | const locales = useLocale('components'); 37 | 38 | return notification.showNotification({ 39 | title: '', 40 | content: locales.notificationContent, 41 | notificationKey: '__HuiXiangDou__', 42 | }); 43 | }; 44 | 45 | export default useNotification; 46 | -------------------------------------------------------------------------------- /web/front-end/src/components/upload-item/index.tsx: -------------------------------------------------------------------------------- 1 | import UploadItem from './upload-item'; 2 | 3 | export * from './upload-item'; 4 | export default UploadItem; 5 | -------------------------------------------------------------------------------- /web/front-end/src/components/upload-item/upload-item.module.less: -------------------------------------------------------------------------------- 1 | .upload-item { 2 | display: flex; 3 | align-items: flex-start; 4 | gap: 4px; 5 | border-radius: 4px; 6 | padding: 2px 4px; 7 | margin-bottom: 4px; 8 | .name { 9 | max-width: 320px; 10 | text-overflow: ellipsis; 11 | white-space: nowrap; 12 | overflow: hidden; 13 | } 14 | .progress { 15 | width: 100%; 16 | margin-top: 4px; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /web/front-end/src/components/upload-item/upload-item.tsx: -------------------------------------------------------------------------------- 1 | import { FC } from 'react'; 2 | import { IconFont } from 'sea-lion-ui'; 3 | import { LoadingOutlined } from '@ant-design/icons'; 4 | import styles from './upload-item.module.less'; 5 | 6 | export const enum UploadStatus { 7 | init = 'init', 8 | done = 'done', 9 | uploading = 'uploading', 10 | error = 'error', 11 | removed = 'removed', 12 | } 13 | 14 | export interface UploadItemProps { 15 | uid: string; 16 | name: string; 17 | status: UploadStatus; 18 | progress: number; 19 | } 20 | 21 | const StatusColor = { 22 | init: 'lightgrey', 23 | done: 'green', 24 | uploading: 'blue', 25 | error: 'red', 26 | removed: 'darkgrey' 27 | }; 28 | 29 | const StatusIcon = { 30 | init: 'icon-DocOutlined', 31 | done: 'icon-CheckCircleFilled', 32 | uploading: 'icon-HorizontalMoreOutlined', 33 | error: 'icon-CloseCircleFilled', 34 | removed: 'icon-DocOutlined' 35 | }; 36 | const UploadItem: FC = ({ 37 | uid, name, status, progress 38 | }) => { 39 | return ( 40 |
47 |
48 | {status === UploadStatus.uploading ? : ( 49 | 50 | )} 51 |
52 |
53 |
{name}
54 |
55 |
59 |
60 |
61 |
62 | ); 63 | }; 64 | 65 | export default UploadItem; 66 | -------------------------------------------------------------------------------- /web/front-end/src/components/upload/delete-btn.tsx: -------------------------------------------------------------------------------- 1 | import { IconFont, Modal } from 'sea-lion-ui'; 2 | import { useLocale } from '@hooks/useLocale'; 3 | import { useState } from 'react'; 4 | import styles from './upload.module.less'; 5 | 6 | const DeleteBtn = ({ onClick }) => { 7 | const locales = useLocale('components'); 8 | 9 | const [openModal, setOpenModal] = useState(false); 10 | 11 | const handleClick = () => { 12 | setOpenModal(true); 13 | }; 14 | 15 | const confirm = () => { 16 | setOpenModal(false); 17 | onClick(); 18 | }; 19 | 20 | const cancel = () => { 21 | setOpenModal(false); 22 | }; 23 | 24 | return ( 25 | <> 26 |
27 | 28 | {locales.deleteSelected} 29 |
30 | )} 35 | icon={} 36 | onClose={() => setOpenModal(false)} 37 | > 38 |
{locales.deleteDesc}
39 |
40 |
{locales.confirm}
41 |
{locales.cancel}
42 |
43 |
44 | 45 | 46 | ); 47 | }; 48 | 49 | export default DeleteBtn; 50 | -------------------------------------------------------------------------------- /web/front-end/src/components/upload/index.tsx: -------------------------------------------------------------------------------- 1 | import Upload from './upload'; 2 | 3 | export * from './upload'; 4 | export default Upload; 5 | -------------------------------------------------------------------------------- /web/front-end/src/config/auth.ts: -------------------------------------------------------------------------------- 1 | // 登录相关配置信息 2 | 3 | export const VITE_NODE = import.meta.env.VITE_NODE; 4 | 5 | // 开启单点登录开关 6 | export const openOSS = false; 7 | 8 | export const ClientIdMap = { 9 | development: '', 10 | staging: '', 11 | production: '' 12 | }; 13 | 14 | // 登录跳转链接 15 | export const LogURLMap = { 16 | development: '', 17 | staging: '', 18 | production: '' 19 | }; 20 | 21 | // 注意 Development环境的domain前面必须加 . 因为,本地开发环境和线上开发环境域名不同 22 | // 如果发生反复跳转,请在浏览器中查看后端返回的cookie的domain是否有问题 23 | export const TokenCookieDomainMap = { 24 | development: '', 25 | staging: '', 26 | production: '' 27 | }; 28 | 29 | export const clientId = ClientIdMap[VITE_NODE]; 30 | export const logURL = LogURLMap[VITE_NODE]; 31 | export const TokenCookieDomain = TokenCookieDomainMap[VITE_NODE]; 32 | 33 | // 针对权限更细化的配置信息 34 | 35 | // 需要权限验证的页面可以把对应的pathname放到这里 36 | export const AuthPages: string[] = [ 37 | '' 38 | ]; 39 | 40 | // 有些接口不需要token 41 | export const NoTokenApiPaths: string[] = [ 42 | '/account/oauth', 43 | '/api/v1/access/v1/login', 44 | '/api/v1/statistic/v1/total' 45 | ]; 46 | -------------------------------------------------------------------------------- /web/front-end/src/config/base-url.ts: -------------------------------------------------------------------------------- 1 | // 接口请求相关的配置信息 2 | 3 | // 各个环境的接口请求域名 4 | export const ApiBaseUrlMap = { 5 | development: '', 6 | staging: '', 7 | production: '' 8 | }; 9 | 10 | // 各个环境的接口前缀 11 | export const ApiPrefixMap = { 12 | mock: '', 13 | development: '', 14 | staging: '', 15 | production: '' 16 | }; 17 | 18 | export const Env = import.meta.env.VITE_NODE; 19 | 20 | export const BaseURL = ApiBaseUrlMap[Env]; 21 | 22 | export const ApiPrefix = ApiPrefixMap[Env]; 23 | -------------------------------------------------------------------------------- /web/front-end/src/config/index.ts: -------------------------------------------------------------------------------- 1 | export * from './auth'; 2 | export * from './base-url'; 3 | export * from './log'; 4 | -------------------------------------------------------------------------------- /web/front-end/src/config/log.ts: -------------------------------------------------------------------------------- 1 | // 日志相关配置 2 | 3 | const VITE_NODE = import.meta.env.VITE_NODE; 4 | 5 | export const openLog = false; 6 | 7 | export const MeasurementIdMap = { 8 | development: '', 9 | staging: '', 10 | production: '' 11 | }; 12 | 13 | export const MeasurementId = MeasurementIdMap[VITE_NODE]; 14 | -------------------------------------------------------------------------------- /web/front-end/src/hooks/useLocale.ts: -------------------------------------------------------------------------------- 1 | import { useContext, useState, useEffect } from 'react'; 2 | import { GlobalLangeContext } from '@components/global-lang'; 3 | import Locale from '@/locales'; 4 | 5 | export const useLocale = (propertyName: string) => { 6 | const [locales, setLocales] = useState({}); 7 | const { locale: lang } = useContext(GlobalLangeContext); 8 | 9 | useEffect(() => { 10 | if (lang && Locale[lang] && Locale[lang][propertyName]) { 11 | setLocales(Locale[lang][propertyName]); 12 | } 13 | }, [lang, propertyName]); 14 | 15 | return locales; 16 | }; 17 | -------------------------------------------------------------------------------- /web/front-end/src/interceptors/request.ts: -------------------------------------------------------------------------------- 1 | import { NoTokenApiPaths, openOSS } from '@config/auth'; 2 | import { getLang, Token } from '@utils/utils'; 3 | import { AxiosRequestHeaders } from 'axios'; 4 | 5 | // *Interceptor函数:主要用来在请求发出前处理config,config由axios的请求拦截器提供 6 | // *Interceptor函数运行规则:函数会依次从左到右执行,每个*Interceptor函数必须返回config,供下一个*Interceptor函数处理 7 | // 好处:代码结构更清晰,每个函数专注做自己的事情,拿到config处理后return,达到逻辑解耦的目的 8 | 9 | interface IAuth extends AxiosRequestHeaders{ 10 | Authorization?: string; 11 | } 12 | 13 | const validateAuthInterceptor = config => { 14 | const token = Token.get(); 15 | const headers: IAuth = { 16 | lang: getLang(), 17 | ...config.headers 18 | }; 19 | 20 | if ( 21 | !NoTokenApiPaths.find(p => (config.url || '').endsWith(p)) 22 | && openOSS 23 | ) { 24 | headers.Authorization = `Bearer ${token}`; 25 | } 26 | 27 | return { 28 | ...config, 29 | headers 30 | }; 31 | }; 32 | 33 | const customConfigInterceptor = config => { 34 | return ({ 35 | ...config, 36 | headers: { 37 | ...config.headers, 38 | 'Client-Type': 'app', 39 | type: 0 40 | } 41 | }); 42 | }; 43 | 44 | export const requestInterceptors = [validateAuthInterceptor, customConfigInterceptor]; 45 | -------------------------------------------------------------------------------- /web/front-end/src/layouts/header-container-layout/header-container-layout.module.less: -------------------------------------------------------------------------------- 1 | .wrapper { 2 | .header {} 3 | .body {} 4 | } -------------------------------------------------------------------------------- /web/front-end/src/layouts/header-container-layout/header-container-layout.tsx: -------------------------------------------------------------------------------- 1 | import Header from '@components/header/header'; 2 | import { Outlet } from 'react-router-dom'; 3 | import useNotification from '@components/notification/use-notification'; 4 | import styles from './header-container-layout.module.less'; 5 | 6 | const HeaderContainerLayout = () => { 7 | return ( 8 |
9 |
10 |
11 | 12 |
13 | {useNotification()} 14 |
15 | ); 16 | }; 17 | 18 | export default HeaderContainerLayout; 19 | -------------------------------------------------------------------------------- /web/front-end/src/locales/en-US.ts: -------------------------------------------------------------------------------- 1 | import home from '@locales/en-US/home'; 2 | import beanDetail from '@locales/en-US/bean-detail'; 3 | import components from '@locales/en-US/components'; 4 | import welcome from './en-US/welcome'; 5 | 6 | export default { 7 | ...welcome, 8 | ...home, 9 | ...beanDetail, 10 | ...components, 11 | }; 12 | -------------------------------------------------------------------------------- /web/front-end/src/locales/en-US/components.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | components: { 3 | notificationContent: '🎉 HuixiangDou is open source now. If this helps you, please give it a star! 🌟 🥺', 4 | hide4ever: 'Hide forever', 5 | goStar: 'Star', 6 | fileSize: 'Single file size should not exceed 35MB', 7 | nameSize: 'File name is too long', 8 | fileCount: 'Up to 200 files can be uploaded at a time', 9 | pendingFiles: 'Uploading documents', 10 | confirmUpload: 'Upload', 11 | uploading: 'Uploading', 12 | uploadedFiles: 'Uploaded documents', 13 | uploadFailed: 'Failed', 14 | processing: 'Processing', 15 | total: 'Total', 16 | failed: 'Failed', 17 | searchDesc: 'Enter the document name to search', 18 | search: 'Search', 19 | selectAll: 'Select all', 20 | noSelected: 'No document selected', 21 | deleteSelected: 'Delete selected', 22 | deleteConfirm: 'Are you sure you want to delete the selected documents?', 23 | deleteDesc: 'The delete operation will rebuild the bean', 24 | confirm: 'Delete', 25 | cancel: 'Cancel', 26 | } 27 | }; 28 | -------------------------------------------------------------------------------- /web/front-end/src/locales/en-US/home.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | home: { 3 | slogan: 'Knowledge Assistant, Zero-coding with Lark and WeChat.', 4 | beanName: 'Knowledge base name. Auto create if not exists', 5 | validateMsg: 'At least 8 characters required', 6 | createBean: 'Create Knowledge Base', 7 | beanPwd: 'Knowledge Base Password', 8 | create: 'Create', 9 | cancel: 'Cancel', 10 | go: 'Go', 11 | bean: 'Knowledge Base', 12 | activeBean: 'Active Base Monthly', 13 | WeChat: 'WeChat', 14 | feishu: 'Lark', 15 | users: 'Chat Count', 16 | uniqueUsers: 'Unique Chat', 17 | pwdError: 'Password Error', 18 | feedback: 'Feedback', 19 | welcome: 'Welcome, grateful', 20 | hello: 'Hi', 21 | hi: 'Hello', 22 | loading: 'Loading', 23 | } 24 | }; 25 | -------------------------------------------------------------------------------- /web/front-end/src/locales/en-US/welcome.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | welcome: 'Welcome, grateful', 3 | hello: 'Hi', 4 | hi: 'Hello', 5 | loading: 'Loading' 6 | }; 7 | -------------------------------------------------------------------------------- /web/front-end/src/locales/index.ts: -------------------------------------------------------------------------------- 1 | import zhCN from './zh-CN'; 2 | import enUS from './en-US'; 3 | 4 | export default { 5 | 'zh-CN': zhCN, 6 | 'en-US': enUS 7 | }; 8 | -------------------------------------------------------------------------------- /web/front-end/src/locales/zh-CN.ts: -------------------------------------------------------------------------------- 1 | import home from '@locales/zh-CN/home'; 2 | import beanDetail from '@locales/zh-CN/bean-detail'; 3 | import components from '@locales/zh-CN/components'; 4 | import welcome from './zh-CN/welcome'; 5 | 6 | export default { 7 | ...welcome, 8 | ...home, 9 | ...beanDetail, 10 | ...components 11 | }; 12 | -------------------------------------------------------------------------------- /web/front-end/src/locales/zh-CN/components.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | components: { 3 | notificationContent: `🎉HuixiangDou开源啦,快来给我们 star 吧! 4 | 小时候,我想当开源人,朋友给我鼓励和我最爱的小星星🌟 🥺`, 5 | hide4ever: '不再显示', 6 | goStar: '前往鼓励', 7 | fileSize: '单个文件大小不能超过 35M', 8 | nameSize: '文件名太长', 9 | fileCount: '单次最多上传 200 个文件', 10 | pendingFiles: '待上传文档', 11 | confirmUpload: '确认上传', 12 | uploading: '上传中', 13 | uploadedFiles: '已上传文档', 14 | uploadFailed: '上传失败', 15 | processing: '处理中', 16 | total: '共计', 17 | failed: '失败', 18 | searchDesc: '输入文档名称进行搜索', 19 | search: '搜索', 20 | selectAll: '全选', 21 | noSelected: '您还未选中任何文档', 22 | deleteSelected: '删除', 23 | deleteConfirm: '确定删除选中的文档吗?', 24 | deleteDesc: '删除操作会重建知识库', 25 | confirm: '删除', 26 | cancel: '取消', 27 | } 28 | }; 29 | -------------------------------------------------------------------------------- /web/front-end/src/locales/zh-CN/home.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | home: { 3 | slogan: '行业知识助手,零开发接入飞书个微群', 4 | beanName: '请输入知识库名称,不存在则自动创建。不少于 8 个字符', 5 | validateMsg: '知识库名称至少需要 8 个字符', 6 | createBean: '创建知识库', 7 | beanPwd: '知识库密码', 8 | create: '创建', 9 | cancel: '取消', 10 | go: '前往', 11 | bean: '知识库', 12 | activeBean: '月活知识库', 13 | WeChat: '微信', 14 | feishu: '飞书', 15 | users: '回答次数', 16 | uniqueUsers: '去重次数', 17 | pwdError: '密码错误', 18 | feedback: '问题反馈', 19 | welcome: 'Welcome, grateful', 20 | hello: 'Hi', 21 | hi: 'Hello', 22 | loading: 'Loading' 23 | } 24 | }; 25 | -------------------------------------------------------------------------------- /web/front-end/src/locales/zh-CN/welcome.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | welcome: '欢迎,感恩', 3 | hello: '嗨', 4 | hi: '你好', 5 | loading: 'Loading', 6 | }; 7 | -------------------------------------------------------------------------------- /web/front-end/src/main.tsx: -------------------------------------------------------------------------------- 1 | import * as React from 'react'; 2 | import * as ReactDOM from 'react-dom/client'; 3 | import Mlog from '@utils/mlog'; 4 | import '@config/change-page-gray'; 5 | import App from './app'; 6 | 7 | Mlog.init(); 8 | 9 | ReactDOM.createRoot(document.getElementById('root') as HTMLElement).render( 10 | 11 | 12 | 13 | ); 14 | -------------------------------------------------------------------------------- /web/front-end/src/pages/bean-detail/bean-detail.module.less: -------------------------------------------------------------------------------- 1 | .btn { 2 | padding: 8px 12px; 3 | background: #c7eaba; 4 | color: #286500; 5 | border-radius: 6px; 6 | font-size: 14px; 7 | line-height: 16px; 8 | display: inline-flex; 9 | align-items: center; 10 | gap: 4px; 11 | cursor: pointer; 12 | &[aria-disabled="true"] { 13 | background: #dcdcdc; 14 | color: #9d9d9d; 15 | cursor: not-allowed; 16 | } 17 | } 18 | .bean-detail { 19 | min-height: 700px; 20 | min-width: 860px; 21 | margin: auto; 22 | text-align: center; 23 | position: absolute; 24 | top: 200px; 25 | left: 50%; 26 | transform: translateX(-50%); 27 | .logo { 28 | width: 800px; 29 | margin: 0 auto 72px; 30 | img { 31 | width: 100%; 32 | } 33 | } 34 | .bean-state { 35 | background-color: #e3f9dd; 36 | border-radius: 8px; 37 | padding: 4px 8px; 38 | margin-left: 4px; 39 | } 40 | .fail-state { 41 | background-color: #f1bcbc; 42 | } 43 | .name-wrapper { 44 | display: flex; 45 | align-items: center; 46 | gap: 4px; 47 | } 48 | .statistics-wrapper { 49 | display: grid; 50 | grid-template-columns: repeat(5, 1fr); 51 | grid-gap: 20px; 52 | margin: 24px auto; 53 | text-align: center; 54 | } 55 | .statistics-item { 56 | text-align: left; 57 | .title-img { 58 | height: 16px; 59 | } 60 | .statistics-item-title { 61 | color: #9D9D9D; 62 | font-size: 16px; 63 | margin-bottom: 12px; 64 | line-height: 20px; 65 | display: flex; 66 | align-items: center; 67 | gap: 4px; 68 | } 69 | } 70 | .refresh { 71 | margin-left: auto; 72 | cursor: pointer; 73 | color: #286500; 74 | } 75 | .logout { 76 | //margin-left: auto; 77 | cursor: pointer; 78 | color: #9D9D9D; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /web/front-end/src/pages/bean-detail/components/chat/index.tsx: -------------------------------------------------------------------------------- 1 | import Chat from './chat'; 2 | 3 | export * from './chat'; 4 | export default Chat; 5 | -------------------------------------------------------------------------------- /web/front-end/src/pages/bean-detail/components/example/example.module.less: -------------------------------------------------------------------------------- 1 | .example { 2 | 3 | } 4 | .editor { 5 | margin-bottom: 12px; 6 | :global { 7 | .seal-input-container .seal-input-wrapper:focus-within { 8 | border: 1px solid #59a041; 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /web/front-end/src/pages/bean-detail/components/example/index.tsx: -------------------------------------------------------------------------------- 1 | import Example from './example'; 2 | 3 | export * from './example'; 4 | export default Example; 5 | -------------------------------------------------------------------------------- /web/front-end/src/pages/bean-detail/components/import-docs/import-docs.module.less: -------------------------------------------------------------------------------- 1 | .import-docs { 2 | 3 | } 4 | -------------------------------------------------------------------------------- /web/front-end/src/pages/bean-detail/components/import-docs/import-docs.tsx: -------------------------------------------------------------------------------- 1 | import { 2 | FC, ReactNode, useState 3 | } from 'react'; 4 | import { IconFont, Modal } from 'sea-lion-ui'; 5 | import Button from '@components/button/button'; 6 | import { useLocale } from '@hooks/useLocale'; 7 | import Upload from '@components/upload'; 8 | import { FileState } from '@services/home'; 9 | import styles from './import-docs.module.less'; 10 | 11 | export interface ImportDocsProps { 12 | filesState: FileState[]; 13 | refresh: () => void; 14 | docs?: string[]; 15 | children?: ReactNode; 16 | } 17 | 18 | const ImportDocs: FC = ({ refresh, docs, filesState }) => { 19 | const locales = useLocale('beanDetail'); 20 | const [openModal, setOpenModal] = useState(false); 21 | 22 | const afterUpload = () => { 23 | refresh(); 24 | }; 25 | const closeModal = () => { 26 | setOpenModal(false); 27 | refresh(); 28 | }; 29 | return ( 30 |
31 | 35 | )} 40 | onClose={closeModal} 41 | > 42 | 48 | 49 |
{locales.upload}
50 |
{locales.supportFiles}
51 |
52 |
53 |
54 | ); 55 | }; 56 | 57 | export default ImportDocs; 58 | -------------------------------------------------------------------------------- /web/front-end/src/pages/bean-detail/components/import-docs/index.tsx: -------------------------------------------------------------------------------- 1 | import ImportDocs from './import-docs'; 2 | 3 | export * from './import-docs'; 4 | export default ImportDocs; 5 | -------------------------------------------------------------------------------- /web/front-end/src/pages/bean-detail/components/integrate-feishu/index.tsx: -------------------------------------------------------------------------------- 1 | import IntegrateFeishu from './integrate-feishu'; 2 | 3 | export * from './integrate-feishu'; 4 | export default IntegrateFeishu; 5 | -------------------------------------------------------------------------------- /web/front-end/src/pages/bean-detail/components/integrate-feishu/integrate-feishu.module.less: -------------------------------------------------------------------------------- 1 | .integrate-feishu { 2 | .webhook-url { 3 | font-size: 14px; 4 | line-height: 16px; 5 | padding: 8px 0; 6 | cursor: pointer; 7 | white-space: nowrap; 8 | overflow: hidden; 9 | text-overflow: ellipsis; 10 | max-width: 200px; 11 | color: #9D9D9D; 12 | } 13 | } 14 | .eventurl { 15 | font-weight: bold; 16 | } 17 | 18 | .title { 19 | font-weight: bold; 20 | margin-top: 12px; 21 | } 22 | 23 | .cancel { 24 | color: #9D9D9D; 25 | background: #F4F5F9; 26 | cursor: pointer; 27 | &:hover { 28 | background: #EBECF0; 29 | } 30 | } 31 | 32 | .flex { 33 | display: flex; 34 | align-items: center; 35 | gap: 4px; 36 | span { 37 | word-break: keep-all; 38 | white-space: nowrap; 39 | color: rgba(0, 0, 0, 0.88); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /web/front-end/src/pages/bean-detail/components/integrate-wechat/integrate-wechat.module.less: -------------------------------------------------------------------------------- 1 | .item-title { 2 | font-weight: bold; 3 | margin-top: 8px; 4 | } 5 | .item-content { 6 | margin-bottom: 24px; 7 | color: #047600; 8 | } -------------------------------------------------------------------------------- /web/front-end/src/pages/bean-detail/components/integrate-wechat/integrate-wechat.tsx: -------------------------------------------------------------------------------- 1 | import { useState } from 'react'; 2 | import { IconFont, Modal } from 'sea-lion-ui'; 3 | import Button from '@components/button/button'; 4 | import { useLocale } from '@hooks/useLocale'; 5 | import CopyCode from '@components/copy-code/copy-code'; 6 | import styles from './integrate-wechat.module.less'; 7 | 8 | export interface IntegrateWechatProps { 9 | messageUrl: string; 10 | } 11 | 12 | const IntegrateWechat = (props: IntegrateWechatProps) => { 13 | const locales = useLocale('beanDetail'); 14 | 15 | const [openModal, setOpenModal] = useState(false); 16 | 17 | const handleOpen = () => { 18 | setOpenModal(true); 19 | }; 20 | return ( 21 |
22 | 26 | )} 30 | onClose={() => setOpenModal(false)} 31 | > 32 |
33 | {locales.WeChatCallback} 34 |
35 |
36 | 37 |
38 |
39 | {locales.wechatGuidance} 40 |
41 | 47 |
48 |
49 | ); 50 | }; 51 | 52 | export default IntegrateWechat; 53 | -------------------------------------------------------------------------------- /web/front-end/src/pages/bean-detail/components/toggle-search/index.tsx: -------------------------------------------------------------------------------- 1 | import ToggleSearch from './toggle-search'; 2 | 3 | export * from './toggle-search'; 4 | export default ToggleSearch; 5 | -------------------------------------------------------------------------------- /web/front-end/src/pages/bean-detail/components/toggle-search/toggle-search.module.less: -------------------------------------------------------------------------------- 1 | .toggle-search { 2 | .token { 3 | font-size: 14px; 4 | line-height: 16px; 5 | padding: 8px 0; 6 | cursor: pointer; 7 | white-space: nowrap; 8 | overflow: hidden; 9 | text-overflow: ellipsis; 10 | max-width: 200px; 11 | color: #9D9D9D; 12 | } 13 | } 14 | 15 | .input-wrapper { 16 | display: flex; 17 | align-items: center; 18 | gap: 8px; 19 | padding: 12px 0; 20 | :global { 21 | .seal-input-group .seal-input-inner-container:focus-within { 22 | outline: 1px solid #59a041; 23 | } 24 | } 25 | } 26 | 27 | a { 28 | color: #286500 29 | } 30 | -------------------------------------------------------------------------------- /web/front-end/src/routes/index.tsx: -------------------------------------------------------------------------------- 1 | // router component 2 | import { 3 | BrowserRouter, Routes, Route, Navigate 4 | } from 'react-router-dom'; 5 | import HeaderContainerLayout from '@layouts/header-container-layout/header-container-layout'; 6 | import Home from '@pages/home/home'; 7 | import BeanDetail from '@pages/bean-detail/bean-detail'; 8 | 9 | const RouterRoot = () => { 10 | return ( 11 | // react-router-dom v6 123 12 | // https://reactrouter.com/docs/en/v6/getting-started/overview 13 | 14 | 15 | }> 16 | } 19 | /> 20 | } /> 21 | } /> 22 | 23 | 27 |

There is nothing here!

28 | 29 | )} 30 | /> 31 |
32 |
33 | ); 34 | }; 35 | 36 | export default RouterRoot; 37 | -------------------------------------------------------------------------------- /web/front-end/src/services/user.ts: -------------------------------------------------------------------------------- 1 | import { request } from '@utils/ajax'; 2 | 3 | const userServicePrefix = '/gw/user-service'; 4 | const uaaServicePrefix = '/gw/uaa-be'; 5 | 6 | export interface fetchCurrentUserReqDto { 7 | avatar?: string; 8 | email?: string; 9 | expiration?: string; 10 | roleIds?: string[]; 11 | nickname?: string; 12 | jwt?: string; 13 | ssoUid: string; 14 | username?: string; 15 | wechat?: string; 16 | wechatName?: string; 17 | [key: string]: any; 18 | } 19 | 20 | // 获取用户信息 21 | export async function fetchCurrentUser( 22 | token: string, 23 | ) { 24 | return request('/api/v1/login/getUserInfo', { 25 | method: 'POST', 26 | headers: { 27 | Authorization: `Bearer ${token}` 28 | }, 29 | }, uaaServicePrefix); 30 | } 31 | 32 | export async function logout() { 33 | return request('/api/v1/logout/all', { 34 | method: 'POST', 35 | meta: { 36 | isAllResponseBody: true 37 | }, 38 | }, uaaServicePrefix); 39 | } 40 | 41 | export interface fetchOauthCodeReqDto { 42 | token: string; 43 | } 44 | 45 | // sso第三方登录验证后,拿取用户信息 46 | export const fetchOauthCode = (code: string | string[], redirect: string) => { 47 | return request('/api/v1/account/oauth', { 48 | method: 'POST', 49 | data: { 50 | code, 51 | redirect 52 | } 53 | }, userServicePrefix); 54 | }; 55 | -------------------------------------------------------------------------------- /web/front-end/src/styles/index.less: -------------------------------------------------------------------------------- 1 | // @import './normalize.css'; normalize.css通过cdn引入了 2 | @import "mixins.less"; 3 | -------------------------------------------------------------------------------- /web/front-end/src/styles/mixins.less: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/src/styles/mixins.less -------------------------------------------------------------------------------- /web/front-end/src/styles/variables.less: -------------------------------------------------------------------------------- 1 | @red: red; 2 | @black: #000; 3 | @white: #fff; 4 | @border-color: #EBECF0; 5 | 6 | @main-content-width: 1440px; 7 | @main-content-hoz-padding: 120px; 8 | @x-lab-header-height: 65px; 9 | 10 | @border-lg: 1px solid @black; 11 | @input-background-color: #f4f5f9; 12 | @input-border-color: #D7D8DD; 13 | @select-arrow-color: #464a53; 14 | 15 | @border: 1px solid @border-color; 16 | @form-item-bg: #F4F5F9; 17 | @text-line-height: 21px; 18 | @form-input-bg: #F9F9F9; 19 | 20 | -------------------------------------------------------------------------------- /web/front-end/src/types.d.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-undef */ 2 | /* eslint-disable no-unused-vars */ 3 | declare module '*.css'; 4 | declare module '*.less'; 5 | declare module '*.png'; 6 | declare module '*.jpg'; 7 | declare module '*.jpeg'; 8 | declare module '*.svg' { 9 | export function ReactComponent( 10 | props: React.SVGProps, 11 | ): React.ReactElement; 12 | const url: string; 13 | export default url; 14 | } 15 | -------------------------------------------------------------------------------- /web/front-end/src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | 3 | // declare Google Analytics gtag.js 4 | declare interface Window {gtag: any; dataBuried: any; sealionJSONPCallback: any; } 5 | 6 | interface ImportMetaEnv { 7 | readonly VITE_NODE: string 8 | // 更多环境变量... 9 | } 10 | 11 | interface ImportMeta { 12 | readonly env: ImportMetaEnv 13 | } 14 | 15 | -------------------------------------------------------------------------------- /web/front-end/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "useDefineForClassFields": true, 5 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 6 | "module": "ESNext", 7 | "skipLibCheck": true, 8 | 9 | /* Bundler mode */ 10 | "moduleResolution": "node", 11 | // "allowImportingTsExtensions": true, 12 | "allowSyntheticDefaultImports":true, 13 | "resolveJsonModule": true, 14 | "isolatedModules": true, 15 | "noEmit": true, 16 | "jsx": "react-jsx", 17 | "baseUrl": ".", 18 | "paths": { 19 | "@/*": [ 20 | "src/*" 21 | ], 22 | "@components/*": [ 23 | "src/components/*" 24 | ], 25 | "@layouts/*": [ 26 | "src/layouts/*" 27 | ], 28 | "@assets/*": [ 29 | "src/assets/*" 30 | ], 31 | "@pages/*": [ 32 | "src/pages/*" 33 | ], 34 | "@services/*": [ 35 | "src/services/*" 36 | ], 37 | "@utils/*": [ 38 | "src/utils/*" 39 | ], 40 | "@styles/*": [ 41 | "src/styles/*" 42 | ], 43 | "@routes/*": [ 44 | "src/routes/*" 45 | ], 46 | "@config/*": [ 47 | "src/config/*" 48 | ], 49 | "@locales/*": [ 50 | "src/locales/*" 51 | ], 52 | "@interceptors/*": [ 53 | "src/interceptors/*" 54 | ], 55 | "@hooks/*": [ 56 | "src/hooks/*" 57 | ], 58 | "@constants/*": [ 59 | "src/constants/*" 60 | ] 61 | }, 62 | "allowJs": true, 63 | "outDir": "./dist", 64 | }, 65 | "include": [ 66 | "src/**/*", 67 | "src/**/*.ts", 68 | "src/**/*.tsx", 69 | "src/**/*.vue", 70 | "tests/**/*.ts", 71 | "tests/**/*.tsx", 72 | "src/types.d.ts" 73 | ], 74 | "references": [{ "path": "./tsconfig.node.json" }] 75 | } 76 | -------------------------------------------------------------------------------- /web/front-end/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "composite": true, 4 | "skipLibCheck": true, 5 | "module": "ESNext", 6 | "moduleResolution": "bundler", 7 | "allowSyntheticDefaultImports": true 8 | }, 9 | "include": ["vite.config.ts", "scripts/*"] 10 | } 11 | -------------------------------------------------------------------------------- /web/middleware/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/middleware/__init__.py -------------------------------------------------------------------------------- /web/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/model/__init__.py -------------------------------------------------------------------------------- /web/model/access.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class LoginBody(BaseModel): 5 | name: str 6 | password: str 7 | 8 | 9 | class AccessInfo(BaseModel): 10 | hashpass: str 11 | featureStoreId: str 12 | -------------------------------------------------------------------------------- /web/model/base.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class BaseBody(BaseModel): 7 | msg: str = Field(default='ok') 8 | msgCode: str = Field(default='10000') 9 | data: object = None 10 | 11 | 12 | class Image(Enum): 13 | INVALID = 'invalid' 14 | JPG = 'jpeg' 15 | PNG = 'png' 16 | BMP = 'bmp' 17 | 18 | 19 | def standard_error_response(error: dict, data=None) -> BaseBody: 20 | if not data: 21 | data = {} 22 | return BaseBody(msg=error.get('msg'), msgCode=error.get('code'), data=data) 23 | -------------------------------------------------------------------------------- /web/model/chat.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import List, Optional 3 | 4 | from pydantic import BaseModel, RootModel 5 | 6 | from web.model.huixiangdou import ChatResponse, HxdTaskChatHistory 7 | 8 | 9 | class ChatRequestBody(BaseModel): 10 | content: Optional[str] = '' 11 | images: Optional[List[str]] = [] 12 | history: Optional[List[HxdTaskChatHistory]] = [] 13 | 14 | 15 | class ChatOnlineResponseBody(BaseModel): 16 | queryId: str 17 | 18 | 19 | class ChatType(Enum): 20 | LARK = 0 21 | WECHAT = 1 22 | ONLINE = 2 23 | 24 | 25 | class ChatQueryInfo(BaseModel): 26 | featureStoreId: str 27 | queryId: str 28 | type: Optional[ChatType] = ChatType.ONLINE 29 | request: ChatRequestBody 30 | response: Optional[ChatResponse] = None 31 | detail: Optional[object] = {} 32 | 33 | 34 | class ChatCaseType(Enum): 35 | GOOD_CASE = 'good' 36 | BAD_CASE = 'bad' 37 | 38 | 39 | class ChatCaseFeedbackBody(BaseModel): 40 | queryId: str 41 | type: ChatCaseType 42 | 43 | 44 | class LarkChatDetail(BaseModel): 45 | appId: Optional[str] = '' 46 | appSecret: Optional[str] = '' 47 | messageId: Optional[str] = '' 48 | 49 | 50 | class WechatType(Enum): 51 | TEXT = 'text' 52 | Image = 'image' 53 | Poll = 'poll' 54 | 55 | 56 | class WechatQuery(BaseModel): 57 | type: WechatType 58 | content: Optional[str] = '' 59 | 60 | 61 | class WechatRequest(BaseModel): 62 | query_id: Optional[str] = '' 63 | groupname: Optional[str] = '' 64 | username: Optional[str] = '' 65 | query: Optional[WechatQuery] = {} 66 | 67 | 68 | class WechatResponse(RootModel): 69 | root: Optional[object] = [] 70 | 71 | 72 | class WechatPollItem(BaseModel): 73 | req: WechatRequest 74 | rsp: ChatResponse 75 | -------------------------------------------------------------------------------- /web/model/huixiangdou.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import List, Optional 3 | 4 | from pydantic import BaseModel 5 | 6 | from web.model.qalib import FilesState 7 | 8 | 9 | class HxdToken(BaseModel): 10 | exp: int 11 | iat: float 12 | jti: str 13 | qa_name: str 14 | 15 | 16 | class HxdTaskChatHistory(BaseModel): 17 | sender: int 18 | content: str 19 | 20 | 21 | class HxdTaskPayload(BaseModel): 22 | name: Optional[str] = None 23 | feature_store_id: Optional[str] = None 24 | file_list: Optional[List[str]] = [] 25 | file_abs_base: Optional[str] = None 26 | positive: Optional[List[str]] = [] 27 | negative: Optional[List[str]] = [] 28 | content: Optional[str] = None 29 | images: Optional[List[str]] = [] 30 | history: Optional[List[HxdTaskChatHistory]] = [] 31 | web_search_token: Optional[str] = None 32 | query_id: Optional[str] = '' 33 | 34 | 35 | class HxdTaskType(Enum): 36 | ADD_DOC = 'add_doc' 37 | UPDATE_PIPELINE = 'update_pipeline' 38 | UPDATE_SAMPLE = 'update_sample' 39 | CHAT = 'chat' 40 | 41 | 42 | class HxdTask(BaseModel): 43 | type: HxdTaskType 44 | payload: HxdTaskPayload 45 | 46 | 47 | class HxdTaskResponse(BaseModel): 48 | feature_store_id: Optional[str] = None 49 | code: Optional[int] = None 50 | status: Optional[str] = None 51 | type: Optional[str] = None 52 | files_state: Optional[List[FilesState]] = None 53 | 54 | 55 | class ChatResponse(BaseModel): 56 | code: Optional[int] = -1 57 | state: Optional[str] = '' 58 | text: Optional[str] = '' 59 | references: Optional[List[str]] = [] 60 | 61 | 62 | class HxdChatResponse(BaseModel): 63 | feature_store_id: str 64 | query_id: str 65 | response: ChatResponse 66 | -------------------------------------------------------------------------------- /web/model/integrate.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class IntegrateLarkBody(BaseModel): 7 | appId: str 8 | appSecret: str 9 | 10 | 11 | class IntegrateWebSearchBody(BaseModel): 12 | webSearchToken: str 13 | vendor: Optional[str] = '' 14 | -------------------------------------------------------------------------------- /web/model/qalib.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class Lark(BaseModel): 7 | appId: Optional[str] = '' 8 | appSecret: Optional[str] = '' 9 | encryptKey: str 10 | verificationToken: str 11 | eventUrl: str 12 | 13 | 14 | class Wechat(BaseModel): 15 | onMessageUrl: str 16 | 17 | 18 | class WebSearch(BaseModel): 19 | token: str 20 | 21 | 22 | class FilesState(BaseModel): 23 | file: str 24 | status: bool 25 | desc: str 26 | 27 | 28 | class QalibInfo(BaseModel): 29 | featureStoreId: Optional[str] = None 30 | name: Optional[str] = None 31 | docs: Optional[List[str]] = [] 32 | docBase: Optional[str] = None 33 | status: Optional[int] = None 34 | status_desc: Optional[str] = None 35 | suffix: Optional[str] = None 36 | lark: Optional[Lark] = None 37 | wechat: Optional[Wechat] = None 38 | webSearch: Optional[WebSearch] = None 39 | filesState: Optional[List[FilesState]] = None 40 | 41 | 42 | class QalibPositiveNegative(BaseModel): 43 | positives: Optional[List] = None 44 | negatives: Optional[List] = None 45 | 46 | 47 | class QalibDeleteDoc(BaseModel): 48 | filenames: List[str] 49 | 50 | 51 | class QalibSample(QalibPositiveNegative): 52 | name: str 53 | featureStoreId: str 54 | confirmed: Optional[bool] = False 55 | 56 | 57 | class Pipeline(BaseModel): 58 | webSearchToken: str 59 | featureStoreId: str 60 | confirmed: bool 61 | success: bool 62 | code: int 63 | status: str 64 | 65 | 66 | class AddDocError(BaseModel): 67 | fileName: Optional[str] 68 | reason: Optional[str] 69 | 70 | 71 | class AddDocsRes(BaseModel): 72 | docBase: Optional[str] = '' 73 | docs: Optional[List[str]] = [] 74 | errors: Optional[List[AddDocError]] = [] 75 | -------------------------------------------------------------------------------- /web/model/statistic.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class StatisticTotal(BaseModel): 7 | qalibTotal: Optional[int] = None 8 | lastMonthUsed: Optional[int] = None 9 | wechatTotal: Optional[int] = None 10 | feishuTotal: Optional[int] = None 11 | servedTotal: Optional[int] = None 12 | realServedTotal: Optional[int] = None 13 | -------------------------------------------------------------------------------- /web/mq/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/mq/__init__.py -------------------------------------------------------------------------------- /web/mq/hxd_task.py: -------------------------------------------------------------------------------- 1 | import web.constant.biz_constant as biz_const 2 | from web.model.huixiangdou import HxdTask, HxdTaskType 3 | from web.orm.redis import r 4 | from web.service.cache import ChatCache 5 | from web.util.log import log 6 | 7 | logger = log(__name__) 8 | 9 | 10 | class HuixiangDouTask: 11 | 12 | def __init__(self): 13 | pass 14 | 15 | def updateTask(self, task: HxdTask) -> bool: 16 | """update task into redis. 17 | 18 | :param task: HxdTask 19 | :return: bool: True or False 20 | """ 21 | if not task: 22 | logger.error("HuixiangDou's task is empty, update task aborted.") 23 | return False 24 | 25 | ChatCache.mark_monthly_active(task.payload.feature_store_id) 26 | if task.type == HxdTaskType.CHAT: 27 | ChatCache.add_inference_number() 28 | 29 | try: 30 | r.rpush(biz_const.RDS_KEY_HXD_TASK, task.model_dump_json()) 31 | except Exception as e: 32 | logger.error(f'{e}') 33 | return False 34 | return True 35 | -------------------------------------------------------------------------------- /web/orm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/orm/__init__.py -------------------------------------------------------------------------------- /web/orm/redis.py: -------------------------------------------------------------------------------- 1 | import redis 2 | 3 | from web.config.env import HuixiangDouEnv 4 | from web.util.log import log 5 | 6 | logger = log(__name__) 7 | 8 | logger.info('connecting to redis') 9 | host = HuixiangDouEnv.get_redis_host() 10 | password = HuixiangDouEnv.get_redis_password() 11 | port = HuixiangDouEnv.get_redis_port() 12 | db = HuixiangDouEnv.get_redis_db() 13 | pool = redis.ConnectionPool(host=host, port=port, db=db, password=password) 14 | r = redis.Redis(connection_pool=pool) 15 | try: 16 | r_res = r.ping() 17 | if not r_res: 18 | logger.error(f'Failed connected to redis, exit with code 1') 19 | exit(1) 20 | except Exception as e: 21 | logger.error(f'Failed connected to redis, error={e}') 22 | exit(2) 23 | logger.info('connected to redis') 24 | -------------------------------------------------------------------------------- /web/proxy/logs/work.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/proxy/logs/work.txt -------------------------------------------------------------------------------- /web/requirements.txt: -------------------------------------------------------------------------------- 1 | apscheduler==3.10.4 2 | fastapi==0.103.0 3 | flask==3.0.2 4 | lark-oapi==1.2.1 5 | passlib==1.7.4 6 | pydantic==2.4.2 7 | PyJWT==2.8.0 8 | python-multipart==0.0.9 9 | redis==4.5.5 10 | starlette==0.27.0 11 | tqdm==4.65.0 12 | uvicorn==0.27.0 -------------------------------------------------------------------------------- /web/scheduler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/scheduler/__init__.py -------------------------------------------------------------------------------- /web/service/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/service/__init__.py -------------------------------------------------------------------------------- /web/service/message.py: -------------------------------------------------------------------------------- 1 | from fastapi import Request, Response 2 | 3 | from web.model.base import BaseBody 4 | from web.model.chat import WechatRequest 5 | from web.service.agent import LarkAgent, WechatAgent 6 | 7 | 8 | class MessageService: 9 | 10 | def __init__(self, request: Request, response: Response): 11 | self.request = request 12 | self.response = response 13 | 14 | async def on_lark_message(self): 15 | req = await LarkAgent.parse_req(self.request) 16 | rsp = LarkAgent.get_event_handler().do(req) 17 | return LarkAgent.parse_rsp(rsp) 18 | 19 | async def on_wechat_message(self, body: WechatRequest, suffix: str): 20 | rsp = WechatAgent.action(body, suffix) 21 | if isinstance(rsp, BaseBody): 22 | return rsp 23 | return BaseBody(data=rsp) 24 | -------------------------------------------------------------------------------- /web/service/statistic.py: -------------------------------------------------------------------------------- 1 | from fastapi import Request, Response 2 | 3 | import web.constant.biz_constant as biz_const 4 | from web.model.base import BaseBody 5 | from web.model.chat import ChatType 6 | from web.model.statistic import StatisticTotal 7 | from web.orm.redis import r 8 | from web.service.cache import ChatCache 9 | from web.util.log import log 10 | 11 | logger = log(__name__) 12 | 13 | 14 | class StatisticService: 15 | 16 | def __init__(self, request: Request, response: Response): 17 | self.request = request 18 | self.response = response 19 | 20 | async def info_statistic(self): 21 | qalib_total = r.hlen(biz_const.RDS_KEY_QALIB_INFO) 22 | monthly_active = ChatCache.get_monthly_active() 23 | lark_used = ChatCache.hlen_agent_used(ChatType.LARK) 24 | wechat_used = ChatCache.hlen_agent_used(ChatType.WECHAT) 25 | total_inference = ChatCache.get_inference_number() 26 | unique_user = ChatCache.get_unique_inference_user_number() 27 | 28 | data = StatisticTotal(qalibTotal=qalib_total, 29 | lastMonthUsed=monthly_active, 30 | wechatTotal=wechat_used, 31 | feishuTotal=lark_used, 32 | servedTotal=total_inference, 33 | realServedTotal=unique_user) 34 | return BaseBody(data=data) 35 | -------------------------------------------------------------------------------- /web/tools/README.md: -------------------------------------------------------------------------------- 1 | # **SFT tools have moved to [sft directory](../../sft/)** 2 | 3 | # Devops tools 4 | 5 | - dump_redis_query.py # for web version, dump all question from redis to `query.jsonl` 6 | - update_fs_max_len.py # for web version, update all users' max text length config of remote LLM 7 | - get_puyu_model_list.py # for inner API, get all puyu API model list 8 | -------------------------------------------------------------------------------- /web/tools/dump_redis_query.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from loguru import logger 5 | from redis import Redis 6 | 7 | 8 | def redis_host(): 9 | host = os.getenv('REDIS_HOST') 10 | if host is None or len(host) < 1: 11 | raise Exception('REDIS_HOST not config') 12 | return host 13 | 14 | 15 | def redis_port(): 16 | port = os.getenv('REDIS_PORT') 17 | if port is None: 18 | logger.debug('REDIS_PORT not set, try 6379') 19 | port = 6379 20 | return port 21 | 22 | 23 | def redis_passwd(): 24 | passwd = os.getenv('REDIS_PASSWORD') 25 | if passwd is None or len(passwd) < 1: 26 | raise Exception('REDIS_PASSWORD not config') 27 | return passwd 28 | 29 | 30 | def feature_store_base_dir(): 31 | return 'feature_stores' 32 | 33 | 34 | db = Redis(host=redis_host(), 35 | port=redis_port(), 36 | password=redis_passwd(), 37 | charset='utf-8', 38 | decode_responses=True) 39 | keys = db.keys('HuixiangDou:query:*') 40 | 41 | with open('query.jsonl', 'w') as f: 42 | for key in keys: 43 | value = db.hgetall(key) 44 | f.write(json.dumps(value, ensure_ascii=False)) 45 | f.write('\n') 46 | -------------------------------------------------------------------------------- /web/tools/get_puyu_model_list.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import requests 5 | 6 | token = os.getenv('TOKEN') 7 | 8 | url = 'https://puyu.openxlab.org.cn/puyu/api/v1/models' 9 | header = {'Content-Type': 'application/json', 'Authorization': token} 10 | data = {} 11 | 12 | res = requests.get(url, headers=header, data=json.dumps(data)) 13 | print(res.status_code) 14 | print(res.json()) 15 | print(res.json()['data']) 16 | -------------------------------------------------------------------------------- /web/tools/update_fs_max_len.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytoml 4 | 5 | 6 | def read_config_ini_files(directory): 7 | # 遍历指定目录 8 | for root, dirs, files in os.walk(directory): 9 | for file in files: 10 | # 检查文件扩展名是否为 .ini 11 | if file == 'config.ini': 12 | # 构建完整的文件路径 13 | file_path = os.path.join(root, file) 14 | try: 15 | # 读取并解析 config.ini 文件 16 | with open(file_path, 'r', encoding='utf-8') as f: 17 | config = pytoml.load(f) 18 | print((file_path, config['llm']['server']['remote_llm_max_text_length'])) 19 | config['llm']['server']['remote_llm_max_text_length'] = 40000 20 | with open(file_path, 'w', encoding='utf8') as f: 21 | pytoml.dump(config, f) 22 | except Exception as e: 23 | print(f'An error occurred while reading {file_path}: {e}') 24 | 25 | 26 | # 指定要遍历的目录 27 | directory_to_crawl = '/root/HuixiangDou/feature_stores' 28 | read_config_ini_files(directory_to_crawl) 29 | -------------------------------------------------------------------------------- /web/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/util/__init__.py -------------------------------------------------------------------------------- /web/util/image.py: -------------------------------------------------------------------------------- 1 | from web.model.base import Image 2 | 3 | 4 | def detect_base64_image_suffix(base64: str) -> [Image, str]: 5 | if not base64 or len(base64) == 0: 6 | return [Image.INVALID, ''] 7 | 8 | s = base64.split('base64,') 9 | if len(s) < 2: 10 | return [Image.INVALID, ''] 11 | 12 | base64_prefix = s[0].lower() 13 | if 'data:image/jpeg;' == base64_prefix: 14 | return [Image.JPG, s[1]] 15 | if 'data:image/png;' == base64_prefix: 16 | return [Image.PNG, s[1]] 17 | if 'data:image/bmp;' == base64_prefix: 18 | return [Image.BMP, s[1]] 19 | 20 | return [Image.INVALID, ''] 21 | -------------------------------------------------------------------------------- /web/util/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def log(name): 5 | """ 6 | @param name: python file name 7 | @return: Logger 8 | """ 9 | logger = logging.getLogger(name) 10 | logger.setLevel(logging.INFO) 11 | formatter = logging.Formatter( 12 | '%(levelname)s: %(asctime)s - %(module)s-%(funcName)s-line:%(lineno)d - %(message)s' 13 | ) 14 | ch = logging.StreamHandler() 15 | ch.setFormatter(formatter) 16 | logger.addHandler(ch) 17 | return logger 18 | 19 | 20 | def clear_other_log(): 21 | for name, item in logging.Logger.manager.loggerDict.items(): 22 | if not isinstance(item, logging.Logger): 23 | continue 24 | if 'aoe' not in name: 25 | item.setLevel(logging.CRITICAL) 26 | 27 | 28 | clear_other_log() 29 | logger = log('util') 30 | -------------------------------------------------------------------------------- /web/util/time_util.py: -------------------------------------------------------------------------------- 1 | #! python3 2 | from datetime import datetime 3 | 4 | 5 | def get_month_time_str(t: datetime) -> str: 6 | return t.strftime('%y-%m') 7 | -------------------------------------------------------------------------------- /web/web-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/web-architecture.png --------------------------------------------------------------------------------