├── .github
├── ISSUE_TEMPLATE
│ ├── bug.md
│ └── others.md
├── scripts
│ └── doc_link_checker.py
└── workflows
│ ├── lint.yml
│ └── release.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .pylintrc
├── .readthedocs.yaml
├── LICENSE
├── README.md
├── README_zh.md
├── android
├── .gitignore
├── .idea
│ ├── .gitignore
│ ├── assetWizardSettings.xml
│ ├── caches
│ │ └── build_file_checksums.ser
│ ├── codeStyles
│ │ ├── Project.xml
│ │ └── codeStyleConfig.xml
│ ├── copyright
│ │ └── profiles_settings.xml
│ ├── dbnavigator.xml
│ ├── deploymentTargetDropDown.xml
│ ├── dictionaries
│ │ └── caochang.xml
│ ├── encodings.xml
│ ├── gradle.xml
│ ├── inspectionProfiles
│ │ └── Project_Default.xml
│ ├── jarRepositories.xml
│ ├── kotlinCodeInsightSettings.xml
│ ├── kotlinc.xml
│ ├── markdown-navigator.xml
│ ├── markdown-navigator
│ │ └── profiles_settings.xml
│ ├── migrations.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── vcs.xml
├── .travis.yml
├── LICENSE
├── README.md
├── build.gradle
├── buildsystem
│ ├── debug.keystore
│ └── default.properties
├── demo
│ ├── .gitignore
│ ├── build.gradle
│ ├── proguard-rules.pro
│ └── src
│ │ ├── androidTest
│ │ └── java
│ │ │ └── com
│ │ │ └── carlos
│ │ │ └── grabredenvelope
│ │ │ └── demo
│ │ │ └── ExampleInstrumentedTest.kt
│ │ ├── main
│ │ ├── AndroidManifest.xml
│ │ ├── java
│ │ │ └── com
│ │ │ │ └── carlos
│ │ │ │ └── grabredenvelope
│ │ │ │ └── demo
│ │ │ │ ├── MainActivity.kt
│ │ │ │ ├── SendEmojiService.kt
│ │ │ │ ├── SharedPreferenceHelper.kt
│ │ │ │ └── WechatConstants.kt
│ │ └── res
│ │ │ ├── drawable-v24
│ │ │ └── ic_launcher_foreground.xml
│ │ │ ├── drawable
│ │ │ ├── ic_launcher_background.xml
│ │ │ └── logo.xml
│ │ │ ├── layout
│ │ │ └── activity_main.xml
│ │ │ ├── mipmap-anydpi-v26
│ │ │ ├── ic_launcher.xml
│ │ │ └── ic_launcher_round.xml
│ │ │ ├── values
│ │ │ ├── colors.xml
│ │ │ ├── strings.xml
│ │ │ └── styles.xml
│ │ │ └── xml
│ │ │ ├── sendemoji_service.xml
│ │ │ └── wechat_service.xml
│ │ └── test
│ │ └── java
│ │ └── com
│ │ └── carlos
│ │ └── grabredenvelope
│ │ └── demo
│ │ └── ExampleUnitTest.kt
├── gradle.properties
├── gradle
│ └── wrapper
│ │ ├── gradle-wrapper.jar
│ │ └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
└── settings.gradle
├── app.py
├── config-cpu.ini
├── config-multimodal.ini
├── config-wkteam-example.ini
├── config.ini
├── docs
├── en
│ ├── .readthedocs.yaml
│ ├── Makefile
│ ├── _static
│ │ ├── css
│ │ │ └── readthedocs.css
│ │ ├── image
│ │ │ ├── logo.svg
│ │ │ └── logo_icon.svg
│ │ └── js
│ │ │ └── custom.js
│ ├── _templates
│ │ ├── 404.html
│ │ ├── autosummary
│ │ │ └── class.rst
│ │ └── callable.rst
│ ├── conf.py
│ ├── cp_origin_docs.sh
│ ├── doc_add_readthedocs.md
│ ├── doc_architecture.md
│ ├── doc_full_dev.md
│ ├── doc_knowledge_graph.md
│ ├── doctuils.conf
│ └── index.rst
├── figures
│ ├── convert.py
│ ├── huixiangdou.png
│ ├── lark-add-ability.png
│ ├── lark-arch.jpg
│ ├── lark-bot-add-callback.png
│ ├── lark-bot-reply.png
│ ├── lark-bot-sub.png
│ ├── lark-create-app.png
│ ├── lark-create-corp.png
│ ├── lark-switch-corp.png
│ ├── wechat-android-example.jpg
│ ├── wechat-android-homepage.jpg
│ ├── wechat-dingdong.png
│ ├── wechat-puppet-log.png
│ ├── wechat-run-state.jpg
│ └── wechat-wkteam.jpg
└── zh
│ ├── .readthedocs.yaml
│ ├── Makefile
│ ├── _static
│ ├── css
│ │ └── readthedocs.css
│ ├── image
│ │ ├── logo.svg
│ │ └── logo_icon.svg
│ └── js
│ │ └── custom.js
│ ├── _templates
│ ├── 404.html
│ ├── autosummary
│ │ └── class.rst
│ └── callable.rst
│ ├── conf.py
│ ├── cp_origin_docs.sh
│ ├── doc_add_lark_group.md
│ ├── doc_add_readthedocs.md
│ ├── doc_add_wechat_accessibility.md
│ ├── doc_add_wechat_commercial.md
│ ├── doc_add_wechat_group.md
│ ├── doc_architecture.md
│ ├── doc_full_dev.md
│ ├── doc_knowledge_graph.md
│ ├── doc_merge_wechat_group.md
│ ├── doc_rag_annotate_sft_data.md
│ ├── doc_send_only_lark_group.md
│ ├── doctuils.conf
│ └── index.rst
├── evaluation
├── README.md
├── README_zh.md
├── end2end
│ └── main.py
├── rejection
│ ├── build_fs_and_filter.py
│ ├── gt_bad.txt
│ ├── gt_good.txt
│ ├── kg_filter.py
│ ├── plot.py
│ └── plot_example.png
└── rerank
│ ├── step0_clean_queries.py
│ └── step1_create_candidates.py
├── huixiangdou-inside.md
├── huixiangdou
├── __init__.py
├── api_server.py
├── frontend
│ ├── __init__.py
│ ├── lark.py
│ ├── lark_group.py
│ └── wechat.py
├── gradio_ui.py
├── main.py
├── primitive
│ ├── __init__.py
│ ├── bm250kapi.py
│ ├── chunk.py
│ ├── embedder.py
│ ├── entity.py
│ ├── faiss.py
│ ├── file_operation.py
│ ├── limitter.py
│ ├── llm_reranker.py
│ ├── query.py
│ ├── splitter.py
│ ├── token.py
│ └── utils.py
├── services
│ ├── __init__.py
│ ├── config.py
│ ├── helper.py
│ ├── kg.py
│ ├── llm.py
│ ├── llm_client.py
│ ├── llm_server_hybrid.py
│ ├── parallel_pipeline.py
│ ├── prompt.py
│ ├── retriever.py
│ ├── serial_pipeline.py
│ ├── session.py
│ ├── sg_search.py
│ ├── store.py
│ └── web_search.py
└── version.py
├── logs
└── work.txt
├── requirements.txt
├── requirements
├── cpu.txt
├── docs.txt
├── lark-group.txt
├── multimodal.txt
└── sft.txt
├── resource
├── bad_questions.json
├── data
│ ├── baicaoyuan.md
│ ├── qa_pair.csv
│ └── tengye.md
├── figures
│ ├── inside-middleware.png
│ ├── inside-mmpose.jpg
│ ├── inside-ncnn-group.jpg
│ └── lark-example.png
├── good_questions.json
├── inner-test.ini
├── logo_black.svg
├── logo_blue.svg
├── rag_example_input.json
└── rag_example_output.json
├── setup.py
├── sft
├── README.md
├── axolotl_configs
│ ├── lora-4B.yml
│ ├── qwen2-lora-0.5B.yaml
│ ├── qwen2-lora-1.8B.yaml
│ ├── qwen2-lora-14B.yaml
│ ├── qwen2-lora-32B.yaml
│ ├── qwen2-lora-4B-loraplus-epoch4.yaml
│ ├── qwen2-lora-4B.yaml
│ ├── qwen2-lora-7B.yaml
│ ├── qwen2-moe-lora-2.7B.yaml
│ ├── qwen2-moe-lora.yaml
│ └── qwen2-moe-qlora.yaml
├── convert_to_alpaca.py
├── reconstruct_check_llm.py
├── reconstruct_filter_annotate.py
└── reconstruct_wechat_group.py
├── tests
├── __init__.py
├── cp_files.py
├── data.json
├── git-clone.sh
├── test_alles_apin.py
├── test_bce.py
├── test_benepar.py
├── test_bge_reranker.py
├── test_build_milvus_and_filter.py
├── test_clear_kimi_files.py
├── test_dataclass.py
├── test_deepseek.py
├── test_get_issue_comment_pipeline.py
├── test_hf_import_accelerate.py
├── test_intention_prompt.py
├── test_internlm2.py
├── test_kimi.py
├── test_kimi_cr.py
├── test_kimi_passkey.py
├── test_lda
│ ├── step0_preprocess.py
│ └── step1_countvec.py
├── test_llm_client.py
├── test_m3.py
├── test_milvus_hybrid_retrieval.py
├── test_neo4j.py
├── test_openai.py
├── test_openxlab_android_api.py
├── test_optimum_st.py
├── test_post_android.py
├── test_pyppeteer.py
├── test_query_gradio.py
├── test_qwen_react.py
├── test_relative.py
├── test_reranker.py
├── test_splitter.py
├── test_step1_llm.py
├── test_time.py
├── test_visual_bge.py
├── test_yi.py
└── test_yulan.py
├── unittest
├── primitive
│ ├── test_bm250api.py
│ ├── test_dataclass.py
│ ├── test_embedder.py
│ ├── test_entity.py
│ ├── test_faiss.py
│ ├── test_limitter.py
│ ├── test_reranker.py
│ └── test_splitter.py
└── service
│ ├── daily_smoke.py
│ ├── test_llm.py
│ ├── test_llm_client.py
│ ├── test_llm_server_local.py
│ ├── test_llm_server_remote.py
│ ├── test_sg_search.py
│ └── test_web_search.py
└── web
├── README.md
├── __init__.py
├── api
├── __init__.py
├── access.py
├── chat.py
├── integrate.py
├── message.py
├── qalib.py
└── statistic.py
├── config
├── __init__.py
├── env.py
└── logging.py
├── constant
├── __init__.py
└── biz_constant.py
├── front-end
├── .eslintignore
├── .eslintrc.cjs
├── .gitignore
├── .npmrc
├── dist
│ ├── assets
│ │ ├── bean1-002ba51d.png
│ │ └── logo-af340389.png
│ ├── index.html
│ └── logo.png
├── env
│ ├── .env.development
│ ├── .env.production
│ └── .env.staging
├── index.html
├── mock
│ └── db.json
├── package.json
├── public
│ └── logo.png
├── readme.md
├── scripts
│ ├── alias.ts
│ ├── import-to-cdn.ts
│ ├── index.ts
│ ├── proxy.ts
│ └── utils.ts
├── src
│ ├── app.tsx
│ ├── assets
│ │ └── imgs
│ │ │ ├── bean.png
│ │ │ ├── bean1.png
│ │ │ └── logo.png
│ ├── components
│ │ ├── button
│ │ │ ├── button.module.less
│ │ │ └── button.tsx
│ │ ├── components-portal
│ │ │ └── components-portal.tsx
│ │ ├── copy-code
│ │ │ ├── copy-code.module.less
│ │ │ └── copy-code.tsx
│ │ ├── global-lang
│ │ │ ├── global-lang-context.ts
│ │ │ ├── global-lang.tsx
│ │ │ └── index.tsx
│ │ ├── header
│ │ │ ├── header.module.less
│ │ │ └── header.tsx
│ │ ├── notification
│ │ │ ├── emoji-wrapper.tsx
│ │ │ ├── notification.module.less
│ │ │ ├── notification.tsx
│ │ │ └── use-notification.tsx
│ │ ├── upload-item
│ │ │ ├── index.tsx
│ │ │ ├── upload-item.module.less
│ │ │ └── upload-item.tsx
│ │ └── upload
│ │ │ ├── delete-btn.tsx
│ │ │ ├── index.tsx
│ │ │ ├── upload.module.less
│ │ │ └── upload.tsx
│ ├── config
│ │ ├── auth.ts
│ │ ├── base-url.ts
│ │ ├── change-page-gray.ts
│ │ ├── index.ts
│ │ └── log.ts
│ ├── hooks
│ │ └── useLocale.ts
│ ├── interceptors
│ │ ├── request.ts
│ │ └── response.ts
│ ├── layouts
│ │ └── header-container-layout
│ │ │ ├── header-container-layout.module.less
│ │ │ └── header-container-layout.tsx
│ ├── locales
│ │ ├── en-US.ts
│ │ ├── en-US
│ │ │ ├── bean-detail.ts
│ │ │ ├── components.ts
│ │ │ ├── home.ts
│ │ │ └── welcome.ts
│ │ ├── index.ts
│ │ ├── zh-CN.ts
│ │ └── zh-CN
│ │ │ ├── bean-detail.ts
│ │ │ ├── components.ts
│ │ │ ├── home.ts
│ │ │ └── welcome.ts
│ ├── main.tsx
│ ├── pages
│ │ ├── bean-detail
│ │ │ ├── bean-detail.module.less
│ │ │ ├── bean-detail.tsx
│ │ │ └── components
│ │ │ │ ├── chat
│ │ │ │ ├── chat.module.less
│ │ │ │ ├── chat.tsx
│ │ │ │ └── index.tsx
│ │ │ │ ├── example
│ │ │ │ ├── example.module.less
│ │ │ │ ├── example.tsx
│ │ │ │ └── index.tsx
│ │ │ │ ├── import-docs
│ │ │ │ ├── import-docs.module.less
│ │ │ │ ├── import-docs.tsx
│ │ │ │ └── index.tsx
│ │ │ │ ├── integrate-feishu
│ │ │ │ ├── index.tsx
│ │ │ │ ├── integrate-feishu.module.less
│ │ │ │ └── integrate-feishu.tsx
│ │ │ │ ├── integrate-wechat
│ │ │ │ ├── integrate-wechat.module.less
│ │ │ │ └── integrate-wechat.tsx
│ │ │ │ └── toggle-search
│ │ │ │ ├── index.tsx
│ │ │ │ ├── toggle-search.module.less
│ │ │ │ └── toggle-search.tsx
│ │ └── home
│ │ │ ├── home.module.less
│ │ │ └── home.tsx
│ ├── routes
│ │ └── index.tsx
│ ├── services
│ │ ├── home.ts
│ │ └── user.ts
│ ├── styles
│ │ ├── index.less
│ │ ├── mixins.less
│ │ └── variables.less
│ ├── types.d.ts
│ ├── utils
│ │ ├── ajax.ts
│ │ ├── mlog.ts
│ │ └── utils.ts
│ └── vite-env.d.ts
├── tsconfig.json
├── tsconfig.node.json
└── vite.config.ts
├── main.py
├── middleware
├── __init__.py
└── token.py
├── model
├── __init__.py
├── access.py
├── base.py
├── chat.py
├── huixiangdou.py
├── integrate.py
├── qalib.py
└── statistic.py
├── mq
├── __init__.py
└── hxd_task.py
├── orm
├── __init__.py
└── redis.py
├── proxy
├── config-template.ini
├── logs
│ └── work.txt
├── main.py
├── test.py
├── traslate.txt
└── web_worker.py
├── requirements.txt
├── scheduler
├── __init__.py
└── huixiangdou_task.py
├── service
├── __init__.py
├── access.py
├── agent.py
├── cache.py
├── chat.py
├── message.py
├── qalib.py
└── statistic.py
├── tools
├── README.md
├── dump_redis_query.py
├── get_puyu_model_list.py
└── update_fs_max_len.py
├── util
├── __init__.py
├── image.py
├── log.py
├── str.py
└── time_util.py
└── web-architecture.png
/.github/ISSUE_TEMPLATE/bug.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: 🐛 bug issue
3 | about: submit a bug report +_+
4 | ---
5 |
6 | ## error log | 日志或报错信息 | ログ
7 |
8 | ## context | 编译/运行环境 | バックグラウンド
9 |
10 | ## how to reproduce | 复现步骤 | 再現方法
11 |
12 | 1.
13 | 2.
14 | 3.
15 |
16 | ## more | 其他 | その他
17 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/others.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: 📝 others
3 | about: discussion, suggestion and question
4 | ---
5 |
6 | ## detail | 详细描述 | 詳細な説明
7 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: Check markdown local file link available
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 |
9 | jobs:
10 | lint:
11 | runs-on: ubuntu-20.04
12 | steps:
13 | - uses: actions/checkout@v2
14 | - name: Set up Python 3.9
15 | uses: actions/setup-python@v2
16 | with:
17 | python-version: 3.9
18 | - name: Check doc link
19 | run: |
20 | python .github/scripts/doc_link_checker.py --target README_zh.md
21 | python .github/scripts/doc_link_checker.py --target README.md
22 | python -m pip install pylint interrogate
23 | pylint huixiangdou || true
24 | interrogate huixiangdou -v || true
25 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Publish Python 🐍 distributions 📦 to PyPI
2 |
3 | on:
4 | push:
5 | tags:
6 | - '*'
7 |
8 | jobs:
9 | build-n-publish:
10 | name: Build and publish Python 🐍 distributions 📦 to PyPI
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/checkout@v2
14 | - name: Set up Python
15 | uses: actions/setup-python@v2
16 | with:
17 | python-version: '3.9'
18 | - name: Install pypa/build
19 | run: >-
20 | python -m
21 | pip install
22 | build
23 | --user
24 | - name: Build a binary wheel and a source tarball
25 | run: >-
26 | python -m
27 | build
28 | --sdist
29 | --wheel
30 | --outdir dist/
31 | - name: Publish distribution 📦 to PyPI
32 | if: startsWith(github.ref, 'refs/tags')
33 | uses: pypa/gh-action-pypi-publish@release/v1
34 | with:
35 | user: __token__
36 | password: ${{ secrets.pypi_password }}
37 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | workdir/
2 | write_toml.py
3 | modeling_internlm2.py
4 | config-template.ini
5 | logs/
6 | logs/work.txt
7 | server.log
8 | **/__pycache__
9 | badcase.txt
10 | config.ini
11 | resource/prompt.txt
12 | build/
13 | dist/
14 | huixiangdou.egg-info/
15 | commit.id
16 | resource/wechat_questions.json
17 | .eggs/
18 | feature_stores/
19 | web/qa
20 | redis.conf
21 | nohup.out
22 | *.pyc
23 | start-web.sh
24 | web/proxy/config-template.ini
25 | web/env.sh
26 | logs/work.txt
27 | web/tools/query.jsonl
28 | query.jsonl
29 | tests/history_recv_send.txt
30 | unittest/token.json
31 | wkteam/
32 | web.log
33 | evaluation/rejection/gt_bad.txt
34 | evaluation/rejection/gt_good.txt
35 | bm25.pkl
36 | repodir/
37 | logs/work.txt
38 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/PyCQA/flake8
3 | rev: 4.0.1
4 | hooks:
5 | - id: flake8
6 | exclude: ^(__init__.py)$
7 | args: ["--max-line-length=79", "--exclude=service/__init__.py", "--exclude=tests/*", "--exclude=android/*"]
8 | - repo: https://github.com/PyCQA/isort
9 | rev: 5.11.5
10 | hooks:
11 | - id: isort
12 | - repo: https://github.com/pre-commit/mirrors-yapf
13 | rev: v0.32.0
14 | hooks:
15 | - id: yapf
16 | name: yapf
17 | description: 'Formatter for Python code'
18 | entry: yapf
19 | language: python
20 | args: ['-i', '--style={based_on_style: pep8, column_limit: 79}']
21 |
22 | - repo: https://github.com/pre-commit/pre-commit-hooks
23 | rev: v4.2.0
24 | hooks:
25 | - id: trailing-whitespace
26 | - id: check-yaml
27 | - id: end-of-file-fixer
28 | - id: requirements-txt-fixer
29 | - id: double-quote-string-fixer
30 | - id: check-merge-conflict
31 | - id: fix-encoding-pragma
32 | args: ["--remove"]
33 | - id: mixed-line-ending
34 | args: ["--fix=lf"]
35 | - repo: https://github.com/executablebooks/mdformat
36 | rev: 0.7.9
37 | hooks:
38 | - id: mdformat
39 | args: ["--number"]
40 | additional_dependencies:
41 | - mdformat-openmmlab
42 | - mdformat_frontmatter
43 | - linkify-it-py
44 | - repo: https://github.com/codespell-project/codespell
45 | rev: v2.1.0
46 | hooks:
47 | - id: codespell
48 | args: ["--skip=third_party/*,*.ipynb,*.proto"]
49 |
50 | - repo: https://github.com/myint/docformatter
51 | rev: v1.4
52 | hooks:
53 | - id: docformatter
54 | args: ["--in-place", "--wrap-descriptions", "79"]
55 |
56 | - repo: https://github.com/open-mmlab/pre-commit-hooks
57 | rev: v0.4.1
58 | hooks:
59 | - id: check-copyright
60 | args: ["huixiangdou"]
61 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yaml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Set the OS, Python version and other tools you might need
9 | build:
10 | os: ubuntu-22.04
11 | tools:
12 | python: "3.12"
13 | # You can also specify other tool versions:
14 | # nodejs: "19"
15 | # rust: "1.64"
16 | # golang: "1.19"
17 |
18 | # Build documentation in the "docs/" directory with Sphinx
19 | sphinx:
20 | configuration: docs/conf.py
21 |
22 | # Optionally build your docs in additional formats such as PDF and ePub
23 | # formats:
24 | # - pdf
25 | # - epub
26 |
27 | # Optional but recommended, declare the Python requirements required
28 | # to build your documentation
29 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
30 | # python:
31 | # install:
32 | # - requirements: docs/requirements.txt
33 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2024, tpoisonooo
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice, this
9 | list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | 3. Neither the name of the copyright holder nor the names of its
16 | contributors may be used to endorse or promote products derived from
17 | this software without specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
--------------------------------------------------------------------------------
/android/.gitignore:
--------------------------------------------------------------------------------
1 | *.iml
2 | .gradle
3 | /local.properties
4 | /.idea/workspace.xml
5 | /.idea/libraries
6 | .DS_Store
7 | /build
8 |
9 | /captures
10 | /buildsystem/keystore.properties
11 | /buildsystem/qianghongbao.jks
12 | /xbd
13 | /app/src/production
14 | app/src/main/java/com/carlos/grabredenvelope/local
15 | /apk
16 |
17 | sentry.properties
18 | /.idea/compiler.xml
19 |
--------------------------------------------------------------------------------
/android/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/android/.idea/assetWizardSettings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/android/.idea/caches/build_file_checksums.ser:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/android/.idea/caches/build_file_checksums.ser
--------------------------------------------------------------------------------
/android/.idea/codeStyles/codeStyleConfig.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/android/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/android/.idea/deploymentTargetDropDown.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/android/.idea/dictionaries/caochang.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/android/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/android/.idea/gradle.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/android/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/android/.idea/kotlinCodeInsightSettings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/android/.idea/kotlinc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/android/.idea/markdown-navigator/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/android/.idea/migrations.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/android/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/android/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/android/.travis.yml:
--------------------------------------------------------------------------------
1 | language: android
2 | sudo: false # 为了开启基于容器的 Travis CI 任务,让编译效率更高
3 | android:
4 | components:
5 | # Uncomment the lines below if you want to
6 | # use the latest revision of Android SDK Tools
7 | # - platform-tools
8 | # - tools
9 | # The BuildTools version used by your project
10 | - build-tools-28.0.3
11 | # The SDK version used to compile your project
12 | - android-28
13 | # Additional components
14 | - extra-google-google_play_services
15 | - extra-google-m2repository
16 | - extra-android-m2repository
17 | - addon-google_apis-google-19
18 | # Specify at least one system image,
19 | # if you need to run emulator(s) during your tests
20 | - sys-img-armeabi-v7a-android-19
21 | - sys-img-x86-android-17
22 |
23 | before_script:
24 | - mkdir "$ANDROID_HOME/licenses" || true
25 | - echo "24333f8a63b6825ea9c5514f83c2829b004d1fee" > "$ANDROID_HOME/licenses/android-sdk-license"
26 |
27 | script:
28 | - ./gradlew assembleDev
29 |
--------------------------------------------------------------------------------
/android/README.md:
--------------------------------------------------------------------------------
1 | # 茴香豆 Android 辅助
2 |
3 | 这是基于 [抢红包 app](https://github.com/xbdcc/GrabRedEnvelope) 软件的二次开发。
4 |
5 | * 移除抢红包功能,重新用于 LLM RAG chat
6 | * 它基于 android 系统 API 工作,原理上可以控制所有 UI(不只是即时通讯软件),风险自行承担
7 |
8 | # License
9 |
10 | 注意软件使用 [GPL 协议](LICENSE)。
11 |
--------------------------------------------------------------------------------
/android/build.gradle:
--------------------------------------------------------------------------------
1 | // Top-level build file where you can add configuration options common to all sub-projects/modules.
2 | buildscript {
3 | ext.kotlin_version = '1.7.20'
4 | repositories {
5 | google()
6 | jcenter()
7 | maven { url 'https://jitpack.io' }
8 | }
9 | dependencies {
10 | classpath 'com.android.tools.build:gradle:7.3.1'
11 | classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version"
12 | classpath 'org.greenrobot:greendao-gradle-plugin:3.3.0' // add plugin
13 | classpath "org.jetbrains.kotlin:kotlin-serialization:$kotlin_version"
14 |
15 | classpath 'io.sentry:sentry-android-gradle-plugin:3.0.1'
16 |
17 | // NOTE: Do not place your application dependencies here; they belong
18 | // in the individual module build.gradle files
19 | }
20 | }
21 | allprojects {
22 | repositories {
23 | google()
24 | jcenter()
25 | maven { url 'https://jitpack.io' }
26 |
27 | maven { url 'https://oss.sonatype.org/content/repositories/snapshots/' }
28 | maven{ url 'https://maven.aliyun.com/repository/public'}
29 |
30 | maven { url "https://kotlin.bintray.com/kotlinx" }
31 | maven { url 'https://dl.bintray.com/xbdcc/maven' }
32 | }
33 | }
34 | task clean(type: Delete) {
35 | delete rootProject.buildDir
36 | }
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/android/buildsystem/debug.keystore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/android/buildsystem/debug.keystore
--------------------------------------------------------------------------------
/android/buildsystem/default.properties:
--------------------------------------------------------------------------------
1 | # keystore
2 | keyAlias= androiddebugkey
3 | keyPassword= android
4 | storeFile= ../buildsystem/debug.keystore
5 | storePassword= android
6 |
7 | # other
8 | JPUSH_APPKEY =
9 |
10 | #测试的
11 | UMENG_APPKEY_DEV =
12 | #正式的
13 | UMENG_APPKEY =
14 |
15 | BUGLY_KEY_DEV =
16 | BUGLY_KEY =
17 |
18 | #sentry
19 | SENTRY_DSN_DEV =
20 | SENTRY_DSN =
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/android/demo/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 |
--------------------------------------------------------------------------------
/android/demo/build.gradle:
--------------------------------------------------------------------------------
1 | apply plugin: 'com.android.application'
2 | apply plugin: 'kotlin-android'
3 | android {
4 | compileSdkVersion 33
5 |
6 | defaultConfig {
7 | applicationId "com.carlos.grabredenvelope.demo"
8 | minSdkVersion 18
9 | targetSdkVersion 33
10 | versionCode 1
11 | versionName "1.0"
12 |
13 | testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
14 |
15 | }
16 |
17 | buildTypes {
18 | release {
19 | minifyEnabled false
20 | proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
21 | }
22 | }
23 | lint {
24 | disable 'GoogleAppIndexingWarning'
25 | }
26 | namespace 'com.carlos.grabredenvelope.demo'
27 |
28 | }
29 |
30 | dependencies {
31 | implementation fileTree(dir: 'libs', include: ['*.jar'])
32 | implementation 'com.github.xbdcc:cutils:0.0.18'
33 | testImplementation 'junit:junit:4.12'
34 | androidTestImplementation 'androidx.test:runner:1.1.1'
35 | androidTestImplementation 'androidx.test.espresso:espresso-core:3.1.1'
36 |
37 | implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-android:1.0.1'
38 | implementation 'com.squareup.okhttp3:okhttp:4.9.0'
39 | implementation 'com.google.code.gson:gson:2.8.9'
40 | implementation 'com.google.android.material:material:1.2.0'
41 | }
42 |
43 |
44 | task hello {
45 | doLast {
46 | println 'Hello world'
47 | }
48 | }
--------------------------------------------------------------------------------
/android/demo/proguard-rules.pro:
--------------------------------------------------------------------------------
1 | # Add project specific ProGuard rules here.
2 | # You can control the set of applied configuration files using the
3 | # proguardFiles setting in build.gradle.
4 | #
5 | # For more details, see
6 | # http://developer.android.com/guide/developing/tools/proguard.html
7 |
8 | # If your project uses WebView with JS, uncomment the following
9 | # and specify the fully qualified class name to the JavaScript interface
10 | # class:
11 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview {
12 | # public *;
13 | #}
14 |
15 | # Uncomment this to preserve the line number information for
16 | # debugging stack traces.
17 | #-keepattributes SourceFile,LineNumberTable
18 |
19 | # If you keep the line number information, uncomment this to
20 | # hide the original source file name.
21 | #-renamesourcefileattribute SourceFile
22 |
--------------------------------------------------------------------------------
/android/demo/src/androidTest/java/com/carlos/grabredenvelope/demo/ExampleInstrumentedTest.kt:
--------------------------------------------------------------------------------
1 | package com.carlos.grabredenvelope.demo
2 |
3 | import androidx.test.InstrumentationRegistry
4 | import androidx.test.runner.AndroidJUnit4
5 | import org.junit.Assert.assertEquals
6 | import org.junit.Test
7 | import org.junit.runner.RunWith
8 |
9 |
10 | /**
11 | * Instrumented test, which will execute on an Android device.
12 | *
13 | * See [testing documentation](http://d.android.com/tools/testing).
14 | */
15 | @RunWith(AndroidJUnit4::class)
16 | class ExampleInstrumentedTest {
17 | @Test
18 | fun useAppContext() {
19 | // Context of the app under test.
20 | val appContext = InstrumentationRegistry.getTargetContext()
21 | assertEquals("com.carlos.grabredenvelope.demo", appContext.packageName)
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/android/demo/src/main/AndroidManifest.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
14 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
27 |
28 |
29 |
30 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/android/demo/src/main/java/com/carlos/grabredenvelope/demo/SharedPreferenceHelper.kt:
--------------------------------------------------------------------------------
1 | package com.carlos.grabredenvelope.demo
2 |
3 | import android.content.Context
4 | import android.content.SharedPreferences
5 |
6 | class SharedPreferenceHelper(context: Context) {
7 | private val NAME = "huixiangdou"
8 | private val sharedPreferences: SharedPreferences = context.getSharedPreferences(NAME, Context.MODE_PRIVATE)
9 |
10 | fun saveString(key: String, value: String) {
11 | val editor = sharedPreferences.edit()
12 | editor.putString(key, value)
13 | editor.commit()
14 | }
15 |
16 | fun saveBoolean(key: String, value: Boolean) {
17 | val editor = sharedPreferences.edit()
18 | editor.putBoolean(key, value)
19 | editor.commit()
20 | }
21 |
22 | fun getBoolean(key: String, defaultValue: Boolean): Boolean =
23 | sharedPreferences.getBoolean(key, defaultValue)
24 |
25 | fun getString(key: String, defaultValue: String): String =
26 | sharedPreferences.getString(key, defaultValue)!!
27 | }
--------------------------------------------------------------------------------
/android/demo/src/main/java/com/carlos/grabredenvelope/demo/WechatConstants.kt:
--------------------------------------------------------------------------------
1 | package com.carlos.grabredenvelope.demo
2 |
3 | import android.util.Log
4 | import com.carlos.cutils.util.LogUtils
5 |
6 | /**
7 | * Created by Carlos on 2019-05-29.
8 | */
9 | object WechatConstants {
10 |
11 | var RES_ID_GROUP_NAME = "com.tencent.mm:id/obn" // 群名
12 | var RES_ID_USER_NAME = "com.tencent.mm:id/brc" // 发消息的人
13 | var RES_ID_USER_CONTENT = "com.tencent.mm:id/bkl" // 发的文本内容
14 | var RES_ID_EDIT_TEXT = "com.tencent.mm:id/bkk" // 消息输入框
15 | // 从 8.0.48 开始调整判断逻辑,根据头像坐标定位谁是发送者
16 | var RES_ID_USER_RL = "com.tencent.mm:id/bn1" // 发消息的 rl
17 | var RES_ID_USER_HEADER = "com.tencent.mm:id/bk1" // 头像
18 |
19 | fun setVersion(version: String) {
20 | LogUtils.d("version:$version")
21 | if (version == "8.0.47" || version == "8.0.48" || version == "8.0.49") {
22 | RES_ID_GROUP_NAME = "com.tencent.mm:id/obn"
23 | RES_ID_USER_NAME = "com.tencent.mm:id/brc"
24 | RES_ID_USER_CONTENT = "com.tencent.mm:id/bkl"
25 | RES_ID_EDIT_TEXT = "com.tencent.mm:id/bkk"
26 | } else {
27 | Log.w("msg", "unknown version, maybe incompatible")
28 | }
29 | }
30 | }
--------------------------------------------------------------------------------
/android/demo/src/main/res/drawable-v24/ic_launcher_foreground.xml:
--------------------------------------------------------------------------------
1 |
7 |
12 |
13 |
19 |
22 |
25 |
26 |
27 |
28 |
34 |
35 |
--------------------------------------------------------------------------------
/android/demo/src/main/res/mipmap-anydpi-v26/ic_launcher.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/android/demo/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/android/demo/src/main/res/values/colors.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | #008577
4 | #00574B
5 | #D81B60
6 |
7 |
--------------------------------------------------------------------------------
/android/demo/src/main/res/values/strings.xml:
--------------------------------------------------------------------------------
1 |
2 | 茴香豆 Android 助手
3 | 这是 [茴香豆](https://github.com/internlm/huixiangdou) 的 Android 部分。\n开启后停留在微信对话界面,将读最新消息,调用大语言模型自动回复主题相关内容。如果是主题无关的闲聊,则不处理。\n如果对你有用,请 star 一下!
4 | 抢微信红包
5 | 茴香豆 LLM RAG 回复
6 |
7 | 如果对你有用,请给 https://github.com/internlm/huixiangdou 点个 star,这对我们真的很重要qaq
8 | 第一步:打开 openxlab.org.cn 应用中心,搜索“茴香豆”,创建知识库直接获取回调地址;或自行部署开源版茴香豆得到服务器地址。\n输入框里是个可用的地址,仅仅用于调试 app 是否正常,并不会真的回答问题。
9 | http://139.224.198.162:18443/api/v1/message/v1/wechat/Qlyq
10 | 确定
11 | 第二步:点击下方按钮进入辅助功能,找到(茴香豆)开启或关闭
12 | 第三步:直接进入微信(注意 github 文档中微信版本要求)聊天界面,请对方发个消息被动扫描屏幕、或上滑聊天框主动触发扫描。\n群聊或单聊都支持。\n注意不要关闭本应用,它默认后台运行。
13 | tips:助手只回答知识库相关话题,碰到无关闲聊会跳过。adb log 可以看到完整处理日志。
14 | 点我
15 | 调试模式,默认开启。\n确认功能正常后需关闭,否则收到啥都响应
16 |
17 |
--------------------------------------------------------------------------------
/android/demo/src/main/res/values/styles.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/android/demo/src/main/res/xml/sendemoji_service.xml:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/android/demo/src/main/res/xml/wechat_service.xml:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/android/demo/src/test/java/com/carlos/grabredenvelope/demo/ExampleUnitTest.kt:
--------------------------------------------------------------------------------
1 | package com.carlos.grabredenvelope.demo
2 |
3 | import org.junit.Assert.assertEquals
4 | import org.junit.Test
5 |
6 | /**
7 | * Example local unit test, which will execute on the development machine (host).
8 | *
9 | * See [testing documentation](http://d.android.com/tools/testing).
10 | */
11 | class ExampleUnitTest {
12 | @Test
13 | fun addition_isCorrect() {
14 | assertEquals(4, 2 + 2)
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/android/gradle.properties:
--------------------------------------------------------------------------------
1 | # Project-wide Gradle settings.
2 |
3 | # IDE (e.g. Android Studio) users:
4 | # Gradle settings configured through the IDE *will override*
5 | # any settings specified in this file.
6 |
7 | # For more details on how to configure your build environment visit
8 | # http://www.gradle.org/docs/current/userguide/build_environment.html
9 |
10 | # Specifies the JVM arguments used for the daemon process.
11 | # The setting is particularly useful for tweaking memory settings.
12 | # Default value: -Xmx10248m -XX:MaxPermSize=256m
13 | # org.gradle.jvmargs=-Xmx2048m -XX:MaxPermSize=512m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8
14 |
15 | # When configured, Gradle will run in incubating parallel mode.
16 | # This option should only be used with decoupled projects. More details, visit
17 | # http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
18 | # org.gradle.parallel=true
19 | android.useAndroidX=true
20 | # Automatically convert third-party libraries to use AndroidX
21 | android.enableJetifier=true
22 | # Kotlin code style for this project: "official" or "obsolete":
23 | kotlin.code.style=official
24 |
25 |
--------------------------------------------------------------------------------
/android/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/android/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/android/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Wed Feb 26 12:16:26 CST 2020
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-all.zip
7 |
--------------------------------------------------------------------------------
/android/settings.gradle:
--------------------------------------------------------------------------------
1 | include ':demo'
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | # This is a start-up file for deploying HuixiangDou-WEB on OpenXLab-APPs(https://openxlab.org.cn/apps)
2 | # Some environment variables need to be set before starting up:
3 | # JWT_SECRET=
4 | # REDIS_HOST=
5 | # REDIS_PASSWORD=
6 | # SERVER_PORT=7860 (when deploy on OpenXLab-APPs, this SERVER_PORT should be 7860)
7 |
8 | import os
9 |
10 | # launch the HuixiangDou-WEB
11 | os.system('python -m web.main')
12 |
--------------------------------------------------------------------------------
/docs/en/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | # Set the version of Python and other tools you might need
4 | build:
5 | os: ubuntu-22.04
6 | tools:
7 | python: "3.8"
8 |
9 | formats:
10 | - epub
11 |
12 | sphinx:
13 | configuration: docs/en/conf.py
14 |
15 | python:
16 | install:
17 | - requirements: requirements/docs.txt
18 |
--------------------------------------------------------------------------------
/docs/en/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/en/_static/css/readthedocs.css:
--------------------------------------------------------------------------------
1 | .header-logo {
2 | background-image: url("../image/logo.svg");
3 | background-size: 444px 93px;
4 | height: 93px;
5 | width: 444px;
6 | }
7 |
8 | @media screen and (min-width: 1100px) {
9 | .header-logo {
10 | top: -25px;
11 | }
12 | }
13 |
14 | pre {
15 | white-space: pre;
16 | }
17 |
18 | @media screen and (min-width: 2000px) {
19 | .pytorch-content-left {
20 | width: 1200px;
21 | margin-left: 30px;
22 | }
23 | article.pytorch-article {
24 | max-width: 1200px;
25 | }
26 | .pytorch-breadcrumbs-wrapper {
27 | width: 1200px;
28 | }
29 | .pytorch-right-menu.scrolling-fixed {
30 | position: fixed;
31 | top: 45px;
32 | left: 1580px;
33 | }
34 | }
35 |
36 |
37 | article.pytorch-article section code {
38 | padding: .2em .4em;
39 | background-color: #f3f4f7;
40 | border-radius: 5px;
41 | }
42 |
43 | /* Disable the change in tables */
44 | article.pytorch-article section table code {
45 | padding: unset;
46 | background-color: unset;
47 | border-radius: unset;
48 | }
49 |
50 | table.autosummary td {
51 | width: 50%
52 | }
53 |
54 | img.align-center {
55 | display: block;
56 | margin-left: auto;
57 | margin-right: auto;
58 | }
59 |
60 | article.pytorch-article p.rubric {
61 | font-weight: bold;
62 | }
63 |
--------------------------------------------------------------------------------
/docs/en/_static/js/custom.js:
--------------------------------------------------------------------------------
1 | var collapsedSections = [];
2 |
3 | $(document).ready(function () {
4 | $('.model-summary').DataTable({
5 | "stateSave": false,
6 | "lengthChange": false,
7 | "pageLength": 20,
8 | "order": []
9 | });
10 | });
11 |
--------------------------------------------------------------------------------
/docs/en/_templates/404.html:
--------------------------------------------------------------------------------
1 | {% extends "layout.html" %}
2 |
3 | {% block body %}
4 |
5 |
10 | If you just switched documentation versions, it is likely that the page you were on is moved. You can look for it in
11 | the content table left, or go to the homepage .
12 |
13 |
17 |
18 | {% endblock %}
19 |
--------------------------------------------------------------------------------
/docs/en/_templates/autosummary/class.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 | .. currentmodule:: {{ module }}
4 |
5 |
6 | {{ name | underline}}
7 |
8 | .. autoclass:: {{ name }}
9 | :members:
10 |
11 | ..
12 | autogenerated from _templates/autosummary/class.rst
13 | note it does not have :inherited-members:
14 |
--------------------------------------------------------------------------------
/docs/en/_templates/callable.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 | .. currentmodule:: {{ module }}
4 |
5 |
6 | {{ name | underline}}
7 |
8 | .. autoclass:: {{ name }}
9 | :members:
10 | :special-members: __call__
11 |
12 | ..
13 | autogenerated from _templates/callable.rst
14 | note it does not have :inherited-members:
15 |
--------------------------------------------------------------------------------
/docs/en/cp_origin_docs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Copy *.md files from docs/ if it doesn't have a Chinese translation
4 |
5 | for filename in $(find ../zh/ -name '*.md' -printf "%P\n");
6 | do
7 | mkdir -p $(dirname $filename)
8 | cp -n ../zh/$filename ./$filename
9 | cp -n ../../README.md ./copy_quickstart.md
10 | cp -n ../../evaluation/README.md ./copy_precision.md
11 | done
12 |
--------------------------------------------------------------------------------
/docs/en/doctuils.conf:
--------------------------------------------------------------------------------
1 | [html writers]
2 | table_style: colwidths-auto
3 |
--------------------------------------------------------------------------------
/docs/en/index.rst:
--------------------------------------------------------------------------------
1 | Welcome to HuixiangDou documentation!
2 | ==========================================
3 |
4 | Getting started with HuixiangDou
5 | -------------------------------
6 |
7 | To help you quickly familiarized with it, we recommend you to walk through the following documents in order:
8 |
9 | 1. Run the basic version according to the README.
10 | 2. Refer to the advanced tutorial to enhance the overall effect.
11 |
12 | We warmly welcome users' PRs and Issues!
13 |
14 | .. _QuickStart:
15 | .. toctree::
16 | :maxdepth: 1
17 | :caption: Quick Start
18 |
19 | copy_quickstart.md
20 |
21 | .. _AdvanceConfiguration:
22 | .. toctree::
23 | :maxdepth: 1
24 | :caption: Advance Configuration
25 |
26 | copy_precision.md
27 | doc_full_dev.md
28 | doc_knowledge_graph.md
29 | doc_architecture.md
30 | doc_rag_annotate_sft_data.md
31 |
32 | .. _readthedocs:
33 | .. toctree::
34 | :maxdepth: 1
35 | :caption: readthedocs Integration
36 |
37 | doc_add_readthedocs.md
38 |
39 | .. _IMApplicaion:
40 | .. toctree::
41 | :maxdepth: 1
42 | :caption: IM Applicaion Integration
43 |
44 | doc_add_wechat_accessibility.md
45 | doc_add_wechat_commercial.md
46 | doc_add_wechat_group.md
47 | doc_add_lark_group.md
48 | doc_send_only_lark_group.md
49 |
50 | .. _Others:
51 | .. toctree::
52 | :maxdepth: 1
53 | :caption: Others
54 |
55 | Indexes & Tables
56 | ==================
57 |
58 | * :ref:`genindex`
59 | * :ref:`search`
60 |
--------------------------------------------------------------------------------
/docs/figures/convert.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 |
4 | import cv2
5 |
6 | # 列出当前目录下的所有png图片
7 | for png_file in glob.glob('*.png'):
8 | # 读取图片
9 | img = cv2.imread(png_file)
10 | # 生成新的文件名
11 | jpg_file = os.path.splitext(png_file)[0] + '.jpg'
12 | # 写入jpg图片,设置质量为90
13 | cv2.imwrite(jpg_file, img, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
14 |
--------------------------------------------------------------------------------
/docs/figures/huixiangdou.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/huixiangdou.png
--------------------------------------------------------------------------------
/docs/figures/lark-add-ability.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-add-ability.png
--------------------------------------------------------------------------------
/docs/figures/lark-arch.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-arch.jpg
--------------------------------------------------------------------------------
/docs/figures/lark-bot-add-callback.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-bot-add-callback.png
--------------------------------------------------------------------------------
/docs/figures/lark-bot-reply.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-bot-reply.png
--------------------------------------------------------------------------------
/docs/figures/lark-bot-sub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-bot-sub.png
--------------------------------------------------------------------------------
/docs/figures/lark-create-app.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-create-app.png
--------------------------------------------------------------------------------
/docs/figures/lark-create-corp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-create-corp.png
--------------------------------------------------------------------------------
/docs/figures/lark-switch-corp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/lark-switch-corp.png
--------------------------------------------------------------------------------
/docs/figures/wechat-android-example.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/wechat-android-example.jpg
--------------------------------------------------------------------------------
/docs/figures/wechat-android-homepage.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/wechat-android-homepage.jpg
--------------------------------------------------------------------------------
/docs/figures/wechat-dingdong.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/wechat-dingdong.png
--------------------------------------------------------------------------------
/docs/figures/wechat-puppet-log.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/wechat-puppet-log.png
--------------------------------------------------------------------------------
/docs/figures/wechat-run-state.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/wechat-run-state.jpg
--------------------------------------------------------------------------------
/docs/figures/wechat-wkteam.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/docs/figures/wechat-wkteam.jpg
--------------------------------------------------------------------------------
/docs/zh/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | # Set the version of Python and other tools you might need
4 | build:
5 | os: ubuntu-22.04
6 | tools:
7 | python: "3.8"
8 |
9 | formats:
10 | - epub
11 |
12 | sphinx:
13 | configuration: docs/zh/conf.py
14 |
15 | python:
16 | install:
17 | - requirements: requirements/docs.txt
18 |
--------------------------------------------------------------------------------
/docs/zh/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/zh/_static/css/readthedocs.css:
--------------------------------------------------------------------------------
1 | .header-logo {
2 | background-image: url("../image/logo.svg");
3 | background-size: 444px 93px;
4 | height: 93px;
5 | width: 444px;
6 | }
7 |
8 | @media screen and (min-width: 1100px) {
9 | .header-logo {
10 | top: -25px;
11 | }
12 | }
13 |
14 | pre {
15 | white-space: pre;
16 | }
17 |
18 | @media screen and (min-width: 2000px) {
19 | .pytorch-content-left {
20 | width: 1200px;
21 | margin-left: 30px;
22 | }
23 | article.pytorch-article {
24 | max-width: 1200px;
25 | }
26 | .pytorch-breadcrumbs-wrapper {
27 | width: 1200px;
28 | }
29 | .pytorch-right-menu.scrolling-fixed {
30 | position: fixed;
31 | top: 45px;
32 | left: 1580px;
33 | }
34 | }
35 |
36 |
37 | article.pytorch-article section code {
38 | padding: .2em .4em;
39 | background-color: #f3f4f7;
40 | border-radius: 5px;
41 | }
42 |
43 | /* Disable the change in tables */
44 | article.pytorch-article section table code {
45 | padding: unset;
46 | background-color: unset;
47 | border-radius: unset;
48 | }
49 |
50 | table.autosummary td {
51 | width: 50%
52 | }
53 |
54 | img.align-center {
55 | display: block;
56 | margin-left: auto;
57 | margin-right: auto;
58 | }
59 |
60 | article.pytorch-article p.rubric {
61 | font-weight: bold;
62 | }
63 |
--------------------------------------------------------------------------------
/docs/zh/_static/js/custom.js:
--------------------------------------------------------------------------------
1 | var collapsedSections = [];
2 |
3 | $(document).ready(function () {
4 | $('.model-summary').DataTable({
5 | "stateSave": false,
6 | "lengthChange": false,
7 | "pageLength": 20,
8 | "order": []
9 | });
10 | });
11 |
--------------------------------------------------------------------------------
/docs/zh/_templates/404.html:
--------------------------------------------------------------------------------
1 | {% extends "layout.html" %}
2 |
3 | {% block body %}
4 |
5 |
10 | If you just switched documentation versions, it is likely that the page you were on is moved. You can look for it in
11 | the content table left, or go to the homepage .
12 |
13 |
17 |
18 | {% endblock %}
19 |
--------------------------------------------------------------------------------
/docs/zh/_templates/autosummary/class.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 | .. currentmodule:: {{ module }}
4 |
5 |
6 | {{ name | underline}}
7 |
8 | .. autoclass:: {{ name }}
9 | :members:
10 |
11 | ..
12 | autogenerated from _templates/autosummary/class.rst
13 | note it does not have :inherited-members:
14 |
--------------------------------------------------------------------------------
/docs/zh/_templates/callable.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 | .. currentmodule:: {{ module }}
4 |
5 |
6 | {{ name | underline}}
7 |
8 | .. autoclass:: {{ name }}
9 | :members:
10 | :special-members: __call__
11 |
12 | ..
13 | autogenerated from _templates/callable.rst
14 | note it does not have :inherited-members:
15 |
--------------------------------------------------------------------------------
/docs/zh/cp_origin_docs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Copy *.md files from docs/ if it doesn't have a Chinese translation
4 |
5 | for filename in $(find ../en/ -name '*.md' -printf "%P\n");
6 | do
7 | mkdir -p $(dirname $filename)
8 | cp -n ../en/$filename ./$filename
9 | cp -n ../../README_zh.md ./copy_quickstart.md
10 | cp -n ../../evaluation/README_zh.md ./copy_precision.md
11 | done
12 |
--------------------------------------------------------------------------------
/docs/zh/doc_add_wechat_accessibility.md:
--------------------------------------------------------------------------------
1 | # 集成个人微信 android 免费版示例
2 |
3 | 在之前的 [魔改 wechaty 方案](./doc_add_wechat_group.md) 我们一共介绍了 7 种方法。这次提供第 5 种方案的具体实现,基于 Android Accessibility 读写前端,和“抢红包”软件原理相同。
4 |
5 | 由于没有 Appium/Selenium 框架做中间商,比较稳定。
6 |
7 | ## 一、演示视频
8 |
9 | 这里是 BiliBili 2 分钟演示视频 https://www.bilibili.com/video/BV1S2421N7mn/
10 |
11 | ## 二、准备工作
12 |
13 | - 一个 android 手机,对性能和系统版本都没要求
14 | - 微信版本 8.0.47 / 8.0.48 / 8.0.49,其他版本的 view id 可能变化。[代码里](https://github.com/InternLM/HuixiangDou/blob/main/android/demo/src/main/java/com/carlos/grabredenvelope/demo/WechatConstants.kt)只测了这些版本的 id
15 | - 一个测试用的微信号
16 |
17 | ## 三、运行
18 |
19 | 打开 [OpenXLab 茴香豆 web 端](https://openxlab.org.cn/apps/detail/tpoisonooo/huixiangdou-web) ,创建自己的知识库。
20 |
21 | 这里是个能直接使用的账号密码:
22 |
23 | ```bash
24 | 账号: 20230322发版测试
25 | 密码: 123
26 | ```
27 |
28 | 点击 “零开发集成微信”,会显示你的服务端回调地址和教程。例如 `http://139.224.198.162:18443/api/v1/message/v1/wechat/oZGh`
29 |
30 | 从 [这里](https://github.com/InternLM/HuixiangDou/releases) 下载编译好的 apk,填入回调地址,开启服务,跳入微信。
31 |
32 | |
34 | | | | |
12 |
13 | ## 部署说明
14 |
15 | 如果仅想转发消息,**不需要 GPU**、**不需要 redis**、**需要公网 ip**
16 |
17 | 1. 打开 [wkteam](http://121.229.29.88:6327) 注册试用版
18 |
19 | 2. 填写 [config.ini](../../config.ini) 中的 `frontend.wechat_wkteam` 部分
20 |
21 | 例如:
22 |
23 | ```text
24 | [frontend.wechat_wkteam]
25 | account = "wkteam手机号"
26 | password = "wkteam密码"
27 | proxy = 3 # 上海地区
28 | dir = "wkteam"
29 | callback_ip = "你的公网 IP"
30 | callback_port = 9528
31 |
32 | # !!! `proxy` is very import parameter, it's your account location
33 | # 1:北京 2:天津 3:上海 4:重庆 5:河北
34 | # 6:山西 7:江苏 8:浙江 9:安徽 10:福建
35 | # 11:江西 12:山东 13:河南 14:湖北 15:湖南
36 | # 16:广东 17:海南 18:四川 20:陕西
37 | # bad proxy would cause account deactivation !!!
38 | ```
39 |
40 | 4. 运行 `wechat.py`,微信扫描二维码登录,然后注册 callback 地址。
41 |
42 | ```text
43 | python3 huixiangdou/frontend/wechat.py --login --forward
44 | ```
45 |
46 | 若运行成功,会看到以下日志,同时 `wkteam/license.json` 会记录完整的账号信息。
47 |
48 | ```bash
49 | # 设置 callback 地址日志
50 | .. set callback url http://xxx/callback
51 | .. {"code":"1000","message":"设置成功","data":null}
52 | .. login success, all license saved to wkteam/license.json
53 |
54 | # 保存账号信息
55 | cat wkteam/license.json
56 | {
57 | "auth": "xxx",
58 | "wId": "xxx",
59 | "wcId": "wxid_xxx",
60 | "qrCodeUrl": "http://wxapii.oosxxx"
61 | }
62 | ```
63 |
64 | 5. 获取 GroupID。在你想要转发的群里发条消息,查看日志或 `wkteam/wechat_message.jsonl` 里的 GroupID 字段。填入 `config.ini`,例如:
65 |
66 | ```text
67 | [frontend.wechat_wkteam.43925126702]
68 | name = "茴香豆群(大暑)"
69 | introduction = "github https://github.com/InternLM/HuixiangDou 用户体验群"
70 | ```
71 |
72 | 6. 重新运行脚本
73 | ```text
74 | python3 huixiangdou/frontend/wechat.py --login --forward
75 | ```
76 |
--------------------------------------------------------------------------------
/docs/zh/doc_send_only_lark_group.md:
--------------------------------------------------------------------------------
1 | # 单向发到飞书群
2 |
3 | 这个功能,主要是测试 pipeline 全流程畅通。单向发送的实用意义有限。
4 |
5 | 点击[创建飞书自定义机器人](https://open.feishu.cn/document/client-docs/bot-v3/add-custom-bot),获取回调 WEBHOOK_URL,填写到 config.ini
6 |
7 | ```ini
8 | # config.ini
9 | ..
10 | [frontend]
11 | type = "lark"
12 | webhook_url = "${YOUR-LARK-WEBHOOK-URL}"
13 | ```
14 |
15 | 运行。结束后,技术助手的答复将**单向**发送到飞书群。
16 |
17 | ```shell
18 | python3 -m huixiangdou.main
19 | ```
20 |
21 |
22 |
--------------------------------------------------------------------------------
/docs/zh/doctuils.conf:
--------------------------------------------------------------------------------
1 | [html writers]
2 | table_style: colwidths-auto
3 |
--------------------------------------------------------------------------------
/docs/zh/index.rst:
--------------------------------------------------------------------------------
1 | 欢迎来到 HuixiangDou 进阶说明!
2 | ==========================================
3 |
4 | HuixiangDou 上手路线
5 | -------------------------------
6 |
7 | 我们推荐以下流程:
8 |
9 | 1. 按照 README 运行基础版本
10 | 2. 参考进阶教程,提升整体效果
11 |
12 | 我们非常欢迎用户的 PR 和 Issue !
13 |
14 | .. _快速运行:
15 | .. toctree::
16 | :maxdepth: 1
17 | :caption: 基础入门
18 |
19 | copy_quickstart.md
20 |
21 | .. _进阶参考:
22 | .. toctree::
23 | :maxdepth: 1
24 | :caption: 配置说明
25 |
26 | copy_precision.md
27 | doc_full_dev.md
28 | doc_knowledge_graph.md
29 | doc_rag_annotate_sft_data.md
30 | doc_architecture.md
31 |
32 | .. _接入readthedocs:
33 | .. toctree::
34 | :maxdepth: 1
35 | :caption: 接入readthedocs
36 |
37 | doc_add_readthedocs.md
38 |
39 | .. _接入即时通讯软件:
40 | .. toctree::
41 | :maxdepth: 1
42 | :caption: 接入即时通讯软件
43 |
44 | doc_add_wechat_accessibility.md
45 | doc_add_wechat_commercial.md
46 | doc_add_wechat_group.md
47 | doc_add_lark_group.md
48 | doc_send_only_lark_group.md
49 | doc_merge_wechat_group.md
50 |
51 | 索引与表格
52 | ==================
53 |
54 | * :ref:`genindex`
55 | * :ref:`search`
56 |
--------------------------------------------------------------------------------
/evaluation/rejection/gt_bad.txt:
--------------------------------------------------------------------------------
1 | 对你课题的目标定义一下就可以了
--------------------------------------------------------------------------------
/evaluation/rejection/gt_good.txt:
--------------------------------------------------------------------------------
1 | 大佬们,请问如何安装mmcv?
--------------------------------------------------------------------------------
/evaluation/rejection/plot_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/evaluation/rejection/plot_example.png
--------------------------------------------------------------------------------
/evaluation/rerank/step0_clean_queries.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import re
4 |
5 | from loguru import logger
6 |
7 | pattern = re.compile(r'^[A-Za-z0-9]+$')
8 |
9 | pwd = os.path.dirname(__file__)
10 | query_log = os.path.join(pwd, '..', 'query.log')
11 |
12 |
13 | def save(_id, sentence):
14 | if _id not in queries:
15 | queries[_id] = [sentence]
16 | else:
17 | queries[_id].append(sentence)
18 |
19 |
20 | queries = dict()
21 | with open(query_log) as f:
22 | query = None
23 |
24 | _id = None
25 | sentence = ''
26 | for line in f:
27 | line = line.strip()
28 | if len(line) < 5:
29 | continue
30 |
31 | if line[4] == ' ' and pattern.match(
32 | line[0:4]) and _id is not None and sentence != '':
33 | save(_id, sentence)
34 | _id = line[0:4]
35 | sentence = line[4:]
36 | else:
37 | if line[4] == ' ' and pattern.match(line[0:4]):
38 | _id = line[0:4]
39 | sentence = line[4:]
40 | else:
41 | sentence += '\n'
42 | sentence += line
43 |
44 | save(_id, sentence)
45 |
46 | counter = 0
47 | for _id in queries:
48 | with open(os.path.join(pwd, '..', 'queries', _id) + '.txt', 'a') as f:
49 | values = map(lambda x: x.strip(), queries[_id])
50 | values = list(set(values))
51 | counter += len(values)
52 | json_str = json.dumps(values, ensure_ascii=False)
53 | f.write(r'{}'.format(json_str))
54 | f.write('\n')
55 |
56 | logger.info(counter)
57 |
--------------------------------------------------------------------------------
/huixiangdou-inside.md:
--------------------------------------------------------------------------------
1 | # HuixiangDou Inside
2 |
3 | | ID | Environment | IM Application | Description | Screen Shortcut |
4 | | --- | --------------------------- | -------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------- |
5 | | 1 | openmmlab user group | wechat | reply user question |
|
6 | | 2 | ncnn contributor group | wechat | explain software and hardware terminologies and pretending to be human |
|
7 | | 3 | inner middleware user group | lark | reply user question |
|
8 |
--------------------------------------------------------------------------------
/huixiangdou/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | """import module."""
3 | # only import frontend when needed, not here
4 | from .services import ErrorCode # noqa E401
5 | from .services import FeatureStore # noqa E401
6 | from .services import WebSearch # noqa E401
7 | from .services import SerialPipeline, ParallelPipeline # no E401
8 | from .services import build_reply_text # noqa E401
9 | from .version import __version__
10 |
--------------------------------------------------------------------------------
/huixiangdou/frontend/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | """IM proxy."""
3 | from .lark import Lark # noqa E401
4 | from .lark_group import is_revert_command # noqa E401
5 | from .lark_group import revert_from_lark_group, send_to_lark_group # noqa E401
6 | from .wechat import WkteamManager # noqa E401
7 |
--------------------------------------------------------------------------------
/huixiangdou/primitive/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | """primitive module."""
3 | from .chunk import Chunk # noqa E401
4 | from .embedder import Embedder # noqa E401
5 | from .faiss import Faiss # noqa E401
6 | from .file_operation import FileName, FileOperation # noqa E401
7 | from .llm_reranker import LLMReranker # noqa E401
8 | from .query import Query
9 | from .splitter import (
10 | CharacterTextSplitter, # noqa E401
11 | ChineseRecursiveTextSplitter,
12 | MarkdownHeaderTextSplitter,
13 | MarkdownTextRefSplitter,
14 | RecursiveCharacterTextSplitter,
15 | nested_split_markdown, split_python_code)
16 | from .limitter import RPM, TPM
17 | from .bm250kapi import BM25Okapi
18 | from .entity import NamedEntity2Chunk
19 | from .utils import always_get_an_event_loop
20 |
--------------------------------------------------------------------------------
/huixiangdou/primitive/chunk.py:
--------------------------------------------------------------------------------
1 |
2 | from dataclasses import dataclass, field
3 |
4 |
5 | @dataclass
6 | class Chunk():
7 | """Class for storing a piece of text and associated metadata.
8 |
9 | Example:
10 |
11 | .. code-block:: python
12 |
13 | from huixiangdou.primitive import Chunk
14 |
15 | chunk = Chunk(
16 | content_or_path="Hello, world!",
17 | metadata={"source": "https://example.com"}
18 | )
19 | """
20 | content_or_path: str = ''
21 | metadata: dict = field(default_factory=dict)
22 | modal: str = 'text'
23 |
24 | def __post_init__(self):
25 | if self.modal not in ['text', 'image', 'audio', 'qa']:
26 | raise ValueError(
27 | f'Invalid modal: {self.modal}. Allowed values are: `text`, `image`, `audio`, `qa`'
28 | )
29 |
30 | def __str__(self) -> str:
31 | """Override __str__ to restrict it to content_or_path and metadata."""
32 | # The format matches pydantic format for __str__.
33 | #
34 | # The purpose of this change is to make sure that user code that
35 | # feeds Document objects directly into prompts remains unchanged
36 | # due to the addition of the id field (or any other fields in the future).
37 | #
38 | # This override will likely be removed in the future in favor of
39 | # a more general solution of formatting content directly inside the prompts.
40 | if self.metadata:
41 | return f"modal='{self.modal}' content_or_path='{self.content_or_path}' metadata={self.metadata}"
42 | else:
43 | return f"modal='{self.modal}' content_or_path='{self.content_or_path}'"
44 |
45 | def __repr__(self) -> str:
46 | return self.__str__()
47 |
--------------------------------------------------------------------------------
/huixiangdou/primitive/token.py:
--------------------------------------------------------------------------------
1 | import tiktoken
2 | import re
3 |
4 | ENCODER = None
5 |
6 | # modified from https://github.com/HKUDS/LightRAG
7 | def encode_string(content: str, model_name: str = "gpt-4o"):
8 | global ENCODER
9 | if ENCODER is None:
10 | tiktoken.get_encoding("cl100k_base")
11 | ENCODER = tiktoken.encoding_for_model(model_name)
12 | tokens = ENCODER.encode(content)
13 | return tokens
14 |
15 |
16 | def decode_tokens(tokens: list[int], model_name: str = "gpt-4o"):
17 | global ENCODER
18 | if ENCODER is None:
19 | ENCODER = tiktoken.encoding_for_model(model_name)
20 | content = ENCODER.decode(tokens)
21 | return content
22 |
23 |
24 | ZH_CN_CHAR_PATTERN = None
25 | EN_CHAR_PATTERN = None
26 |
27 |
28 | def judge_language(text):
29 | # 计算中文字符的数量
30 | global ZH_CN_CHAR_PATTERN
31 | if ZH_CN_CHAR_PATTERN is None:
32 | ZH_CN_CHAR_PATTERN = re.compile(r'[\u4e00-\u9fff]')
33 |
34 | global EN_CHAR_PATTERN
35 | if EN_CHAR_PATTERN is None:
36 | EN_CHAR_PATTERN = re.compile(r'[a-zA-Z]')
37 |
38 | chinese_count = len(ZH_CN_CHAR_PATTERN.findall(text))
39 | english_count = len(EN_CHAR_PATTERN.findall(text))
40 |
41 | # 判断中英文的比例
42 | if chinese_count > english_count:
43 | return "zh_cn"
44 | else:
45 | return "en"
--------------------------------------------------------------------------------
/huixiangdou/primitive/utils.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from loguru import logger
3 |
4 | def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
5 | try:
6 | loop = asyncio.get_running_loop()
7 | except RuntimeError:
8 | logger.info("Creating a new event loop in a sub-thread.")
9 | loop = asyncio.new_event_loop()
10 | asyncio.set_event_loop(loop)
11 | return loop
--------------------------------------------------------------------------------
/huixiangdou/services/__init__.py:
--------------------------------------------------------------------------------
1 | """LLM service module."""
2 | from .config import (feature_store_base_dir, redis_host, redis_passwd,
3 | redis_port)
4 | from .helper import (ErrorCode, QueryTracker, Queue, TaskCode,
5 | build_reply_text, check_str_useful, histogram, kimi_ocr,
6 | multimodal, parse_json_str)
7 | from .kg import KnowledgeGraph # noqa E401
8 | from .llm import LLM
9 | from .web_search import WebSearch # noqa E401
10 | from .serial_pipeline import SerialPipeline
11 | from .parallel_pipeline import ParallelPipeline
12 | # Import FeatureStore at the end to avoid circular imports
13 | from .store import FeatureStore # noqa E401
14 |
--------------------------------------------------------------------------------
/huixiangdou/services/config.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 |
4 | from loguru import logger
5 |
6 |
7 | def redis_host():
8 | host = os.getenv('REDIS_HOST')
9 | if host is None or len(host) < 1:
10 | raise Exception('REDIS_HOST not config')
11 | return host
12 |
13 |
14 | def redis_port():
15 | port = os.getenv('REDIS_PORT')
16 | if port is None:
17 | logger.debug('REDIS_PORT not set, try 6379')
18 | port = 6379
19 | return port
20 |
21 |
22 | def redis_passwd():
23 | passwd = os.getenv('REDIS_PASSWORD')
24 | if passwd is None or len(passwd) < 1:
25 | raise Exception('REDIS_PASSWORD not config')
26 | return passwd
27 |
28 |
29 | def feature_store_base_dir():
30 | return 'feature_stores'
31 |
--------------------------------------------------------------------------------
/huixiangdou/services/session.py:
--------------------------------------------------------------------------------
1 | from huixiangdou.primitive import Query
2 | from .helper import ErrorCode
3 | import os
4 | import json
5 |
6 | class Session:
7 | """For compute graph, `session` takes all parameter."""
8 |
9 | def __init__(self,
10 | query: Query,
11 | history: list,
12 | groupname: str = '',
13 | log_path: str = 'logs/generate.jsonl',
14 | groupchats: list = []):
15 | self.query = query
16 | self.history = history
17 | self.groupname = groupname
18 | self.groupchats = groupchats
19 |
20 | # init
21 | # Same as `chunk.choices[0].delta`
22 | self.delta = ''
23 | self.parallel_chunks = []
24 | self.response = ''
25 | self.references = []
26 | self.topic = ''
27 | self.code = ErrorCode.INIT
28 |
29 | # coreference resolution results
30 | self.cr = ''
31 |
32 | # text2vec results
33 | self.chunk = ''
34 | self.knowledge = ''
35 |
36 | # web search results
37 | self.web_knowledge = ''
38 |
39 | # source graph search results
40 | self.sg_knowledge = ''
41 |
42 | # debug logs
43 | self.debug = dict()
44 | self.log_path = log_path
45 |
46 | def __del__(self):
47 | dirname = os.path.dirname(self.log_path)
48 | if not os.path.exists(dirname):
49 | os.makedirs(dirname)
50 |
51 | try:
52 | with open(self.log_path, 'a') as f:
53 | json_str = json.dumps(self.debug, indent=2, ensure_ascii=False)
54 | f.write(json_str)
55 | f.write('\n')
56 | except Exception as e:
57 | pass
--------------------------------------------------------------------------------
/huixiangdou/version.py:
--------------------------------------------------------------------------------
1 |
2 | from typing import Tuple
3 |
4 | __version__ = '20240415'
5 | short_version = __version__
6 |
7 |
8 | def parse_version_info(version_str: str) -> Tuple:
9 | """Parse version from a string.
10 |
11 | Args:
12 | version_str (str): A string represents a version info.
13 |
14 | Returns:
15 | tuple: A sequence of integer and string represents version.
16 | """
17 | _version_info = []
18 | for x in version_str.split('.'):
19 | if x.isdigit():
20 | _version_info.append(int(x))
21 | elif x.find('rc') != -1:
22 | patch_version = x.split('rc')
23 | _version_info.append(int(patch_version[0]))
24 | _version_info.append(f'rc{patch_version[1]}')
25 | return tuple(_version_info)
26 |
27 |
28 | version_info = parse_version_info(__version__)
29 |
--------------------------------------------------------------------------------
/logs/work.txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohttp
2 | beautifulsoup4
3 | duckduckgo_search
4 | einops
5 | loguru
6 | lxml_html_clean
7 | networkx>=3.0
8 | numpy<2.0.0
9 | openai>=1.0.0
10 | openpyxl
11 | pandas
12 | pydantic>=1.10.13
13 | pymupdf
14 | python-docx
15 | pytoml
16 | readability-lxml
17 | redis
18 | requests
19 | scikit-learn
20 | db-sqlite3
21 | # See https://github.com/deanmalmgren/textract/issues/461
22 | # textract @ git+https://github.com/tpoisonooo/textract@master
23 | # textract
24 | texttable
25 | tiktoken
26 | torch>=2.0.0
27 | transformers>=4.38
28 | tenacity
29 | transformers_stream_generator
30 | unstructured
31 | sentence_transformers
32 | sse_starlette
33 | fastapi
34 | uvicorn
35 | termcolor
36 | opencv-python-headless
37 | gradio>=4.41
38 | bcembedding
39 | jieba
40 | faiss-gpu
41 |
--------------------------------------------------------------------------------
/requirements/cpu.txt:
--------------------------------------------------------------------------------
1 | --extra-index-url https://download.pytorch.org/whl/cpu
2 | aiohttp
3 | beautifulsoup4
4 | duckduckgo_search
5 | einops
6 | faiss-cpu
7 | jieba
8 | loguru
9 | lxml_html_clean
10 | nest_asyncio
11 | networkx>=3.0
12 | numpy<2.0.0
13 | openai>=1.55.3
14 | openpyxl
15 | pandas
16 | pydantic>=1.10.13
17 | pymupdf
18 | python-docx
19 | pytoml
20 | readability-lxml
21 | redis
22 | requests
23 | scikit-learn
24 | # See https://github.com/deanmalmgren/textract/issues/461
25 | # textract @ git+https://github.com/tpoisonooo/textract@master
26 | # textract
27 | texttable
28 | tiktoken
29 | torch
30 | unstructured
31 | sse_starlette
32 | fastapi
33 | uvicorn
34 | termcolor
35 | opencv-python-headless
36 | gradio
37 |
--------------------------------------------------------------------------------
/requirements/docs.txt:
--------------------------------------------------------------------------------
1 | docutils==0.18.1
2 | modelindex
3 | myst-parser
4 | -e git+https://github.com/tpoisonooo/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
5 | sphinx==6.1.3
6 | sphinx-copybutton
7 | sphinx-design
8 | sphinx-notfound-page
9 | sphinx-tabs
10 | sphinxcontrib-jquery
11 | tabulate
--------------------------------------------------------------------------------
/requirements/lark-group.txt:
--------------------------------------------------------------------------------
1 | flask
2 | lark_oapi
3 | pytoml
4 | redis
--------------------------------------------------------------------------------
/requirements/multimodal.txt:
--------------------------------------------------------------------------------
1 | einops
2 | ftfy
3 | timm
4 | torchvision
5 | FlagEmbedding
6 |
7 | # donot install xformer and apex
--------------------------------------------------------------------------------
/requirements/sft.txt:
--------------------------------------------------------------------------------
1 | accelerate>=0.26.1
2 | auto-gptq
--------------------------------------------------------------------------------
/resource/bad_questions.json:
--------------------------------------------------------------------------------
1 | [
2 | "mmpose中怎么调用mmyolo接口",
3 | "mmpose实现姿态估计后怎么实现行为识别",
4 | "mmpose执行提取关键点命令不是分为两步吗,一步是目标检测,另一步是关键点提取,我现在目标检测这部分的代码是demo/topdown_demo_with_mmdet.py demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth 现在我想把这个mmdet的checkpoints换位yolo的,那么应该怎么操作",
5 | "在mmdetection中,如何同时加载两个数据集,两个dataloader",
6 | "如何将mmdetection2.28.2的retinanet配置文件改为单尺度的呢?",
7 | "1.MMPose_Tutorial.ipynb、inferencer_demo.py、image_demo.py、bottomup_demo.py、body3d_pose_lifter_demo.py这几个文件和topdown_demo_with_mmdet.py的区别是什么,\n2.我如果要使用mmdet是不是就只能使用topdown_demo_with_mmdet.py文件,",
8 | "mmpose 测试 map 一直是 0 怎么办?",
9 | "如何使用mmpose检测人体关键点?",
10 | "我使用的数据集是labelme标注的,我想知道mmpose的数据集都是什么样式的,全都是单目标的数据集标注,还是里边也有多目标然后进行标注",
11 | "如何生成openmmpose的c++推理脚本",
12 | "mmpose",
13 | "mmpose的目标检测阶段调用的模型,一定要是demo文件夹下的文件吗,有没有其他路径下的文件",
14 | "mmpose可以实现行为识别吗,如果要实现的话应该怎么做",
15 | "我在mmyolo的v0.6.0 (15/8/2023)更新日志里看到了他新增了支持基于 MMPose 的 YOLOX-Pose,我现在是不是只需要在mmpose/project/yolox-Pose内做出一些设置就可以,换掉demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py 改用mmyolo来进行目标检测了",
16 | "mac m1从源码安装的mmpose是x86_64的",
17 | "想请教一下mmpose有没有提供可以读取外接摄像头,做3d姿态并达到实时的项目呀?",
18 | "huixiangdou 是什么?",
19 | "大佬们,如果我想在高空检测安全帽,我应该用 mmdetection 还是 mmrotate",
20 | "mmdetection如何开启多卡训练",
21 | "硬件模型库是什么",
22 | "硬件模型库是啥?",
23 | "OpenMMLab有哪些开源库",
24 | "cbam注意力机制如何改进",
25 | "轻量级的边分辨率模型有哪些?",
26 | "如何添加CBAM机制",
27 | "自定义数据集需要修改什么内容",
28 | "对人进行关键点提取的时候,如果是多个人的场景下,就会出现连线到其他人身上去的情况,这个时候是不是目标检测模型这里的问题,也就是mmdet的识别效率有点低了,所以导致这种情况的出现",
29 | "有人把mmdeploy成功部署到jetson agx orin上吗?",
30 | "那这里的mmdet的配置文件demo/topdown_demo_with_mmdet.py就不需要换吗,他里边的配置不是训练mmdet的配置吗,我觉得是不是要换一个新的py配置文件,然后调用yolo",
31 | "怎么训练llm",
32 | "哪种目标检测算法适合小目标",
33 | "OpenCompass 大模型数据集评估分数查询",
34 | "把某专业标准类知识pdf格式,如何创建成向量数据库?"
35 | ]
36 |
--------------------------------------------------------------------------------
/resource/data/baicaoyuan.md:
--------------------------------------------------------------------------------
1 | # 从百草园到三味书屋
2 | 我家的后面有一个很大的园,相传叫作百草园。现在是早已并屋子一起卖给朱文公的子孙了,连那最末次的相见也已经隔了七八年,其中似乎确凿只有一些野草;但那时却是我的乐园。
3 | 不必说碧绿的菜畦,光滑的石井栏,高大的皂荚树,紫红的桑椹;也不必说鸣蝉在树叶里长吟,肥胖的黄蜂伏在菜花上,轻捷的叫天子(云雀)忽然从草间直窜向云霄里去了。单是周围的短短的泥墙根一带,就有无限趣味。油蛉在这里低唱,蟋蟀们在这里弹琴。翻开断砖来,有时会遇见蜈蚣;还有斑蝥,倘若用手指按住它的脊梁,便会啪的一声,从后窍喷出一阵烟雾。何首乌藤和木莲藤缠络着,木莲有莲房一般的果实,何首乌有臃肿的根。有人说,何首乌根是有像人形的,吃了便可以成仙,我于是常常拔它起来,牵连不断地拔起来,也曾因此弄坏了泥墙,却从来没有见过有一块根像人样。如果不怕刺,还可以摘到覆盆子,像小珊瑚珠攒成的小球,又酸又甜,色味都比桑椹要好得远。
--------------------------------------------------------------------------------
/resource/data/qa_pair.csv:
--------------------------------------------------------------------------------
1 | "What is HuixiangDou?","HuixiangDou is an AI assistant that can answer questions based on your knowledge base."
2 | "How to use HuixiangDou?","You can use HuixiangDou by providing a knowledge base and asking questions related to it."
3 | "What features does HuixiangDou support?","HuixiangDou supports text embedding, document retrieval, and question answering."
--------------------------------------------------------------------------------
/resource/data/tengye.md:
--------------------------------------------------------------------------------
1 | # 藤野先生
2 | 东京也无非是这样。上野②的樱花烂熳的时节,望去确也像绯红的轻云,但花下也缺不了成群结队的“清国留学生”的速成班③,头顶上盘着大辫子,顶得学生制帽的顶上高高耸起,形成一座富士山④。也有解散辫子,盘得平的,除下帽来,油光可鉴⑤,宛如小姑娘的发髻一般,还要将脖子扭几扭。实在标致⑥极了。
3 | 中国留学生会馆⑦的门房里有几本书买,有时还值得去一转;倘在上午,里面的几间洋房里倒也还可以坐坐的。但到傍晚,有一间的地板便常不免要咚咚咚地响得震天,兼以满房烟尘斗乱⑧;问问精通时事⑨的人,答道,“那是在学跳舞。”
4 | 到别的地方去看看,如何呢?
--------------------------------------------------------------------------------
/resource/figures/inside-middleware.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/resource/figures/inside-middleware.png
--------------------------------------------------------------------------------
/resource/figures/inside-mmpose.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/resource/figures/inside-mmpose.jpg
--------------------------------------------------------------------------------
/resource/figures/inside-ncnn-group.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/resource/figures/inside-ncnn-group.jpg
--------------------------------------------------------------------------------
/resource/figures/lark-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/resource/figures/lark-example.png
--------------------------------------------------------------------------------
/resource/good_questions.json:
--------------------------------------------------------------------------------
1 | [
2 | "百草园是什么",
3 | "斋藤先生是谁?"
4 | ]
5 |
--------------------------------------------------------------------------------
/sft/axolotl_configs/lora-4B.yml:
--------------------------------------------------------------------------------
1 | base_model: /workspace/models/Qwen1.5-4B-Chat
2 | model_type: AutoModelForCausalLM
3 | tokenizer_type: AutoTokenizer
4 |
5 | trust_remote_code: true
6 |
7 | load_in_8bit: true
8 | load_in_4bit: false
9 | strict: false
10 |
11 | datasets:
12 | - path: /workspace/axolotl/alpaca.json
13 | type: alpaca
14 | dataset_prepared_path:
15 | val_set_size: 0.05
16 | output_dir: ./lora-out
17 |
18 | sequence_len: 2048 # supports up to 8192
19 | sample_packing: false
20 | pad_to_sequence_len:
21 |
22 | adapter: lora
23 | lora_model_dir:
24 | lora_r: 32
25 | lora_alpha: 16
26 | lora_dropout: 0.05
27 | lora_target_linear: true
28 | lora_fan_in_fan_out:
29 |
30 | wandb_mode: online
31 | wandb_project: huixiangdou-cr
32 | wandb_entity:
33 | wandb_watch:
34 | wandb_name: qwen-4
35 | wandb_log_model:
36 |
37 | gradient_accumulation_steps: 4
38 | micro_batch_size: 2
39 | num_epochs: 4
40 | optimizer: adamw_bnb_8bit
41 | lr_scheduler: cosine
42 | learning_rate: 0.0002
43 |
44 | train_on_inputs: false
45 | group_by_length: false
46 | bf16: auto
47 | fp16:
48 | tf32: false
49 |
50 | gradient_checkpointing: false
51 | early_stopping_patience:
52 | resume_from_checkpoint:
53 | local_rank:
54 | logging_steps: 1
55 | xformers_attention:
56 | flash_attention:
57 |
58 | warmup_steps: 10
59 | evals_per_epoch: 4
60 | eval_table_size:
61 | eval_max_new_tokens: 128
62 | saves_per_epoch: 1
63 | debug:
64 | deepspeed:
65 | weight_decay: 0.0
66 | fsdp:
67 | fsdp_config:
68 | special_tokens:
69 |
--------------------------------------------------------------------------------
/sft/axolotl_configs/qwen2-lora-0.5B.yaml:
--------------------------------------------------------------------------------
1 | base_model: /workspace/models/Qwen1.5-0.5B-Chat
2 | trust_remote_code: true
3 |
4 | load_in_8bit: false
5 | load_in_4bit: false
6 | strict: false
7 |
8 | datasets:
9 | - path: /workspace/axolotl/alpaca.json
10 | type: alpaca
11 | dataset_prepared_path:
12 | val_set_size: 0.05
13 | output_dir: ./out-qwen0.5
14 |
15 | sequence_len: 1400 # supports up to 32k
16 | sample_packing: false
17 | pad_to_sequence_len: false
18 |
19 | adapter: lora
20 | lora_model_dir:
21 | lora_r: 64
22 | lora_alpha: 16
23 | lora_dropout: 0.05
24 | lora_target_linear: true
25 | lora_fan_in_fan_out:
26 |
27 | wandb_mode: online
28 | wandb_project: huixiangdou-cr
29 | wandb_entity:
30 | wandb_watch:
31 | wandb_name: qwen0.5
32 | wandb_log_model:
33 |
34 | gradient_accumulation_steps: 1
35 | micro_batch_size: 16
36 | num_epochs: 1
37 | optimizer: paged_adamw_8bit
38 | lr_scheduler: cosine
39 | learning_rate: 0.0002
40 |
41 | train_on_inputs: false
42 | group_by_length: false
43 | gradient_checkpointing: true
44 | gradient_checkpointing_kwargs:
45 | use_reentrant: false
46 | early_stopping_patience:
47 | resume_from_checkpoint:
48 | local_rank:
49 | logging_steps: 1
50 | xformers_attention:
51 | flash_attention: true
52 |
53 | warmup_steps: 10
54 | evals_per_epoch: 1
55 | saves_per_epoch: 1
56 | debug:
57 | deepspeed:
58 | weight_decay: 0.0
59 | fsdp:
60 | fsdp_config:
61 | special_tokens:
62 |
--------------------------------------------------------------------------------
/sft/axolotl_configs/qwen2-lora-1.8B.yaml:
--------------------------------------------------------------------------------
1 | base_model: /workspace/models/Qwen1.5-1.8B-Chat
2 | trust_remote_code: true
3 |
4 | load_in_8bit: false
5 | load_in_4bit: false
6 | strict: false
7 |
8 | datasets:
9 | - path: /workspace/axolotl/alpaca.json
10 | type: alpaca
11 | dataset_prepared_path:
12 | val_set_size: 0.05
13 | output_dir: ./out-qwen1.8
14 |
15 | sequence_len: 1400 # supports up to 32k
16 | sample_packing: false
17 | pad_to_sequence_len: false
18 |
19 | adapter: lora
20 | lora_model_dir:
21 | lora_r: 64
22 | lora_alpha: 16
23 | lora_dropout: 0.05
24 | lora_target_linear: true
25 | lora_fan_in_fan_out:
26 |
27 | wandb_mode: online
28 | wandb_project: huixiangdou-cr
29 | wandb_entity:
30 | wandb_watch:
31 | wandb_name: qwen1.8
32 | wandb_log_model:
33 |
34 | gradient_accumulation_steps: 1
35 | micro_batch_size: 16
36 | num_epochs: 1
37 | optimizer: paged_adamw_8bit
38 | lr_scheduler: cosine
39 | learning_rate: 0.0002
40 |
41 | train_on_inputs: false
42 | group_by_length: false
43 | gradient_checkpointing: true
44 | gradient_checkpointing_kwargs:
45 | use_reentrant: false
46 | early_stopping_patience:
47 | resume_from_checkpoint:
48 | local_rank:
49 | logging_steps: 1
50 | xformers_attention:
51 | flash_attention: true
52 |
53 | warmup_steps: 10
54 | evals_per_epoch: 1
55 | saves_per_epoch: 1
56 | debug:
57 | deepspeed:
58 | weight_decay: 0.0
59 | fsdp:
60 | fsdp_config:
61 | special_tokens:
62 |
--------------------------------------------------------------------------------
/sft/axolotl_configs/qwen2-lora-14B.yaml:
--------------------------------------------------------------------------------
1 | base_model: /workspace/models/Qwen1.5-14B-Chat
2 | trust_remote_code: true
3 |
4 | load_in_8bit: false
5 | load_in_4bit: false
6 | strict: false
7 |
8 | datasets:
9 | - path: /workspace/axolotl/alpaca.json
10 | type: alpaca
11 | dataset_prepared_path:
12 | val_set_size: 0.05
13 | output_dir: ./out-qwen14
14 |
15 |
16 | sequence_len: 1400 # supports up to 32k
17 | sample_packing: false
18 | pad_to_sequence_len: false
19 |
20 | adapter: lora
21 | lora_model_dir:
22 | lora_r: 64
23 | lora_alpha: 16
24 | lora_dropout: 0.05
25 | lora_target_linear: true
26 | lora_fan_in_fan_out:
27 |
28 | wandb_mode: online
29 | wandb_project: huixiangdou-cr
30 | wandb_entity:
31 | wandb_watch:
32 | wandb_name: qwen14
33 | wandb_log_model:
34 |
35 | gradient_accumulation_steps: 1
36 | micro_batch_size: 8
37 | num_epochs: 1
38 | optimizer: paged_adamw_8bit
39 | lr_scheduler: cosine
40 | learning_rate: 0.0002
41 |
42 | train_on_inputs: false
43 | group_by_length: false
44 | gradient_checkpointing: true
45 | gradient_checkpointing_kwargs:
46 | use_reentrant: false
47 | early_stopping_patience:
48 | resume_from_checkpoint:
49 | local_rank:
50 | logging_steps: 1
51 | xformers_attention:
52 | flash_attention: true
53 |
54 | warmup_steps: 10
55 | evals_per_epoch: 1
56 | saves_per_epoch: 1
57 | debug:
58 | deepspeed:
59 | weight_decay: 0.0
60 | fsdp:
61 | fsdp_config:
62 | special_tokens:
63 |
--------------------------------------------------------------------------------
/sft/axolotl_configs/qwen2-lora-32B.yaml:
--------------------------------------------------------------------------------
1 | base_model: /workspace/models/Qwen1.5-32B-Chat
2 | trust_remote_code: true
3 |
4 | load_in_8bit: false
5 | load_in_4bit: false
6 | strict: false
7 |
8 | datasets:
9 | - path: /workspace/axolotl/alpaca.json
10 | type: alpaca
11 | dataset_prepared_path:
12 | val_set_size: 0.05
13 | output_dir: ./out-qwen32
14 |
15 |
16 | sequence_len: 1400 # supports up to 32k
17 | sample_packing: false
18 | pad_to_sequence_len: false
19 |
20 | adapter: lora
21 | lora_model_dir:
22 | lora_r: 64
23 | lora_alpha: 16
24 | lora_dropout: 0.05
25 | lora_target_linear: true
26 | lora_fan_in_fan_out:
27 |
28 | wandb_mode: online
29 | wandb_project: huixiangdou-cr
30 | wandb_entity:
31 | wandb_watch:
32 | wandb_name: qwen32
33 | wandb_log_model:
34 |
35 | gradient_accumulation_steps: 1
36 | micro_batch_size: 4
37 | num_epochs: 1
38 | optimizer: paged_adamw_8bit
39 | lr_scheduler: cosine
40 | learning_rate: 0.0002
41 |
42 | train_on_inputs: false
43 | group_by_length: false
44 | gradient_checkpointing: true
45 | gradient_checkpointing_kwargs:
46 | use_reentrant: false
47 | early_stopping_patience:
48 | resume_from_checkpoint:
49 | local_rank:
50 | logging_steps: 1
51 | xformers_attention:
52 | flash_attention: true
53 |
54 | warmup_steps: 10
55 | evals_per_epoch: 1
56 | saves_per_epoch: 1
57 | debug:
58 | deepspeed:
59 | weight_decay: 0.0
60 | fsdp:
61 | fsdp_config:
62 | special_tokens:
63 |
--------------------------------------------------------------------------------
/sft/axolotl_configs/qwen2-lora-4B-loraplus-epoch4.yaml:
--------------------------------------------------------------------------------
1 | base_model: /workspace/models/Qwen1.5-4B-Chat
2 | trust_remote_code: true
3 |
4 | load_in_8bit: false
5 | load_in_4bit: false
6 | strict: false
7 |
8 | datasets:
9 | - path: /workspace/axolotl/alpaca.json
10 | type: alpaca
11 | dataset_prepared_path:
12 | val_set_size: 0.05
13 | output_dir: ./out-qwen4-loraplus-ep4
14 |
15 | sequence_len: 1400 # supports up to 32k
16 | sample_packing: false
17 | pad_to_sequence_len: false
18 |
19 | adapter: lora
20 | lora_model_dir:
21 | lora_r: 64
22 | lora_alpha: 16
23 | lora_dropout: 0.05
24 | lora_target_linear: true
25 | lora_fan_in_fan_out:
26 | loraplus_lr_ratio: 16
27 |
28 | wandb_mode: online
29 | wandb_project: huixiangdou-cr
30 | wandb_entity:
31 | wandb_watch:
32 | wandb_name: qwen-4
33 | wandb_log_model:
34 |
35 | gradient_accumulation_steps: 1
36 | micro_batch_size: 16
37 | num_epochs: 4
38 | optimizer: paged_adamw_8bit
39 | lr_scheduler: cosine
40 | learning_rate: 0.00005
41 |
42 | train_on_inputs: false
43 | group_by_length: false
44 | gradient_checkpointing: true
45 | gradient_checkpointing_kwargs:
46 | use_reentrant: false
47 | early_stopping_patience:
48 | resume_from_checkpoint:
49 | local_rank:
50 | logging_steps: 1
51 | xformers_attention:
52 | flash_attention: true
53 |
54 | warmup_steps: 10
55 | evals_per_epoch: 1
56 | saves_per_epoch: 4
57 | debug:
58 | deepspeed:
59 | weight_decay: 0.0
60 | fsdp:
61 | fsdp_config:
62 | special_tokens:
63 |
--------------------------------------------------------------------------------
/sft/axolotl_configs/qwen2-lora-4B.yaml:
--------------------------------------------------------------------------------
1 | base_model: /workspace/models/Qwen1.5-4B-Chat
2 | trust_remote_code: true
3 |
4 | load_in_8bit: false
5 | load_in_4bit: false
6 | strict: false
7 |
8 | datasets:
9 | - path: /workspace/axolotl/alpaca.json
10 | type: alpaca
11 | dataset_prepared_path:
12 | val_set_size: 0.05
13 | output_dir: ./out-qwen4
14 |
15 | sequence_len: 1400 # supports up to 32k
16 | sample_packing: false
17 | pad_to_sequence_len: false
18 |
19 | adapter: lora
20 | lora_model_dir:
21 | lora_r: 64
22 | lora_alpha: 16
23 | lora_dropout: 0.05
24 | lora_target_linear: true
25 | lora_fan_in_fan_out:
26 |
27 | wandb_mode: online
28 | wandb_project: huixiangdou-cr
29 | wandb_entity:
30 | wandb_watch:
31 | wandb_name: qwen-4
32 | wandb_log_model:
33 |
34 | gradient_accumulation_steps: 1
35 | micro_batch_size: 32
36 | num_epochs: 1
37 | optimizer: paged_adamw_8bit
38 | lr_scheduler: cosine
39 | learning_rate: 0.0002
40 |
41 | train_on_inputs: false
42 | group_by_length: false
43 | gradient_checkpointing: true
44 | gradient_checkpointing_kwargs:
45 | use_reentrant: false
46 | early_stopping_patience:
47 | resume_from_checkpoint:
48 | local_rank:
49 | logging_steps: 1
50 | xformers_attention:
51 | flash_attention: true
52 |
53 | warmup_steps: 10
54 | evals_per_epoch: 1
55 | saves_per_epoch: 1
56 | debug:
57 | deepspeed:
58 | weight_decay: 0.0
59 | fsdp:
60 | fsdp_config:
61 | special_tokens:
62 |
--------------------------------------------------------------------------------
/sft/axolotl_configs/qwen2-lora-7B.yaml:
--------------------------------------------------------------------------------
1 | base_model: /workspace/models/Qwen1.5-7B-Chat
2 | trust_remote_code: true
3 |
4 | load_in_8bit: false
5 | load_in_4bit: false
6 | strict: false
7 |
8 | datasets:
9 | - path: /workspace/axolotl/alpaca.json
10 | type: alpaca
11 | dataset_prepared_path:
12 | val_set_size: 0.05
13 | output_dir: ./out-qwen7
14 |
15 |
16 | sequence_len: 1400 # supports up to 32k
17 | sample_packing: false
18 | pad_to_sequence_len: false
19 |
20 | adapter: lora
21 | lora_model_dir:
22 | lora_r: 16
23 | lora_alpha: 16
24 | lora_dropout: 0.05
25 | lora_target_linear: true
26 | lora_fan_in_fan_out:
27 |
28 | wandb_mode: online
29 | wandb_project: huixiangdou-cr
30 | wandb_entity:
31 | wandb_watch:
32 | wandb_name: qwen7
33 | wandb_log_model:
34 |
35 | gradient_accumulation_steps: 1
36 | micro_batch_size: 16
37 | num_epochs: 1
38 | optimizer: paged_adamw_8bit
39 | lr_scheduler: cosine
40 | learning_rate: 0.0002
41 |
42 | train_on_inputs: false
43 | group_by_length: false
44 | gradient_checkpointing: true
45 | gradient_checkpointing_kwargs:
46 | use_reentrant: false
47 | early_stopping_patience:
48 | resume_from_checkpoint:
49 | local_rank:
50 | logging_steps: 1
51 | xformers_attention:
52 | flash_attention: true
53 |
54 | warmup_steps: 10
55 | evals_per_epoch: 1
56 | saves_per_epoch: 1
57 | debug:
58 | deepspeed:
59 | weight_decay: 0.0
60 | fsdp:
61 | fsdp_config:
62 | special_tokens:
63 |
--------------------------------------------------------------------------------
/sft/axolotl_configs/qwen2-moe-lora-2.7B.yaml:
--------------------------------------------------------------------------------
1 | base_model: /workspace/models/qwen1.5-moe-2.7B-chat
2 | trust_remote_code: true
3 |
4 | load_in_8bit: false
5 | load_in_4bit: false
6 | strict: false
7 |
8 | datasets:
9 | - path: /workspace/axolotl/alpaca.json
10 | type: alpaca
11 | dataset_prepared_path:
12 | val_set_size: 0.05
13 | output_dir: ./out-moe
14 |
15 | sequence_len: 1400 # supports up to 32k
16 | sample_packing: false
17 | pad_to_sequence_len: false
18 |
19 | adapter: lora
20 | lora_model_dir:
21 | lora_r: 64
22 | lora_alpha: 16
23 | lora_dropout: 0.05
24 | lora_target_linear: true
25 | lora_fan_in_fan_out:
26 |
27 | # smooth-clould-2
28 | wandb_mode: online
29 | wandb_project: huixiangdou-cr
30 | wandb_entity:
31 | wandb_watch:
32 | wandb_name: qwen-moe
33 | wandb_log_model:
34 |
35 | gradient_accumulation_steps: 1
36 | micro_batch_size: 16
37 | num_epochs: 1
38 | optimizer: paged_adamw_8bit
39 | lr_scheduler: cosine
40 | learning_rate: 0.0002
41 |
42 | train_on_inputs: false
43 | group_by_length: false
44 | gradient_checkpointing: true
45 | gradient_checkpointing_kwargs:
46 | use_reentrant: false
47 | early_stopping_patience:
48 | resume_from_checkpoint:
49 | local_rank:
50 | logging_steps: 1
51 | xformers_attention:
52 | flash_attention: true
53 |
54 | warmup_steps: 10
55 | evals_per_epoch: 1
56 | saves_per_epoch: 1
57 | debug:
58 | deepspeed:
59 | weight_decay: 0.0
60 | fsdp:
61 | fsdp_config:
62 | special_tokens:
63 |
--------------------------------------------------------------------------------
/sft/axolotl_configs/qwen2-moe-lora.yaml:
--------------------------------------------------------------------------------
1 | base_model: /workspace/models/qwen1.5-moe-2.7B-chat
2 | trust_remote_code: true
3 |
4 | load_in_8bit: false
5 | load_in_4bit: false
6 | strict: false
7 |
8 | datasets:
9 | - path: mhenrichsen/alpaca_2k_test
10 | type: alpaca
11 | dataset_prepared_path:
12 | val_set_size: 0.05
13 | output_dir: ./out
14 |
15 | sequence_len: 1024 # supports up to 32k
16 | sample_packing: false
17 | pad_to_sequence_len: false
18 |
19 | adapter: lora
20 | lora_model_dir:
21 | lora_r: 32
22 | lora_alpha: 16
23 | lora_dropout: 0.05
24 | lora_target_linear: true
25 | lora_fan_in_fan_out:
26 |
27 | wandb_project:
28 | wandb_entity:
29 | wandb_watch:
30 | wandb_name:
31 | wandb_log_model:
32 |
33 | gradient_accumulation_steps: 4
34 | micro_batch_size: 1
35 | num_epochs: 4
36 | optimizer: paged_adamw_8bit
37 | lr_scheduler: cosine
38 | learning_rate: 0.0002
39 |
40 | train_on_inputs: false
41 | group_by_length: false
42 | bf16: auto
43 | fp16:
44 | tf32: true
45 |
46 | gradient_checkpointing: true
47 | gradient_checkpointing_kwargs:
48 | use_reentrant: false
49 | early_stopping_patience:
50 | resume_from_checkpoint:
51 | local_rank:
52 | logging_steps: 1
53 | xformers_attention:
54 | flash_attention: true
55 |
56 | warmup_steps: 10
57 | evals_per_epoch: 4
58 | saves_per_epoch: 1
59 | debug:
60 | deepspeed:
61 | weight_decay: 0.0
62 | fsdp:
63 | fsdp_config:
64 | special_tokens:
65 |
--------------------------------------------------------------------------------
/sft/axolotl_configs/qwen2-moe-qlora.yaml:
--------------------------------------------------------------------------------
1 | base_model: Qwen/Qwen1.5-MoE-A2.7B
2 | trust_remote_code: true
3 |
4 | load_in_8bit: false
5 | load_in_4bit: true
6 | strict: false
7 |
8 | datasets:
9 | - path: mhenrichsen/alpaca_2k_test
10 | type: alpaca
11 | dataset_prepared_path:
12 | val_set_size: 0.05
13 | output_dir: ./out
14 |
15 | sequence_len: 1024 # supports up to 32k
16 | sample_packing: false
17 | pad_to_sequence_len: false
18 |
19 | adapter: qlora
20 | lora_model_dir:
21 | lora_r: 32
22 | lora_alpha: 16
23 | lora_dropout: 0.05
24 | lora_target_linear: true
25 | lora_fan_in_fan_out:
26 |
27 | wandb_project:
28 | wandb_entity:
29 | wandb_watch:
30 | wandb_name:
31 | wandb_log_model:
32 |
33 | gradient_accumulation_steps: 4
34 | micro_batch_size: 1
35 | num_epochs: 4
36 | optimizer: paged_adamw_8bit
37 | lr_scheduler: cosine
38 | learning_rate: 0.0002
39 |
40 | train_on_inputs: false
41 | group_by_length: false
42 | bf16: auto
43 | fp16:
44 | tf32: true
45 |
46 | gradient_checkpointing: true
47 | gradient_checkpointing_kwargs:
48 | use_reentrant: false
49 | early_stopping_patience:
50 | resume_from_checkpoint:
51 | local_rank:
52 | logging_steps: 1
53 | xformers_attention:
54 | flash_attention: true
55 |
56 | warmup_steps: 10
57 | evals_per_epoch: 4
58 | saves_per_epoch: 1
59 | debug:
60 | deepspeed:
61 | weight_decay: 0.0
62 | fsdp:
63 | fsdp_config:
64 | special_tokens:
65 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/tests/__init__.py
--------------------------------------------------------------------------------
/tests/cp_files.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 |
4 | def copy_files(src_dir, dest_dir):
5 | # 遍历源目录
6 | for root, dirs, files in os.walk(src_dir):
7 | for file in files:
8 | # 构建源文件的完整路径
9 | src_file = os.path.join(root, file)
10 | # 构建目标文件的完整路径
11 | dest_file = os.path.join(dest_dir, file)
12 | # 复制文件
13 |
14 | shutil.copy(src_file, dest_file)
15 | print(f"Copied '{src_file}' to '{dest_file}'")
16 |
17 | # 指定源目录和目标目录
18 | source_directory = '/home/khj/CNKI_pure_text'
19 | destination_directory = '/home/khj/hxd-ci/repodir'
20 |
21 | # 调用函数
22 | copy_files(source_directory, destination_directory)
--------------------------------------------------------------------------------
/tests/git-clone.sh:
--------------------------------------------------------------------------------
1 | git clone https://ghproxy.org/https://github.com/open-compass/opencompass --depth=1
2 | git clone https://ghproxy.org/https://github.com/open-mmlab/mmpose --depth=1
3 | git clone https://ghproxy.org/https://github.com/open-mmlab/mmdeploy --depth=1
4 | git clone https://ghproxy.org/https://github.com/open-mmlab/mmdetection --depth=1
5 | git clone https://ghproxy.org/https://github.com/internlm/lmdeploy --depth=1
6 | git clone https://ghproxy.org/https://github.com/internlm/xtuner --depth=1
7 | git clone https://ghproxy.org/https://github.com/open-mmlab/mmyolo --depth=1
8 | git clone https://ghproxy.org/https://github.com/open-mmlab/mmcv --depth=1
9 | git clone https://ghproxy.org/https://github.com/internlm/huixiangdou --depth=1
10 |
11 | git clone https://github.com/open-compass/opencompass --depth=1
12 | git clone https://github.com/open-mmlab/mmpose --depth=1
13 | git clone https://github.com/open-mmlab/mmdeploy --depth=1
14 | git clone https://github.com/open-mmlab/mmdetection --depth=1
15 | git clone https://github.com/internlm/lmdeploy --depth=1
16 | git clone https://github.com/internlm/xtuner --depth=1
17 | git clone https://github.com/open-mmlab/mmyolo --depth=1
18 | git clone https://github.com/open-mmlab/mmcv --depth=1
19 | git clone https://github.com/internlm/huixiangdou --depth=1
20 |
21 | git clone https://github.com/open-mmlab/Amphion --depth=1
22 | git clone https://github.com/open-mmlab/labelbee --depth=1
23 |
--------------------------------------------------------------------------------
/tests/test_alles_apin.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 |
4 | import requests
5 |
6 | url = 'https://openxlab.org.cn/gw/alles-apin-hub/v1/openai/v2/text/chat'
7 | api_token = os.getenv('ALLES_APIN_TOKEN')
8 | headers = {'content-type': 'application/json', 'alles-apin-token': api_token}
9 |
10 | payload = {
11 | 'model':
12 | 'gpt-4-1106-preview',
13 | 'messages': [{
14 | 'role':
15 | 'user',
16 | 'content':
17 | '帮我写个 python 代码,用 time.time() 和 datetime 获取当前时间。把当前时间的秒数设成 0,毫秒数也设成 0, 分钟数加 1,输出新时间对应的毫秒数,格式和 time.time() 相同'
18 | }]
19 | }
20 |
21 | response = requests.post(url, headers=headers, data=json.dumps(payload))
22 | resp_json = response.json()
23 | if resp_json['msgCode'] == '10000':
24 | data = resp_json['data']
25 | if len(data['choices']) > 0:
26 | text = data['choices'][0]['message']['content']
27 | print(text)
28 |
--------------------------------------------------------------------------------
/tests/test_benepar.py:
--------------------------------------------------------------------------------
1 | import benepar
2 | import nltk
3 |
4 | benepar.download('benepar_en3_large')
5 |
6 | nltk.download('punkt')
7 | # 创建解析器
8 | parser = benepar.Parser('benepar_en3_large')
9 |
10 | # 解析句子
11 | tree = parser.parse('The quick brown fox jumps over the lazy dog.')
12 |
--------------------------------------------------------------------------------
/tests/test_bge_reranker.py:
--------------------------------------------------------------------------------
1 | from FlagEmbedding import FlagReranker
2 |
3 | reranker = FlagReranker(
4 | '/data2/khj/bge-reranker-v2-m3/', use_fp16=True
5 | ) # Setting use_fp16 to True speeds up computation with a slight performance degradation
6 |
7 | score = reranker.compute_score(['query', 'passage'])
8 | print(score) # -5.65234375
9 |
10 | # You can map the scores into 0-1 by set "normalize=True", which will apply sigmoid function to the score
11 | score = reranker.compute_score(['query', 'passage'], normalize=True)
12 | print(score) # 0.003497010252573502
13 |
14 | scores = reranker.compute_score([
15 | ['what is panda?', 'hi'],
16 | [
17 | 'what is panda?',
18 | 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.'
19 | ]
20 | ])
21 | print(scores) # [-8.1875, 5.26171875]
22 | import pdb
23 |
24 | # You can map the scores into 0-1 by set "normalize=True", which will apply sigmoid function to the score
25 | scores = reranker.compute_score([
26 | ['what is panda?', 'hi'],
27 | [
28 | 'what is panda?',
29 | 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.'
30 | ]
31 | ],
32 | normalize=True)
33 | print(scores) # [0.00027803096387751553, 0.9948403768236574]
34 |
--------------------------------------------------------------------------------
/tests/test_clear_kimi_files.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pdb
3 |
4 | from openai import OpenAI
5 | from tqdm import tqdm
6 |
7 | client = OpenAI(api_key=os.getenv('MOONSHOT_API_KEY'),
8 | base_url='https://api.moonshot.cn/v1')
9 | file_list = client.files.list()
10 | for file in tqdm(file_list.data):
11 | client.files.delete(file_id=file.id)
12 | print(file)
13 |
--------------------------------------------------------------------------------
/tests/test_dataclass.py:
--------------------------------------------------------------------------------
1 | import pdb
2 | from enum import Enum, unique
3 |
4 |
5 | @unique
6 | class KGType(Enum):
7 | MARKDOWN = 'markdown'
8 | CHUNK = 'chunk'
9 | KEYWORD = 'keyword'
10 | IMAGE = 'image'
11 |
12 |
13 | x = KGType.IMAGE
14 | print(x)
15 |
--------------------------------------------------------------------------------
/tests/test_deepseek.py:
--------------------------------------------------------------------------------
1 | # python3
2 | from openai import OpenAI
3 |
4 | client = OpenAI(api_key='sk-f58e45ee054743f898f732b09dbcaa7c',
5 | base_url='https://api.deepseek.com/v1')
6 | queries = [
7 | '已知 ncnn 中 cnn 是卷积神经网络,n 是 ncnn 的作者 nihui。所以 ncnn 的全称是?',
8 | '"请问如何安装 mmdeploy ?"\n请仔细阅读以上内容,判断句子是否是个有主题的疑问句,结果用 0~10 表示。直接提供得分不要解释。\n判断标准:有主语谓语宾语并且是疑问句得 10 分;缺少主谓宾扣分;陈述句直接得 0 分;不是疑问句直接得 0 分。直接提供得分不要解释。',
9 | '"豆哥少水点键证群"\n请仔细阅读以上内容,判断句子是否是个有主题的疑问句,结果用 0~10 表示。直接提供得分不要解释。\n判断标准:有主语谓语宾语并且是疑问句得 10 分;缺少主谓宾扣分;陈述句直接得 0 分;不是疑问句直接得 0 分。直接提供得分不要解释。'
10 | ]
11 |
12 | for query in queries:
13 | response = client.chat.completions.create(
14 | model='deepseek-chat',
15 | messages=[
16 | {
17 | 'role': 'system',
18 | 'content': 'You are a helpful assistant'
19 | },
20 | {
21 | 'role': 'user',
22 | 'content': query
23 | },
24 | ],
25 | temperature=0.1)
26 |
27 | print(response.choices[0].message.content)
28 |
--------------------------------------------------------------------------------
/tests/test_hf_import_accelerate.py:
--------------------------------------------------------------------------------
1 | from accelerate import (dispatch_model, infer_auto_device_map,
2 | init_empty_weights)
3 | from accelerate.hooks import add_hook_to_module
4 | from accelerate.utils import (check_tied_parameters_on_same_device,
5 | find_tied_parameters, get_balanced_memory,
6 | get_max_memory, load_offloaded_weights,
7 | offload_weight, save_offload_index,
8 | set_module_tensor_to_device)
9 |
--------------------------------------------------------------------------------
/tests/test_intention_prompt.py:
--------------------------------------------------------------------------------
1 |
2 | import json
3 |
4 | import torch
5 | from transformers.generation import GenerationConfig
6 |
7 | # Note: The default behavior now has injection attack prevention off.
8 | DIR = '/internlm/ampere_7b_v1_7_0'
9 | from transformers import AutoModelForCausalLM, AutoTokenizer
10 |
11 | tokenizer = AutoTokenizer.from_pretrained(DIR, trust_remote_code=True)
12 | model = AutoModelForCausalLM.from_pretrained(DIR,
13 | trust_remote_code=True,
14 | device_map='auto').eval()
15 |
16 |
17 | def task1_intention():
18 | """Test prompt."""
19 | ret = []
20 | with open('data.json', encoding='utf8') as f:
21 | items = json.load(f)
22 | for idx, item in enumerate(items):
23 | question = item['question']
24 |
25 | prompt = '“{}”\n请仔细阅读以上内容,判断句子是否是个有主题的疑问句,结果用 1~10 表示。直接提供得分不要解释。\n判断标准:有主语谓语宾语并且是疑问句得 10 分;缺少主谓宾扣分;陈述句直接得 0 分;不是疑问句直接得 0 分。直接提供得分不要解释。'.format(
26 | question)
27 | answer, _ = model.chat(tokenizer, prompt, history=[], top_k=1)
28 | print((answer, prompt))
29 |
30 | ret.append({'question': prompt, 'answer': answer})
31 |
32 | with open('task1_intention_internlm_prompt.json', 'w',
33 | encoding='utf8') as f:
34 | json.dump(list(ret), f, ensure_ascii=False, indent=2)
35 | print('{}/{}'.format(idx, len(items)))
36 |
37 |
38 | if __name__ == '__main__':
39 | task1_intention()
40 |
--------------------------------------------------------------------------------
/tests/test_internlm2.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from transformers import AutoModelForCausalLM, AutoTokenizer
3 | import asyncio
4 |
5 | # wrap to async generator
6 | async def chat_stream():
7 | model_path = "/data2/khj/internlm2_5-7b-chat"
8 | model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda()
9 | tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
10 |
11 | model = model.eval()
12 | length = 0
13 | for response, history in model.stream_chat(tokenizer, "Hello", history=[]):
14 | part = response[length:]
15 | length = len(response)
16 | yield part
17 | yield '\n'
18 |
19 | # coroutine
20 | async def main():
21 | async for part in chat_stream():
22 | print(part, flush=True, end="")
23 |
24 | loop = asyncio.get_event_loop()
25 | loop.run_until_complete(main())
--------------------------------------------------------------------------------
/tests/test_kimi.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from openai import OpenAI
4 |
5 | client = OpenAI(
6 | api_key=os.getenv('MOONSHOT_API_KEY'),
7 | base_url='https://api.moonshot.cn/v1',
8 | )
9 |
10 | prompt = '“huixiangdou 是什么?”\n请仔细阅读以上内容,判断句子是否是个有主题的疑问句,结果用 0~10 表示。直接提供得分不要解释。\n判断标准:有主语谓语宾语并且是疑问句得 10 分;缺少主谓宾扣分;陈述句直接得 0 分;不是疑问句直接得 0 分。直接提供得分不要解释。'
11 |
12 |
13 | def generate():
14 | """Test generate."""
15 | messages = [
16 | {
17 | 'role': 'system',
18 | 'content': '你是一个语文专家,擅长对句子的结构进行分析'
19 | # '你是 Kimi,由 Moonshot AI 提供的人工智能助手,你更擅长中文和英文的对话。你会为用户提供安全,有帮助,准确的回答。
20 | # 同时,你会拒绝一些涉及恐怖主义,种族歧视,黄色暴力等问题的回答。Moonshot AI 为专有名词,不可翻译成其他语言。'
21 | },
22 | {
23 | 'role': 'user',
24 | 'content': prompt
25 | }
26 | ]
27 |
28 | whole_input = str(messages)
29 | print(whole_input)
30 | # print('input_length {}'.format(len(whole_input)))
31 |
32 | try:
33 | completion = client.chat.completions.create(model='moonshot-v1-8k',
34 | messages=messages,
35 | temperature=0.1,
36 | n=10)
37 | except Exception as e:
38 | return prompt, str(e)
39 |
40 | results = []
41 | for choice in completion.choices:
42 | results.append(choice.message.content)
43 |
44 | return prompt, results
45 |
46 |
47 | if __name__ == '__main__':
48 | print(generate())
49 |
--------------------------------------------------------------------------------
/tests/test_m3.py:
--------------------------------------------------------------------------------
1 | from FlagEmbedding import BGEM3FlagModel
2 |
3 | model = BGEM3FlagModel(
4 | '/data2/khj/bge-m3', use_fp16=True
5 | ) # Setting use_fp16 to True speeds up computation with a slight performance degradation
6 |
7 | sentences_1 = ['What is BGE M3?', 'Defination of BM25']
8 | sentences_2 = [
9 | 'BGE M3 is an embedding model supporting dense retrieval, lexical matching and multi-vector interaction.',
10 | 'BM25 is a bag-of-words retrieval function that ranks a set of documents based on the query terms appearing in each document'
11 | ]
12 |
13 | import pdb
14 |
15 | embeddings_1 = model.encode(sentences_1, max_length=512)['dense_vecs']
16 | embeddings_2 = model.encode(sentences_2)['dense_vecs']
17 | similarity = embeddings_1 @ embeddings_2.T
18 | print(similarity)
19 | # [[0.6265, 0.3477], [0.3499, 0.678 ]]
20 |
--------------------------------------------------------------------------------
/tests/test_neo4j.py:
--------------------------------------------------------------------------------
1 | import nxneo4j as nx
2 | from neo4j import GraphDatabase
3 |
4 | # Neo4j Desktop 版
5 | # 1. 关掉 auth
6 | # 2. server.default_listen_address=0.0.0.0
7 | # 浏览器打开 http://10.1.52.85:7474/browser/,无密码模式应该能登录
8 |
9 | # 配置 Neo4j 连接参数
10 | uri = 'bolt://10.1.52.85:7687' # 默认的 bolt 协议地址和端口
11 | user = 'neo4j' # Neo4j 用户名
12 | password = 'neo4j' # Neo4j 密码
13 |
14 | # 创建驱动实例
15 | driver = GraphDatabase.driver(uri, auth=(user, password))
16 |
17 | G = nx.Graph(driver)
18 | G.delete_all()
19 |
20 | #Add a node
21 | G.add_node('Yusuf')
22 | #Add node with features
23 | G.add_node('Nurgul', gender='F')
24 | #Add multiple properties at once
25 | G.add_node('Betul', age=4, gender='F')
26 | #Check nodes
27 | for node in G.nodes(): #Unlike networkX, nxneo4j returns a generator
28 | print(node)
29 |
--------------------------------------------------------------------------------
/tests/test_openai.py:
--------------------------------------------------------------------------------
1 | import openai
2 | from openai import OpenAI
3 |
4 |
5 | def call_openai(model_name, prompt, history):
6 |
7 | messages = [{
8 | 'role': 'system',
9 | 'content': 'You are a helpful assistant.' # noqa E501
10 | }]
11 | for item in history:
12 | messages.append({'role': 'user', 'content': item[0]})
13 | messages.append({'role': 'system', 'content': item[1]})
14 | messages.append({'role': 'user', 'content': prompt})
15 |
16 | client = OpenAI(
17 | api_key='EMPTY',
18 | base_url='https://10.140.24.142:29500/v1',
19 | )
20 |
21 | completion = client.chat.completions.create(model=model_name,
22 | messages=messages)
23 | return completion.choices[0].message.content
24 |
25 |
26 | def call2():
27 | from openai import OpenAI
28 |
29 | # Set OpenAI's API key and API base to use vLLM's API server.
30 | openai_api_key = 'EMPTY'
31 | openai_api_base = 'http://10.140.24.142:29500/v1'
32 |
33 | client = OpenAI(
34 | api_key=openai_api_key,
35 | base_url=openai_api_base,
36 | )
37 |
38 | chat_response = client.chat.completions.create(
39 | model='../models/Qwen1.5-14B-Chat/',
40 | messages=[
41 | {
42 | 'role': 'system',
43 | 'content': 'You are a helpful assistant.'
44 | },
45 | {
46 | 'role': 'user',
47 | 'content': 'Tell me a joke.'
48 | },
49 | ])
50 | print('Chat response:', chat_response)
51 |
52 |
53 | call2()
54 | # call_openai("../models/Qwen1.5-14B-Chat/", '如何安装 mmdeploy', [])
55 |
56 | # curl http://10.140.24.142:29500/v1/chat/completions \
57 | # -H "Content-Type: application/json" \
58 | # -d '{
59 | # "model": "../models/Qwen1.5-14B-Chat/",
60 | # "messages": [
61 | # {"role": "system", "content": "You are a helpful assistant."},
62 | # {"role": "user", "content": "Tell me something about large language models."}
63 | # ]
64 | # }'
65 |
--------------------------------------------------------------------------------
/tests/test_post_android.py:
--------------------------------------------------------------------------------
1 | import json
2 | import time
3 |
4 | import requests
5 |
6 | # base_url = 'https://p-172_dot_31_dot_0_dot_170_colon_18443.openxlab.space/api/v1/message/v1/wechat/fRHK'
7 | base_url = 'http://139.224.198.162:18443/api/v1/message/v1/wechat/fRHK'
8 |
9 | headers = {'Content-Type': 'application/json; charset=utf-8'}
10 |
11 |
12 | def send():
13 | data_send = {
14 | 'query_id': 'abb',
15 | 'groupname': '茴香豆测试群', # 完整的微信群名
16 | 'username': '豆哥 123', # 发送者的在这个群的微信昵称, 注意一个人可能在多个群里
17 | 'query': {
18 | 'type': 'text', # 发的类型, text or image, poll
19 | 'content':
20 | '请问如何申请公寓?' # 如果 type 是 text 就是文本; 如果是 image,就是个可公开访问的 oss_url
21 | }
22 | }
23 | resp = requests.post(base_url,
24 | headers=headers,
25 | data=json.dumps(data_send),
26 | timeout=10)
27 |
28 | resp_json = resp.json()
29 | print(resp_json)
30 |
31 |
32 | def get():
33 | data_wait = {
34 | 'query_id': 'abb', # 微信给的随机值,用于事后日志分析
35 | 'groupname': '茴香豆测试群', # 完整的微信群名
36 | 'username': '豆哥 123', # 发送者的在这个群的微信昵称, 注意一个人可能在多个群里
37 | 'query': {
38 | 'type': 'poll' # 发的类型, text or image, poll
39 | }
40 | }
41 | resp = requests.post(base_url,
42 | headers=headers,
43 | data=json.dumps(data_wait),
44 | timeout=20)
45 | print(resp.text)
46 |
47 |
48 | send()
49 | send()
50 |
51 | time.sleep(40)
52 | get()
53 |
--------------------------------------------------------------------------------
/tests/test_pyppeteer.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import time
3 |
4 | from pyppeteer import launch
5 |
6 |
7 | async def main(url):
8 | browser = await launch(headless=True,
9 | args=[
10 | '--no-sandbox', '--disable-dev-shm-usage',
11 | '--disable-gpu',
12 | '--disable-software-rasterizer',
13 | '--disable-setuid-sandbox'
14 | ])
15 | page = await browser.newPage()
16 | await page.goto(url)
17 | content = await page.evaluate(
18 | 'document.getElementsByClassName("Post-Main")[0].innerText',
19 | force_expr=True)
20 | # print(content)
21 | await browser.close()
22 | return content
23 |
24 |
25 | result = asyncio.get_event_loop().run_until_complete(
26 | main(url='https://zhuanlan.zhihu.com/p/699164101'))
27 | print(result)
28 |
--------------------------------------------------------------------------------
/tests/test_query_gradio.py:
--------------------------------------------------------------------------------
1 | from loguru import logger
2 |
3 | if __name__ == '__main__':
4 | logger.warning('This file moved to `huixiangdou.gradio_ui`')
5 |
--------------------------------------------------------------------------------
/tests/test_relative.py:
--------------------------------------------------------------------------------
1 | def test_reject(retriever: Retriever, sample: str = None):
2 | """Simple test reject pipeline."""
3 | if sample is None:
4 | real_questions = [
5 | 'SAM 10个T 的训练集,怎么比比较公平呢~?速度上还有缺陷吧?',
6 | '想问下,如果只是推理的话,amp的fp16是不会省显存么,我看parameter仍然是float32,开和不开推理的显存占用都是一样的。能不能直接用把数据和model都 .half() 代替呢,相比之下amp好在哪里', # noqa E501
7 | 'mmdeploy支持ncnn vulkan部署么,我只找到了ncnn cpu 版本',
8 | '大佬们,如果我想在高空检测安全帽,我应该用 mmdetection 还是 mmrotate',
9 | '请问 ncnn 全称是什么',
10 | '有啥中文的 text to speech 模型吗?',
11 | '今天中午吃什么?',
12 | 'huixiangdou 是什么?',
13 | 'mmpose 如何安装?',
14 | '使用科研仪器需要注意什么?'
15 | ]
16 | else:
17 | with open(sample) as f:
18 | real_questions = json.load(f)
19 |
20 | for example in real_questions:
21 | relative, _ = retriever.is_relative(example)
22 |
23 | if relative:
24 | logger.warning(f'process query: {example}')
25 | else:
26 | logger.error(f'reject query: {example}')
27 |
28 | if sample is not None:
29 | if relative:
30 | with open('workdir/positive.txt', 'a+') as f:
31 | f.write(example)
32 | f.write('\n')
33 | else:
34 | with open('workdir/negative.txt', 'a+') as f:
35 | f.write(example)
36 | f.write('\n')
37 |
38 | empty_cache()
39 |
--------------------------------------------------------------------------------
/tests/test_time.py:
--------------------------------------------------------------------------------
1 | import time
2 | from datetime import datetime
3 |
4 | current_time = time.time() # 获取当前时间戳
5 | dt_object = datetime.fromtimestamp(current_time) # 将时间戳转换为datetime对象
6 |
7 | # 获取当天自午夜以来的总分钟数
8 | total_minutes_since_midnight = dt_object.hour * 60 + dt_object.minute
9 |
10 | print(total_minutes_since_midnight)
11 |
--------------------------------------------------------------------------------
/tests/test_visual_bge.py:
--------------------------------------------------------------------------------
1 | ##### Use M3 doing Multilingual Multi-Modal Retrieval
2 | import torch
3 | from FlagEmbedding.visual.modeling import Visualized_BGE
4 |
5 | model = Visualized_BGE(
6 | model_name_bge='/data2/khj/bge-m3',
7 | model_weight='/data2/khj/bge-visualized/Visualized_m3.pth')
8 | model.eval()
9 | with torch.no_grad():
10 | query_emb = model.encode(image='./imgs/cir_query.png', text='一匹马牵着这辆车')
11 | candi_emb_1 = model.encode(image='./imgs/cir_candi_1.png')
12 | candi_emb_2 = model.encode(image='./imgs/cir_candi_2.png')
13 |
14 | sim_1 = query_emb @ candi_emb_1.T
15 | sim_2 = query_emb @ candi_emb_2.T
16 | print(sim_1, sim_2) # tensor([[0.7026]]) tensor([[0.8075]])
17 |
--------------------------------------------------------------------------------
/tests/test_yi.py:
--------------------------------------------------------------------------------
1 | from transformers import AutoModelForCausalLM, AutoTokenizer
2 |
3 | model = AutoModelForCausalLM.from_pretrained('/models/Yi-6B-200K',
4 | device_map='auto',
5 | torch_dtype='auto',
6 | trust_remote_code=True)
7 | tokenizer = AutoTokenizer.from_pretrained('/models/Yi-6B-200K',
8 | trust_remote_code=True)
9 | inputs = tokenizer('', return_tensors='pt')
10 | max_length = 512
11 | outputs = model.generate(
12 | inputs.input_ids.cuda(),
13 | max_length=max_length,
14 | eos_token_id=tokenizer.eos_token_id,
15 | do_sample=True,
16 | repetition_penalty=1.3,
17 | no_repeat_ngram_size=5,
18 | temperature=0.7,
19 | top_k=1,
20 | top_p=0.8,
21 | )
22 | print(tokenizer.decode(outputs[0], skip_special_tokens=True))
23 |
--------------------------------------------------------------------------------
/tests/test_yulan.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from transformers import LlamaForCausalLM, LlamaTokenizer
3 |
4 | model_path = '/models/YuLan-Chat-2-13b-fp16'
5 | tokenizer = LlamaTokenizer.from_pretrained(model_path)
6 | model = LlamaForCausalLM.from_pretrained(model_path,
7 | torch_dtype=torch.float16,
8 | device_map='auto')
9 | model = model.eval()
10 |
11 |
12 | def run(input_text: str):
13 | prompt = "The following is a conversation between a human and an AI assistant namely YuLan, developed by GSAI, Renmin University of China. The AI assistant gives helpful, detailed, and polite answers to the user's questions.\n[|Human|]:{}\n[|AI|]:".format(
14 | input_text)
15 | inputs = tokenizer(prompt,
16 | return_tensors='pt',
17 | padding='longest',
18 | max_length=8192,
19 | truncation=True,
20 | return_attention_mask=True,
21 | add_special_tokens=True)
22 | print(inputs)
23 | kwargs = {
24 | 'temperature': 0.8,
25 | 'top_p': 0.95,
26 | 'top_k': 50,
27 | 'repetition_penalty': 1.1,
28 | 'no_repeat_ngram_size': 64,
29 | 'max_length': 8192,
30 | 'pad_token_id': tokenizer.bos_token_id,
31 | 'eos_token_id': tokenizer.eos_token_id
32 | }
33 | outputs = model.generate(inputs['input_ids'].to(model.device),
34 | attention_mask=inputs['attention_mask'].to(
35 | model.device),
36 | do_sample=True,
37 | **kwargs)
38 | print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
39 |
40 |
41 | texts = [
42 | 'mmdeploy extract如何使用', 'OpenMMLab与上海AI lab 的关系是什么?', 'MMEngine 和MMCV的区别',
43 | 'openmmlab 是什么?', 'mmdet3.0 是否依赖 mmcv0.7', 'mmdet3.0对应的mmcv最低版本是多少'
44 | ]
45 | for input_text in texts:
46 | run(input_text)
47 |
--------------------------------------------------------------------------------
/unittest/primitive/test_bm250api.py:
--------------------------------------------------------------------------------
1 | from huixiangdou.primitive import BM25Okapi, Chunk
2 | import pdb
3 |
4 | def test_bm25_dump():
5 | corpus = [
6 | "Hello there good man!",
7 | "It is quite windy in London",
8 | "How is the weather today?"
9 | ]
10 | chunks = []
11 | for content in corpus:
12 | c = Chunk(content_or_path=content)
13 | chunks.append(c)
14 |
15 | bm25 = BM25Okapi()
16 | bm25.save(chunks, './')
17 |
18 | def test_bm25_load():
19 | bm25 = BM25Okapi()
20 | bm25.load('./')
21 | query_text = 'what is the weather'
22 |
23 | res = bm25.get_top_n(query=query_text.split(' '))
24 | print(res)
25 |
26 | res = bm25.get_top_n(query=query_text)
27 | print(res)
28 |
29 | if __name__ == '__main__':
30 | test_bm25_dump()
31 | test_bm25_load()
32 |
--------------------------------------------------------------------------------
/unittest/primitive/test_dataclass.py:
--------------------------------------------------------------------------------
1 | from huixiangdou.primitive import Chunk, Query
2 |
3 |
4 | def test_chunk():
5 | c = Chunk()
6 | c_str = '{}'.format(c)
7 | assert 'content_or_path=' in c_str
8 |
9 |
10 | def test_query():
11 | q = Query(text='hello', image='test.jpg')
12 | q_str = '{}'.format(q)
13 | assert 'hello' in q_str
14 | assert 'image=' in q_str
15 |
16 | p = Query('hello')
17 | p_str = '{}'.format(p)
18 | assert 'text=' in p_str
19 |
20 |
21 | if __name__ == '__main__':
22 | test_chunk()
23 | test_query()
24 |
--------------------------------------------------------------------------------
/unittest/primitive/test_embedder.py:
--------------------------------------------------------------------------------
1 | import pdb
2 |
3 | from huixiangdou.primitive import Embedder
4 |
5 |
6 | def test_embedder():
7 | emb = Embedder({'embedding_model_path':'/data2/khj/bge-m3'})
8 | sentence = 'hello world '
9 | sentence_16k = sentence * (16384 // len(sentence))
10 | image_path = 'resource/figures/wechat.jpg'
11 |
12 | text_feature = emb.embed_query(text=sentence_16k)
13 | image_feature = emb.embed_query(path=image_path)
14 |
15 | query_feature = emb.embed_query(text=sentence_16k, path=image_path)
16 |
17 | sim1 = query_feature @ text_feature.T
18 | sim2 = query_feature @ image_feature.T
19 |
20 | assert sim1.item() >= 0.4
21 | assert sim2.item() >= 0.4
22 |
23 |
24 | if __name__ == '__main__':
25 | test_embedder()
26 |
--------------------------------------------------------------------------------
/unittest/primitive/test_entity.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pdb
3 |
4 | from huixiangdou.primitive import NamedEntity2Chunk, Chunk
5 |
6 |
7 | def test_entity_build_and_query():
8 | entities = ['HuixiangDou', 'WeChat']
9 |
10 | indexer = NamedEntity2Chunk('/tmp')
11 | indexer.clean()
12 | indexer.set_entity(entities=entities)
13 |
14 | c0 = Chunk(content_or_path='How to deploy HuixiangDou on wechaty ?')
15 | c1 = Chunk(content_or_path='do you know what huixiangdou means ?')
16 | chunks = [c0, c1]
17 | map_entity2chunks = dict()
18 | # build inverted index
19 | for chunk_id, chunk in enumerate(chunks):
20 | if chunk.modal != 'text':
21 | continue
22 | entity_ids = indexer.parse(text=chunk.content_or_path)
23 | for entity_id in entity_ids:
24 | if entity_id not in map_entity2chunks:
25 | map_entity2chunks[entity_id] = [chunk_id]
26 | else:
27 | map_entity2chunks[entity_id].append(chunk_id)
28 |
29 | for entity_id, chunk_indexes in map_entity2chunks.items():
30 | indexer.insert_relation(eid = entity_id, chunk_ids=chunk_indexes)
31 | del indexer
32 |
33 | query_text = 'how to install wechat ?'
34 | retriver = NamedEntity2Chunk('/tmp')
35 | entity_ids = retriver.parse(query_text)
36 | # chunk_id match counter
37 | chunk_id_list = retriver.get_chunk_ids(entity_ids=entity_ids)
38 | print(chunk_id_list)
39 | assert chunk_id_list[0][0] == 0
40 |
41 |
42 | if __name__ == '__main__':
43 | test_entity_build_and_query()
44 |
--------------------------------------------------------------------------------
/unittest/primitive/test_faiss.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pdb
3 |
4 | from huixiangdou.primitive import Chunk, Embedder, Faiss, Query
5 |
6 |
7 | def test_faiss():
8 | a = Chunk('hello world', {'source': 'unittest'})
9 | b = Chunk('resource/figures/inside-mmpose.jpg', {'source': 'unittest'},
10 | 'image')
11 | c = Chunk('resource/figures/wechat.jpg', {'source': 'test image'}, 'image')
12 | chunks = [a, b, c]
13 |
14 | save_path = '/tmp/faiss'
15 |
16 | model_config = {
17 | 'embedding_model_path': '/data2/khj/bge-m3'
18 | }
19 | embedder = Embedder(model_config)
20 |
21 | Faiss.save_local(folder_path=save_path, chunks=chunks, embedder=embedder)
22 | assert os.path.exists(os.path.join(save_path, 'embedding.faiss'))
23 |
24 | g = Faiss.load_local(save_path)
25 | for idx, c in enumerate(g.chunks):
26 | assert str(chunks[idx]) == str(c)
27 |
28 | target = 'resource/figures/inside-mmpose.jpg'
29 | query = Query(image=target)
30 | pairs = g.similarity_search_with_query(query=query, embedder=embedder)
31 | chunk, score = pairs[0]
32 | assert chunk.content_or_path == target
33 | assert score >= 0.9999
34 |
35 |
36 | if __name__ == '__main__':
37 | test_faiss()
38 |
--------------------------------------------------------------------------------
/unittest/primitive/test_limitter.py:
--------------------------------------------------------------------------------
1 | import time
2 | from huixiangdou.services.llm_server_hybrid import RPM, TPM
3 |
4 | def test_rpm():
5 | rpm = RPM(30)
6 |
7 | for i in range(40):
8 | rpm.wait()
9 | print(i)
10 |
11 | time.sleep(5)
12 |
13 | for i in range(40):
14 | rpm.wait()
15 | print(i)
16 |
17 | def test_tpm():
18 | tpm = TPM(2000)
19 |
20 | for i in range(20):
21 | tpm.wait(silent=False, token_count=150)
22 | print(i)
23 |
24 | if __name__ == '__main__':
25 | test_tpm()
--------------------------------------------------------------------------------
/unittest/primitive/test_reranker.py:
--------------------------------------------------------------------------------
1 | import pdb
2 |
3 | from huixiangdou.primitive import LLMReranker
4 |
5 |
6 | def test_reranker():
7 | model = LLMReranker({'reranker_model_path':'/data2/khj/bce-reranker-base_v1'})
8 |
9 | query = 'apple'
10 | texts = [ 'roast banana', 'ice juice', 'red orange', 'apple pie']
11 | scores = model._sort(texts=texts, query=query)
12 |
13 | assert scores[0] == len(texts) - 1
14 |
15 |
16 | if __name__ == '__main__':
17 | test_reranker()
18 |
--------------------------------------------------------------------------------
/unittest/service/test_llm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/unittest/service/test_llm.py
--------------------------------------------------------------------------------
/unittest/service/test_llm_client.py:
--------------------------------------------------------------------------------
1 | from huixiangdou.services.llm_client import ChatClient
2 |
3 | @DeprecationWarning
4 | def test_auto_fix():
5 | """test auto choose backend based on config."""
6 | remote_only_config = 'config-2G.ini'
7 | local_only_config = 'config.ini'
8 | full_config = 'config-advanced.ini'
9 |
10 | client = ChatClient(config_path=remote_only_config)
11 | real_backend, max_len = client.auto_fix(backend='local')
12 | assert real_backend != 'local'
13 | assert max_len >= 32000
14 |
15 | client = ChatClient(config_path=local_only_config)
16 | real_backend, max_len = client.auto_fix(backend='kimi')
17 | assert real_backend == 'local'
18 |
19 | client = ChatClient(config_path=full_config)
20 | real_backend, max_len = client.auto_fix(backend='local')
21 | assert real_backend == 'local'
22 | real_backend, max_len = client.auto_fix(backend='kimi')
23 | assert real_backend != 'local'
24 |
25 | if __name__ == '__main__':
26 | test_auto_fix()
27 |
--------------------------------------------------------------------------------
/unittest/service/test_sg_search.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import tempfile
4 | import time
5 |
6 | import pytoml
7 | from loguru import logger
8 |
9 | from huixiangdou.services.llm import LLM
10 | from huixiangdou.services.sg_search import SourceGraphProxy
11 |
12 |
13 | def load_secret():
14 | kimi_token = ''
15 | serper_token = ''
16 | with open('unittest/token.json') as f:
17 | json_obj = json.load(f)
18 | kimi_token = json_obj['kimi']
19 | serper_token = json_obj['serper']
20 | sg_token = json_obj['sg']
21 | return kimi_token, serper_token, sg_token
22 |
23 |
24 | def build_config_path():
25 | config_path = 'config-2G.ini'
26 | kimi_token, serper_token, sg_token = load_secret()
27 | config = None
28 | with open(config_path) as f:
29 | config = pytoml.load(f)
30 | config['web_search']['engine'] = 'serper'
31 | config['web_search']['serper_x_api_key'] = serper_token
32 | config['feature_store'][
33 | 'embedding_model_path'] = '/data2/khj/bce-embedding-base_v1/'
34 | config['feature_store'][
35 | 'reranker_model_path'] = '/data2/khj/bce-embedding-base_v1/'
36 | config['llm']['server']['remote_api_key'] = kimi_token
37 | config['worker']['enable_sg_search'] = 1
38 | config['sg_search']['src_access_token'] = sg_token
39 |
40 | config_path = None
41 | with tempfile.NamedTemporaryFile(delete=False, mode='w+b') as temp_file:
42 | tomlstr = pytoml.dumps(config)
43 | temp_file.write(tomlstr.encode('utf8'))
44 | config_path = temp_file.name
45 |
46 | return config_path
47 |
48 |
49 | def test_sg():
50 | config_path = build_config_path()
51 |
52 | llm = LLM(config_path=config_path)
53 | proxy = SourceGraphProxy(config_path=config_path)
54 | content = proxy.search(llm_client=llm,
55 | question='mmpose installation',
56 | groupname='mmpose dev group')
57 | assert len(content) > 0
58 |
--------------------------------------------------------------------------------
/web/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/__init__.py
--------------------------------------------------------------------------------
/web/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/api/__init__.py
--------------------------------------------------------------------------------
/web/api/access.py:
--------------------------------------------------------------------------------
1 | from fastapi import APIRouter, Request, Response
2 |
3 | from web.model.access import LoginBody
4 | from web.service.access import LoginService
5 |
6 | access_api = APIRouter()
7 |
8 |
9 | @access_api.post('/v1/login')
10 | async def login(body: LoginBody, request: Request, response: Response):
11 | return await LoginService(body, request, response).login()
12 |
--------------------------------------------------------------------------------
/web/api/chat.py:
--------------------------------------------------------------------------------
1 | from fastapi import APIRouter, Depends, Request, Response
2 |
3 | from web.middleware.token import check_hxd_token
4 | from web.model.chat import (ChatCaseFeedbackBody, ChatOnlineResponseBody,
5 | ChatRequestBody)
6 | from web.model.qalib import QalibInfo
7 | from web.service.chat import ChatService
8 |
9 | chat_api = APIRouter()
10 |
11 |
12 | @chat_api.post('/v1/online')
13 | async def chat_online(request: Request,
14 | response: Response,
15 | body: ChatRequestBody,
16 | hxd_info: QalibInfo = Depends(check_hxd_token)):
17 | return await ChatService(request, response, hxd_info).chat_online(body)
18 |
19 |
20 | @chat_api.post('/v1/onlineResponse')
21 | async def chat_online_response(request: Request,
22 | response: Response,
23 | body: ChatOnlineResponseBody,
24 | hxd_info: QalibInfo = Depends(check_hxd_token)):
25 | return await ChatService(request, response, hxd_info).fetch_response(body)
26 |
27 |
28 | @chat_api.post('/v1/caseFeedback')
29 | async def case_feedback(request: Request,
30 | response: Response,
31 | body: ChatCaseFeedbackBody,
32 | hxd_info: QalibInfo = Depends(check_hxd_token)):
33 | return await ChatService(request, response, hxd_info).case_feedback(body)
34 |
--------------------------------------------------------------------------------
/web/api/integrate.py:
--------------------------------------------------------------------------------
1 | from fastapi import APIRouter, Depends, Request, Response
2 |
3 | from web.middleware.token import check_hxd_token
4 | from web.model.integrate import IntegrateLarkBody, IntegrateWebSearchBody
5 | from web.model.qalib import QalibInfo
6 | from web.service.qalib import QaLibService
7 |
8 | integrate_api = APIRouter()
9 |
10 |
11 | @integrate_api.post('/v1/integrateLark')
12 | async def integrate_lark(request: Request,
13 | response: Response,
14 | body: IntegrateLarkBody,
15 | hxd_info: QalibInfo = Depends(check_hxd_token)):
16 | return await QaLibService(request, response, hxd_info).integrate_lark(body)
17 |
18 |
19 | @integrate_api.post('/v1/integrateWebSearch')
20 | async def integrate_web_search(request: Request,
21 | response: Response,
22 | body: IntegrateWebSearchBody,
23 | hxd_info: QalibInfo = Depends(check_hxd_token)):
24 | return await QaLibService(request, response,
25 | hxd_info).integrate_web_search(body)
26 |
--------------------------------------------------------------------------------
/web/api/message.py:
--------------------------------------------------------------------------------
1 | from fastapi import APIRouter, Request, Response
2 |
3 | from web.model.chat import WechatRequest
4 | from web.service.message import MessageService
5 |
6 | message_api = APIRouter()
7 |
8 |
9 | @message_api.post('/v1/lark')
10 | async def on_lark_message(request: Request, response: Response):
11 | return await MessageService(request, response).on_lark_message()
12 |
13 |
14 | @message_api.post('/v1/wechat/{suffix}')
15 | async def on_wechat_message(request: Request, response: Response, suffix: str,
16 | body: WechatRequest):
17 | return await MessageService(request,
18 | response).on_wechat_message(body, suffix)
19 |
--------------------------------------------------------------------------------
/web/api/qalib.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from fastapi import APIRouter, Depends, File, Request, Response, UploadFile
4 |
5 | from web.middleware.token import check_hxd_token
6 | from web.model.qalib import QalibInfo, QalibPositiveNegative, QalibDeleteDoc
7 | from web.service.qalib import QaLibService
8 |
9 | qalib_api = APIRouter()
10 |
11 |
12 | @qalib_api.post('/v1/getInfo')
13 | async def qalib_info(request: Request,
14 | response: Response,
15 | hxd_info: QalibInfo = Depends(check_hxd_token)):
16 | return await QaLibService(request, response, hxd_info).info()
17 |
18 |
19 | @qalib_api.post('/v1/addDocs')
20 | async def qalib_add_docs(request: Request,
21 | response: Response,
22 | files: List[UploadFile] = File(...),
23 | hxd_info: QalibInfo = Depends(check_hxd_token)):
24 | return await QaLibService(request, response, hxd_info).add_docs(files)
25 |
26 |
27 | @qalib_api.post('/v1/getSampleInfo')
28 | async def qalib_get_sample_info(
29 | request: Request,
30 | response: Response,
31 | hxd_info: QalibInfo = Depends(check_hxd_token)):
32 | return await QaLibService(request, response, hxd_info).get_sample_info()
33 |
34 |
35 | @qalib_api.post('/v1/updateSampleInfo')
36 | async def qalib_update_sample_info(
37 | request: Request,
38 | response: Response,
39 | body: QalibPositiveNegative,
40 | hxd_info: QalibInfo = Depends(check_hxd_token)):
41 | return await QaLibService(request, response,
42 | hxd_info).update_sample_info(body)
43 |
44 |
45 | @qalib_api.post('/v1/deleteDocs')
46 | async def qalib_add_docs(request: Request,
47 | response: Response,
48 | body: QalibDeleteDoc,
49 | hxd_info: QalibInfo = Depends(check_hxd_token)):
50 | return await QaLibService(request, response, hxd_info).delete_docs(body)
51 |
--------------------------------------------------------------------------------
/web/api/statistic.py:
--------------------------------------------------------------------------------
1 | from fastapi import APIRouter, Request, Response
2 |
3 | from web.service.statistic import StatisticService
4 |
5 | statistic_api = APIRouter()
6 |
7 |
8 | @statistic_api.get('/v1/total')
9 | async def qalib_info_statistic(request: Request, response: Response):
10 | return await StatisticService(request, response).info_statistic()
11 |
--------------------------------------------------------------------------------
/web/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/config/__init__.py
--------------------------------------------------------------------------------
/web/config/logging.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | LOGGING_CONFIG = {
4 | 'version': 1,
5 | 'disable_existing_loggers': False,
6 | 'formatters': {
7 | 'default': {
8 | '()': 'uvicorn.logging.DefaultFormatter',
9 | 'fmt': '%(levelprefix)s %(asctime)s - %(message)s',
10 | 'datefmt': '%Y-%m-%d %H:%M:%S',
11 | },
12 | },
13 | 'handlers': {
14 | 'default': {
15 | 'formatter': 'default',
16 | 'class': 'logging.StreamHandler',
17 | 'stream': 'ext://sys.stderr',
18 | },
19 | },
20 | 'loggers': {
21 | 'uvicorn': {
22 | 'handlers': ['default'],
23 | 'level': 'INFO'
24 | },
25 | 'uvicorn.error': {
26 | 'level': 'INFO'
27 | },
28 | 'uvicorn.access': {
29 | 'handlers': ['default'],
30 | 'level': 'INFO',
31 | 'propagate': False
32 | },
33 | },
34 | }
35 |
--------------------------------------------------------------------------------
/web/constant/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/constant/__init__.py
--------------------------------------------------------------------------------
/web/front-end/.eslintignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | dist
3 | build
4 | .umi
5 | *.d.ts
6 | lib
--------------------------------------------------------------------------------
/web/front-end/.gitignore:
--------------------------------------------------------------------------------
1 | # dependencies
2 | /node_modules
3 | /npm-debug.log*
4 | /yarn-error.log
5 | /yarn.lock
6 | package-lock.json
7 |
8 | # production
9 | /dist
10 | /build
11 |
12 | # misc
13 | .DS_Store
14 | .idea
15 |
16 | # umi
17 | /src/.umi
18 | /src/.umi-production
19 | /src/.umi-test
20 | /.env.local
21 |
22 | /maps
23 | .husky
--------------------------------------------------------------------------------
/web/front-end/.npmrc:
--------------------------------------------------------------------------------
1 | # 改变远程仓库地址
2 | # registry=https://registry.npmjs.org/
3 |
--------------------------------------------------------------------------------
/web/front-end/dist/assets/bean1-002ba51d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/dist/assets/bean1-002ba51d.png
--------------------------------------------------------------------------------
/web/front-end/dist/assets/logo-af340389.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/dist/assets/logo-af340389.png
--------------------------------------------------------------------------------
/web/front-end/dist/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/dist/logo.png
--------------------------------------------------------------------------------
/web/front-end/env/.env.development:
--------------------------------------------------------------------------------
1 | # use in js: console.log(import.meta.env.MODE);
2 | VITE_NODE=development
3 |
--------------------------------------------------------------------------------
/web/front-end/env/.env.production:
--------------------------------------------------------------------------------
1 | VITE_NODE=production
2 |
--------------------------------------------------------------------------------
/web/front-end/env/.env.staging:
--------------------------------------------------------------------------------
1 | VITE_NODE=staging
2 |
--------------------------------------------------------------------------------
/web/front-end/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
HuixiangDou
9 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/web/front-end/mock/db.json:
--------------------------------------------------------------------------------
1 | {
2 | "posts": [
3 | {
4 | "id": 1,
5 | "title": "json-server",
6 | "author": "typicode"
7 | }
8 | ],
9 | "comments": [
10 | {
11 | "id": 1,
12 | "body": "some comment",
13 | "postId": 1
14 | }
15 | ],
16 | "profile": {
17 | "name": "typicode"
18 | },
19 | "userinfo": {
20 | "code": 0,
21 | "data": {
22 | "id": 1,
23 | "name": "李剑阁",
24 | "job": "scientist"
25 | }
26 | }
27 | }
--------------------------------------------------------------------------------
/web/front-end/public/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/public/logo.png
--------------------------------------------------------------------------------
/web/front-end/readme.md:
--------------------------------------------------------------------------------
1 | # 1. 命令
2 | ## 安装依赖
3 | npm install
4 |
5 | ## 开发
6 | npm run dev
7 |
8 | ## build
9 | npm run build
10 |
针对不同的环境打包命令不同,比如线上环境的命令npm run build:aliyun-prod
11 |
12 | ## preview
13 | npm run preview
14 |
这是vite项目特有的命令,因为vite的serve和build出的代码不一致,上线前需要用preview检测打包结果是否和serve一致
15 |
16 | ## mock
17 | npm run mock
18 |
19 | # 2. Ability config
20 |
当前模板支持动态配置能力
21 |
src/config/auth.ts : 支持是否开启该功能(default false)clientId, 接口白名单与网页白名单
22 |
src/config/log.ts : 支持是否开启该功能(default false)ga4 measurement id
23 |
src/config/base-url.ts : 各个环境接口访问host和api prefix
24 |
25 |
更多细节请查看配置文件注释
26 |
27 |
--------------------------------------------------------------------------------
/web/front-end/scripts/alias.ts:
--------------------------------------------------------------------------------
1 | import { resolvePath } from './utils';
2 |
3 | const alias = {
4 | '@': resolvePath('./src'),
5 | '@components': resolvePath('./src/components'),
6 | '@layouts': resolvePath('./src/layouts'),
7 | '@assets': resolvePath('./src/assets'),
8 | '@pages': resolvePath('./src/pages'),
9 | '@services': resolvePath('./src/services'),
10 | '@utils': resolvePath('./src/utils'),
11 | '@styles': resolvePath('./src/styles'),
12 | '@routes': resolvePath('./src/routes'),
13 | '@config': resolvePath('./src/config'),
14 | '@locales': resolvePath('./src/locales'),
15 | '@constants': resolvePath('./src/constants'),
16 | '@interceptors': resolvePath('./src/interceptors'),
17 | '@hooks': resolvePath('./src/hooks')
18 | };
19 |
20 | export default alias;
21 |
--------------------------------------------------------------------------------
/web/front-end/scripts/import-to-cdn.ts:
--------------------------------------------------------------------------------
1 | export default [
2 | {
3 | name: 'react',
4 | var: 'React',
5 | path: 'https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/asserts/react@18.2.0/react.production.min.js'
6 | },
7 | {
8 | name: 'react-dom',
9 | var: 'ReactDOM',
10 | path: 'https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/asserts/react@18.2.0/react-dom.production.min.js'
11 | }
12 | ];
13 |
--------------------------------------------------------------------------------
/web/front-end/scripts/index.ts:
--------------------------------------------------------------------------------
1 | export { default as ProxyConfig } from './proxy';
2 | export { default as ImportToCDNList } from './import-to-cdn';
3 | export { default as alias } from './alias';
4 |
--------------------------------------------------------------------------------
/web/front-end/scripts/proxy.ts:
--------------------------------------------------------------------------------
1 | // https://github.com/http-party/node-http-proxy#options
2 | const ProxyConfig = {
3 | '/api': {
4 | target: 'http://localhost:8080',
5 | changeOrigin: true,
6 | secure: false,
7 | rewrite: path => {
8 | return path.replace('^', '');
9 | },
10 | }
11 | };
12 |
13 | export default ProxyConfig;
14 |
--------------------------------------------------------------------------------
/web/front-end/scripts/utils.ts:
--------------------------------------------------------------------------------
1 | import path from 'path';
2 |
3 | export const resolvePath = p => path.resolve(__dirname, '..', p);
4 |
--------------------------------------------------------------------------------
/web/front-end/src/app.tsx:
--------------------------------------------------------------------------------
1 | import { GlobalLang } from '@components/global-lang';
2 | import RouterRoot from './routes';
3 | import './styles/index.less';
4 | import 'sea-lion-ui/dist/index.css';
5 |
6 | console.log(import.meta.env.VITE_NODE);
7 |
8 | const App = () => {
9 | return (
10 |
11 |
12 |
13 | );
14 | };
15 |
16 | export default App;
17 |
--------------------------------------------------------------------------------
/web/front-end/src/assets/imgs/bean.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/src/assets/imgs/bean.png
--------------------------------------------------------------------------------
/web/front-end/src/assets/imgs/bean1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/src/assets/imgs/bean1.png
--------------------------------------------------------------------------------
/web/front-end/src/assets/imgs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/src/assets/imgs/logo.png
--------------------------------------------------------------------------------
/web/front-end/src/components/button/button.module.less:
--------------------------------------------------------------------------------
1 | .btn {
2 | padding: 8px 12px;
3 | background: #c7eaba;
4 | color: #286500;
5 | border-radius: 6px;
6 | font-size: 14px;
7 | line-height: 16px;
8 | display: inline-flex;
9 | align-items: center;
10 | gap: 4px;
11 | cursor: pointer;
12 | word-break: keep-all;
13 | &[aria-disabled="true"] {
14 | background: #dcdcdc;
15 | color: #9d9d9d;
16 | cursor: not-allowed;
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/web/front-end/src/components/button/button.tsx:
--------------------------------------------------------------------------------
1 | import { FC, ReactNode, HTMLAttributes } from 'react';
2 | import classNames from 'classnames';
3 | import styles from './button.module.less';
4 |
5 | export interface ExampleProps extends HTMLAttributes
{
6 | disabled?: boolean;
7 | onClick?: () => void;
8 | children?: ReactNode;
9 | className?: string;
10 | }
11 |
12 | const Button: FC = ({
13 | disabled = false,
14 | onClick, children,
15 | className,
16 | }) => {
17 | const handleClick = () => {
18 | if (disabled) {
19 | return;
20 | }
21 | onClick();
22 | };
23 | return (
24 |
29 | {children}
30 |
31 | );
32 | };
33 |
34 | export default Button;
35 |
--------------------------------------------------------------------------------
/web/front-end/src/components/components-portal/components-portal.tsx:
--------------------------------------------------------------------------------
1 | import { createPortal } from 'react-dom';
2 |
3 | const ComponentPortal = ({ children, wrapperId = '' }) => {
4 | return createPortal(children, document.getElementById(wrapperId) || document.body);
5 | };
6 |
7 | export default ComponentPortal;
8 |
--------------------------------------------------------------------------------
/web/front-end/src/components/copy-code/copy-code.module.less:
--------------------------------------------------------------------------------
1 | .copy-code {
2 | display: flex;
3 | gap: 4px;
4 | align-items: center;
5 | width: 100%;
6 | .code {
7 | font-size: 14px;
8 | line-height: 16px;
9 | padding: 8px 0;
10 | cursor: pointer;
11 | white-space: nowrap;
12 | overflow: hidden;
13 | text-overflow: ellipsis;
14 | max-width: calc(100% - 36px);
15 | color: #047600;
16 | }
17 | .copy {
18 | cursor: pointer;
19 | color: #9D9D9D;
20 | margin-left: 4px;
21 | }
22 | }
--------------------------------------------------------------------------------
/web/front-end/src/components/copy-code/copy-code.tsx:
--------------------------------------------------------------------------------
1 | import { IconFont, message } from 'sea-lion-ui';
2 | import styles from './copy-code.module.less';
3 |
4 | export interface CopyCodeProps {
5 | code: string;
6 | }
7 |
8 | const CopyCode = (props: CopyCodeProps) => {
9 | const { code } = props;
10 | const copy = () => {
11 | const input = document.createElement('input');
12 | input.value = code;
13 | document.body.appendChild(input);
14 | input.select();
15 | document.execCommand('copy');
16 | message.success('复制成功');
17 | document.body.removeChild(input);
18 | };
19 | return (
20 |
21 |
{code}
22 |
23 |
24 |
25 |
26 | );
27 | };
28 |
29 | export default CopyCode;
30 |
--------------------------------------------------------------------------------
/web/front-end/src/components/global-lang/global-lang-context.ts:
--------------------------------------------------------------------------------
1 | import { createContext } from 'react';
2 | import { Language } from '@utils/utils';
3 |
4 | const noop = (l: Language) => undefined;
5 |
6 | export const LangDefault = {
7 | locale: '',
8 | setLocale: noop
9 | };
10 |
11 | export const GlobalLangeContext = createContext(LangDefault);
12 |
--------------------------------------------------------------------------------
/web/front-end/src/components/global-lang/global-lang.tsx:
--------------------------------------------------------------------------------
1 | import {
2 | FC, useCallback, useState, useMemo
3 | } from 'react';
4 | import { IntlProvider } from 'react-intl';
5 | import {
6 | getLang, Language, setLang
7 | } from '@utils/utils';
8 | import locales from '@/locales';
9 | import { GlobalLangeContext } from './global-lang-context';
10 |
11 | const GlobalLang: FC = ({ children }) => {
12 | const [locale, setLocale] = useState(getLang());
13 |
14 | const setCurrentLocale = useCallback((lang: Language) => {
15 | setLocale(lang);
16 | setLang(lang);
17 | }, []);
18 |
19 | // 子孙组件通过context获取setLocale可以更改中英文
20 | const value = useMemo(() => ({ locale, setLocale: setCurrentLocale }), [locale, setCurrentLocale]);
21 |
22 | return (
23 |
24 |
25 | {children}
26 |
27 |
28 | );
29 | };
30 |
31 | export default GlobalLang;
32 |
--------------------------------------------------------------------------------
/web/front-end/src/components/global-lang/index.tsx:
--------------------------------------------------------------------------------
1 | export { default as GlobalLang } from './global-lang';
2 | export { GlobalLangeContext } from './global-lang-context';
3 |
--------------------------------------------------------------------------------
/web/front-end/src/components/header/header.module.less:
--------------------------------------------------------------------------------
1 | .header {
2 | padding: 0 50px;
3 | height: 64px;
4 | display: flex;
5 | align-items: center;
6 | justify-content: flex-end;
7 | gap: 24px;
8 |
9 | .feedback {
10 | cursor: pointer;
11 | }
12 |
13 | .language {
14 | cursor: pointer;
15 | .chosen {
16 | font-weight: bold;
17 | }
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/web/front-end/src/components/header/header.tsx:
--------------------------------------------------------------------------------
1 | import { GlobalLangeContext } from '@components/global-lang';
2 | import { useContext } from 'react';
3 | import { useLocale } from '@hooks/useLocale';
4 | import styles from './header.module.less';
5 |
6 | const Header = () => {
7 | const { locale, setLocale } = useContext(GlobalLangeContext);
8 | const locales = useLocale('home');
9 | return (
10 |
11 | window.open('https://github.com/InternLM/HuixiangDou/issues')}
14 | >
15 | {locales.feedback}
16 |
17 |
18 | setLocale('zh-CN')}
20 | className={locale === 'zh-CN' && styles.chosen}
21 | >
22 | 中
23 | {' '}
24 |
25 | /
26 | setLocale('en-US')}
28 | className={locale === 'en-US' && styles.chosen}
29 | >
30 | {' '}
31 | EN
32 |
33 |
34 |
35 | );
36 | };
37 |
38 | export default Header;
39 |
--------------------------------------------------------------------------------
/web/front-end/src/components/notification/emoji-wrapper.tsx:
--------------------------------------------------------------------------------
1 | import { FC, ReactNode } from 'react';
2 | import styles from './notification.module.less';
3 |
4 | interface EmojiWrapperProps {
5 | emoji?: string;
6 | children?: ReactNode;
7 | }
8 |
9 | const heart = 'https://oss.openmmlab.com/www/home/heart_3d.png';
10 | const EmojiWrapper: FC = ({ emoji = heart, children }) => {
11 | return (
12 |
13 | {children}
14 |
15 |
16 |
17 |
18 | );
19 | };
20 |
21 | export default EmojiWrapper;
22 |
--------------------------------------------------------------------------------
/web/front-end/src/components/notification/notification.tsx:
--------------------------------------------------------------------------------
1 | import { FC, ReactNode } from 'react';
2 | import { notification } from '@components/notification/use-notification';
3 | import EmojiWrapper from '@components/notification/emoji-wrapper';
4 | import { useLocale } from '@hooks/useLocale';
5 | import styles from './notification.module.less';
6 |
7 | export interface NotificationProps {
8 | title: string;
9 | content: string;
10 | notificationKey: string;
11 | children?: ReactNode;
12 | }
13 |
14 | const Notification: FC = ({
15 | title,
16 | content,
17 | notificationKey,
18 | }) => {
19 | const locales = useLocale('components');
20 |
21 | return (
22 |
23 |
{title}
24 |
{content}
25 |
26 |
notification.unmountNotification(notificationKey)}
29 | >
30 | {locales.hide4ever}
31 |
32 |
33 | {
36 | window.open('https://github.com/InternLM/HuixiangDou/');
37 | }}
38 | >
39 | {locales.goStar}
40 |
41 |
42 |
43 |
44 | );
45 | };
46 |
47 | export default Notification;
48 |
--------------------------------------------------------------------------------
/web/front-end/src/components/notification/use-notification.tsx:
--------------------------------------------------------------------------------
1 | import Notification, { NotificationProps } from '@components/notification/notification';
2 | import { useLocale } from '@hooks/useLocale';
3 | import ComponentPortal from '@components/components-portal/components-portal';
4 |
5 | const notificationWrapper = 'global-notification';
6 |
7 | export const notification = {
8 | notificationContainer: null,
9 |
10 | showNotification(params: NotificationProps) {
11 | if (document.getElementById(notificationWrapper)) {
12 | document.body.removeChild(document.getElementById(notificationWrapper));
13 | this.notificationContainer = null;
14 | }
15 | if (localStorage.getItem(params.notificationKey)) {
16 | return null;
17 | }
18 | this.notificationContainer = document.createElement('div');
19 | this.notificationContainer.id = notificationWrapper;
20 | document.body.appendChild(this.notificationContainer);
21 | return (
22 |
23 |
24 |
25 | );
26 | },
27 | unmountNotification(key) {
28 | if (this.notificationContainer) {
29 | localStorage.setItem(key, 'true');
30 | document.body.removeChild(this.notificationContainer);
31 | this.notificationContainer = null;
32 | }
33 | },
34 | };
35 | const useNotification = () => {
36 | const locales = useLocale('components');
37 |
38 | return notification.showNotification({
39 | title: '',
40 | content: locales.notificationContent,
41 | notificationKey: '__HuiXiangDou__',
42 | });
43 | };
44 |
45 | export default useNotification;
46 |
--------------------------------------------------------------------------------
/web/front-end/src/components/upload-item/index.tsx:
--------------------------------------------------------------------------------
1 | import UploadItem from './upload-item';
2 |
3 | export * from './upload-item';
4 | export default UploadItem;
5 |
--------------------------------------------------------------------------------
/web/front-end/src/components/upload-item/upload-item.module.less:
--------------------------------------------------------------------------------
1 | .upload-item {
2 | display: flex;
3 | align-items: flex-start;
4 | gap: 4px;
5 | border-radius: 4px;
6 | padding: 2px 4px;
7 | margin-bottom: 4px;
8 | .name {
9 | max-width: 320px;
10 | text-overflow: ellipsis;
11 | white-space: nowrap;
12 | overflow: hidden;
13 | }
14 | .progress {
15 | width: 100%;
16 | margin-top: 4px;
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/web/front-end/src/components/upload-item/upload-item.tsx:
--------------------------------------------------------------------------------
1 | import { FC } from 'react';
2 | import { IconFont } from 'sea-lion-ui';
3 | import { LoadingOutlined } from '@ant-design/icons';
4 | import styles from './upload-item.module.less';
5 |
6 | export const enum UploadStatus {
7 | init = 'init',
8 | done = 'done',
9 | uploading = 'uploading',
10 | error = 'error',
11 | removed = 'removed',
12 | }
13 |
14 | export interface UploadItemProps {
15 | uid: string;
16 | name: string;
17 | status: UploadStatus;
18 | progress: number;
19 | }
20 |
21 | const StatusColor = {
22 | init: 'lightgrey',
23 | done: 'green',
24 | uploading: 'blue',
25 | error: 'red',
26 | removed: 'darkgrey'
27 | };
28 |
29 | const StatusIcon = {
30 | init: 'icon-DocOutlined',
31 | done: 'icon-CheckCircleFilled',
32 | uploading: 'icon-HorizontalMoreOutlined',
33 | error: 'icon-CloseCircleFilled',
34 | removed: 'icon-DocOutlined'
35 | };
36 | const UploadItem: FC = ({
37 | uid, name, status, progress
38 | }) => {
39 | return (
40 |
47 |
48 | {status === UploadStatus.uploading ? : (
49 |
50 | )}
51 |
52 |
61 |
62 | );
63 | };
64 |
65 | export default UploadItem;
66 |
--------------------------------------------------------------------------------
/web/front-end/src/components/upload/delete-btn.tsx:
--------------------------------------------------------------------------------
1 | import { IconFont, Modal } from 'sea-lion-ui';
2 | import { useLocale } from '@hooks/useLocale';
3 | import { useState } from 'react';
4 | import styles from './upload.module.less';
5 |
6 | const DeleteBtn = ({ onClick }) => {
7 | const locales = useLocale('components');
8 |
9 | const [openModal, setOpenModal] = useState(false);
10 |
11 | const handleClick = () => {
12 | setOpenModal(true);
13 | };
14 |
15 | const confirm = () => {
16 | setOpenModal(false);
17 | onClick();
18 | };
19 |
20 | const cancel = () => {
21 | setOpenModal(false);
22 | };
23 |
24 | return (
25 | <>
26 |
27 |
28 | {locales.deleteSelected}
29 |
30 | )}
35 | icon={ }
36 | onClose={() => setOpenModal(false)}
37 | >
38 | {locales.deleteDesc}
39 |
40 |
{locales.confirm}
41 |
{locales.cancel}
42 |
43 |
44 | >
45 |
46 | );
47 | };
48 |
49 | export default DeleteBtn;
50 |
--------------------------------------------------------------------------------
/web/front-end/src/components/upload/index.tsx:
--------------------------------------------------------------------------------
1 | import Upload from './upload';
2 |
3 | export * from './upload';
4 | export default Upload;
5 |
--------------------------------------------------------------------------------
/web/front-end/src/config/auth.ts:
--------------------------------------------------------------------------------
1 | // 登录相关配置信息
2 |
3 | export const VITE_NODE = import.meta.env.VITE_NODE;
4 |
5 | // 开启单点登录开关
6 | export const openOSS = false;
7 |
8 | export const ClientIdMap = {
9 | development: '',
10 | staging: '',
11 | production: ''
12 | };
13 |
14 | // 登录跳转链接
15 | export const LogURLMap = {
16 | development: '',
17 | staging: '',
18 | production: ''
19 | };
20 |
21 | // 注意 Development环境的domain前面必须加 . 因为,本地开发环境和线上开发环境域名不同
22 | // 如果发生反复跳转,请在浏览器中查看后端返回的cookie的domain是否有问题
23 | export const TokenCookieDomainMap = {
24 | development: '',
25 | staging: '',
26 | production: ''
27 | };
28 |
29 | export const clientId = ClientIdMap[VITE_NODE];
30 | export const logURL = LogURLMap[VITE_NODE];
31 | export const TokenCookieDomain = TokenCookieDomainMap[VITE_NODE];
32 |
33 | // 针对权限更细化的配置信息
34 |
35 | // 需要权限验证的页面可以把对应的pathname放到这里
36 | export const AuthPages: string[] = [
37 | ''
38 | ];
39 |
40 | // 有些接口不需要token
41 | export const NoTokenApiPaths: string[] = [
42 | '/account/oauth',
43 | '/api/v1/access/v1/login',
44 | '/api/v1/statistic/v1/total'
45 | ];
46 |
--------------------------------------------------------------------------------
/web/front-end/src/config/base-url.ts:
--------------------------------------------------------------------------------
1 | // 接口请求相关的配置信息
2 |
3 | // 各个环境的接口请求域名
4 | export const ApiBaseUrlMap = {
5 | development: '',
6 | staging: '',
7 | production: ''
8 | };
9 |
10 | // 各个环境的接口前缀
11 | export const ApiPrefixMap = {
12 | mock: '',
13 | development: '',
14 | staging: '',
15 | production: ''
16 | };
17 |
18 | export const Env = import.meta.env.VITE_NODE;
19 |
20 | export const BaseURL = ApiBaseUrlMap[Env];
21 |
22 | export const ApiPrefix = ApiPrefixMap[Env];
23 |
--------------------------------------------------------------------------------
/web/front-end/src/config/index.ts:
--------------------------------------------------------------------------------
1 | export * from './auth';
2 | export * from './base-url';
3 | export * from './log';
4 |
--------------------------------------------------------------------------------
/web/front-end/src/config/log.ts:
--------------------------------------------------------------------------------
1 | // 日志相关配置
2 |
3 | const VITE_NODE = import.meta.env.VITE_NODE;
4 |
5 | export const openLog = false;
6 |
7 | export const MeasurementIdMap = {
8 | development: '',
9 | staging: '',
10 | production: ''
11 | };
12 |
13 | export const MeasurementId = MeasurementIdMap[VITE_NODE];
14 |
--------------------------------------------------------------------------------
/web/front-end/src/hooks/useLocale.ts:
--------------------------------------------------------------------------------
1 | import { useContext, useState, useEffect } from 'react';
2 | import { GlobalLangeContext } from '@components/global-lang';
3 | import Locale from '@/locales';
4 |
5 | export const useLocale = (propertyName: string) => {
6 | const [locales, setLocales] = useState({});
7 | const { locale: lang } = useContext(GlobalLangeContext);
8 |
9 | useEffect(() => {
10 | if (lang && Locale[lang] && Locale[lang][propertyName]) {
11 | setLocales(Locale[lang][propertyName]);
12 | }
13 | }, [lang, propertyName]);
14 |
15 | return locales;
16 | };
17 |
--------------------------------------------------------------------------------
/web/front-end/src/interceptors/request.ts:
--------------------------------------------------------------------------------
1 | import { NoTokenApiPaths, openOSS } from '@config/auth';
2 | import { getLang, Token } from '@utils/utils';
3 | import { AxiosRequestHeaders } from 'axios';
4 |
5 | // *Interceptor函数:主要用来在请求发出前处理config,config由axios的请求拦截器提供
6 | // *Interceptor函数运行规则:函数会依次从左到右执行,每个*Interceptor函数必须返回config,供下一个*Interceptor函数处理
7 | // 好处:代码结构更清晰,每个函数专注做自己的事情,拿到config处理后return,达到逻辑解耦的目的
8 |
9 | interface IAuth extends AxiosRequestHeaders{
10 | Authorization?: string;
11 | }
12 |
13 | const validateAuthInterceptor = config => {
14 | const token = Token.get();
15 | const headers: IAuth = {
16 | lang: getLang(),
17 | ...config.headers
18 | };
19 |
20 | if (
21 | !NoTokenApiPaths.find(p => (config.url || '').endsWith(p))
22 | && openOSS
23 | ) {
24 | headers.Authorization = `Bearer ${token}`;
25 | }
26 |
27 | return {
28 | ...config,
29 | headers
30 | };
31 | };
32 |
33 | const customConfigInterceptor = config => {
34 | return ({
35 | ...config,
36 | headers: {
37 | ...config.headers,
38 | 'Client-Type': 'app',
39 | type: 0
40 | }
41 | });
42 | };
43 |
44 | export const requestInterceptors = [validateAuthInterceptor, customConfigInterceptor];
45 |
--------------------------------------------------------------------------------
/web/front-end/src/layouts/header-container-layout/header-container-layout.module.less:
--------------------------------------------------------------------------------
1 | .wrapper {
2 | .header {}
3 | .body {}
4 | }
--------------------------------------------------------------------------------
/web/front-end/src/layouts/header-container-layout/header-container-layout.tsx:
--------------------------------------------------------------------------------
1 | import Header from '@components/header/header';
2 | import { Outlet } from 'react-router-dom';
3 | import useNotification from '@components/notification/use-notification';
4 | import styles from './header-container-layout.module.less';
5 |
6 | const HeaderContainerLayout = () => {
7 | return (
8 |
9 |
10 |
11 |
12 |
13 | {useNotification()}
14 |
15 | );
16 | };
17 |
18 | export default HeaderContainerLayout;
19 |
--------------------------------------------------------------------------------
/web/front-end/src/locales/en-US.ts:
--------------------------------------------------------------------------------
1 | import home from '@locales/en-US/home';
2 | import beanDetail from '@locales/en-US/bean-detail';
3 | import components from '@locales/en-US/components';
4 | import welcome from './en-US/welcome';
5 |
6 | export default {
7 | ...welcome,
8 | ...home,
9 | ...beanDetail,
10 | ...components,
11 | };
12 |
--------------------------------------------------------------------------------
/web/front-end/src/locales/en-US/components.ts:
--------------------------------------------------------------------------------
1 | export default {
2 | components: {
3 | notificationContent: '🎉 HuixiangDou is open source now. If this helps you, please give it a star! 🌟 🥺',
4 | hide4ever: 'Hide forever',
5 | goStar: 'Star',
6 | fileSize: 'Single file size should not exceed 35MB',
7 | nameSize: 'File name is too long',
8 | fileCount: 'Up to 200 files can be uploaded at a time',
9 | pendingFiles: 'Uploading documents',
10 | confirmUpload: 'Upload',
11 | uploading: 'Uploading',
12 | uploadedFiles: 'Uploaded documents',
13 | uploadFailed: 'Failed',
14 | processing: 'Processing',
15 | total: 'Total',
16 | failed: 'Failed',
17 | searchDesc: 'Enter the document name to search',
18 | search: 'Search',
19 | selectAll: 'Select all',
20 | noSelected: 'No document selected',
21 | deleteSelected: 'Delete selected',
22 | deleteConfirm: 'Are you sure you want to delete the selected documents?',
23 | deleteDesc: 'The delete operation will rebuild the bean',
24 | confirm: 'Delete',
25 | cancel: 'Cancel',
26 | }
27 | };
28 |
--------------------------------------------------------------------------------
/web/front-end/src/locales/en-US/home.ts:
--------------------------------------------------------------------------------
1 | export default {
2 | home: {
3 | slogan: 'Knowledge Assistant, Zero-coding with Lark and WeChat.',
4 | beanName: 'Knowledge base name. Auto create if not exists',
5 | validateMsg: 'At least 8 characters required',
6 | createBean: 'Create Knowledge Base',
7 | beanPwd: 'Knowledge Base Password',
8 | create: 'Create',
9 | cancel: 'Cancel',
10 | go: 'Go',
11 | bean: 'Knowledge Base',
12 | activeBean: 'Active Base Monthly',
13 | WeChat: 'WeChat',
14 | feishu: 'Lark',
15 | users: 'Chat Count',
16 | uniqueUsers: 'Unique Chat',
17 | pwdError: 'Password Error',
18 | feedback: 'Feedback',
19 | welcome: 'Welcome, grateful',
20 | hello: 'Hi',
21 | hi: 'Hello',
22 | loading: 'Loading',
23 | }
24 | };
25 |
--------------------------------------------------------------------------------
/web/front-end/src/locales/en-US/welcome.ts:
--------------------------------------------------------------------------------
1 | export default {
2 | welcome: 'Welcome, grateful',
3 | hello: 'Hi',
4 | hi: 'Hello',
5 | loading: 'Loading'
6 | };
7 |
--------------------------------------------------------------------------------
/web/front-end/src/locales/index.ts:
--------------------------------------------------------------------------------
1 | import zhCN from './zh-CN';
2 | import enUS from './en-US';
3 |
4 | export default {
5 | 'zh-CN': zhCN,
6 | 'en-US': enUS
7 | };
8 |
--------------------------------------------------------------------------------
/web/front-end/src/locales/zh-CN.ts:
--------------------------------------------------------------------------------
1 | import home from '@locales/zh-CN/home';
2 | import beanDetail from '@locales/zh-CN/bean-detail';
3 | import components from '@locales/zh-CN/components';
4 | import welcome from './zh-CN/welcome';
5 |
6 | export default {
7 | ...welcome,
8 | ...home,
9 | ...beanDetail,
10 | ...components
11 | };
12 |
--------------------------------------------------------------------------------
/web/front-end/src/locales/zh-CN/components.ts:
--------------------------------------------------------------------------------
1 | export default {
2 | components: {
3 | notificationContent: `🎉HuixiangDou开源啦,快来给我们 star 吧!
4 | 小时候,我想当开源人,朋友给我鼓励和我最爱的小星星🌟 🥺`,
5 | hide4ever: '不再显示',
6 | goStar: '前往鼓励',
7 | fileSize: '单个文件大小不能超过 35M',
8 | nameSize: '文件名太长',
9 | fileCount: '单次最多上传 200 个文件',
10 | pendingFiles: '待上传文档',
11 | confirmUpload: '确认上传',
12 | uploading: '上传中',
13 | uploadedFiles: '已上传文档',
14 | uploadFailed: '上传失败',
15 | processing: '处理中',
16 | total: '共计',
17 | failed: '失败',
18 | searchDesc: '输入文档名称进行搜索',
19 | search: '搜索',
20 | selectAll: '全选',
21 | noSelected: '您还未选中任何文档',
22 | deleteSelected: '删除',
23 | deleteConfirm: '确定删除选中的文档吗?',
24 | deleteDesc: '删除操作会重建知识库',
25 | confirm: '删除',
26 | cancel: '取消',
27 | }
28 | };
29 |
--------------------------------------------------------------------------------
/web/front-end/src/locales/zh-CN/home.ts:
--------------------------------------------------------------------------------
1 | export default {
2 | home: {
3 | slogan: '行业知识助手,零开发接入飞书个微群',
4 | beanName: '请输入知识库名称,不存在则自动创建。不少于 8 个字符',
5 | validateMsg: '知识库名称至少需要 8 个字符',
6 | createBean: '创建知识库',
7 | beanPwd: '知识库密码',
8 | create: '创建',
9 | cancel: '取消',
10 | go: '前往',
11 | bean: '知识库',
12 | activeBean: '月活知识库',
13 | WeChat: '微信',
14 | feishu: '飞书',
15 | users: '回答次数',
16 | uniqueUsers: '去重次数',
17 | pwdError: '密码错误',
18 | feedback: '问题反馈',
19 | welcome: 'Welcome, grateful',
20 | hello: 'Hi',
21 | hi: 'Hello',
22 | loading: 'Loading'
23 | }
24 | };
25 |
--------------------------------------------------------------------------------
/web/front-end/src/locales/zh-CN/welcome.ts:
--------------------------------------------------------------------------------
1 | export default {
2 | welcome: '欢迎,感恩',
3 | hello: '嗨',
4 | hi: '你好',
5 | loading: 'Loading',
6 | };
7 |
--------------------------------------------------------------------------------
/web/front-end/src/main.tsx:
--------------------------------------------------------------------------------
1 | import * as React from 'react';
2 | import * as ReactDOM from 'react-dom/client';
3 | import Mlog from '@utils/mlog';
4 | import '@config/change-page-gray';
5 | import App from './app';
6 |
7 | Mlog.init();
8 |
9 | ReactDOM.createRoot(document.getElementById('root') as HTMLElement).render(
10 |
11 |
12 |
13 | );
14 |
--------------------------------------------------------------------------------
/web/front-end/src/pages/bean-detail/bean-detail.module.less:
--------------------------------------------------------------------------------
1 | .btn {
2 | padding: 8px 12px;
3 | background: #c7eaba;
4 | color: #286500;
5 | border-radius: 6px;
6 | font-size: 14px;
7 | line-height: 16px;
8 | display: inline-flex;
9 | align-items: center;
10 | gap: 4px;
11 | cursor: pointer;
12 | &[aria-disabled="true"] {
13 | background: #dcdcdc;
14 | color: #9d9d9d;
15 | cursor: not-allowed;
16 | }
17 | }
18 | .bean-detail {
19 | min-height: 700px;
20 | min-width: 860px;
21 | margin: auto;
22 | text-align: center;
23 | position: absolute;
24 | top: 200px;
25 | left: 50%;
26 | transform: translateX(-50%);
27 | .logo {
28 | width: 800px;
29 | margin: 0 auto 72px;
30 | img {
31 | width: 100%;
32 | }
33 | }
34 | .bean-state {
35 | background-color: #e3f9dd;
36 | border-radius: 8px;
37 | padding: 4px 8px;
38 | margin-left: 4px;
39 | }
40 | .fail-state {
41 | background-color: #f1bcbc;
42 | }
43 | .name-wrapper {
44 | display: flex;
45 | align-items: center;
46 | gap: 4px;
47 | }
48 | .statistics-wrapper {
49 | display: grid;
50 | grid-template-columns: repeat(5, 1fr);
51 | grid-gap: 20px;
52 | margin: 24px auto;
53 | text-align: center;
54 | }
55 | .statistics-item {
56 | text-align: left;
57 | .title-img {
58 | height: 16px;
59 | }
60 | .statistics-item-title {
61 | color: #9D9D9D;
62 | font-size: 16px;
63 | margin-bottom: 12px;
64 | line-height: 20px;
65 | display: flex;
66 | align-items: center;
67 | gap: 4px;
68 | }
69 | }
70 | .refresh {
71 | margin-left: auto;
72 | cursor: pointer;
73 | color: #286500;
74 | }
75 | .logout {
76 | //margin-left: auto;
77 | cursor: pointer;
78 | color: #9D9D9D;
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/web/front-end/src/pages/bean-detail/components/chat/index.tsx:
--------------------------------------------------------------------------------
1 | import Chat from './chat';
2 |
3 | export * from './chat';
4 | export default Chat;
5 |
--------------------------------------------------------------------------------
/web/front-end/src/pages/bean-detail/components/example/example.module.less:
--------------------------------------------------------------------------------
1 | .example {
2 |
3 | }
4 | .editor {
5 | margin-bottom: 12px;
6 | :global {
7 | .seal-input-container .seal-input-wrapper:focus-within {
8 | border: 1px solid #59a041;
9 | }
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/web/front-end/src/pages/bean-detail/components/example/index.tsx:
--------------------------------------------------------------------------------
1 | import Example from './example';
2 |
3 | export * from './example';
4 | export default Example;
5 |
--------------------------------------------------------------------------------
/web/front-end/src/pages/bean-detail/components/import-docs/import-docs.module.less:
--------------------------------------------------------------------------------
1 | .import-docs {
2 |
3 | }
4 |
--------------------------------------------------------------------------------
/web/front-end/src/pages/bean-detail/components/import-docs/import-docs.tsx:
--------------------------------------------------------------------------------
1 | import {
2 | FC, ReactNode, useState
3 | } from 'react';
4 | import { IconFont, Modal } from 'sea-lion-ui';
5 | import Button from '@components/button/button';
6 | import { useLocale } from '@hooks/useLocale';
7 | import Upload from '@components/upload';
8 | import { FileState } from '@services/home';
9 | import styles from './import-docs.module.less';
10 |
11 | export interface ImportDocsProps {
12 | filesState: FileState[];
13 | refresh: () => void;
14 | docs?: string[];
15 | children?: ReactNode;
16 | }
17 |
18 | const ImportDocs: FC = ({ refresh, docs, filesState }) => {
19 | const locales = useLocale('beanDetail');
20 | const [openModal, setOpenModal] = useState(false);
21 |
22 | const afterUpload = () => {
23 | refresh();
24 | };
25 | const closeModal = () => {
26 | setOpenModal(false);
27 | refresh();
28 | };
29 | return (
30 |
31 |
setOpenModal(true)}>
32 | {locales.docs}
33 |
34 |
35 |
)}
40 | onClose={closeModal}
41 | >
42 |
48 |
49 | {locales.upload}
50 | {locales.supportFiles}
51 |
52 |
53 |
54 | );
55 | };
56 |
57 | export default ImportDocs;
58 |
--------------------------------------------------------------------------------
/web/front-end/src/pages/bean-detail/components/import-docs/index.tsx:
--------------------------------------------------------------------------------
1 | import ImportDocs from './import-docs';
2 |
3 | export * from './import-docs';
4 | export default ImportDocs;
5 |
--------------------------------------------------------------------------------
/web/front-end/src/pages/bean-detail/components/integrate-feishu/index.tsx:
--------------------------------------------------------------------------------
1 | import IntegrateFeishu from './integrate-feishu';
2 |
3 | export * from './integrate-feishu';
4 | export default IntegrateFeishu;
5 |
--------------------------------------------------------------------------------
/web/front-end/src/pages/bean-detail/components/integrate-feishu/integrate-feishu.module.less:
--------------------------------------------------------------------------------
1 | .integrate-feishu {
2 | .webhook-url {
3 | font-size: 14px;
4 | line-height: 16px;
5 | padding: 8px 0;
6 | cursor: pointer;
7 | white-space: nowrap;
8 | overflow: hidden;
9 | text-overflow: ellipsis;
10 | max-width: 200px;
11 | color: #9D9D9D;
12 | }
13 | }
14 | .eventurl {
15 | font-weight: bold;
16 | }
17 |
18 | .title {
19 | font-weight: bold;
20 | margin-top: 12px;
21 | }
22 |
23 | .cancel {
24 | color: #9D9D9D;
25 | background: #F4F5F9;
26 | cursor: pointer;
27 | &:hover {
28 | background: #EBECF0;
29 | }
30 | }
31 |
32 | .flex {
33 | display: flex;
34 | align-items: center;
35 | gap: 4px;
36 | span {
37 | word-break: keep-all;
38 | white-space: nowrap;
39 | color: rgba(0, 0, 0, 0.88);
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/web/front-end/src/pages/bean-detail/components/integrate-wechat/integrate-wechat.module.less:
--------------------------------------------------------------------------------
1 | .item-title {
2 | font-weight: bold;
3 | margin-top: 8px;
4 | }
5 | .item-content {
6 | margin-bottom: 24px;
7 | color: #047600;
8 | }
--------------------------------------------------------------------------------
/web/front-end/src/pages/bean-detail/components/integrate-wechat/integrate-wechat.tsx:
--------------------------------------------------------------------------------
1 | import { useState } from 'react';
2 | import { IconFont, Modal } from 'sea-lion-ui';
3 | import Button from '@components/button/button';
4 | import { useLocale } from '@hooks/useLocale';
5 | import CopyCode from '@components/copy-code/copy-code';
6 | import styles from './integrate-wechat.module.less';
7 |
8 | export interface IntegrateWechatProps {
9 | messageUrl: string;
10 | }
11 |
12 | const IntegrateWechat = (props: IntegrateWechatProps) => {
13 | const locales = useLocale('beanDetail');
14 |
15 | const [openModal, setOpenModal] = useState(false);
16 |
17 | const handleOpen = () => {
18 | setOpenModal(true);
19 | };
20 | return (
21 |
22 |
23 | {locales.viewDetail}
24 |
25 |
26 |
)}
30 | onClose={() => setOpenModal(false)}
31 | >
32 |
33 | {locales.WeChatCallback}
34 |
35 |
36 |
37 |
38 |
39 | {locales.wechatGuidance}
40 |
41 |
window.open('https://zhuanlan.zhihu.com/p/686579577')}
43 | >
44 | {locales.viewGuide}
45 |
46 |
47 |
48 |
49 | );
50 | };
51 |
52 | export default IntegrateWechat;
53 |
--------------------------------------------------------------------------------
/web/front-end/src/pages/bean-detail/components/toggle-search/index.tsx:
--------------------------------------------------------------------------------
1 | import ToggleSearch from './toggle-search';
2 |
3 | export * from './toggle-search';
4 | export default ToggleSearch;
5 |
--------------------------------------------------------------------------------
/web/front-end/src/pages/bean-detail/components/toggle-search/toggle-search.module.less:
--------------------------------------------------------------------------------
1 | .toggle-search {
2 | .token {
3 | font-size: 14px;
4 | line-height: 16px;
5 | padding: 8px 0;
6 | cursor: pointer;
7 | white-space: nowrap;
8 | overflow: hidden;
9 | text-overflow: ellipsis;
10 | max-width: 200px;
11 | color: #9D9D9D;
12 | }
13 | }
14 |
15 | .input-wrapper {
16 | display: flex;
17 | align-items: center;
18 | gap: 8px;
19 | padding: 12px 0;
20 | :global {
21 | .seal-input-group .seal-input-inner-container:focus-within {
22 | outline: 1px solid #59a041;
23 | }
24 | }
25 | }
26 |
27 | a {
28 | color: #286500
29 | }
30 |
--------------------------------------------------------------------------------
/web/front-end/src/routes/index.tsx:
--------------------------------------------------------------------------------
1 | // router component
2 | import {
3 | BrowserRouter, Routes, Route, Navigate
4 | } from 'react-router-dom';
5 | import HeaderContainerLayout from '@layouts/header-container-layout/header-container-layout';
6 | import Home from '@pages/home/home';
7 | import BeanDetail from '@pages/bean-detail/bean-detail';
8 |
9 | const RouterRoot = () => {
10 | return (
11 | // react-router-dom v6 123
12 | // https://reactrouter.com/docs/en/v6/getting-started/overview
13 |
14 |
15 | }>
16 | }
19 | />
20 | } />
21 | } />
22 |
23 |
27 | There is nothing here!
28 |
29 | )}
30 | />
31 |
32 |
33 | );
34 | };
35 |
36 | export default RouterRoot;
37 |
--------------------------------------------------------------------------------
/web/front-end/src/services/user.ts:
--------------------------------------------------------------------------------
1 | import { request } from '@utils/ajax';
2 |
3 | const userServicePrefix = '/gw/user-service';
4 | const uaaServicePrefix = '/gw/uaa-be';
5 |
6 | export interface fetchCurrentUserReqDto {
7 | avatar?: string;
8 | email?: string;
9 | expiration?: string;
10 | roleIds?: string[];
11 | nickname?: string;
12 | jwt?: string;
13 | ssoUid: string;
14 | username?: string;
15 | wechat?: string;
16 | wechatName?: string;
17 | [key: string]: any;
18 | }
19 |
20 | // 获取用户信息
21 | export async function fetchCurrentUser(
22 | token: string,
23 | ) {
24 | return request('/api/v1/login/getUserInfo', {
25 | method: 'POST',
26 | headers: {
27 | Authorization: `Bearer ${token}`
28 | },
29 | }, uaaServicePrefix);
30 | }
31 |
32 | export async function logout() {
33 | return request('/api/v1/logout/all', {
34 | method: 'POST',
35 | meta: {
36 | isAllResponseBody: true
37 | },
38 | }, uaaServicePrefix);
39 | }
40 |
41 | export interface fetchOauthCodeReqDto {
42 | token: string;
43 | }
44 |
45 | // sso第三方登录验证后,拿取用户信息
46 | export const fetchOauthCode = (code: string | string[], redirect: string) => {
47 | return request('/api/v1/account/oauth', {
48 | method: 'POST',
49 | data: {
50 | code,
51 | redirect
52 | }
53 | }, userServicePrefix);
54 | };
55 |
--------------------------------------------------------------------------------
/web/front-end/src/styles/index.less:
--------------------------------------------------------------------------------
1 | // @import './normalize.css'; normalize.css通过cdn引入了
2 | @import "mixins.less";
3 |
--------------------------------------------------------------------------------
/web/front-end/src/styles/mixins.less:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/front-end/src/styles/mixins.less
--------------------------------------------------------------------------------
/web/front-end/src/styles/variables.less:
--------------------------------------------------------------------------------
1 | @red: red;
2 | @black: #000;
3 | @white: #fff;
4 | @border-color: #EBECF0;
5 |
6 | @main-content-width: 1440px;
7 | @main-content-hoz-padding: 120px;
8 | @x-lab-header-height: 65px;
9 |
10 | @border-lg: 1px solid @black;
11 | @input-background-color: #f4f5f9;
12 | @input-border-color: #D7D8DD;
13 | @select-arrow-color: #464a53;
14 |
15 | @border: 1px solid @border-color;
16 | @form-item-bg: #F4F5F9;
17 | @text-line-height: 21px;
18 | @form-input-bg: #F9F9F9;
19 |
20 |
--------------------------------------------------------------------------------
/web/front-end/src/types.d.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable no-undef */
2 | /* eslint-disable no-unused-vars */
3 | declare module '*.css';
4 | declare module '*.less';
5 | declare module '*.png';
6 | declare module '*.jpg';
7 | declare module '*.jpeg';
8 | declare module '*.svg' {
9 | export function ReactComponent(
10 | props: React.SVGProps,
11 | ): React.ReactElement;
12 | const url: string;
13 | export default url;
14 | }
15 |
--------------------------------------------------------------------------------
/web/front-end/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | ///
2 |
3 | // declare Google Analytics gtag.js
4 | declare interface Window {gtag: any; dataBuried: any; sealionJSONPCallback: any; }
5 |
6 | interface ImportMetaEnv {
7 | readonly VITE_NODE: string
8 | // 更多环境变量...
9 | }
10 |
11 | interface ImportMeta {
12 | readonly env: ImportMetaEnv
13 | }
14 |
15 |
--------------------------------------------------------------------------------
/web/front-end/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2020",
4 | "useDefineForClassFields": true,
5 | "lib": ["ES2020", "DOM", "DOM.Iterable"],
6 | "module": "ESNext",
7 | "skipLibCheck": true,
8 |
9 | /* Bundler mode */
10 | "moduleResolution": "node",
11 | // "allowImportingTsExtensions": true,
12 | "allowSyntheticDefaultImports":true,
13 | "resolveJsonModule": true,
14 | "isolatedModules": true,
15 | "noEmit": true,
16 | "jsx": "react-jsx",
17 | "baseUrl": ".",
18 | "paths": {
19 | "@/*": [
20 | "src/*"
21 | ],
22 | "@components/*": [
23 | "src/components/*"
24 | ],
25 | "@layouts/*": [
26 | "src/layouts/*"
27 | ],
28 | "@assets/*": [
29 | "src/assets/*"
30 | ],
31 | "@pages/*": [
32 | "src/pages/*"
33 | ],
34 | "@services/*": [
35 | "src/services/*"
36 | ],
37 | "@utils/*": [
38 | "src/utils/*"
39 | ],
40 | "@styles/*": [
41 | "src/styles/*"
42 | ],
43 | "@routes/*": [
44 | "src/routes/*"
45 | ],
46 | "@config/*": [
47 | "src/config/*"
48 | ],
49 | "@locales/*": [
50 | "src/locales/*"
51 | ],
52 | "@interceptors/*": [
53 | "src/interceptors/*"
54 | ],
55 | "@hooks/*": [
56 | "src/hooks/*"
57 | ],
58 | "@constants/*": [
59 | "src/constants/*"
60 | ]
61 | },
62 | "allowJs": true,
63 | "outDir": "./dist",
64 | },
65 | "include": [
66 | "src/**/*",
67 | "src/**/*.ts",
68 | "src/**/*.tsx",
69 | "src/**/*.vue",
70 | "tests/**/*.ts",
71 | "tests/**/*.tsx",
72 | "src/types.d.ts"
73 | ],
74 | "references": [{ "path": "./tsconfig.node.json" }]
75 | }
76 |
--------------------------------------------------------------------------------
/web/front-end/tsconfig.node.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "composite": true,
4 | "skipLibCheck": true,
5 | "module": "ESNext",
6 | "moduleResolution": "bundler",
7 | "allowSyntheticDefaultImports": true
8 | },
9 | "include": ["vite.config.ts", "scripts/*"]
10 | }
11 |
--------------------------------------------------------------------------------
/web/middleware/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/middleware/__init__.py
--------------------------------------------------------------------------------
/web/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/model/__init__.py
--------------------------------------------------------------------------------
/web/model/access.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 |
3 |
4 | class LoginBody(BaseModel):
5 | name: str
6 | password: str
7 |
8 |
9 | class AccessInfo(BaseModel):
10 | hashpass: str
11 | featureStoreId: str
12 |
--------------------------------------------------------------------------------
/web/model/base.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 | from pydantic import BaseModel, Field
4 |
5 |
6 | class BaseBody(BaseModel):
7 | msg: str = Field(default='ok')
8 | msgCode: str = Field(default='10000')
9 | data: object = None
10 |
11 |
12 | class Image(Enum):
13 | INVALID = 'invalid'
14 | JPG = 'jpeg'
15 | PNG = 'png'
16 | BMP = 'bmp'
17 |
18 |
19 | def standard_error_response(error: dict, data=None) -> BaseBody:
20 | if not data:
21 | data = {}
22 | return BaseBody(msg=error.get('msg'), msgCode=error.get('code'), data=data)
23 |
--------------------------------------------------------------------------------
/web/model/chat.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | from typing import List, Optional
3 |
4 | from pydantic import BaseModel, RootModel
5 |
6 | from web.model.huixiangdou import ChatResponse, HxdTaskChatHistory
7 |
8 |
9 | class ChatRequestBody(BaseModel):
10 | content: Optional[str] = ''
11 | images: Optional[List[str]] = []
12 | history: Optional[List[HxdTaskChatHistory]] = []
13 |
14 |
15 | class ChatOnlineResponseBody(BaseModel):
16 | queryId: str
17 |
18 |
19 | class ChatType(Enum):
20 | LARK = 0
21 | WECHAT = 1
22 | ONLINE = 2
23 |
24 |
25 | class ChatQueryInfo(BaseModel):
26 | featureStoreId: str
27 | queryId: str
28 | type: Optional[ChatType] = ChatType.ONLINE
29 | request: ChatRequestBody
30 | response: Optional[ChatResponse] = None
31 | detail: Optional[object] = {}
32 |
33 |
34 | class ChatCaseType(Enum):
35 | GOOD_CASE = 'good'
36 | BAD_CASE = 'bad'
37 |
38 |
39 | class ChatCaseFeedbackBody(BaseModel):
40 | queryId: str
41 | type: ChatCaseType
42 |
43 |
44 | class LarkChatDetail(BaseModel):
45 | appId: Optional[str] = ''
46 | appSecret: Optional[str] = ''
47 | messageId: Optional[str] = ''
48 |
49 |
50 | class WechatType(Enum):
51 | TEXT = 'text'
52 | Image = 'image'
53 | Poll = 'poll'
54 |
55 |
56 | class WechatQuery(BaseModel):
57 | type: WechatType
58 | content: Optional[str] = ''
59 |
60 |
61 | class WechatRequest(BaseModel):
62 | query_id: Optional[str] = ''
63 | groupname: Optional[str] = ''
64 | username: Optional[str] = ''
65 | query: Optional[WechatQuery] = {}
66 |
67 |
68 | class WechatResponse(RootModel):
69 | root: Optional[object] = []
70 |
71 |
72 | class WechatPollItem(BaseModel):
73 | req: WechatRequest
74 | rsp: ChatResponse
75 |
--------------------------------------------------------------------------------
/web/model/huixiangdou.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | from typing import List, Optional
3 |
4 | from pydantic import BaseModel
5 |
6 | from web.model.qalib import FilesState
7 |
8 |
9 | class HxdToken(BaseModel):
10 | exp: int
11 | iat: float
12 | jti: str
13 | qa_name: str
14 |
15 |
16 | class HxdTaskChatHistory(BaseModel):
17 | sender: int
18 | content: str
19 |
20 |
21 | class HxdTaskPayload(BaseModel):
22 | name: Optional[str] = None
23 | feature_store_id: Optional[str] = None
24 | file_list: Optional[List[str]] = []
25 | file_abs_base: Optional[str] = None
26 | positive: Optional[List[str]] = []
27 | negative: Optional[List[str]] = []
28 | content: Optional[str] = None
29 | images: Optional[List[str]] = []
30 | history: Optional[List[HxdTaskChatHistory]] = []
31 | web_search_token: Optional[str] = None
32 | query_id: Optional[str] = ''
33 |
34 |
35 | class HxdTaskType(Enum):
36 | ADD_DOC = 'add_doc'
37 | UPDATE_PIPELINE = 'update_pipeline'
38 | UPDATE_SAMPLE = 'update_sample'
39 | CHAT = 'chat'
40 |
41 |
42 | class HxdTask(BaseModel):
43 | type: HxdTaskType
44 | payload: HxdTaskPayload
45 |
46 |
47 | class HxdTaskResponse(BaseModel):
48 | feature_store_id: Optional[str] = None
49 | code: Optional[int] = None
50 | status: Optional[str] = None
51 | type: Optional[str] = None
52 | files_state: Optional[List[FilesState]] = None
53 |
54 |
55 | class ChatResponse(BaseModel):
56 | code: Optional[int] = -1
57 | state: Optional[str] = ''
58 | text: Optional[str] = ''
59 | references: Optional[List[str]] = []
60 |
61 |
62 | class HxdChatResponse(BaseModel):
63 | feature_store_id: str
64 | query_id: str
65 | response: ChatResponse
66 |
--------------------------------------------------------------------------------
/web/model/integrate.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from pydantic import BaseModel
4 |
5 |
6 | class IntegrateLarkBody(BaseModel):
7 | appId: str
8 | appSecret: str
9 |
10 |
11 | class IntegrateWebSearchBody(BaseModel):
12 | webSearchToken: str
13 | vendor: Optional[str] = ''
14 |
--------------------------------------------------------------------------------
/web/model/qalib.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional
2 |
3 | from pydantic import BaseModel
4 |
5 |
6 | class Lark(BaseModel):
7 | appId: Optional[str] = ''
8 | appSecret: Optional[str] = ''
9 | encryptKey: str
10 | verificationToken: str
11 | eventUrl: str
12 |
13 |
14 | class Wechat(BaseModel):
15 | onMessageUrl: str
16 |
17 |
18 | class WebSearch(BaseModel):
19 | token: str
20 |
21 |
22 | class FilesState(BaseModel):
23 | file: str
24 | status: bool
25 | desc: str
26 |
27 |
28 | class QalibInfo(BaseModel):
29 | featureStoreId: Optional[str] = None
30 | name: Optional[str] = None
31 | docs: Optional[List[str]] = []
32 | docBase: Optional[str] = None
33 | status: Optional[int] = None
34 | status_desc: Optional[str] = None
35 | suffix: Optional[str] = None
36 | lark: Optional[Lark] = None
37 | wechat: Optional[Wechat] = None
38 | webSearch: Optional[WebSearch] = None
39 | filesState: Optional[List[FilesState]] = None
40 |
41 |
42 | class QalibPositiveNegative(BaseModel):
43 | positives: Optional[List] = None
44 | negatives: Optional[List] = None
45 |
46 |
47 | class QalibDeleteDoc(BaseModel):
48 | filenames: List[str]
49 |
50 |
51 | class QalibSample(QalibPositiveNegative):
52 | name: str
53 | featureStoreId: str
54 | confirmed: Optional[bool] = False
55 |
56 |
57 | class Pipeline(BaseModel):
58 | webSearchToken: str
59 | featureStoreId: str
60 | confirmed: bool
61 | success: bool
62 | code: int
63 | status: str
64 |
65 |
66 | class AddDocError(BaseModel):
67 | fileName: Optional[str]
68 | reason: Optional[str]
69 |
70 |
71 | class AddDocsRes(BaseModel):
72 | docBase: Optional[str] = ''
73 | docs: Optional[List[str]] = []
74 | errors: Optional[List[AddDocError]] = []
75 |
--------------------------------------------------------------------------------
/web/model/statistic.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from pydantic import BaseModel
4 |
5 |
6 | class StatisticTotal(BaseModel):
7 | qalibTotal: Optional[int] = None
8 | lastMonthUsed: Optional[int] = None
9 | wechatTotal: Optional[int] = None
10 | feishuTotal: Optional[int] = None
11 | servedTotal: Optional[int] = None
12 | realServedTotal: Optional[int] = None
13 |
--------------------------------------------------------------------------------
/web/mq/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/mq/__init__.py
--------------------------------------------------------------------------------
/web/mq/hxd_task.py:
--------------------------------------------------------------------------------
1 | import web.constant.biz_constant as biz_const
2 | from web.model.huixiangdou import HxdTask, HxdTaskType
3 | from web.orm.redis import r
4 | from web.service.cache import ChatCache
5 | from web.util.log import log
6 |
7 | logger = log(__name__)
8 |
9 |
10 | class HuixiangDouTask:
11 |
12 | def __init__(self):
13 | pass
14 |
15 | def updateTask(self, task: HxdTask) -> bool:
16 | """update task into redis.
17 |
18 | :param task: HxdTask
19 | :return: bool: True or False
20 | """
21 | if not task:
22 | logger.error("HuixiangDou's task is empty, update task aborted.")
23 | return False
24 |
25 | ChatCache.mark_monthly_active(task.payload.feature_store_id)
26 | if task.type == HxdTaskType.CHAT:
27 | ChatCache.add_inference_number()
28 |
29 | try:
30 | r.rpush(biz_const.RDS_KEY_HXD_TASK, task.model_dump_json())
31 | except Exception as e:
32 | logger.error(f'{e}')
33 | return False
34 | return True
35 |
--------------------------------------------------------------------------------
/web/orm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/orm/__init__.py
--------------------------------------------------------------------------------
/web/orm/redis.py:
--------------------------------------------------------------------------------
1 | import redis
2 |
3 | from web.config.env import HuixiangDouEnv
4 | from web.util.log import log
5 |
6 | logger = log(__name__)
7 |
8 | logger.info('connecting to redis')
9 | host = HuixiangDouEnv.get_redis_host()
10 | password = HuixiangDouEnv.get_redis_password()
11 | port = HuixiangDouEnv.get_redis_port()
12 | db = HuixiangDouEnv.get_redis_db()
13 | pool = redis.ConnectionPool(host=host, port=port, db=db, password=password)
14 | r = redis.Redis(connection_pool=pool)
15 | try:
16 | r_res = r.ping()
17 | if not r_res:
18 | logger.error(f'Failed connected to redis, exit with code 1')
19 | exit(1)
20 | except Exception as e:
21 | logger.error(f'Failed connected to redis, error={e}')
22 | exit(2)
23 | logger.info('connected to redis')
24 |
--------------------------------------------------------------------------------
/web/proxy/logs/work.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/proxy/logs/work.txt
--------------------------------------------------------------------------------
/web/requirements.txt:
--------------------------------------------------------------------------------
1 | apscheduler==3.10.4
2 | fastapi==0.103.0
3 | flask==3.0.2
4 | lark-oapi==1.2.1
5 | passlib==1.7.4
6 | pydantic==2.4.2
7 | PyJWT==2.8.0
8 | python-multipart==0.0.9
9 | redis==4.5.5
10 | starlette==0.27.0
11 | tqdm==4.65.0
12 | uvicorn==0.27.0
--------------------------------------------------------------------------------
/web/scheduler/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/scheduler/__init__.py
--------------------------------------------------------------------------------
/web/service/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/service/__init__.py
--------------------------------------------------------------------------------
/web/service/message.py:
--------------------------------------------------------------------------------
1 | from fastapi import Request, Response
2 |
3 | from web.model.base import BaseBody
4 | from web.model.chat import WechatRequest
5 | from web.service.agent import LarkAgent, WechatAgent
6 |
7 |
8 | class MessageService:
9 |
10 | def __init__(self, request: Request, response: Response):
11 | self.request = request
12 | self.response = response
13 |
14 | async def on_lark_message(self):
15 | req = await LarkAgent.parse_req(self.request)
16 | rsp = LarkAgent.get_event_handler().do(req)
17 | return LarkAgent.parse_rsp(rsp)
18 |
19 | async def on_wechat_message(self, body: WechatRequest, suffix: str):
20 | rsp = WechatAgent.action(body, suffix)
21 | if isinstance(rsp, BaseBody):
22 | return rsp
23 | return BaseBody(data=rsp)
24 |
--------------------------------------------------------------------------------
/web/service/statistic.py:
--------------------------------------------------------------------------------
1 | from fastapi import Request, Response
2 |
3 | import web.constant.biz_constant as biz_const
4 | from web.model.base import BaseBody
5 | from web.model.chat import ChatType
6 | from web.model.statistic import StatisticTotal
7 | from web.orm.redis import r
8 | from web.service.cache import ChatCache
9 | from web.util.log import log
10 |
11 | logger = log(__name__)
12 |
13 |
14 | class StatisticService:
15 |
16 | def __init__(self, request: Request, response: Response):
17 | self.request = request
18 | self.response = response
19 |
20 | async def info_statistic(self):
21 | qalib_total = r.hlen(biz_const.RDS_KEY_QALIB_INFO)
22 | monthly_active = ChatCache.get_monthly_active()
23 | lark_used = ChatCache.hlen_agent_used(ChatType.LARK)
24 | wechat_used = ChatCache.hlen_agent_used(ChatType.WECHAT)
25 | total_inference = ChatCache.get_inference_number()
26 | unique_user = ChatCache.get_unique_inference_user_number()
27 |
28 | data = StatisticTotal(qalibTotal=qalib_total,
29 | lastMonthUsed=monthly_active,
30 | wechatTotal=wechat_used,
31 | feishuTotal=lark_used,
32 | servedTotal=total_inference,
33 | realServedTotal=unique_user)
34 | return BaseBody(data=data)
35 |
--------------------------------------------------------------------------------
/web/tools/README.md:
--------------------------------------------------------------------------------
1 | # **SFT tools have moved to [sft directory](../../sft/)**
2 |
3 | # Devops tools
4 |
5 | - dump_redis_query.py # for web version, dump all question from redis to `query.jsonl`
6 | - update_fs_max_len.py # for web version, update all users' max text length config of remote LLM
7 | - get_puyu_model_list.py # for inner API, get all puyu API model list
8 |
--------------------------------------------------------------------------------
/web/tools/dump_redis_query.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 |
4 | from loguru import logger
5 | from redis import Redis
6 |
7 |
8 | def redis_host():
9 | host = os.getenv('REDIS_HOST')
10 | if host is None or len(host) < 1:
11 | raise Exception('REDIS_HOST not config')
12 | return host
13 |
14 |
15 | def redis_port():
16 | port = os.getenv('REDIS_PORT')
17 | if port is None:
18 | logger.debug('REDIS_PORT not set, try 6379')
19 | port = 6379
20 | return port
21 |
22 |
23 | def redis_passwd():
24 | passwd = os.getenv('REDIS_PASSWORD')
25 | if passwd is None or len(passwd) < 1:
26 | raise Exception('REDIS_PASSWORD not config')
27 | return passwd
28 |
29 |
30 | def feature_store_base_dir():
31 | return 'feature_stores'
32 |
33 |
34 | db = Redis(host=redis_host(),
35 | port=redis_port(),
36 | password=redis_passwd(),
37 | charset='utf-8',
38 | decode_responses=True)
39 | keys = db.keys('HuixiangDou:query:*')
40 |
41 | with open('query.jsonl', 'w') as f:
42 | for key in keys:
43 | value = db.hgetall(key)
44 | f.write(json.dumps(value, ensure_ascii=False))
45 | f.write('\n')
46 |
--------------------------------------------------------------------------------
/web/tools/get_puyu_model_list.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 |
4 | import requests
5 |
6 | token = os.getenv('TOKEN')
7 |
8 | url = 'https://puyu.openxlab.org.cn/puyu/api/v1/models'
9 | header = {'Content-Type': 'application/json', 'Authorization': token}
10 | data = {}
11 |
12 | res = requests.get(url, headers=header, data=json.dumps(data))
13 | print(res.status_code)
14 | print(res.json())
15 | print(res.json()['data'])
16 |
--------------------------------------------------------------------------------
/web/tools/update_fs_max_len.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytoml
4 |
5 |
6 | def read_config_ini_files(directory):
7 | # 遍历指定目录
8 | for root, dirs, files in os.walk(directory):
9 | for file in files:
10 | # 检查文件扩展名是否为 .ini
11 | if file == 'config.ini':
12 | # 构建完整的文件路径
13 | file_path = os.path.join(root, file)
14 | try:
15 | # 读取并解析 config.ini 文件
16 | with open(file_path, 'r', encoding='utf-8') as f:
17 | config = pytoml.load(f)
18 | print((file_path, config['llm']['server']['remote_llm_max_text_length']))
19 | config['llm']['server']['remote_llm_max_text_length'] = 40000
20 | with open(file_path, 'w', encoding='utf8') as f:
21 | pytoml.dump(config, f)
22 | except Exception as e:
23 | print(f'An error occurred while reading {file_path}: {e}')
24 |
25 |
26 | # 指定要遍历的目录
27 | directory_to_crawl = '/root/HuixiangDou/feature_stores'
28 | read_config_ini_files(directory_to_crawl)
29 |
--------------------------------------------------------------------------------
/web/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/util/__init__.py
--------------------------------------------------------------------------------
/web/util/image.py:
--------------------------------------------------------------------------------
1 | from web.model.base import Image
2 |
3 |
4 | def detect_base64_image_suffix(base64: str) -> [Image, str]:
5 | if not base64 or len(base64) == 0:
6 | return [Image.INVALID, '']
7 |
8 | s = base64.split('base64,')
9 | if len(s) < 2:
10 | return [Image.INVALID, '']
11 |
12 | base64_prefix = s[0].lower()
13 | if 'data:image/jpeg;' == base64_prefix:
14 | return [Image.JPG, s[1]]
15 | if 'data:image/png;' == base64_prefix:
16 | return [Image.PNG, s[1]]
17 | if 'data:image/bmp;' == base64_prefix:
18 | return [Image.BMP, s[1]]
19 |
20 | return [Image.INVALID, '']
21 |
--------------------------------------------------------------------------------
/web/util/log.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 |
4 | def log(name):
5 | """
6 | @param name: python file name
7 | @return: Logger
8 | """
9 | logger = logging.getLogger(name)
10 | logger.setLevel(logging.INFO)
11 | formatter = logging.Formatter(
12 | '%(levelname)s: %(asctime)s - %(module)s-%(funcName)s-line:%(lineno)d - %(message)s'
13 | )
14 | ch = logging.StreamHandler()
15 | ch.setFormatter(formatter)
16 | logger.addHandler(ch)
17 | return logger
18 |
19 |
20 | def clear_other_log():
21 | for name, item in logging.Logger.manager.loggerDict.items():
22 | if not isinstance(item, logging.Logger):
23 | continue
24 | if 'aoe' not in name:
25 | item.setLevel(logging.CRITICAL)
26 |
27 |
28 | clear_other_log()
29 | logger = log('util')
30 |
--------------------------------------------------------------------------------
/web/util/time_util.py:
--------------------------------------------------------------------------------
1 | #! python3
2 | from datetime import datetime
3 |
4 |
5 | def get_month_time_str(t: datetime) -> str:
6 | return t.strftime('%y-%m')
7 |
--------------------------------------------------------------------------------
/web/web-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/HuixiangDou/4abe626b1ab607e8070785cc9522f6cc083fd3ac/web/web-architecture.png
--------------------------------------------------------------------------------