├── .flake8 ├── .github └── workflows │ ├── deploy_sphinx_docs.yml │ ├── docker-image-arm.yml │ └── docker-image.yml ├── .gitignore ├── .pre-commit-config.yaml ├── Dockerfile ├── DockerfileArm ├── LICENSE ├── README.md ├── README_JP.md ├── README_ZH.md ├── clear-vector-store.py ├── docker-compose.yml ├── docs ├── README.md ├── contribution.md ├── contribution_zh.md ├── images │ ├── framework.png │ └── logo.png ├── installation.md ├── installation_zh.md └── sphinx_doc │ ├── Makefile │ ├── assets │ └── redirect.html │ ├── build_sphinx_doc.sh │ ├── en │ └── source │ │ ├── _static │ │ └── custom.css │ │ ├── _templates │ │ ├── language_selector.html │ │ └── layout.html │ │ ├── conf.py │ │ ├── docs │ │ └── api.rst │ │ ├── index.rst │ │ └── modules.rst │ ├── ja │ └── source │ │ └── index.rst │ ├── requirements.txt │ ├── template │ ├── module.rst_t │ └── package.rst_t │ └── zh │ └── source │ ├── _static │ └── custom.css │ ├── _templates │ ├── language_selector.html │ └── layout.html │ ├── conf.py │ ├── docs │ └── api.rst │ ├── index.rst │ └── modules.rst ├── examples ├── advance │ ├── custom_operator.md │ ├── custom_operator_zh.md │ └── replacement.yaml ├── api │ ├── agentscope_example.md │ ├── agentscope_example.py │ ├── autogen_example.md │ ├── autogen_example.py │ ├── chat_example.py │ ├── simple_usages.ipynb │ └── simple_usages_zh.ipynb ├── cli │ ├── CLI_README.md │ └── CLI_README_ZH.md └── docker │ ├── entrypoint.sh │ └── run_elastic_search.sh ├── memoryscope ├── __init__.py ├── constants │ ├── __init__.py │ ├── common_constants.py │ └── language_constants.py ├── contrib │ ├── example_query_worker.py │ └── example_query_worker.yaml ├── core │ ├── __init__.py │ ├── chat │ │ ├── __init__.py │ │ ├── api_memory_chat.py │ │ ├── base_memory_chat.py │ │ ├── cli_memory_chat.py │ │ └── memory_chat_prompt.yaml │ ├── config │ │ ├── __init__.py │ │ ├── arguments.py │ │ ├── config_manager.py │ │ ├── demo_config.yaml │ │ └── demo_config_zh.yaml │ ├── memoryscope.py │ ├── memoryscope_context.py │ ├── models │ │ ├── __init__.py │ │ ├── base_model.py │ │ ├── dummy_generation_model.py │ │ ├── llama_index_embedding_model.py │ │ ├── llama_index_generation_model.py │ │ └── llama_index_rank_model.py │ ├── operation │ │ ├── __init__.py │ │ ├── backend_operation.py │ │ ├── base_operation.py │ │ ├── base_workflow.py │ │ ├── consolidate_memory_op.py │ │ └── frontend_operation.py │ ├── service │ │ ├── __init__.py │ │ ├── base_memory_service.py │ │ └── memory_scope_service.py │ ├── storage │ │ ├── __init__.py │ │ ├── base_memory_store.py │ │ ├── base_monitor.py │ │ ├── dummy_memory_store.py │ │ ├── dummy_monitor.py │ │ ├── llama_index_es_memory_store.py │ │ └── llama_index_sync_elasticsearch.py │ ├── utils │ │ ├── __init__.py │ │ ├── datetime_handler.py │ │ ├── logger.py │ │ ├── prompt_handler.py │ │ ├── registry.py │ │ ├── response_text_parser.py │ │ ├── singleton.py │ │ ├── timer.py │ │ └── tool_functions.py │ └── worker │ │ ├── __init__.py │ │ ├── backend │ │ ├── __init__.py │ │ ├── contra_repeat_worker.py │ │ ├── contra_repeat_worker.yaml │ │ ├── get_observation_with_time_worker.py │ │ ├── get_observation_with_time_worker.yaml │ │ ├── get_observation_worker.py │ │ ├── get_observation_worker.yaml │ │ ├── get_reflection_subject_worker.py │ │ ├── get_reflection_subject_worker.yaml │ │ ├── info_filter_worker.py │ │ ├── info_filter_worker.yaml │ │ ├── load_memory_worker.py │ │ ├── long_contra_repeat_worker.py │ │ ├── long_contra_repeat_worker.yaml │ │ ├── update_insight_worker.py │ │ ├── update_insight_worker.yaml │ │ └── update_memory_worker.py │ │ ├── base_worker.py │ │ ├── dummy_worker.py │ │ ├── frontend │ │ ├── __init__.py │ │ ├── extract_time_worker.py │ │ ├── extract_time_worker.yaml │ │ ├── fuse_rerank_worker.py │ │ ├── print_memory_worker.py │ │ ├── print_memory_worker.yaml │ │ ├── read_message_worker.py │ │ ├── retrieve_memory_worker.py │ │ ├── semantic_rank_worker.py │ │ └── set_query_worker.py │ │ ├── memory_base_worker.py │ │ └── memory_manager.py ├── enumeration │ ├── __init__.py │ ├── action_status_enum.py │ ├── language_enum.py │ ├── memory_type_enum.py │ ├── message_role_enum.py │ ├── model_enum.py │ └── store_status_enum.py └── scheme │ ├── __init__.py │ ├── memory_node.py │ ├── message.py │ └── model_response.py ├── quick-start-demo.py ├── requirements.txt ├── setup.py └── tests ├── __init__.py ├── models ├── test_models_lli_embedding.py ├── test_models_lli_generation.py └── test_models_lli_rank.py ├── other ├── init_test.py ├── read_prompt.yaml ├── read_yaml.py ├── test_attr.py └── test_cli.py ├── storages ├── test_storages_lli_es.py └── test_storages_lli_synces.py └── worker ├── test_workers_cn.py └── test_workers_en.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = tests/*,examples/*,memoryscope/core/storage/llama_index_sync_elasticsearch.py 3 | max-line-length = 120 4 | inline-quotes = " 5 | avoid-escape = no 6 | ignore = -------------------------------------------------------------------------------- /.github/workflows/deploy_sphinx_docs.yml: -------------------------------------------------------------------------------- 1 | name: deploy-sphinx-documentation-to-pages 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize] 6 | paths: 7 | - 'docs/sphinx_doc/**/*' 8 | push: 9 | branches: 10 | - main 11 | 12 | jobs: 13 | pages: 14 | runs-on: ubuntu-20.04 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v4 18 | - name: Setup Python 19 | uses: actions/setup-python@master 20 | with: 21 | python-version: '3.10' 22 | - name: Choose Pandoc 23 | shell: bash 24 | run: | 25 | case $RUNNER_OS in 26 | "Linux") 27 | printf 'INSTALLER_SUFFIX=1-amd64.deb' >> $GITHUB_ENV 28 | ;; 29 | "macOS") 30 | printf 'INSTALLER_SUFFIX=macOS.pkg' >> $GITHUB_ENV 31 | ;; 32 | *) 33 | printf 'Do not know how to install pandoc on %s\n' "$RUNNER_OS" 34 | exit 1 35 | ;; 36 | esac 37 | - name: Download Pandoc 38 | shell: bash 39 | env: 40 | GITHUB_TOKEN: ${{ github.token }} 41 | REPO: jgm/pandoc 42 | DOWNLOAD_URL: 'https://github.com/jgm/pandoc/releases/download/' 43 | run: | 44 | gh release download ${{ inputs.version }} \ 45 | --repo "$REPO" \ 46 | --pattern '*'${{ env.INSTALLER_SUFFIX }} 47 | printf 'INSTALLER_VERSION=%s' \ 48 | "$(ls pandoc-*-${{ env.INSTALLER_SUFFIX }} | \ 49 | sed 's/pandoc-\([0-9.]*\)-.*/\1/')" \ 50 | >> $GITHUB_ENV 51 | - name: Install Pandoc 52 | shell: bash 53 | env: 54 | INSTALLER: pandoc-${{ env.INSTALLER_VERSION }}-${{ env.INSTALLER_SUFFIX }} 55 | run: | 56 | case $RUNNER_OS in 57 | "Linux") 58 | sudo apt install ./$INSTALLER 59 | ;; 60 | "macOS") 61 | sudo installer -pkg ./$INSTALLER -target '/' 62 | ;; 63 | *) 64 | echo "$RUNNER_OS not supported" 65 | exit 1 66 | ;; 67 | esac 68 | rm $INSTALLER 69 | - name: Install Sphinx Dependencies 70 | run: | 71 | python -m pip install --upgrade pip 72 | pip install -r requirements.txt 73 | pip install -r docs/sphinx_doc/requirements.txt 74 | - name: Build Documentation 75 | run: | 76 | cd docs/sphinx_doc 77 | bash build_sphinx_doc.sh 78 | - name: Upload Documentation 79 | uses: actions/upload-artifact@v3 80 | with: 81 | name: SphinxDoc 82 | path: 'docs/sphinx_doc/build/html' 83 | - name: Push Pages 84 | uses: peaceiris/actions-gh-pages@v3 85 | if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} 86 | with: 87 | github_token: ${{ secrets.GITHUB_TOKEN }} 88 | publish_dir: 'docs/sphinx_doc/build/html' 89 | -------------------------------------------------------------------------------- /.github/workflows/docker-image-arm.yml: -------------------------------------------------------------------------------- 1 | name: build-and-upload-docker-image-arm 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'main' 7 | 8 | env: 9 | REGISTRY: ghcr.io 10 | IMAGE_NAME: ${{ github.repository }}_arm 11 | 12 | 13 | jobs: 14 | build-and-push-image: 15 | runs-on: ubuntu-latest 16 | permissions: 17 | contents: read 18 | packages: write 19 | 20 | steps: 21 | - name: Set up QEMU 22 | uses: docker/setup-qemu-action@v3 23 | 24 | - name: Set up Docker Buildx 25 | uses: docker/setup-buildx-action@v3 26 | 27 | - name: Checkout repository 28 | uses: actions/checkout@v4 29 | 30 | - name: Log in to the Container registry 31 | uses: docker/login-action@v3 32 | with: 33 | registry: ${{ env.REGISTRY }} 34 | username: ${{ github.actor }} 35 | password: ${{ secrets.GITHUB_TOKEN }} 36 | 37 | - name: Extract metadata (tags, labels) for Docker 38 | id: meta 39 | uses: docker/metadata-action@v4 40 | with: 41 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 42 | 43 | - name: Build and push Docker image 44 | uses: docker/build-push-action@v6 45 | with: 46 | context: . 47 | push: true 48 | platforms: linux/arm64 49 | file: DockerfileArm 50 | tags: ${{ steps.meta.outputs.tags }} 51 | labels: ${{ steps.meta.outputs.labels }} -------------------------------------------------------------------------------- /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- 1 | # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages 2 | name: build-and-upload-docker-image 3 | 4 | on: 5 | push: 6 | branches: 7 | - 'main' 8 | 9 | env: 10 | REGISTRY: ghcr.io 11 | IMAGE_NAME: ${{ github.repository }} 12 | 13 | jobs: 14 | build-and-push-image: 15 | runs-on: ubuntu-latest 16 | permissions: 17 | contents: read 18 | packages: write 19 | 20 | steps: 21 | - name: Checkout repository 22 | uses: actions/checkout@v3 23 | 24 | - name: Log in to the Container registry 25 | uses: docker/login-action@v2 26 | with: 27 | registry: ${{ env.REGISTRY }} 28 | username: ${{ github.actor }} 29 | password: ${{ secrets.GITHUB_TOKEN }} 30 | 31 | - name: Extract metadata (tags, labels) for Docker 32 | id: meta 33 | uses: docker/metadata-action@v4 34 | with: 35 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 36 | 37 | - name: Build and push Docker image 38 | uses: docker/build-push-action@v4 39 | with: 40 | context: . 41 | push: true 42 | file: Dockerfile 43 | tags: ${{ steps.meta.outputs.tags }} 44 | labels: ${{ steps.meta.outputs.labels }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .idea/ 132 | 133 | # macOS 134 | .DS_Store 135 | 136 | # vscode 137 | .vscode 138 | 139 | # docs 140 | docs/sphinx_doc/build/ 141 | 142 | # Used to save loggings and files 143 | *runs/ 144 | memoryscope.db 145 | tmp*.json 146 | tmp*.py 147 | cradle* 148 | 149 | # sphinx docs 150 | 151 | memoryscope*.rst -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.3.0 4 | hooks: 5 | - id: check-ast 6 | - id: check-yaml 7 | - id: check-xml 8 | - id: check-toml 9 | - id: check-docstring-first 10 | - id: check-json 11 | - id: detect-private-key 12 | - id: trailing-whitespace 13 | exclude: (README\.md|README_ZH\.md|README_JP\.md)$ 14 | - id: end-of-file-fixer 15 | files: \.py$ 16 | - id: check-merge-conflict 17 | - id: check-symlinks 18 | - id: mixed-line-ending 19 | - repo: https://github.com/PyCQA/flake8 20 | rev: 6.1.0 21 | hooks: 22 | - id: flake8 23 | - repo: https://github.com/pappasam/toml-sort 24 | rev: v0.23.1 25 | hooks: 26 | - id: toml-sort-fix 27 | - repo: https://github.com/srstevenson/nb-clean 28 | rev: 3.1.0 29 | hooks: 30 | - id: nb-clean 31 | args: [ --preserve-cell-outputs, --remove-empty-cells ] 32 | - repo: https://github.com/codespell-project/codespell 33 | rev: v2.2.6 34 | hooks: 35 | - id: codespell 36 | additional_dependencies: [ tomli ] 37 | exclude: | 38 | poetry.lock| 39 | (\/.*?\.[\w:]+)/pyproject.toml| 40 | (\/.*?\.[\w:]+)/poetry.lock 41 | args: 42 | [ 43 | "--ignore-words-list", 44 | "astroid,gallary,momento,narl,ot,rouge,nin,gere,asend,ans,thur", 45 | ] 46 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # __ __ ____ 2 | # | \/ | ___ _ __ ___ ___ _ __ _ _/ ___| ___ ___ _ __ ___ 3 | # | |\/| |/ _ \ '_ ` _ \ / _ \| '__| | | \___ \ / __/ _ \| '_ \ / _ \ 4 | # | | | | __/ | | | | | (_) | | | |_| |___) | (_| (_) | |_) | __/ 5 | # |_| |_|\___|_| |_| |_|\___/|_| \__, |____/ \___\___/| .__/ \___| 6 | # |___/ |_| 7 | 8 | # Instruction 9 | 10 | # To construct docker image: 11 | # sudo docker build --network=host -t memoryscope . 12 | 13 | # To run docker image: 14 | # sudo docker run -it --rm --memory=4G --net=host memoryscope 15 | # To run docker image with arguments (refer to memoryscope/core/config/arguments.py): 16 | # sudo docker run -it --rm --memory=4G --net=host -e "OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" -e "language=en" -e "human_name=superman" -e "generation_backend=openai_generation" -e "generation_model=gpt-4o" -e "embedding_backend=openai_embedding" -e "embedding_model=text-embedding-3-small" -e "enable_ranker=False" memoryscope 17 | 18 | FROM python:3.11 19 | 20 | # (Not necessary) Change pip source 21 | RUN echo '[global]' > /etc/pip.conf && \ 22 | echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \ 23 | echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf 24 | 25 | # Install Elastic Search 26 | RUN useradd -m elastic_search_user 27 | USER elastic_search_user 28 | WORKDIR /home/elastic_search_user/elastic_search 29 | # COPY elasticsearch-8.15.0-linux-x86_64.tar.gz ./elasticsearch-8.15.0-linux-x86_64.tar.gz 30 | RUN wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.15.0-linux-x86_64.tar.gz 31 | RUN tar -xzf elasticsearch-8.15.0-linux-x86_64.tar.gz 32 | WORKDIR /home/elastic_search_user/elastic_search/elasticsearch-8.15.0 33 | ENV DISCOVERY_TYPE=single-node \ 34 | XPACK_SECURITY_ENABLED=false \ 35 | XPACK_LICENSE_SELF_GENERATED_TYPE=trial 36 | 37 | # Change user back to root and fix ownership 38 | USER root 39 | RUN chown -R elastic_search_user:elastic_search_user /home/elastic_search_user/ 40 | WORKDIR /memory_scope_project 41 | 42 | # (Not necessary) Install the majority of deps, using docker build cache to accelerate future building 43 | COPY requirements.txt ./ 44 | RUN pip3 install -r requirements.txt 45 | 46 | # Enter working dir 47 | WORKDIR /memory_scope_project 48 | COPY . . 49 | # RUN pip3 install poetry 50 | # RUN poetry install 51 | RUN pip3 install -r requirements.txt 52 | 53 | # Launch! 54 | # CMD ["bash"] 55 | CMD ["bash", "examples/docker/entrypoint.sh"] -------------------------------------------------------------------------------- /DockerfileArm: -------------------------------------------------------------------------------- 1 | # __ __ ____ 2 | # | \/ | ___ _ __ ___ ___ _ __ _ _/ ___| ___ ___ _ __ ___ 3 | # | |\/| |/ _ \ '_ ` _ \ / _ \| '__| | | \___ \ / __/ _ \| '_ \ / _ \ 4 | # | | | | __/ | | | | | (_) | | | |_| |___) | (_| (_) | |_) | __/ 5 | # |_| |_|\___|_| |_| |_|\___/|_| \__, |____/ \___\___/| .__/ \___| 6 | # |___/ |_| 7 | 8 | # Instruction 9 | 10 | # To construct docker image: 11 | # sudo docker build --network=host -t memoryscope . 12 | 13 | # To run docker image: 14 | # sudo docker run -it --rm --memory=4G --net=host memoryscope 15 | # To run docker image with arguments (refer to memoryscope/core/config/arguments.py): 16 | # sudo docker run -it --rm --memory=4G --net=host -e "OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" -e "language=en" -e "human_name=superman" -e "generation_backend=openai_generation" -e "generation_model=gpt-4o" -e "embedding_backend=openai_embedding" -e "embedding_model=text-embedding-3-small" -e "enable_ranker=False" memoryscope 17 | #docker run -it --rm ghcr.io/modelscope/memoryscope_arm /bin/bash 18 | FROM python:3.11 19 | 20 | # (Not necessary) Change pip source 21 | RUN echo '[global]' > /etc/pip.conf && \ 22 | echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \ 23 | echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf 24 | 25 | # Install Elastic Search 26 | RUN useradd -m elastic_search_user 27 | USER elastic_search_user 28 | WORKDIR /home/elastic_search_user/elastic_search 29 | RUN wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.15.2-linux-aarch64.tar.gz 30 | RUN tar -xzf elasticsearch-8.15.2-linux-aarch64.tar.gz 31 | RUN mv /home/elastic_search_user/elastic_search/elasticsearch-8.15.2 /home/elastic_search_user/elastic_search/elasticsearch-8.15.0 32 | WORKDIR /home/elastic_search_user/elastic_search/elasticsearch-8.15.0 33 | ENV DISCOVERY_TYPE=single-node \ 34 | XPACK_SECURITY_ENABLED=false \ 35 | XPACK_LICENSE_SELF_GENERATED_TYPE=trial 36 | 37 | # Change user back to root and fix ownership 38 | USER root 39 | RUN chown -R elastic_search_user:elastic_search_user /home/elastic_search_user/ 40 | WORKDIR /memory_scope_project 41 | 42 | # (Not necessary) Install the majority of deps, using docker build cache to accelerate future building 43 | COPY requirements.txt ./ 44 | RUN pip3 install -r requirements.txt 45 | 46 | # Enter working dir 47 | WORKDIR /memory_scope_project 48 | COPY . . 49 | # RUN pip3 install poetry 50 | # RUN poetry install 51 | RUN pip3 install -r requirements.txt 52 | 53 | # Launch! 54 | # CMD ["bash"] 55 | CMD ["bash", "examples/docker/entrypoint.sh"] 56 | 57 | -------------------------------------------------------------------------------- /README_JP.md: -------------------------------------------------------------------------------- 1 | [**English**](./README.md) | [**中文**](./README_ZH.md) | 日本語 2 | 3 | # MemoryScope 4 |

5 | MemoryScopeLogo 6 |

7 | あなたのLLMチャットボットに強力で柔軟な長期記憶システムを装備しましょう。 8 | 9 | [![](https://img.shields.io/badge/python-3.10+-blue)](https://pypi.org/project/memoryscope/) 10 | [![](https://img.shields.io/badge/pypi-v0.1.1.0-blue?logo=pypi)](https://pypi.org/project/memoryscope/) 11 | [![](https://img.shields.io/badge/license-Apache--2.0-black)](./LICENSE) 12 | [![](https://img.shields.io/badge/Docs-English%7C%E4%B8%AD%E6%96%87-blue?logo=markdown)](https://modelscope.github.io/MemoryScope/en/index.html#welcome-to-memoryscope-tutorial) 13 | [![](https://img.shields.io/badge/Docs-API_Reference-blue?logo=markdown)](https://modelscope.github.io/MemoryScope/en/docs/api.html) 14 | [![](https://img.shields.io/badge/Contribute-Welcome-green)](https://modelscope.github.io/MemoryScope/en/docs/contribution.html) 15 | 16 | ---- 17 | ## 📰 ニュース 18 | 19 | - **[2024-09-10]** MemoryScope v0.1.1.0をリリースしました。 [PyPI](https://pypi.org/simple/memoryscope/)でも入手可能です! 20 | ---- 21 | ## 🌟 MemoryScopeとは? 22 | MemoryScopeは、LLMチャットボットに強力で柔軟な長期記憶能力を提供し、その能力を構築するためのフレームワークを提供します。 23 | 個人アシスタントや感情的な伴侶などのシナリオに適用でき、長期記憶を通じてユーザーの基本情報やさまざまな習慣や好みを覚え続けることができます。 24 | これにより、ユーザーはLLMを使用する際に徐々に「理解されている」感覚を体験することができます。 25 | 26 | ### デモ 27 |

28 | en_demo 29 |

30 | 31 | ### フレームワーク 32 |

33 | Framework 34 |

35 | 36 | 💾 メモリデータベース: MemoryScopeは、システム内に記録されたすべての記憶片を保存するためのベクトルデータベース(デフォルトは*ElasticSearch*)を備えています。 37 | 38 | 🔧 ワーカーライブラリ: MemoryScopeは、長期記憶の能力を個々のワーカーに原子化し、クエリ情報のフィルタリング、観察の抽出、洞察の更新など、20以上のワーカーを含みます。 39 | 40 | 🛠️ オペレーションライブラリ: ワーカーパイプラインに基づいて、メモリサービスのオペレーションを構築し、メモリの取得やメモリの統合などの主要な機能を実現します。 41 | 42 | - メモリの取得: ユーザークエリが到着すると、この操作は意味的に関連する記憶片を返します。 43 | クエリが時間に言及している場合は、対応する時間の記憶片も返します。 44 | - メモリの統合: この操作は、一連のユーザークエリを受け取り、クエリから抽出された重要なユーザー情報を統合された*観察*としてメモリデータベースに保存します。 45 | - 反映と再統合: 定期的に、この操作は新たに記録された*観察*を反映し、*洞察*を形成および更新します。 46 | その後、メモリの再統合を実行して、記憶片間の矛盾や重複が適切に処理されるようにします。 47 | 48 | ⚙️ ベストプラクティス: 49 | 50 | - MemoryScopeは、長期記憶のコア機能に基づいて、長期記憶を持つ対話インターフェース(API)と長期記憶を持つコマンドライン対話の実践(CLI)を実装しています。 51 | - MemoryScopeは、現在人気のあるエージェントフレームワーク(AutoGen、AgentScope)を組み合わせて、ベストプラクティスを提供します。 52 | 53 | ### 主な特徴 54 | 55 | ⚡ 低い応答時間(RT): 56 | - システム内のバックエンド操作(メモリの統合、反映と再統合)は、フロントエンド操作(メモリの取得)と分離されています。 57 | - バックエンド操作は通常(および推奨される)キューに入れられるか、定期的に実行されるため、システムのユーザー応答時間(RT)はフロントエンド操作のみに依存し、約500ミリ秒です。 58 | 59 | 🌲 階層的で一貫性のある記憶: 60 | - システムに保存される記憶片は階層構造になっており、*洞察*は同様のテーマの*観察*の集約から得られる高レベルの情報です。 61 | - 記憶片間の矛盾や重複は定期的に処理され、一貫性が保たれます。 62 | - ユーザーの虚偽の内容はフィルタリングされ、LLMの幻覚を避けることができます。 63 | 64 | ⏰ 時間感覚: 65 | - メモリの取得とメモリの統合を実行する際に時間感覚があり、クエリが時間に言及している場合に正確な関連情報を取得できます。 66 | 67 | ---- 68 | 69 | ## 💼 サポートされているモデルAPI 70 | 71 | | バックエンド | タスク | サポートされているモデルの一部 | 72 | |-------------------|------------|------------------------------------------------------------------------| 73 | | openai_backend | Generation | gpt-4o, gpt-4o-mini, gpt-4, gpt-3.5-turbo | 74 | | | Embedding | text-embedding-ada-002, text-embedding-3-large, text-embedding-3-small | 75 | | dashscope_backend | Generation | qwen-max, qwen-plus, qwen-plus, qwen2-72b-instruct | 76 | | | Embedding | text-embedding-v1, text-embedding-v2 | 77 | | | Reranker | gte-rerank | 78 | 79 | 将来的には、より多くのモデルインターフェースとローカルデプロイメントのLLMおよび埋め込みサービスをサポートする予定です。 80 | 81 | ## 🚀 インストール 82 | インストール方法については、[Installation.md](docs/installation.md)を参照してください。 83 | 84 | ## 🍕 クイックスタート 85 | - [簡単な使用法(クイックスタート)](./examples/api/simple_usages.ipynb) 86 | - [AutoGenとの連携](./examples/api/autogen_example.md) 87 | - [MemoryScopeチャットボットとのCLI](./examples/cli/README.md) 88 | - [高度なカスタマイズ](./examples/advance/custom_operator.md) 89 | 90 | ## 💡 貢献 91 | 92 | 貢献は常に奨励されています! 93 | 94 | プルリクエストをコミットする前に、このリポジトリにpre-commitフックをインストールすることを強くお勧めします。 95 | これらのフックは、gitコミットを行うたびに実行される小さなハウスキーピングスクリプトであり、フォーマットとリンティングを自動的に処理します。 96 | ```shell 97 | pip install -e . 98 | pre-commit install 99 | ``` 100 | 101 | 詳細については、[貢献ガイド](./docs/contribution.md)を参照してください。 102 | 103 | ## 📖 引用 104 | 105 | MemoryScopeを論文で使用する場合は、以下の引用を追加してください: 106 | 107 | ``` 108 | @software{MemoryScope, 109 | author = {Li Yu and 110 | Tiancheng Qin and 111 | Qingxu Fu and 112 | Sen Huang and 113 | Xianzhe Xu and 114 | Zhaoyang Liu and 115 | Boyin Liu}, 116 | month = {09}, 117 | title = {{MemoryScope}}, 118 | url = {https://github.com/modelscope/MemoryScope}, 119 | year = {2024} 120 | } 121 | ``` 122 | -------------------------------------------------------------------------------- /clear-vector-store.py: -------------------------------------------------------------------------------- 1 | """ 2 | Warning! 3 | 4 | This script purges the entire vector store ! 5 | 6 | """ 7 | 8 | from memoryscope import MemoryScope, Arguments 9 | 10 | arguments = Arguments( 11 | language="en", 12 | human_name="user", 13 | assistant_name="AI", 14 | memory_chat_class="api_memory_chat", 15 | generation_backend="openai_generation", 16 | generation_model="gpt-4o", 17 | embedding_backend="openai_embedding", 18 | embedding_model="text-embedding-3-small", 19 | enable_ranker=False, 20 | ) 21 | 22 | ms = MemoryScope(arguments=arguments) 23 | es_store = ms.context.memory_store.es_store 24 | es_store.sync_delete_all() 25 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | memory_scope_main: 3 | image: ghcr.io/modelscope/memoryscope_arm:main 4 | # image: ghcr.io/modelscope/memoryscope_arm:main # For ARM architecture 5 | environment: 6 | DASHSCOPE_API_KEY: "sk-0000000000" 7 | # OPENAI_API_KEY: "sk-0000000000" 8 | volumes: 9 | - ./memoryscope/core/config:/memory_scope_project/memoryscope/memoryscope/core/config 10 | deploy: 11 | resources: 12 | limits: 13 | memory: 4G 14 | stdin_open: true 15 | tty: true 16 | # Please execute `docker compose run memory_scope_main` instead of `docker compose up` 17 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # MemoryScope Documentation 2 | 3 | ## Build Documentation 4 | 5 | Please use the following commands to build sphinx doc of MemoryScope. 6 | 7 | ```shell 8 | # step 1: Install dependencies 9 | pip install sphinx sphinx-autobuild sphinx_rtd_theme myst-parser sphinxcontrib-mermaid 10 | 11 | # step 2: go into the sphinx_doc dir 12 | cd docs/sphinx_doc 13 | 14 | # step 3: build the sphinx doc 15 | ./build_sphinx_doc.sh 16 | 17 | # step 4: view sphinx_doc/build/html/index.html using your browser 18 | cd docs/sphinx_doc/build/html && python -m http.server 8899 19 | ``` 20 | -------------------------------------------------------------------------------- /docs/contribution.md: -------------------------------------------------------------------------------- 1 | # Contribute to MemoryScope 2 | Our community thrives on the diverse ideas and contributions of its members. Whether you're fixing a bug, adding a new feature, improving the documentation, or adding examples, your help is welcome. Here's how you can contribute: 3 | ## Report Bugs and Ask For New Features? 4 | Did you find a bug or have a feature request? Please first check the issue tracker to see if it has already been reported. If not, feel free to open a new issue. Include as much detail as possible: 5 | - A descriptive title 6 | - Clear description of the issue 7 | - Steps to reproduce the problem 8 | - Version of the MemoryScope you are using 9 | - Any relevant code snippets or error messages 10 | ## Contribute to Codebase 11 | ### Fork and Clone the Repository 12 | To work on an issue or a new feature, start by forking the MemoryScope repository and then cloning your fork locally. 13 | ```bash 14 | git clone https://github.com/your-username/memoryscope.git 15 | cd memoryscope 16 | ``` 17 | ### Create a New Branch 18 | Create a new branch for your work. This helps keep proposed changes organized and separate from the `main` branch. 19 | ```bash 20 | git checkout -b your-feature-branch-name 21 | ``` 22 | ### Making Changes 23 | With your new branch checked out, you can now make your changes to the code. Remember to keep your changes as focused as possible. If you're addressing multiple issues or features, it's better to create separate branches and pull requests for each. 24 | We provide a developer version with additional `pre-commit` hooks to perform format checks compared to the official version: 25 | ```bash 26 | # Install the developer version 27 | pip install -e . 28 | # Install pre-commit hooks 29 | pre-commit install 30 | ``` 31 | ### Commit Your Changes 32 | Once you've made your changes, it's time to commit them. Write clear and concise commit messages that explain your changes. 33 | ```bash 34 | git add -A 35 | git commit -m "A brief description of the changes" 36 | ``` 37 | You might get some error messages raised by `pre-commit`. Please resolve them according to the error code and commit again. 38 | ### Submit a Pull Request 39 | When you're ready for feedback, submit a pull request to the MemoryScope `main` branch. In your pull request description, explain the changes you've made and any other relevant context. 40 | We will review your pull request. This process might involve some discussion, additional changes on your part, or both. 41 | ### Code Review 42 | Wait for us to review your pull request. We may suggest some changes or improvements. Keep an eye on your GitHub notifications and be responsive to any feedback. 43 | -------------------------------------------------------------------------------- /docs/contribution_zh.md: -------------------------------------------------------------------------------- 1 | # 贡献到MemoryScope 2 | 我们的社区因其成员的多样化思想和贡献而兴旺发展。无论是修复一个错误,添加一个新功能,改进文档,还是添加示例,我们都欢迎您的帮助。以下是您做出贡献的方法: 3 | ## 报告错误和提出新功能 4 | 当您发现一个错误或者有一个功能请求,请首先检查问题跟踪器,查看它是否已经被报告。如果没有,随时可以开设一个新的问题。请包含尽可能多的细节: 5 | - 简明扼要的标题 6 | - 清晰地描述问题 7 | - 提供重现问题的步骤 8 | - 提供所使用的MemoryScope版本 9 | - 提供所有相关代码片段或错误信息 10 | ## 对代码库做出贡献 11 | ### Fork和Clone仓库 12 | 要处理一个问题或新功能,首先要Fork仓库,然后将你的Fork克隆到本地。 13 | ```bash 14 | git clone git@github.com:modelscope/MemoryScope.git 15 | cd MemoryScope 16 | ``` 17 | ### 创建一个新分支 18 | 为您的工作创建一个新分支。这有助于保持拟议更改的组织性,并与`main`分支分离。 19 | ```bash 20 | git checkout -b your-feature-branch-name 21 | ``` 22 | ### 做出修改 23 | 我们非常推荐每一个贡献者在代码提交前,安装`pre-commit`钩子工具, 24 | 能够帮助在每一次git提交的时候,进行自动化的代码格式校验。 25 | ```bash 26 | # 安装开发者版本 27 | pip install -e . 28 | # 安装 pre-commit 钩子 29 | pre-commit install 30 | ``` 31 | 32 | ### 提交您的修改 33 | 34 | 修改完成之后就是提交它们的时候了。请提供清晰而简洁的提交信息,以解释您的修改内容。 35 | 36 | ```bash 37 | git add -A 38 | git commit -m "修改内容的简要描述" 39 | ``` 40 | 41 | 运行时您可能会收到 `pre-commit` 给出的错误信息。请根据错误信息修改您的代码然后再次提交。 42 | 43 | ### 提交 Pull Request 44 | 45 | 当您准备好您的修改分支后,向MemoryScope的 `main` 分支提交一个Pull Request。在您的Pull Request描述中,解释您所做的修改以及其他相关的信息。 46 | 47 | 我们将审查您的Pull Request。这个过程可能涉及一些讨论以及额外的代码修改。 48 | 49 | ### 代码审查 50 | 51 | 等待我们审核您的Pull Request。我们可能会提供一些更改或改进建议。请留意您的GitHub通知,并对反馈做出响应。 -------------------------------------------------------------------------------- /docs/images/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modelscope/MemoryScope/715b75e520b1029885647afc6b37419182ca94ce/docs/images/framework.png -------------------------------------------------------------------------------- /docs/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modelscope/MemoryScope/715b75e520b1029885647afc6b37419182ca94ce/docs/images/logo.png -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installing MemoryScope 2 | 3 | ## I. Install with docker [Recommended] [x86_64] 4 | 5 | 1. Clone the repository and edit settings 6 | ```bash 7 | # clone project 8 | git clone https://github.com/modelscope/memoryscope 9 | cd memoryscope 10 | # edit configuration, e.g. add api keys 11 | vim memoryscope/core/config/demo_config.yaml 12 | ``` 13 | 14 | 2. Build Docker image 15 | ```bash 16 | sudo docker build --network=host -t memoryscope . 17 | ``` 18 | If you are using arm-based computers, modify command above into: `sudo docker build -f DockerfileArm --network=host -t memoryscope .` 19 | 20 | 3. Launch Docker container 21 | ```bash 22 | sudo docker run -it --rm --net=host memoryscope 23 | ``` 24 | 25 | > [!Important] 26 | > To inspect memory shift during the conversation, modify command in step 3 to `sudo docker run -it --name=memoryscope_container --rm --net=host memoryscope`;
27 | > Then start a new terminal window and execute `sudo docker exec -it memoryscope_container python quick-start-demo.py --config_path=memoryscope/core/config/demo_config_zh.yaml`;
28 | > In the second window, input `/list_memory refresh_time=5` to inspect memory 29 | 30 | ## II. Install with docker compose [Recommended] [x86_64] 31 | 32 | 1. Clone the repository and edit settings 33 | ```bash 34 | # clone project 35 | git clone https://github.com/modelscope/memoryscope 36 | cd memoryscope 37 | # edit configuration, e.g. add api keys 38 | vim memoryscope/core/config/demo_config.yaml 39 | ``` 40 | 41 | 2. Edit `docker-compose.yml` to change environment variable. 42 | ``` 43 | OPENAI_API_KEY: "sk-0000000000" 44 | ``` 45 | 46 | 3. Run `docker-compose run memory_scope_main` to build and launch the memory-scope cli interface. (For ARM architecture, you should edit `docker-compose.yml`, changing `image: ghcr.io/modelscope/memoryscope:main` to `image: ghcr.io/modelscope/memoryscope_arm:main`) 47 | 48 | 49 | ## III. Install from PyPI 50 | 51 | 1. Install from PyPI 52 | ```bash 53 | pip install memoryscope 54 | ``` 55 | 56 | 2. Run Elasticsearch service, refer to [elasticsearch documents](https://www.elastic.co/guide/en/elasticsearch/reference/current/getting-started.html). 57 | The docker method is recommended: 58 | ``` 59 | sudo docker run -p 9200:9200 \ 60 | -e "discovery.type=single-node" \ 61 | -e "xpack.security.enabled=false" \ 62 | -e "xpack.license.self_generated.type=trial" \ 63 | docker.elastic.co/elasticsearch/elasticsearch:8.13.2 64 | ``` 65 | 66 | 3. Test Chinese / Dashscope Configuration 67 | ```bash 68 | export DASHSCOPE_API_KEY="sk-0000000000" 69 | memoryscope --language="cn" \ 70 | --memory_chat_class="cli_memory_chat" \ 71 | --human_name="用户" \ 72 | --assistant_name="AI" \ 73 | --generation_backend="dashscope_generation" \ 74 | --generation_model="qwen-max" \ 75 | --embedding_backend="dashscope_embedding" \ 76 | --embedding_model="text-embedding-v2" \ 77 | --enable_ranker=True \ 78 | --rank_backend="dashscope_rank" \ 79 | --rank_model="gte-rerank" 80 | ``` 81 | 82 | 4. Test English / OpenAI Configuration 83 | ```bash 84 | export OPENAI_API_KEY="sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" 85 | memoryscope --language="en" \ 86 | --memory_chat_class="cli_memory_chat" \ 87 | --human_name="User" \ 88 | --assistant_name="AI" \ 89 | --generation_backend="openai_generation" \ 90 | --generation_model="gpt-4o" \ 91 | --embedding_backend="openai_embedding" \ 92 | --embedding_model="text-embedding-3-small" \ 93 | --enable_ranker=False 94 | ``` 95 | 96 | ## IV. Install from source 97 | 98 | 1. Clone the repository and edit settings 99 | ```bash 100 | # clone project 101 | git clone https://github.com/modelscope/memoryscope 102 | cd memoryscope 103 | # edit configuration, e.g. add api keys 104 | vim memoryscope/core/config/demo_config.yaml 105 | ``` 106 | 107 | 2. Install 108 | ```bash 109 | pip install -e . 110 | ``` 111 | 112 | 3. Run Elasticsearch service, refer to [elasticsearch documents](https://www.elastic.co/guide/en/elasticsearch/reference/current/getting-started.html). 113 | The docker method is recommended: 114 | ``` 115 | sudo docker run -p 9200:9200 \ 116 | -e "discovery.type=single-node" \ 117 | -e "xpack.security.enabled=false" \ 118 | -e "xpack.license.self_generated.type=trial" \ 119 | docker.elastic.co/elasticsearch/elasticsearch:8.13.2 120 | ``` 121 | 122 | 4. Launch memoryscope, also refer to [cli documents](../examples/cli/CLI_README.md) 123 | ```bash 124 | export OPENAI_API_KEY="sk-0000000000" 125 | python quick-start-demo.py --config_path=memoryscope/core/config/demo_config_zh.yaml 126 | ``` 127 | -------------------------------------------------------------------------------- /docs/installation_zh.md: -------------------------------------------------------------------------------- 1 | # MemoryScope 安装指南 2 | 3 | ## 一、使用 Docker 安装 [推荐] 4 | 5 | 1. 克隆仓库并编辑配置 6 | ```bash 7 | # 克隆项目 8 | git clone https://github.com/modelscope/memoryscope 9 | cd memoryscope 10 | # 编辑配置,例如添加 API 密钥 11 | vim memoryscope/core/config/demo_config_zh.yaml 12 | ``` 13 | 14 | 2. 构建 Docker 镜像 15 | ```bash 16 | sudo docker build --network=host -t memoryscope . 17 | ``` 18 | 备注:如果是arm架构的电脑,则必须使用另一个命令:`sudo docker build -f DockerfileArm --network=host -t memoryscope .` 19 | 20 | 3. 启动 Docker 容器 21 | ```bash 22 | sudo docker run -it --rm --net=host memoryscope 23 | ``` 24 | 25 | 26 | > [!Important] 27 | > 如果需要观察Memory的变化请调整第3步的运行命令。首先执行 `sudo docker run -it --name=memoryscope_container --rm --net=host memoryscope`启动memoryscope;
28 | > 然后新建命令行窗口,运行`sudo docker exec -it memoryscope_container python quick-start-demo.py --config_path=memoryscope/core/config/demo_config_zh.yaml`;
29 | > 在第二个窗口,继续输入`/list_memory refresh_time=5`来检查实时的memory 30 | 31 | ## 二、使用 Docker Compose 安装 [推荐] [x86_64] 32 | 33 | 1. 克隆仓库并编辑配置 34 | ```bash 35 | # 克隆项目 36 | git clone https://github.com/modelscope/memoryscope 37 | cd memoryscope 38 | # 编辑配置,例如添加 API 密钥 39 | vim memoryscope/core/config/demo_config_zh.yaml 40 | ``` 41 | 42 | 2. 编辑 `docker-compose.yml` 文件以更改环境变量。 43 | ``` 44 | DASHSCOPE_API_KEY: "sk-0000000000" 45 | ``` 46 | 47 | 3. 运行 `docker-compose run memory_scope_main` 命令来构建并启动 MemoryScope CLI 界面。(备注:如果是arm架构,还需要手动将docker-compose.yml中的`ghcr.io/modelscope/memoryscope:main`修改成`ghcr.io/modelscope/memoryscope_arm:main`) 48 | 49 | 50 | ## 三、通过 PYPI 安装 51 | 52 | 1. 从 PyPI 安装: 53 | ```bash 54 | pip install memoryscope 55 | ``` 56 | 57 | 2. 运行 Elasticsearch 服务,参照 [Elasticsearch 文档](https://www.elastic.co/guide/cn/elasticsearch/reference/current/getting-started.html)。 58 | 推荐使用 Docker 方法: 59 | ``` 60 | sudo docker run -p 9200:9200 \ 61 | -e "discovery.type=single-node" \ 62 | -e "xpack.security.enabled=false" \ 63 | -e "xpack.license.self_generated.type=trial" \ 64 | docker.elastic.co/elasticsearch/elasticsearch:8.13.2 65 | ``` 66 | 67 | 3. 测试中文 / Dashscope 对话配置: 68 | ```bash 69 | export DASHSCOPE_API_KEY="sk-0000000000" 70 | memoryscope --language="cn" \ 71 | --memory_chat_class="cli_memory_chat" \ 72 | --human_name="用户" \ 73 | --assistant_name="AI" \ 74 | --generation_backend="dashscope_generation" \ 75 | --generation_model="qwen-max" \ 76 | --embedding_backend="dashscope_embedding" \ 77 | --embedding_model="text-embedding-v2" \ 78 | --enable_ranker=True \ 79 | --rank_backend="dashscope_rank" \ 80 | --rank_model="gte-rerank" 81 | ``` 82 | 83 | 4. 测试英文 / OpenAI 对话配置: 84 | ```bash 85 | export OPENAI_API_KEY="sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" 86 | memoryscope --language="en" \ 87 | --memory_chat_class="cli_memory_chat" \ 88 | --human_name="User" \ 89 | --assistant_name="AI" \ 90 | --generation_backend="openai_generation" \ 91 | --generation_model="gpt-4o" \ 92 | --embedding_backend="openai_embedding" \ 93 | --embedding_model="text-embedding-3-small" \ 94 | --enable_ranker=False 95 | ``` 96 | 97 | 98 | ## 四、从源码安装 99 | 100 | 1. 克隆仓库并编辑设置 101 | ```bash 102 | # 克隆项目 103 | git clone https://github.com/modelscope/memoryscope 104 | cd memoryscope 105 | # 编辑配置,例如添加 API 密钥 106 | vim memoryscope/core/config/demo_config_zh.yaml 107 | ``` 108 | 109 | 2. 安装依赖 110 | ```bash 111 | pip install -e . 112 | ``` 113 | 114 | 3. 运行 Elasticsearch 服务,参照 [Elasticsearch 文档](https://www.elastic.co/guide/cn/elasticsearch/reference/current/getting-started.html)。 115 | 推荐使用 Docker 方法: 116 | ``` 117 | sudo docker run -p 9200:9200 \ 118 | -e "discovery.type=single-node" \ 119 | -e "xpack.security.enabled=false" \ 120 | -e "xpack.license.self_generated.type=trial" \ 121 | docker.elastic.co/elasticsearch/elasticsearch:8.13.2 122 | ``` 123 | 124 | 4. 启动 MemoryScope,同时参考 [CLI 文档](../examples/cli/CLI_README_ZH.md) 125 | ```bash 126 | export DASHSCOPE_API_KEY="sk-0000000000" 127 | python quick-start-demo.py --config_path=memoryscope/core/config/demo_config_zh.yaml 128 | ``` 129 | 130 | -------------------------------------------------------------------------------- /docs/sphinx_doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | SPHINXBUILD = sphinx-build 4 | SPHINXPROJ = MemoryScope-Doc 5 | ASSETSDIR = assets 6 | BUILDDIR = build/html 7 | SOURCEDIR_EN = en/source 8 | BUILDDIR_EN = build/html/en 9 | SOURCEDIR_ZH = zh/source 10 | BUILDDIR_ZH = build/html/zh 11 | 12 | # English document 13 | en: 14 | @$(SPHINXBUILD) -b html "$(SOURCEDIR_EN)" "$(BUILDDIR_EN)" 15 | @echo 16 | @echo "Build finished. The HTML pages are in $(BUILDDIR_EN)" 17 | 18 | # Chinese document 19 | zh: 20 | @$(SPHINXBUILD) -b html "$(SOURCEDIR_ZH)" "$(BUILDDIR_ZH)" 21 | @echo 22 | @echo "Build finished. The HTML pages are in $(BUILDDIR_ZH)" 23 | 24 | index: 25 | @cp "$(ASSETSDIR)/redirect.html" "$(BUILDDIR)/index.html" 26 | 27 | %: Makefile 28 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR_EN)" "$(BUILDDIR_EN)" $(O) 29 | 30 | all: en zh index 31 | 32 | .PHONY: all en zh index -------------------------------------------------------------------------------- /docs/sphinx_doc/assets/redirect.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | MemoryScope Documentation 7 | 8 | 9 |

Redirecting to English documentation...

10 |

If you are not redirected, click here.

11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/sphinx_doc/build_sphinx_doc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # remove build 4 | rm -rf build/html/* 5 | rm -rf en/source/memoryscope*.rst 6 | rm -rf zh/source/memoryscope*.rst 7 | rm -rf ja/source/memoryscope*.rst 8 | 9 | # copy related files 10 | cd ../../ 11 | 12 | cp README.md docs/sphinx_doc/en/source/README.md 13 | cp docs/installation.md docs/sphinx_doc/en/source/docs/installation.md 14 | cp docs/contribution.md docs/sphinx_doc/en/source/docs/contribution.md 15 | cp -r docs/images docs/sphinx_doc/en/source/docs/images 16 | cp -r examples docs/sphinx_doc/en/source/examples 17 | 18 | cp README_ZH.md docs/sphinx_doc/zh/source/README.md 19 | cp docs/installation_zh.md docs/sphinx_doc/zh/source/docs/installation.md 20 | cp docs/contribution_zh.md docs/sphinx_doc/zh/source/docs/contribution.md 21 | cp -r docs/images docs/sphinx_doc/zh/source/docs/images 22 | cp -r examples docs/sphinx_doc/zh/source/examples 23 | 24 | cp README_JP.md docs/sphinx_doc/ja/source/README.md 25 | cp docs/installation_jp.md docs/sphinx_doc/ja/source/docs/installation.md 26 | cp docs/contribution_jp.md docs/sphinx_doc/ja/source/docs/contribution.md 27 | cp -r docs/images docs/sphinx_doc/ja/source/docs/images 28 | cp -r examples docs/sphinx_doc/ja/source/examples 29 | 30 | # build 31 | cd docs/sphinx_doc 32 | sphinx-apidoc -f -o en/source ../../memoryscope -t template -e 33 | sphinx-apidoc -f -o zh/source ../../memoryscope -t template -e 34 | sphinx-apidoc -f -o ja/source ../../memoryscope -t template -e 35 | 36 | # clear redundant files 37 | make clean all 38 | 39 | rm en/source/README.md 40 | rm en/source/docs/installation.md 41 | rm en/source/docs/contribution.md 42 | rm -rf en/source/docs/images 43 | rm -rf en/source/examples 44 | 45 | rm zh/source/README.md 46 | rm zh/source/docs/installation.md 47 | rm zh/source/docs/contribution.md 48 | rm -rf zh/source/docs/images 49 | rm -rf zh/source/examples 50 | 51 | rm ja/source/README.md 52 | rm ja/source/docs/installation.md 53 | rm ja/source/docs/contribution.md 54 | rm -rf ja/source/docs/images 55 | rm -rf ja/source/examples 56 | -------------------------------------------------------------------------------- /docs/sphinx_doc/en/source/_static/custom.css: -------------------------------------------------------------------------------- 1 | .language-selector a { 2 | color: white; 3 | width: 20px; 4 | } -------------------------------------------------------------------------------- /docs/sphinx_doc/en/source/_templates/language_selector.html: -------------------------------------------------------------------------------- 1 | 2 |
3 | English | 4 | 中文 5 |
6 | -------------------------------------------------------------------------------- /docs/sphinx_doc/en/source/_templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | {% extends "!layout.html" %} {% block sidebartitle %} {{ super() }} {% include 3 | "language_selector.html" %} {% endblock %} 4 | -------------------------------------------------------------------------------- /docs/sphinx_doc/en/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Configuration file for the Sphinx documentation builder. 3 | # 4 | # This file only contains a selection of the most common options. For a full 5 | # list see the documentation: 6 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 7 | 8 | # -- Path setup -------------------------------------------------------------- 9 | 10 | # If extensions (or modules to document with autodoc) are in another directory, 11 | # add these directories to sys.path here. If the directory is relative to the 12 | # documentation root, use os.path.abspath to make it absolute, like shown here. 13 | # 14 | import os 15 | import sys 16 | 17 | sys.path.insert(0, os.path.abspath("../../../../../MemoryScope")) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | language = "en" 23 | 24 | project = "MemoryScope" 25 | copyright = "2024, Alibaba Tongyi Lab" 26 | author = "EcoML team of Alibaba Tongyi Lab" 27 | 28 | 29 | # -- General configuration --------------------------------------------------- 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 35 | "sphinx.ext.autodoc", 36 | "sphinx.ext.autosummary", 37 | "sphinx.ext.viewcode", 38 | "sphinx.ext.napoleon", 39 | "sphinxcontrib.mermaid", 40 | "myst_parser", 41 | "sphinx.ext.autosectionlabel", 42 | "sphinxcontrib.autodoc_pydantic", 43 | "nbsphinx" 44 | ] 45 | 46 | autodoc_pydantic_model_show_json = True 47 | autodoc_pydantic_settings_show_json = True 48 | 49 | # Prefix document path to section labels, otherwise autogenerated labels would 50 | # look like 'heading' rather than 'path/to/file:heading' 51 | autosectionlabel_prefix_document = True 52 | autosummary_generate = True 53 | autosummary_ignore_module_all = False 54 | 55 | autodoc_member_order = "bysource" 56 | 57 | # If true, '()' will be appended to :func: etc. cross-reference text. 58 | add_function_parentheses = False 59 | 60 | # If true, the current module name will be prepended to all description 61 | # unit titles (such as .. function::). 62 | add_module_names = True 63 | 64 | autodoc_default_flags = ["members"] 65 | 66 | autodoc_default_options = { 67 | "members": True, 68 | "member-order": "bysource", 69 | "special-members": "__init__", 70 | } 71 | # Add any paths that contain templates here, relative to this directory. 72 | templates_path = ["_templates"] 73 | 74 | # List of patterns, relative to source directory, that match files and 75 | # directories to ignore when looking for source files. 76 | # This pattern also affects html_static_path and html_extra_path. 77 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 78 | 79 | # -- Options for HTML output ------------------------------------------------- 80 | 81 | # The theme to use for HTML and HTML Help pages. See the documentation for 82 | # a list of builtin themes. 83 | # 84 | html_theme = "sphinx_rtd_theme" 85 | 86 | # html_logo = "_static/logo.png" 87 | 88 | # Add any paths that contain custom static files (such as style sheets) here, 89 | # relative to this directory. They are copied after the builtin static files, 90 | # so a file named "default.css" will overwrite the builtin "default.css". 91 | html_static_path = ["_static"] 92 | 93 | html_theme_options = { 94 | # "logo_only": True, 95 | "navigation_depth": 4, 96 | } 97 | 98 | source_suffix = { 99 | ".rst": "restructuredtext", 100 | ".md": "markdown", 101 | } 102 | 103 | html_css_files = [ 104 | "custom.css", 105 | ] 106 | -------------------------------------------------------------------------------- /docs/sphinx_doc/en/source/docs/api.rst: -------------------------------------------------------------------------------- 1 | .. _api: 2 | 3 | 4 | MemoryScope API Documentation 5 | 6 | 7 | Enumeration 8 | =========== 9 | 10 | .. automodule:: memoryscope.enumeration 11 | :members: 12 | 13 | Scheme 14 | ====== 15 | .. automodule:: memoryscope.scheme 16 | :members: 17 | 18 | Config 19 | ====== 20 | .. automodule:: memoryscope.core.config 21 | :members: 22 | 23 | 24 | Models 25 | ====== 26 | .. automodule:: memoryscope.core.models 27 | :members: 28 | 29 | 30 | 31 | Storage 32 | ======= 33 | .. automodule:: memoryscope.core.storage 34 | :members: 35 | 36 | 37 | Worker 38 | ====== 39 | Base 40 | ---- 41 | 42 | .. automodule:: memoryscope.core.worker 43 | :members: 44 | 45 | Frontend 46 | -------- 47 | .. automodule:: memoryscope.core.worker.frontend 48 | :members: 49 | 50 | Backend 51 | -------- 52 | .. automodule:: memoryscope.core.worker.backend 53 | :members: 54 | 55 | Operation 56 | ========= 57 | .. automodule:: memoryscope.core.operation 58 | :members: 59 | 60 | Service 61 | ======= 62 | .. automodule:: memoryscope.core.service 63 | :members: 64 | 65 | Chat 66 | ==== 67 | .. automodule:: memoryscope.core.chat 68 | :members: 69 | -------------------------------------------------------------------------------- /docs/sphinx_doc/en/source/index.rst: -------------------------------------------------------------------------------- 1 | .. MemoryScope documentation master file, created by 2 | sphinx-quickstart on Fri Jan 5 17:53:54 2024. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | :github_url: https://github.com/modelscope/memoryscope 7 | 8 | MemoryScope Documentation 9 | ========================= 10 | 11 | Welcome to MemoryScope Tutorial 12 | ------------------------------- 13 | 14 | .. image:: docs/images/logo.png 15 | :align: center 16 | 17 | MemoryScope provides LLM chatbots with powerful and flexible long-term memory capabilities, offering a framework for building such abilities. 18 | It can be applied to scenarios like personal assistants and emotional companions, continuously learning through long-term memory to remember users' basic information as well as various habits and preferences. 19 | This allows users to gradually experience a sense of "understanding" when using the LLM. 20 | 21 | .. image:: docs/images/framework.png 22 | :align: center 23 | 24 | Framework 25 | ^^^^^^^^^^^^^^^^^^^^ 26 | 27 | 💾 Memory Database: MemoryScope is equipped with a vector database (default is *ElasticSearch*) to store all memory fragments recorded in the system. 28 | 29 | 🔧 Worker Library: MemoryScope atomizes the capabilities of long-term memory into individual workers, including over 20 workers for tasks such as query information filtering, observation extraction, and insight updating. 30 | 31 | 🛠️ Operation Library: Based on the worker pipeline, it constructs the operations for memory services, realizing key capabilities such as memory retrieval and memory consolidation. 32 | 33 | - Memory Retrieval: Upon arrival of a user query, this operation returns the semantically related memory pieces 34 | and/or those from the corresponding time if the query involves reference to time. 35 | - Memory Consolidation: This operation takes in a batch of user queries and returns important user information 36 | extracted from the queries as consolidated *observations* to be stored in the memory database. 37 | - Reflection and Re-consolidation: At regular intervals, this operation performs reflection upon newly recorded *observations* 38 | to form and update *insights*. Then, memory re-consolidation is performed to ensure contradictions and repetitions 39 | among memory pieces are properly handled. 40 | 41 | .. toctree:: 42 | :maxdepth: 2 43 | :caption: MemoryScope Tutorial 44 | 45 | About MemoryScope 46 | Installation 47 | Cli Client 48 | Simple Usages 49 | Advanced usage 50 | Contribution 51 | 52 | 53 | .. toctree:: 54 | :maxdepth: 6 55 | :caption: MemoryScope API Reference 56 | 57 | API 58 | -------------------------------------------------------------------------------- /docs/sphinx_doc/en/source/modules.rst: -------------------------------------------------------------------------------- 1 | memoryscope 2 | =========== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | memoryscope 8 | -------------------------------------------------------------------------------- /docs/sphinx_doc/ja/source/index.rst: -------------------------------------------------------------------------------- 1 | .. MemoryScope documentation master file, created by 2 | sphinx-quickstart on Fri Jan 5 17:53:54 2024. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | :github_url: https://github.com/modelscope/memoryscope 7 | 8 | MemoryScope ドキュメント 9 | ========================= 10 | 11 | MemoryScopeに関するドキュメントへようこそ 12 | ------------------------------- 13 | 14 | .. image:: ./docs/images/logo.png 15 | :align: center 16 | 17 | MemoryScopeは、LLMチャットボットに強力で柔軟な長期記憶能力を提供し、長期記憶能力を構築するためのフレームワークを提供します。 18 | MemoryScopeは、個人アシスタントや感情的な伴侶などの記憶シナリオに使用でき、長期記憶能力を通じてユーザーの基本情報やさまざまな習慣や好みを覚え続けることができます。 19 | これにより、ユーザーはLLMを使用する際に徐々に「理解されている」感覚を体験することができます。 20 | 21 | .. image:: docs/images/framework.png 22 | :align: center 23 | 24 | フレームワーク 25 | ^^^^^^^^^^^^^^^^^^^^ 26 | 27 | 💾 メモリデータベース: MemoryScopeは、システム内に記録されたすべての記憶片を保存するためのベクトルデータベース(デフォルトは*ElasticSearch*)を備えています。 28 | 29 | 🔧 ワーカーライブラリ: MemoryScopeは、長期記憶の能力を個々のワーカーに原子化し、クエリ情報のフィルタリング、観察の抽出、洞察の更新など、20以上のワーカーを含みます。 30 | 31 | 🛠️ オペレーションライブラリ: ワーカーパイプラインに基づいて、メモリサービスのオペレーションを構築し、メモリの取得やメモリの統合などの主要な機能を実現します。 32 | 33 | - メモリの取得: ユーザークエリが到着すると、この操作は意味的に関連する記憶片を返します。 34 | クエリが時間に言及している場合は、対応する時間の記憶片も返します。 35 | - メモリの統合: この操作は、一連のユーザークエリを受け取り、クエリから抽出された重要なユーザー情報を統合された*観察*としてメモリデータベースに保存します。 36 | - 反映と再統合: 定期的に、この操作は新たに記録された*観察*を反映し、*洞察*を形成および更新します。 37 | その後、メモリの再統合を実行して、記憶片間の矛盾や重複が適切に処理されるようにします。 38 | 39 | .. toctree:: 40 | :maxdepth: 2 41 | :caption: MemoryScope チュートリアル 42 | 43 | MemoryScopeについて 44 | インストール 45 | CLIクライアント 46 | 簡単な使用法 47 | 高度な使用法 48 | 貢献 49 | 50 | 51 | .. toctree:: 52 | :maxdepth: 6 53 | :caption: MemoryScope APIリファレンス 54 | 55 | API 56 | -------------------------------------------------------------------------------- /docs/sphinx_doc/requirements.txt: -------------------------------------------------------------------------------- 1 | loguru 2 | tiktoken 3 | pillow 4 | requests 5 | openai 6 | numpy 7 | sphinx 8 | sphinx-autobuild 9 | sphinx_rtd_theme 10 | sphinxcontrib-mermaid 11 | myst-parser 12 | autodoc_pydantic 13 | nbsphinx 14 | -------------------------------------------------------------------------------- /docs/sphinx_doc/template/module.rst_t: -------------------------------------------------------------------------------- 1 | {{ basename | heading }} 2 | .. automodule:: {{ qualname }} 3 | {%- for option in automodule_options %} 4 | :{{ option }}: 5 | {%- endfor %} -------------------------------------------------------------------------------- /docs/sphinx_doc/template/package.rst_t: -------------------------------------------------------------------------------- 1 | {%- macro automodule(modname, options) -%} 2 | .. automodule:: {{ modname }} 3 | {%- for option in options %} 4 | :{{ option }}: 5 | {%- endfor %} 6 | {%- endmacro %} 7 | 8 | {{- pkgname | heading }} 9 | 10 | {{ automodule(pkgname, automodule_options) }} 11 | -------------------------------------------------------------------------------- /docs/sphinx_doc/zh/source/_static/custom.css: -------------------------------------------------------------------------------- 1 | .language-selector a { 2 | color: white; 3 | width: 20px; 4 | } -------------------------------------------------------------------------------- /docs/sphinx_doc/zh/source/_templates/language_selector.html: -------------------------------------------------------------------------------- 1 | 2 |
3 | English | 4 | 中文 5 |
6 | -------------------------------------------------------------------------------- /docs/sphinx_doc/zh/source/_templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | {% extends "!layout.html" %} {% block sidebartitle %} {{ super() }} {% include 3 | "language_selector.html" %} {% endblock %} 4 | -------------------------------------------------------------------------------- /docs/sphinx_doc/zh/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Configuration file for the Sphinx documentation builder. 3 | # 4 | # This file only contains a selection of the most common options. For a full 5 | # list see the documentation: 6 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 7 | 8 | # -- Path setup -------------------------------------------------------------- 9 | 10 | # If extensions (or modules to document with autodoc) are in another directory, 11 | # add these directories to sys.path here. If the directory is relative to the 12 | # documentation root, use os.path.abspath to make it absolute, like shown here. 13 | # 14 | import os 15 | import sys 16 | 17 | sys.path.insert(0, os.path.abspath("../../../../../MemoryScope")) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | language = "zh" 23 | 24 | project = "MemoryScope" 25 | copyright = "2024, Alibaba Tongyi Lab" 26 | author = "EcoML team of Alibaba Tongyi Lab" 27 | 28 | 29 | # -- General configuration --------------------------------------------------- 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 35 | "sphinx.ext.autodoc", 36 | "sphinx.ext.autosummary", 37 | "sphinx.ext.viewcode", 38 | "sphinx.ext.napoleon", 39 | "sphinxcontrib.mermaid", 40 | "myst_parser", 41 | "sphinx.ext.autosectionlabel", 42 | "sphinxcontrib.autodoc_pydantic", 43 | "nbsphinx" 44 | ] 45 | 46 | autodoc_pydantic_model_show_json = True 47 | autodoc_pydantic_settings_show_json = True 48 | 49 | # Prefix document path to section labels, otherwise autogenerated labels would 50 | # look like 'heading' rather than 'path/to/file:heading' 51 | autosectionlabel_prefix_document = True 52 | autosummary_generate = True 53 | autosummary_ignore_module_all = False 54 | 55 | autodoc_member_order = "bysource" 56 | 57 | # If true, '()' will be appended to :func: etc. cross-reference text. 58 | add_function_parentheses = False 59 | 60 | # If true, the current module name will be prepended to all description 61 | # unit titles (such as .. function::). 62 | add_module_names = True 63 | 64 | autodoc_default_flags = ["members"] 65 | 66 | autodoc_default_options = { 67 | "members": True, 68 | "member-order": "bysource", 69 | "special-members": "__init__", 70 | } 71 | # Add any paths that contain templates here, relative to this directory. 72 | templates_path = ["_templates"] 73 | 74 | # List of patterns, relative to source directory, that match files and 75 | # directories to ignore when looking for source files. 76 | # This pattern also affects html_static_path and html_extra_path. 77 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 78 | 79 | # -- Options for HTML output ------------------------------------------------- 80 | 81 | # The theme to use for HTML and HTML Help pages. See the documentation for 82 | # a list of builtin themes. 83 | # 84 | html_theme = "sphinx_rtd_theme" 85 | 86 | # html_logo = "_static/logo.png" 87 | 88 | # Add any paths that contain custom static files (such as style sheets) here, 89 | # relative to this directory. They are copied after the builtin static files, 90 | # so a file named "default.css" will overwrite the builtin "default.css". 91 | html_static_path = ["_static"] 92 | 93 | html_theme_options = { 94 | # "logo_only": True, 95 | "navigation_depth": 4, 96 | } 97 | 98 | source_suffix = { 99 | ".rst": "restructuredtext", 100 | ".md": "markdown", 101 | } 102 | 103 | html_css_files = [ 104 | "custom.css", 105 | ] 106 | -------------------------------------------------------------------------------- /docs/sphinx_doc/zh/source/docs/api.rst: -------------------------------------------------------------------------------- 1 | .. _api: 2 | 3 | 4 | MemoryScope API 接口文档 5 | 6 | 7 | Enumeration 8 | =========== 9 | 10 | .. automodule:: memoryscope.enumeration 11 | :members: 12 | 13 | Scheme 14 | ====== 15 | .. automodule:: memoryscope.scheme 16 | :members: 17 | 18 | Config 19 | ====== 20 | .. automodule:: memoryscope.core.config 21 | :members: 22 | 23 | 24 | Models 25 | ====== 26 | .. automodule:: memoryscope.core.models 27 | :members: 28 | 29 | 30 | 31 | Storage 32 | ======= 33 | .. automodule:: memoryscope.core.storage 34 | :members: 35 | 36 | 37 | Worker 38 | ====== 39 | Base 40 | ---- 41 | 42 | .. automodule:: memoryscope.core.worker 43 | :members: 44 | 45 | Frontend 46 | -------- 47 | .. automodule:: memoryscope.core.worker.frontend 48 | :members: 49 | 50 | Backend 51 | -------- 52 | .. automodule:: memoryscope.core.worker.backend 53 | :members: 54 | 55 | Operation 56 | ========= 57 | .. automodule:: memoryscope.core.operation 58 | :members: 59 | 60 | Service 61 | ======= 62 | .. automodule:: memoryscope.core.service 63 | :members: 64 | 65 | Chat 66 | ==== 67 | .. automodule:: memoryscope.core.chat 68 | :members: 69 | -------------------------------------------------------------------------------- /docs/sphinx_doc/zh/source/index.rst: -------------------------------------------------------------------------------- 1 | .. MemoryScope documentation master file, created by 2 | sphinx-quickstart on Fri Jan 5 17:53:54 2024. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | :github_url: https://github.com/modelscope/memoryscope 7 | 8 | MemoryScope 文档 9 | ========================= 10 | 11 | 欢迎浏览MemoryScope相关文档 12 | ------------------------------- 13 | 14 | .. image:: ./docs/images/logo.png 15 | :align: center 16 | 17 | MemoryScope可以为LLM聊天机器人提供强大且灵活的长期记忆能力,并提供了构建长期记忆能力的框架。 18 | MemoryScope可以用于个人助理、情感陪伴等记忆场景,通过长期记忆能力来不断学习,记得用户的基础信息以及各种习惯和喜好,使得用户在使用LLM时逐渐感受到一种“默契”。 19 | 20 | .. image:: docs/images/framework.png 21 | :align: center 22 | 23 | 核心框架 24 | ^^^^^^^^^^^^^^^^^^^^ 25 | 26 | 💾 记忆数据库: MemoryScope配备了向量数据库(默认是*ElasticSearch*),用于存储系统中记录的所有记忆片段。 27 | 28 | 🔧 核心worker库: MemoryScope将长期记忆的能力原子化,抽象成单独的worker,包括query信息过滤,observation抽取,insight更新等20+worker。 29 | 30 | 🛠️ 核心Op库: 并基于worker的pipeline构建了memory服务的核心operation,实现了记忆检索,记忆巩固等核心能力。 31 | 32 | - 记忆检索:当用户输入对话,此操作返回语义相关的记忆片段。如果输入对话包含对时间的指涉,则同时返回相应时间中的记忆片段。 33 | - 记忆巩固:此操作接收一批用户的输入对话,并从对话中提取重要的用户信息,将其作为 *observation* 形式的记忆片段存储在记忆数据库中。 34 | - 反思与再巩固:每隔一段时间,此操作对新记录的 *observations* 进行反思,以形成和更新 *insight* 35 | 形式的记忆片段。然后执行记忆再巩固,以确保记忆片段之间的矛盾和重复得到妥善处理。 36 | 37 | .. toctree:: 38 | :maxdepth: 2 39 | :caption: MemoryScope 教程 40 | 41 | 关于 MemoryScope 42 | 安装 43 | 命令行终端 44 | 简单案例 45 | 高级用法 46 | 贡献 47 | 48 | .. toctree:: 49 | :maxdepth: 6 50 | :caption: MemoryScope 接口 51 | 52 | API 53 | 54 | -------------------------------------------------------------------------------- /docs/sphinx_doc/zh/source/modules.rst: -------------------------------------------------------------------------------- 1 | memoryscope 2 | =========== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | memoryscope 8 | -------------------------------------------------------------------------------- /examples/advance/custom_operator.md: -------------------------------------------------------------------------------- 1 | # Custom Operator and Worker 2 | 3 | 1. Create a new worker named `example_query_worker.py` in the `contrib` directory: 4 | ```bash 5 | vim memoryscope/contrib/example_query_worker.py 6 | ``` 7 | 8 | 2. Write the program for the new custom worker. Note that the class name must match the filename, which is `ExampleQueryWorker`: 9 | ```python 10 | import datetime 11 | from memoryscope.constants.common_constants import QUERY_WITH_TS 12 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker 13 | 14 | class ExampleQueryWorker(MemoryBaseWorker): 15 | def _run(self): 16 | timestamp = int(datetime.datetime.now().timestamp()) # Current timestamp as default 17 | assert "query" in self.chat_kwargs 18 | query = self.chat_kwargs["query"] 19 | if not query: 20 | query = "" 21 | else: 22 | query = query.strip() + "\n You must add a `meow~` at the end of each of your answers." 23 | # Store the determined query and its timestamp in the context 24 | self.set_workflow_context(QUERY_WITH_TS, (query, timestamp)) 25 | ``` 26 | 27 | 3. Create a YAML startup file (copying `demo_config.yaml`): 28 | ``` 29 | cp memoryscope/core/config/demo_config.yaml examples/advance/replacement.yaml 30 | vim examples/advance/replacement.yaml 31 | ``` 32 | 33 | 4. At the bottom, insert the definition for the new worker and replace the previous default `set_query` worker, and update the operation's workflow: 34 | ``` 35 | rewrite_query: 36 | class: contrib.example_query_worker 37 | generation_model: generation_model 38 | ``` 39 | ``` 40 | retrieve_memory: 41 | class: core.operation.frontend_operation 42 | workflow: rewrite_query,[extract_time|retrieve_obs_ins,semantic_rank],fuse_rerank 43 | description: "retrieve long-term memory" 44 | ``` 45 | 46 | 5. Verify: 47 | ``` 48 | python quick-start-demo.py --config examples/advance/replacement.yaml 49 | ``` -------------------------------------------------------------------------------- /examples/advance/custom_operator_zh.md: -------------------------------------------------------------------------------- 1 | # 自定义 Operator 和 Worker 2 | 3 | 1. 在 `contrib` 路径下创建新worker,命名为 `example_query_worker.py`: 4 | ```bash 5 | vim memoryscope/contrib/example_query_worker.py 6 | ``` 7 | 8 | 2. 写入新的自定义worker的程序,注意`class`的命名需要与文件名保持一致,为`ExampleQueryWorker`: 9 | ```python 10 | import datetime 11 | from memoryscope.constants.common_constants import QUERY_WITH_TS 12 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker 13 | 14 | class ExampleQueryWorker(MemoryBaseWorker): 15 | 16 | def _run(self): 17 | 18 | timestamp = int(datetime.datetime.now().timestamp()) # Current timestamp as default 19 | 20 | assert "query" in self.chat_kwargs 21 | query = self.chat_kwargs["query"] 22 | if not query: 23 | query = "" 24 | else: 25 | query = query.strip() + "\n You must add a `meow~` at the end of each of your answer." 26 | 27 | # Store the determined query and its timestamp in the context 28 | self.set_workflow_context(QUERY_WITH_TS, (query, timestamp)) 29 | ``` 30 | 31 | 3. 创建yaml启动文件(复制demo_config_zh.yaml) 32 | ``` 33 | cp memoryscope/core/config/demo_config_zh.yaml examples/advance/replacement.yaml 34 | vim examples/advance/replacement.yaml 35 | ``` 36 | 37 | 4. 在最下面插入新worker的定义,并且取代之前的默认`set_query`worker,并替换operation的workflow 38 | ``` 39 | rewrite_query: 40 | class: contrib.example_query_worker 41 | generation_model: generation_model 42 | ``` 43 | ``` 44 | retrieve_memory: 45 | class: core.operation.frontend_operation 46 | workflow: rewrite_query,[extract_time|retrieve_obs_ins,semantic_rank],fuse_rerank 47 | description: "retrieve long-term memory" 48 | ``` 49 | 50 | 5. 验证: 51 | ``` 52 | python quick-start-demo.py --config examples/advance/replacement.yaml 53 | ``` 54 | -------------------------------------------------------------------------------- /examples/api/agentscope_example.md: -------------------------------------------------------------------------------- 1 | # Working with AgentScope 2 | 3 | 1. First, make sure that you have installed AutoGen as well as memoryscope. 4 | ``` 5 | pip install agentscope memoryscope 6 | ``` 7 | 8 | 9 | 2. Then, ensure that es is up and running. [elasticsearch documents](https://www.elastic.co/guide/en/elasticsearch/reference/current/getting-started.html). 10 | The docker method is recommended: 11 | ``` 12 | sudo docker run -p 9200:9200 \ 13 | -e "discovery.type=single-node" \ 14 | -e "xpack.security.enabled=false" \ 15 | -e "xpack.license.self_generated.type=trial" \ 16 | docker.elastic.co/elasticsearch/elasticsearch:8.13.2 17 | ``` 18 | 19 | 3. Finally, we can start the autogen demo. 20 | ``` 21 | python examples/api/agentscope_example.py 22 | ``` -------------------------------------------------------------------------------- /examples/api/agentscope_example.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Union, Sequence 2 | 3 | import agentscope 4 | from agentscope.agents import AgentBase, UserAgent 5 | from agentscope.message import Msg 6 | 7 | from memoryscope import MemoryScope, Arguments 8 | 9 | 10 | class MemoryScopeAgent(AgentBase): 11 | def __init__(self, name: str, arguments: Arguments, **kwargs) -> None: 12 | # Disable AgentScope memory and use MemoryScope memory instead 13 | super().__init__(name, use_memory=False, **kwargs) 14 | 15 | # Create a memory client in MemoryScope 16 | self.memory_scope = MemoryScope(arguments=arguments) 17 | self.memory_chat = self.memory_scope.default_memory_chat 18 | 19 | def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg: 20 | # Generate response 21 | response = self.memory_chat.chat_with_memory(query=x.content) 22 | 23 | # Wrap the response in a message object in AgentScope 24 | msg = Msg(name=self.name, content=response.message.content, role="assistant") 25 | 26 | # Print/speak the message in this agent's voice 27 | self.speak(msg) 28 | 29 | return msg 30 | 31 | def close(self): 32 | # Close the backend service of MemoryScope 33 | self.memory_scope.close() 34 | 35 | 36 | def main(): 37 | # Setting of MemoryScope 38 | arguments = Arguments( 39 | language="cn", 40 | human_name="用户", 41 | assistant_name="AI", 42 | memory_chat_class="api_memory_chat", 43 | generation_backend="dashscope_generation", 44 | generation_model="qwen-max", 45 | embedding_backend="dashscope_embedding", 46 | embedding_model="text-embedding-v2", 47 | rank_backend="dashscope_rank", 48 | rank_model="gte-rerank") 49 | 50 | # Initialize AgentScope 51 | agentscope.init(project="MemoryScope") 52 | 53 | memoryscope_agent = MemoryScopeAgent(name="Assistant", arguments=arguments) 54 | 55 | user_agent = UserAgent() 56 | 57 | # Dialog 58 | msg = None 59 | while True: 60 | # User input 61 | msg = user_agent(msg) 62 | if msg.content == "exit": 63 | break 64 | # Agent speaks 65 | msg = memoryscope_agent(msg) 66 | 67 | # End memory 68 | memoryscope_agent.close() 69 | 70 | 71 | if __name__ == "__main__": 72 | main() 73 | -------------------------------------------------------------------------------- /examples/api/autogen_example.md: -------------------------------------------------------------------------------- 1 | # Working with AutoGen 2 | 3 | 1. First, make sure that you have installed AutoGen as well as memoryscope. 4 | ``` 5 | pip install pyautogen memoryscope 6 | ``` 7 | 8 | 9 | 2. Then, ensure that es is up and running. [elasticsearch documents](https://www.elastic.co/guide/en/elasticsearch/reference/current/getting-started.html). 10 | The docker method is recommended: 11 | ``` 12 | sudo docker run -p 9200:9200 \ 13 | -e "discovery.type=single-node" \ 14 | -e "xpack.security.enabled=false" \ 15 | -e "xpack.license.self_generated.type=trial" \ 16 | docker.elastic.co/elasticsearch/elasticsearch:8.13.2 17 | ``` 18 | 19 | 3. Finally, we can start the autogen demo. 20 | ``` 21 | python examples/api/autogen_example.py 22 | ``` -------------------------------------------------------------------------------- /examples/api/autogen_example.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Union, Literal, Dict, List, Any, Tuple 2 | 3 | from autogen import Agent, ConversableAgent, UserProxyAgent 4 | 5 | from memoryscope import MemoryScope, Arguments 6 | 7 | 8 | class MemoryScopeAgent(ConversableAgent): 9 | def __init__( 10 | self, 11 | name: str = "assistant", 12 | system_message: Optional[str] = "", 13 | human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER", 14 | llm_config: Optional[Union[Dict, bool]] = None, 15 | arguments: Arguments = None, 16 | **kwargs, 17 | ): 18 | super().__init__( 19 | name=name, 20 | system_message=system_message, 21 | human_input_mode=human_input_mode, 22 | llm_config=llm_config, 23 | **kwargs, 24 | ) 25 | 26 | # Create a memory client in MemoryScope 27 | self.memory_scope = MemoryScope(arguments=arguments) 28 | self.memory_chat = self.memory_scope.default_memory_chat 29 | 30 | self.register_reply([Agent, None], MemoryScopeAgent.generate_reply_with_memory, remove_other_reply_funcs=True) 31 | 32 | def generate_reply_with_memory( 33 | self, 34 | messages: Optional[List[Dict]] = None, 35 | sender: Optional[Agent] = None, 36 | config: Optional[Any] = None, 37 | ) -> Tuple[bool, Union[str, Dict, None]]: 38 | # Generate response 39 | 40 | contents = [] 41 | for message in messages: 42 | if message.get("role") != self.name: 43 | contents.append(message.get("content", "")) 44 | 45 | query = contents[-1] 46 | response = self.memory_chat.chat_with_memory(query=query) 47 | return True, response.message.content 48 | 49 | def close(self): 50 | self.memory_scope.close() 51 | 52 | 53 | def main(): 54 | # Create the agent of MemoryScope 55 | arguments = Arguments( 56 | language="cn", 57 | human_name="用户", 58 | assistant_name="AI", 59 | memory_chat_class="api_memory_chat", 60 | generation_backend="dashscope_generation", 61 | generation_model="qwen-max", 62 | embedding_backend="dashscope_embedding", 63 | embedding_model="text-embedding-v2", 64 | rank_backend="dashscope_rank", 65 | rank_model="gte-rerank" 66 | ) 67 | 68 | assistant = MemoryScopeAgent("assistant", arguments=arguments) 69 | 70 | # Create the agent that represents the user in the conversation. 71 | user_proxy = UserProxyAgent("user", code_execution_config=False) 72 | 73 | # Let the assistant start the conversation. It will end when the user types exit. 74 | assistant.initiate_chat(user_proxy, message="有什么需要帮忙的吗?") 75 | assistant.close() 76 | 77 | 78 | if __name__ == "__main__": 79 | main() 80 | -------------------------------------------------------------------------------- /examples/api/chat_example.py: -------------------------------------------------------------------------------- 1 | from memoryscope import MemoryScope, Arguments 2 | 3 | arguments = Arguments( 4 | language="cn", 5 | human_name="用户", 6 | assistant_name="AI", 7 | memory_chat_class="api_memory_chat", 8 | generation_backend="dashscope_generation", 9 | generation_model="qwen-max", 10 | embedding_backend="dashscope_embedding", 11 | embedding_model="text-embedding-v2", 12 | rank_backend="dashscope_rank", 13 | rank_model="gte-rerank", 14 | enable_ranker=True) 15 | 16 | 17 | def chat_example1(): 18 | with MemoryScope(config_path="memoryscope/core/config/demo_config_zh.yaml") as ms: 19 | memory_chat = ms.default_memory_chat 20 | response = memory_chat.chat_with_memory(query="我的爱好是弹琴。") 21 | print("回答:\n" + response.message.content) 22 | print("记忆:\n" + response.meta_data["memories"]) 23 | 24 | 25 | def chat_example2(): 26 | with MemoryScope(arguments=arguments) as ms: 27 | memory_chat = ms.default_memory_chat 28 | response = memory_chat.chat_with_memory(query="我的爱好是弹琴。") 29 | print("回答:\n" + response.message.content) 30 | print("记忆:\n" + response.meta_data["memories"]) 31 | 32 | 33 | def chat_example3(): 34 | with MemoryScope(arguments=arguments) as ms: 35 | memory_chat = ms.default_memory_chat 36 | response = memory_chat.chat_with_memory( 37 | query="你知道我消灭了几个星系吗?", 38 | temporary_memories="张三是一只哥斯拉,这是真是的,不是虚构的。\n前天张三消灭了星系0x4be?\n大前天张三消灭了星系0x5be?") 39 | print("回答:\n" + response.message.content) 40 | 41 | 42 | def chat_example4(): 43 | with MemoryScope(arguments=arguments) as ms: 44 | memory_chat = ms.default_memory_chat 45 | memory_chat.run_service_operation("delete_all") 46 | 47 | response = memory_chat.chat_with_memory(query="我的爱好是弹琴。") 48 | print("回答1:\n" + response.message.content) 49 | result = memory_chat.run_service_operation("consolidate_memory") 50 | print(result) 51 | 52 | response = memory_chat.chat_with_memory(query="你知道我的乐器爱好是什么?", history_message_strategy=None) 53 | print("回答2:\n" + response.message.content) 54 | print("记忆2:\n" + response.meta_data["memories"]) 55 | 56 | 57 | def chat_example5(): 58 | with MemoryScope(arguments=arguments) as ms: 59 | memory_service = ms.default_memory_service 60 | memory_service.init_service() 61 | 62 | result = memory_service.list_memory() 63 | print(f"list_memory result={result}") 64 | 65 | result = memory_service.retrieve_memory() 66 | print(f"retrieve_memory result={result}") 67 | 68 | result = memory_service.consolidate_memory() 69 | print(f"consolidate_memory result={result}") 70 | 71 | 72 | def chat_example6(): 73 | with MemoryScope(arguments=arguments) as ms: 74 | memory_chat = ms.default_memory_chat 75 | memory_chat.run_service_operation("delete_all", "张三") 76 | memory_chat.run_service_operation("delete_all", "李四") 77 | 78 | print("李四=========================") 79 | response = memory_chat.chat_with_memory(query="我的爱好是弹琴。", role_name="李四") 80 | print("回答1:\n" + response.message.content) 81 | result = memory_chat.run_service_operation("consolidate_memory", role_name="李四") 82 | print(result) 83 | response = memory_chat.chat_with_memory(query="你知道我的乐器爱好是什么?", role_name="李四", 84 | history_message_strategy=None) 85 | print("回答2:\n" + response.message.content) 86 | print("记忆2:\n" + response.meta_data["memories"]) 87 | 88 | print("张三=========================") 89 | response = memory_chat.chat_with_memory(query="我的爱好是打羽毛球。", role_name="张三") 90 | print("回答1:\n" + response.message.content) 91 | result = memory_chat.run_service_operation("consolidate_memory", role_name="张三") 92 | print(result) 93 | response = memory_chat.chat_with_memory(query="你知道我的运动爱好是什么?", role_name="张三", 94 | history_message_strategy=None) 95 | print("回答2:\n" + response.message.content) 96 | print("记忆2:\n" + response.meta_data["memories"]) 97 | 98 | 99 | if __name__ == "__main__": 100 | # chat_example1() 101 | # chat_example2() 102 | # chat_example3() 103 | chat_example4() 104 | # chat_example5() 105 | # chat_example6() 106 | -------------------------------------------------------------------------------- /examples/cli/CLI_README.md: -------------------------------------------------------------------------------- 1 | # The Cli Interface of MemoryScope 2 | 3 | ## Usage 4 | Before running, follow the [**Installation**](../../docs/installation.md#iii-install-from-pypi) guidelines in Readme, and start the Docker image first. 5 | MemoryScope can be launched in two different ways: 6 | 7 | ### 1. Using YAML Configuration File 8 | 9 | If you prefer to configure your settings via a YAML file, you can do so by providing the path to the configuration file as follows: 10 | ```bash 11 | memoryscope --config_path=memoryscope/core/config/demo_config.yaml 12 | ``` 13 | 14 | ### 2. Using Command Line Arguments 15 | 16 | Alternatively, you can specify all the parameters directly on the command line: 17 | 18 | ```bash 19 | # Chinese / Dashscope 20 | memoryscope --language="cn" \ 21 | --memory_chat_class="cli_memory_chat" \ 22 | --human_name="用户" \ 23 | --assistant_name="AI" \ 24 | --generation_backend="dashscope_generation" \ 25 | --generation_model="qwen-max" \ 26 | --embedding_backend="dashscope_embedding" \ 27 | --embedding_model="text-embedding-v2" \ 28 | --enable_ranker=True \ 29 | --rank_backend="dashscope_rank" \ 30 | --rank_model="gte-rerank" 31 | # English / OpenAI 32 | memoryscope --language="en" \ 33 | --memory_chat_class="cli_memory_chat" \ 34 | --human_name="user" \ 35 | --assistant_name="AI" \ 36 | --generation_backend="openai_generation" \ 37 | --generation_model="gpt-4o" \ 38 | --embedding_backend="openai_embedding" \ 39 | --embedding_model="text-embedding-3-small" \ 40 | --enable_ranker=False 41 | ``` 42 | 43 | Here are the available options that can be set through either method: 44 | 45 | - `--language`: The language used for the conversation. 46 | - `--memory_chat_class`: The class name for managing the chat history. 47 | - `--human_name`: The name of the human user. 48 | - `--assistant_name`: The name of the AI assistant. 49 | - `--generation_backend`: The backend used for generating responses. 50 | - `--generation_model`: The model used for generating responses. 51 | - `--embedding_backend`: The backend used for text embeddings. 52 | - `--embedding_model`: The model used for creating text embeddings. 53 | - `--enable_ranker`: A boolean indicating whether to use a dummy ranker (default is `False`). 54 | - `--rank_backend`: The backend used for ranking responses. 55 | - `--rank_model`: The model used for ranking responses. 56 | 57 | ### 3. View Memory 58 | You can open two command line windows following the method in the second step. 59 | In one command line window, you can have a conversation with the AI, while in the other, you can check the AI's long-term memory about the user. 60 | Use /help to open the command line help, and find the command /list_memory along with the corresponding auto-refresh instruction. 61 | ``` 62 | /list_memory refresh_time=5 63 | ``` 64 | Then you can enjoy a pleasant conversation with the AI! -------------------------------------------------------------------------------- /examples/cli/CLI_README_ZH.md: -------------------------------------------------------------------------------- 1 | # MemoryScope 的命令行接口 2 | 3 | ## 使用方法 4 | 在运行之前,请先按照 Readme 中的 [**Installation**](../../docs/installation_zh.md#三通过-pypi-安装) 指南进行安装,并启动 Docker 镜像。 5 | MemoryScope 可以通过两种不同的方式启动: 6 | 7 | ### 1. 使用 YAML 配置文件 8 | 9 | 如果您更喜欢通过 YAML 文件配置设置,可以通过提供配置文件的路径来实现: 10 | ```bash 11 | memoryscope --config_path=memoryscope/core/config/demo_config_zh.yaml 12 | ``` 13 | 14 | ### 2. 使用命令行参数 15 | 16 | 或者,您可以直接在命令行上指定所有参数: 17 | 18 | ``` 19 | # 中文 20 | memoryscope --language="cn" \ 21 | --memory_chat_class="cli_memory_chat" \ 22 | --human_name="用户" \ 23 | --assistant_name="AI" \ 24 | --generation_backend="dashscope_generation" \ 25 | --generation_model="qwen-max" \ 26 | --embedding_backend="dashscope_embedding" \ 27 | --embedding_model="text-embedding-v2" \ 28 | --enable_ranker=True \ 29 | --rank_backend="dashscope_rank" \ 30 | --rank_model="gte-rerank" 31 | # 英文 32 | memoryscope --language="en" \ 33 | --memory_chat_class="cli_memory_chat" \ 34 | --human_name="User" \ 35 | --assistant_name="AI" \ 36 | --generation_backend="openai_generation" \ 37 | --generation_model="gpt-4o" \ 38 | --embedding_backend="openai_embedding" \ 39 | --embedding_model="text-embedding-3-small" \ 40 | --enable_ranker=False 41 | ``` 42 | 43 | 以下是可以通过任一方法设置的可用选项: 44 | 45 | - `--language`: 对话中使用的语言。 46 | - `--memory_chat_class`: 管理聊天记录的类名。 47 | - `--human_name`: 人类用户的名字。 48 | - `--assistant_name`: AI 助手的名字。 49 | - `--generation_backend`: 用于生成回复的后端。 50 | - `--generation_model`: 用于生成回复的模型。 51 | - `--embedding_backend`: 用于文本嵌入的后端。 52 | - `--embedding_model`: 用于创建文本嵌入的模型。 53 | - `--enable_ranker`: 一个布尔值,指示是否使用排名器(默认为 False)。 54 | - `--rank_backend`: 用于排名回复的后端。 55 | - `--rank_model`: 用于排名回复的模型。 56 | 57 | ### 3. 查看记忆 58 | 按照第二步的方式可以打开两个命令行的窗口。 59 | 其中一个命令行窗口可以和AI进行对话,另一个命令行窗口可以查看AI关于用户的长期记忆 60 | 使用/help打开命令行帮助,找到/list_memory的命令和对应自动刷新的指令。 61 | ``` 62 | /list_memory refresh_time=5 63 | ``` 64 | 接下来就可以和AI进行愉快地交流啦。 -------------------------------------------------------------------------------- /examples/docker/entrypoint.sh: -------------------------------------------------------------------------------- 1 | sh examples/docker/run_elastic_search.sh 2 | python quick-start-demo.py --config_path=memoryscope/core/config/demo_config_zh.yaml -------------------------------------------------------------------------------- /examples/docker/run_elastic_search.sh: -------------------------------------------------------------------------------- 1 | su - elastic_search_user -c "/home/elastic_search_user/elastic_search/elasticsearch-8.15.0/bin/elasticsearch -E xpack.security.enabled=false -E discovery.type=single-node -E xpack.license.self_generated.type=trial -d" 2 | -------------------------------------------------------------------------------- /memoryscope/__init__.py: -------------------------------------------------------------------------------- 1 | """ Version of MemoryScope.""" 2 | __version__ = "0.1.1.0" 3 | import fire 4 | 5 | from memoryscope.core.config.arguments import Arguments # noqa: F401 6 | from memoryscope.core.memoryscope import MemoryScope # noqa: F401 7 | 8 | 9 | def cli(): 10 | fire.Fire(MemoryScope.cli_memory_chat) 11 | -------------------------------------------------------------------------------- /memoryscope/constants/__init__.py: -------------------------------------------------------------------------------- 1 | from . import common_constants 2 | from . import language_constants 3 | 4 | 5 | __all__ = [ 6 | "common_constants", 7 | "language_constants" 8 | ] 9 | -------------------------------------------------------------------------------- /memoryscope/constants/common_constants.py: -------------------------------------------------------------------------------- 1 | # common_constants.py 2 | # This module defines constants used as keys throughout the application to maintain a consistent reference 3 | # for data structures related to workflow management, chat interactions, context storage, memory operations, 4 | # node processing, and temporal inference functionalities. 5 | 6 | WORKFLOW_NAME = "workflow_name" 7 | 8 | MEMORYSCOPE_CONTEXT = "memoryscope_context" 9 | 10 | RESULT = "result" 11 | 12 | MEMORIES = "memories" 13 | 14 | CHAT_MESSAGES = "chat_messages" 15 | 16 | CHAT_MESSAGES_SCATTER = "chat_messages_scatter" 17 | 18 | CHAT_KWARGS = "chat_kwargs" 19 | 20 | USER_NAME = "user_name" 21 | 22 | TARGET_NAME = "target_name" 23 | 24 | MEMORY_MANAGER = "memory_manager" 25 | 26 | QUERY_WITH_TS = "query_with_ts" 27 | 28 | RETRIEVE_MEMORY_NODES = "retrieve_memory_nodes" 29 | 30 | RANKED_MEMORY_NODES = "ranked_memory_nodes" 31 | 32 | NOT_REFLECTED_NODES = "not_reflected_nodes" 33 | 34 | NOT_UPDATED_NODES = "not_updated_nodes" 35 | 36 | EXTRACT_TIME_DICT = "extract_time_dict" 37 | 38 | NEW_OBS_NODES = "new_obs_nodes" 39 | 40 | NEW_OBS_WITH_TIME_NODES = "new_obs_with_time_nodes" 41 | 42 | INSIGHT_NODES = "insight_nodes" 43 | 44 | TODAY_NODES = "today_nodes" 45 | 46 | MERGE_OBS_NODES = "merge_obs_nodes" 47 | 48 | TIME_INFER = "time_infer" 49 | -------------------------------------------------------------------------------- /memoryscope/contrib/example_query_worker.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from memoryscope.constants.common_constants import QUERY_WITH_TS 4 | from memoryscope.constants.language_constants import NONE_WORD 5 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker 6 | from memoryscope.enumeration.message_role_enum import MessageRoleEnum 7 | 8 | 9 | class ExampleQueryWorker(MemoryBaseWorker): 10 | # NOTE: If you want to utilize the capabilities of the prompt handler, please be sure to include this sentence. 11 | FILE_PATH: str = __file__ 12 | 13 | def _parse_params(self, **kwargs): 14 | self.rewrite_history_count: int = kwargs.get("rewrite_history_count", 2) 15 | self.generation_model_kwargs: dict = kwargs.get("generation_model_kwargs", {}) 16 | 17 | def rewrite_query(self, query: str) -> str: 18 | chat_messages = self.chat_messages_scatter 19 | if len(chat_messages) <= 1: 20 | return query 21 | 22 | if chat_messages[-1].role == MessageRoleEnum.USER: 23 | chat_messages = chat_messages[:-1] 24 | chat_messages = chat_messages[-self.rewrite_history_count:] 25 | 26 | # get context 27 | context_list = [] 28 | for message in chat_messages: 29 | context = message.content 30 | if len(context) > 200: 31 | context = context[:100] + context[-100:] 32 | if message.role == MessageRoleEnum.USER: 33 | context_list.append(f"{self.target_name}: {context}") 34 | elif message.role == MessageRoleEnum.ASSISTANT: 35 | context_list.append(f"Assistant: {context}") 36 | 37 | if not context_list: 38 | return query 39 | 40 | system_prompt = self.prompt_handler.rewrite_query_system 41 | user_query = self.prompt_handler.rewrite_query_query.format(query=query, 42 | context="\n".join(context_list)) 43 | rewrite_query_message = self.prompt_to_msg(system_prompt=system_prompt, 44 | few_shot="", 45 | user_query=user_query) 46 | self.logger.info(f"rewrite_query_message={rewrite_query_message}") 47 | 48 | # Invoke the LLM to generate a response 49 | response = self.generation_model.call(messages=rewrite_query_message, 50 | **self.generation_model_kwargs) 51 | 52 | # Handle empty or unsuccessful responses 53 | if not response.status or not response.message.content: 54 | return query 55 | 56 | response_text = response.message.content 57 | self.logger.info(f"rewrite_query.response_text={response_text}") 58 | 59 | if not response_text or response_text.lower() == self.get_language_value(NONE_WORD): 60 | return query 61 | 62 | return response_text 63 | 64 | def _run(self): 65 | query = "" # Default query value 66 | timestamp = int(datetime.datetime.now().timestamp()) # Current timestamp as default 67 | 68 | if "query" in self.chat_kwargs: 69 | # set query if exists 70 | query = self.chat_kwargs["query"] 71 | if not query: 72 | query = "" 73 | query = query.strip() 74 | 75 | # set ts if exists 76 | _timestamp = self.chat_kwargs.get("timestamp") 77 | if _timestamp and isinstance(_timestamp, int): 78 | timestamp = _timestamp 79 | 80 | if self.rewrite_history_count > 0: 81 | t_query = self.rewrite_query(query=query) 82 | if t_query: 83 | query = t_query 84 | 85 | # Store the determined query and its timestamp in the context 86 | self.set_workflow_context(QUERY_WITH_TS, (query, timestamp)) 87 | -------------------------------------------------------------------------------- /memoryscope/contrib/example_query_worker.yaml: -------------------------------------------------------------------------------- 1 | rewrite_query_system: 2 | cn: | 3 | 任务: 消除指代问题并重写 4 | 要求: 检查提供的问题是否存在指代。如果存在指代,通过上下文信息重写问题,使其信息充足,能够单独回答。如果没有指代问题,则回答“无”。 5 | en: | 6 | Task: Eliminate referencing issues and rewrite 7 | Requirements: Check the provided questions for any references. If references exist, rewrite the questions using contextual information to make them sufficiently informative so they can be answered independently. If there are no referencing issues, respond with "None". 8 | 9 | rewrite_query_query: 10 | cn: | 11 | 上下文: 12 | {context} 13 | 问题:{query} 14 | 重写: 15 | 16 | en: | 17 | Context: 18 | {context} 19 | Question: {query} 20 | Rewrite: 21 | 22 | -------------------------------------------------------------------------------- /memoryscope/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .memoryscope import MemoryScope 2 | from .memoryscope_context import MemoryscopeContext 3 | 4 | __all__ = [ 5 | "MemoryScope", 6 | "MemoryscopeContext" 7 | ] 8 | -------------------------------------------------------------------------------- /memoryscope/core/chat/__init__.py: -------------------------------------------------------------------------------- 1 | from .api_memory_chat import ApiMemoryChat 2 | from .base_memory_chat import BaseMemoryChat 3 | from .cli_memory_chat import CliMemoryChat 4 | 5 | __all__ = [ 6 | "ApiMemoryChat", 7 | "BaseMemoryChat", 8 | "CliMemoryChat" 9 | ] 10 | -------------------------------------------------------------------------------- /memoryscope/core/chat/base_memory_chat.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from typing import Optional, Literal 3 | 4 | from memoryscope.core.service.base_memory_service import BaseMemoryService 5 | from memoryscope.core.utils.logger import Logger 6 | 7 | 8 | class BaseMemoryChat(metaclass=ABCMeta): 9 | """ 10 | An abstract base class representing a chat system integrated with memory services. 11 | It outlines the method to initiate a chat session leveraging memory data, which concrete subclasses must implement. 12 | """ 13 | 14 | def __init__(self, **kwargs): 15 | self.kwargs: dict = kwargs 16 | self.logger = Logger.get_logger() 17 | 18 | @property 19 | def memory_service(self) -> BaseMemoryService: 20 | """ 21 | Abstract property to access the memory service. 22 | 23 | Raises: 24 | NotImplementedError: This method should be implemented in a subclass. 25 | """ 26 | raise NotImplementedError 27 | 28 | @abstractmethod 29 | def chat_with_memory(self, 30 | query: str, 31 | role_name: Optional[str] = None, 32 | system_prompt: Optional[str] = None, 33 | memory_prompt: Optional[str] = None, 34 | temporary_memories: Optional[str] = None, 35 | history_message_strategy: Literal["auto", None] | int = "auto", 36 | remember_response: bool = True, 37 | **kwargs): 38 | """ 39 | The core function that carries out conversation with memory accepts user queries through query and returns the 40 | conversation results through model_response. The retrieved memories are stored in the memories within meta_data. 41 | Args: 42 | query (str): User's query, includes the user's question. 43 | role_name (str, optional): User's role name. 44 | system_prompt (str, optional): System prompt. Defaults to the system_prompt in "memory_chat_prompt.yaml". 45 | memory_prompt (str, optional): Memory prompt, It takes effect when there is a memory and will be placed in 46 | front of the retrieved memory. Defaults to the memory_prompt in "memory_chat_prompt.yaml". 47 | temporary_memories (str, optional): Manually added user memory in this function. 48 | history_message_strategy ("auto", None, int): 49 | - If it is set to "auto", the history messages in the conversation will retain those that have not 50 | yet been summarized. Default to "auto". 51 | - If it is set to None, no conversation history will be saved. 52 | - If it is set to an integer value "n", recent "n" message-pair[user, assistant] will be retained. 53 | remember_response (bool, optional): Flag indicating whether to save the AI's response to memory. 54 | Defaults to False. 55 | Returns: 56 | - ModelResponse: In non-streaming mode, returns a complete AI response. 57 | - ModelResponseGen: In streaming mode, returns a generator yielding AI response parts. 58 | - Memories: To obtain the memory by invoking the method of model_response.meta_data[MEMORIES] 59 | """ 60 | raise NotImplementedError 61 | 62 | def start_backend_service(self, **kwargs): 63 | self.memory_service.start_backend_service(**kwargs) 64 | 65 | def run_service_operation(self, name: str, role_name: Optional[str] = None, **kwargs): 66 | return self.memory_service.run_operation(name, role_name=role_name, **kwargs) 67 | 68 | def run(self): 69 | """ 70 | Abstract method to run the chat system. 71 | 72 | This method should contain the logic to initiate and manage the chat process, 73 | utilizing the memory service as needed. It must be implemented by subclasses. 74 | """ 75 | pass 76 | -------------------------------------------------------------------------------- /memoryscope/core/chat/memory_chat_prompt.yaml: -------------------------------------------------------------------------------- 1 | system_prompt: 2 | cn: | 3 | 你是一个名为MemoryScope的智能助理,请用中文简洁地回答用户问题。当前时间是{date_time}。 4 | en: | 5 | You are a helpful assistant named MemoryScope, please answer questions concisely in English. The current time is {date_time}. 6 | 7 | memory_prompt: 8 | cn: | 9 | 在回答用户问题时,请尽量忘记大部分不相关的信息。只有当信息与用户问题或对话内容非常相关时,才记住这些信息并加以使用。请确保你的回答简洁、准确,并聚焦于用户问题或对话主题。信息: 10 | en: | 11 | When responding to user questions, please try to forget most of the irrelevant information. Only remember and use the information if it is highly relevant to the current question or conversation. Ensure that your answers are concise, accurate, and focused on the user's current question or the topic of discussion. Information: -------------------------------------------------------------------------------- /memoryscope/core/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .arguments import Arguments 2 | from .config_manager import ConfigManager 3 | 4 | __all__ = [ 5 | "Arguments", 6 | "ConfigManager", 7 | ] 8 | -------------------------------------------------------------------------------- /memoryscope/core/config/arguments.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Literal, Dict 3 | 4 | 5 | @dataclass 6 | class Arguments(object): 7 | language: Literal["cn", "en"] = field(default="cn", metadata={"help": "support en & cn now"}) 8 | 9 | thread_pool_max_workers: int = field(default=5, metadata={"help": "thread pool max workers"}) 10 | 11 | memory_chat_class: str = field(default="cli_memory_chat", metadata={ 12 | "help": "cli_memory_chat(Command-line interaction), api_memory_chat(API interface interaction), etc."}) 13 | 14 | chat_stream: bool | None = field(default=None, metadata={ 15 | "help": "In the case of cli_memory_chat, stream mode is recommended. For api_memory_chat mode, " 16 | "please use non-stream. If set to None, the value will be automatically determined."}) 17 | 18 | human_name: str = field(default="user", metadata={"help": "Human user's name"}) 19 | 20 | assistant_name: str = field(default="AI", metadata={"help": "assistant' name"}) 21 | 22 | consolidate_memory_interval_time: int | None = field(default=1, metadata={ 23 | "help": "Memory backend service: If you feel that the token consumption is relatively high, " 24 | "please increase the time interval. When set to None, the value will not be updated."}) 25 | 26 | reflect_and_reconsolidate_interval_time: int | None = field(default=15, metadata={ 27 | "help": "Memory backend service: If you feel that the token consumption is relatively high, " 28 | "please increase the time interval. When set to None, the value will not be updated."}) 29 | 30 | worker_params: Dict[str, dict] = field(default_factory=lambda: {}, metadata={ 31 | "help": "dict format: worker_name -> param_key -> param_value"}) 32 | 33 | generation_backend: str = field(default="dashscope_generation", metadata={ 34 | "help": "global generation backend: openai_generation, dashscope_generation, etc."}) 35 | 36 | generation_model: str = field(default="qwen-max", metadata={ 37 | "help": "global generation model: gpt-4o, gpt-4o-mini, gpt-4-turbo, qwen-max, etc."}) 38 | 39 | generation_params: dict = field(default_factory=lambda: {}, metadata={ 40 | "help": "global generation params: max_tokens, top_p, temperature, etc."}) 41 | 42 | embedding_backend: str = field(default="dashscope_generation", metadata={ 43 | "help": "global embedding backend: openai_embedding, dashscope_embedding, etc."}) 44 | 45 | embedding_model: str = field(default="text-embedding-v2", metadata={ 46 | "help": "global embedding model: text-embedding-3-large, text-embedding-3-small, text-embedding-ada-002, " 47 | "text-embedding-v2, etc."}) 48 | 49 | embedding_params: dict = field(default_factory=lambda: {}) 50 | 51 | rank_backend: str = field(default="dashscope_rank", metadata={"help": "global rank backend: dashscope_rank, etc."}) 52 | 53 | rank_model: str = field(default="gte-rerank", metadata={"help": "global rank model: gte-rerank, etc."}) 54 | 55 | rank_params: dict = field(default_factory=lambda: {}) 56 | 57 | es_index_name: str = field(default="memory_index") 58 | 59 | es_url: str = field(default="http://localhost:9200") 60 | 61 | retrieve_mode: str = field(default="dense", metadata={ 62 | "help": "retrieve_mode: dense, sparse(not implemented), hybrid(not implemented)"}) 63 | 64 | enable_ranker: bool = field(default=False, metadata={ 65 | "help": "If a semantic ranking model is not available, MemoryScope will use cosine similarity scoring as a " 66 | "substitute. However, the ranking effectiveness will be somewhat compromised.", 67 | "map_yaml": "global->enable_ranker"}) 68 | 69 | enable_today_contra_repeat: bool = field(default=True, metadata={ 70 | "help": "Whether enable conflict resolution and deduplication for the day? " 71 | "Note that enabling this will increase token consumption.", 72 | "map_yaml": "global->enable_today_contra_repeat"}) 73 | 74 | enable_long_contra_repeat: bool = field(default=False, metadata={ 75 | "help": "Whether to enable long-term conflict resolution and deduplication. " 76 | "Note that enabling this will increase token consumption.", 77 | "map_yaml": "global->enable_long_contra_repeat"}) 78 | 79 | output_memory_max_count: int = field(default=20, metadata={ 80 | "help": "The maximum number of memories retrieved during memory recall.", 81 | "map_yaml": "global->output_memory_max_count"}) 82 | -------------------------------------------------------------------------------- /memoryscope/core/memoryscope.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import ThreadPoolExecutor 2 | from datetime import datetime 3 | 4 | from memoryscope.core.chat.base_memory_chat import BaseMemoryChat 5 | from memoryscope.core.config.config_manager import ConfigManager 6 | from memoryscope.core.memoryscope_context import MemoryscopeContext 7 | from memoryscope.core.service.base_memory_service import BaseMemoryService 8 | from memoryscope.core.utils.tool_functions import init_instance_by_config 9 | from memoryscope.enumeration.language_enum import LanguageEnum 10 | from memoryscope.enumeration.model_enum import ModelEnum 11 | 12 | 13 | class MemoryScope(ConfigManager): 14 | 15 | def __init__(self, **kwargs): 16 | self._context: MemoryscopeContext = MemoryscopeContext() 17 | self._context.memory_scope_uuid = datetime.now().strftime(r"%Y%m%d_%H%M%S") 18 | super().__init__(**kwargs) 19 | self._init_context_by_config() 20 | 21 | def _init_context_by_config(self): 22 | # set global config 23 | global_conf = self.config["global"] 24 | self._context.language = LanguageEnum(global_conf["language"]) 25 | self._context.thread_pool = ThreadPoolExecutor(max_workers=global_conf["thread_pool_max_workers"]) 26 | self._context.meta_data.update({ 27 | "enable_ranker": global_conf["enable_ranker"], 28 | "enable_today_contra_repeat": global_conf["enable_today_contra_repeat"], 29 | "enable_long_contra_repeat": global_conf["enable_long_contra_repeat"], 30 | "output_memory_max_count": global_conf["output_memory_max_count"], 31 | }) 32 | 33 | if not global_conf["enable_ranker"]: 34 | self.logger.warning("If a semantic ranking model is not available, MemoryScope will use cosine similarity " 35 | "scoring as a substitute. However, the ranking effectiveness will be somewhat " 36 | "compromised.") 37 | 38 | # init memory_chat 39 | memory_chat_conf_dict = self.config["memory_chat"] 40 | if memory_chat_conf_dict: 41 | for name, conf in memory_chat_conf_dict.items(): 42 | self._context.memory_chat_dict[name] = init_instance_by_config(conf, name=name, context=self._context) 43 | 44 | # set memory_service 45 | memory_service_conf_dict = self.config["memory_service"] 46 | assert memory_service_conf_dict 47 | for name, conf in memory_service_conf_dict.items(): 48 | self._context.memory_service_dict[name] = init_instance_by_config(conf, name=name, context=self._context) 49 | 50 | # init model 51 | model_conf_dict = self.config["model"] 52 | assert model_conf_dict 53 | for name, conf in model_conf_dict.items(): 54 | self._context.model_dict[name] = init_instance_by_config(conf, name=name) 55 | 56 | # init memory_store 57 | memory_store_conf = self.config["memory_store"] 58 | assert memory_store_conf 59 | emb_model_name: str = memory_store_conf[ModelEnum.EMBEDDING_MODEL.value] 60 | embedding_model = self._context.model_dict[emb_model_name] 61 | self._context.memory_store = init_instance_by_config(memory_store_conf, embedding_model=embedding_model) 62 | 63 | # init monitor 64 | monitor_conf = self.config["monitor"] 65 | if monitor_conf: 66 | self._context.monitor = init_instance_by_config(monitor_conf) 67 | 68 | # set worker config 69 | self._context.worker_conf_dict = self.config["worker"] 70 | 71 | def close(self): 72 | # wait service to stop 73 | for _, service in self._context.memory_service_dict.items(): 74 | service.stop_backend_service(wait_service=True) 75 | 76 | self._context.thread_pool.shutdown() 77 | 78 | self._context.memory_store.close() 79 | 80 | if self._context.monitor: 81 | self._context.monitor.close() 82 | 83 | self.logger.close() 84 | 85 | def __enter__(self): 86 | return self 87 | 88 | def __exit__(self, exc_type, exc_val, exc_tb): 89 | if exc_type is not None: 90 | self.logger.warning(f"An exception occurred: {exc_type.__name__}: {exc_val}\n{exc_tb}") 91 | self.close() 92 | 93 | @property 94 | def context(self): 95 | return self._context 96 | 97 | @property 98 | def memory_chat_dict(self): 99 | return self._context.memory_chat_dict 100 | 101 | @property 102 | def memory_service_dict(self): 103 | return self._context.memory_service_dict 104 | 105 | @property 106 | def default_memory_chat(self) -> BaseMemoryChat: 107 | return list(self.memory_chat_dict.values())[0] 108 | 109 | @property 110 | def default_memory_service(self) -> BaseMemoryService: 111 | return list(self.memory_service_dict.values())[0] 112 | 113 | @classmethod 114 | def cli_memory_chat(cls, **kwargs): 115 | with cls(**kwargs) as ms: 116 | memory_chat = ms.default_memory_chat 117 | memory_chat.run() 118 | -------------------------------------------------------------------------------- /memoryscope/core/memoryscope_context.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import ThreadPoolExecutor 2 | from dataclasses import dataclass, field 3 | 4 | from memoryscope.enumeration.language_enum import LanguageEnum 5 | from memoryscope.core.utils.singleton import singleton 6 | 7 | @singleton 8 | @dataclass 9 | class MemoryscopeContext(object): 10 | """ 11 | The context class archives all configs utilized by store, monitor, services and workers. 12 | """ 13 | 14 | language: LanguageEnum = LanguageEnum.EN 15 | 16 | thread_pool: ThreadPoolExecutor | None = None 17 | 18 | memory_store = None 19 | 20 | monitor = None 21 | 22 | memory_chat_dict: dict = field(default_factory=lambda: {}, metadata={"help": "name -> memory_chat"}) 23 | 24 | memory_service_dict: dict = field(default_factory=lambda: {}, metadata={"help": "name -> memory_service"}) 25 | 26 | model_dict: dict = field(default_factory=lambda: {}, metadata={"help": "name -> model"}) 27 | 28 | worker_conf_dict: dict = field(default_factory=lambda: {}, metadata={"help": "name -> worker_conf"}) 29 | 30 | meta_data: dict = field(default_factory=lambda: {}) 31 | 32 | memory_scope_uuid: str = "" 33 | 34 | print_workflow_dynamic: bool = False 35 | 36 | log_elasticsearch_dynamic: bool = False 37 | 38 | 39 | def get_memoryscope_uuid(): 40 | ms_context = MemoryscopeContext() 41 | if ms_context.memory_scope_uuid: 42 | return ms_context.memory_scope_uuid 43 | else: 44 | # raise RuntimeError("MemoryscopeContext is not initialized yet. Please initialize it first.") 45 | return "memory_scope_uuid_not_registered" 46 | 47 | def get_memoryscope_context(): 48 | ms_context = MemoryscopeContext() 49 | if ms_context.memory_scope_uuid: 50 | return ms_context 51 | else: 52 | # raise RuntimeError("MemoryscopeContext is not initialized yet. Please initialize it first.") 53 | return "memory_scope_uuid_not_registered" 54 | -------------------------------------------------------------------------------- /memoryscope/core/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_model import BaseModel 2 | from .dummy_generation_model import DummyGenerationModel 3 | from .llama_index_embedding_model import LlamaIndexEmbeddingModel 4 | from .llama_index_generation_model import LlamaIndexGenerationModel 5 | from .llama_index_rank_model import LlamaIndexRankModel 6 | 7 | __all__ = [ 8 | "BaseModel", 9 | "DummyGenerationModel", 10 | "LlamaIndexEmbeddingModel", 11 | "LlamaIndexGenerationModel", 12 | "LlamaIndexRankModel" 13 | ] 14 | -------------------------------------------------------------------------------- /memoryscope/core/models/dummy_generation_model.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import List 3 | 4 | from llama_index.core.base.llms.types import ChatMessage 5 | 6 | from memoryscope.core.models.base_model import BaseModel, MODEL_REGISTRY 7 | from memoryscope.enumeration.message_role_enum import MessageRoleEnum 8 | from memoryscope.enumeration.model_enum import ModelEnum 9 | from memoryscope.scheme.message import Message 10 | from memoryscope.scheme.model_response import ModelResponse, ModelResponseGen 11 | 12 | 13 | class DummyGenerationModel(BaseModel): 14 | """ 15 | The `DummyGenerationModel` class serves as a placeholder model for generating responses. 16 | It processes input prompts or sequences of messages, adapting them into a structure compatible 17 | with chat interfaces. It also facilitates the generation of mock (dummy) responses for testing, 18 | supporting both immediate and streamed output. 19 | """ 20 | m_type: ModelEnum = ModelEnum.GENERATION_MODEL 21 | 22 | MODEL_REGISTRY.register("dummy_generation", object) 23 | 24 | def before_call(self, model_response: ModelResponse, **kwargs): 25 | """ 26 | Prepares the input data before making a call to the language model. 27 | It accepts either a 'prompt' directly or a list of 'messages'. 28 | If 'prompt' is provided, it sets the data accordingly. 29 | If 'messages' are provided, it constructs a list of ChatMessage objects from the list. 30 | Raises an error if neither 'prompt' nor 'messages' are supplied. 31 | 32 | Args: 33 | model_response: model_response 34 | **kwargs: Arbitrary keyword arguments including 'prompt' and 'messages'. 35 | 36 | Raises: 37 | RuntimeError: When both 'prompt' and 'messages' inputs are not provided. 38 | """ 39 | prompt: str = kwargs.pop("prompt", "") 40 | messages: List[Message] | List[dict] = kwargs.pop("messages", []) 41 | 42 | if prompt: 43 | data = {"prompt": prompt} 44 | elif messages: 45 | if isinstance(messages[0], dict): 46 | data = {"messages": [ChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]} 47 | else: 48 | data = {"messages": [ChatMessage(role=msg.role, content=msg.content) for msg in messages]} 49 | else: 50 | raise RuntimeError("prompt and messages are both empty!") 51 | data.update(**kwargs) 52 | model_response.meta_data["data"] = data 53 | 54 | def after_call(self, 55 | model_response: ModelResponse, 56 | stream: bool = False, 57 | **kwargs) -> ModelResponse | ModelResponseGen: 58 | """ 59 | Processes the model's response post-call, optionally streaming the output or returning it as a whole. 60 | 61 | This method modifies the input `model_response` by resetting its message content and, based on the `stream` 62 | parameter, either yields the response in a generated stream or returns the complete response directly. 63 | 64 | Args: 65 | model_response (ModelResponse): The initial response object to be processed. 66 | stream (bool, optional): Flag indicating whether to stream the response. Defaults to False. 67 | **kwargs: Additional keyword arguments (not used in this implementation). 68 | 69 | Returns: 70 | ModelResponse | ModelResponseGen: If `stream` is True, a generator yielding updated `ModelResponse` objects; 71 | otherwise, a modified `ModelResponse` object with the complete content. 72 | """ 73 | model_response.message = Message(role=MessageRoleEnum.ASSISTANT, content="") 74 | 75 | call_result = ["-" for _ in range(10)] 76 | if stream: 77 | def gen() -> ModelResponseGen: 78 | for delta in call_result: 79 | model_response.message.content += delta 80 | model_response.delta = delta 81 | time.sleep(0.1) 82 | yield model_response 83 | 84 | return gen() 85 | else: 86 | model_response.message.content = "".join(call_result) 87 | return model_response 88 | 89 | def _call(self, model_response: ModelResponse, stream: bool = False, **kwargs): 90 | return model_response 91 | 92 | async def _async_call(self, model_response: ModelResponse, **kwargs): 93 | return model_response 94 | -------------------------------------------------------------------------------- /memoryscope/core/models/llama_index_embedding_model.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from llama_index.embeddings.dashscope import DashScopeEmbedding 4 | from llama_index.embeddings.openai import OpenAIEmbedding 5 | 6 | from memoryscope.core.models.base_model import BaseModel, MODEL_REGISTRY 7 | from memoryscope.enumeration.model_enum import ModelEnum 8 | from memoryscope.scheme.model_response import ModelResponse 9 | from memoryscope.core.utils.logger import Logger 10 | 11 | 12 | class LlamaIndexEmbeddingModel(BaseModel): 13 | """ 14 | Manages text embeddings utilizing the DashScopeEmbedding within the LlamaIndex framework, 15 | facilitating embedding operations for both sync and async modes, inheriting from BaseModel. 16 | """ 17 | m_type: ModelEnum = ModelEnum.EMBEDDING_MODEL 18 | 19 | def __init__(self, *args, **kwargs): 20 | super().__init__(*args, **kwargs) 21 | self.logger = Logger.get_logger("llama_index_embedding_model") 22 | 23 | @classmethod 24 | def register_model(cls, model_name: str, model_class: type): 25 | """ 26 | Registers a new embedding model class with the model registry. 27 | 28 | Args: 29 | model_name (str): The name to register the model under. 30 | model_class (type): The class of the model to register. 31 | """ 32 | MODEL_REGISTRY.register(model_name, model_class) 33 | 34 | MODEL_REGISTRY.register("dashscope_embedding", DashScopeEmbedding) 35 | MODEL_REGISTRY.register("openai_embedding", OpenAIEmbedding) 36 | 37 | def before_call(self, model_response: ModelResponse, **kwargs): 38 | text: str | List[str] = kwargs.pop("text", "") 39 | if isinstance(text, str): 40 | text = [text] 41 | model_response.meta_data["data"] = dict(texts=text) 42 | self.logger.info("Embedding Model:\n" + text[0]) 43 | 44 | def after_call(self, model_response: ModelResponse, **kwargs) -> ModelResponse: 45 | embeddings = model_response.raw 46 | if not embeddings: 47 | model_response.details = "empty embeddings" 48 | model_response.status = False 49 | return model_response 50 | if len(embeddings) == 1: 51 | # return list[float] 52 | embeddings = embeddings[0] 53 | 54 | model_response.embedding_results = embeddings 55 | return model_response 56 | 57 | def _call(self, model_response: ModelResponse, **kwargs): 58 | """ 59 | Executes a synchronous call to generate embeddings for the input data. 60 | 61 | This method utilizes the `get_text_embedding_batch` method of the encapsulated model, 62 | passing the processed data from `self.data`. The result is then packaged into a 63 | `ModelResponse` object with the model type specified by `self.m_type`. 64 | 65 | Args: 66 | **kwargs: Additional keyword arguments that might be used in the embedding process. 67 | 68 | Returns: 69 | ModelResponse: An object containing the embedding results and the model type. 70 | """ 71 | model_response.raw = self.model.get_text_embedding_batch(**model_response.meta_data["data"]) 72 | 73 | async def _async_call(self, model_response: ModelResponse, **kwargs): 74 | """ 75 | Executes an asynchronous call to generate embeddings for the input data. 76 | 77 | Similar to `_call`, but uses the asynchronous `aget_text_embedding_batch` method 78 | of the model. It handles the input data asynchronously and packages the result 79 | within a `ModelResponse` instance. 80 | 81 | Args: 82 | **kwargs: Additional keyword arguments for the embedding process, if any. 83 | 84 | Returns: 85 | ModelResponse: An object encapsulating the embedding output and the model's type. 86 | """ 87 | model_response.raw = await self.model.aget_text_embedding_batch(**model_response.meta_data["data"]) 88 | -------------------------------------------------------------------------------- /memoryscope/core/models/llama_index_rank_model.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from llama_index.core.data_structs import Node 4 | from llama_index.core.schema import NodeWithScore 5 | from llama_index.postprocessor.dashscope_rerank import DashScopeRerank 6 | 7 | from memoryscope.core.models.base_model import BaseModel, MODEL_REGISTRY 8 | from memoryscope.enumeration.model_enum import ModelEnum 9 | from memoryscope.scheme.model_response import ModelResponse 10 | from memoryscope.core.utils.logger import Logger 11 | 12 | 13 | class LlamaIndexRankModel(BaseModel): 14 | """ 15 | The LlamaIndexRankModel class is designed to rerank documents according to their relevance 16 | to a provided query, utilizing the DashScope Rerank model. It transforms document lists 17 | and queries into a compatible format for ranking, manages the ranking process, and allocates 18 | rank scores to individual documents. 19 | """ 20 | m_type: ModelEnum = ModelEnum.RANK_MODEL 21 | 22 | MODEL_REGISTRY.register("dashscope_rank", DashScopeRerank) 23 | 24 | def __init__(self, *args, **kwargs): 25 | super().__init__(*args, **kwargs) 26 | self.logger = Logger.get_logger("llama_index_rank_model") 27 | 28 | def before_call(self, model_response: ModelResponse, **kwargs): 29 | """ 30 | Prepares necessary data before the ranking call by extracting the query and documents, 31 | ensuring they are valid, and initializing nodes with dummy scores. 32 | 33 | Args: 34 | model_response: model response 35 | **kwargs: Keyword arguments containing 'query' and 'documents'. 36 | """ 37 | query: str = kwargs.pop("query", "") 38 | documents: List[str] = kwargs.pop("documents", []) 39 | if isinstance(documents, str): 40 | documents = [documents] 41 | assert query and documents and all(documents), \ 42 | f"query or documents is empty! query={query}, documents={len(documents)}" 43 | assert len(documents) < 500, \ 44 | "The input documents of Dashscope rerank model should not larger than 500!" 45 | # Using -1.0 as dummy scores 46 | nodes = [NodeWithScore(node=Node(text=doc), score=-1.0) for doc in documents] 47 | 48 | model_response.meta_data.update({ 49 | "data": {"nodes": nodes, "query_str": query, "top_n": len(documents)}, 50 | "documents_map": {doc: idx for idx, doc in enumerate(documents)}, 51 | }) 52 | 53 | def after_call(self, model_response: ModelResponse, **kwargs) -> ModelResponse: 54 | """ 55 | Processes the model response post-ranking, assigning calculated rank scores to each document 56 | based on their index in the original document list. 57 | 58 | Args: 59 | model_response (ModelResponse): The initial response from the ranking model. 60 | **kwargs: Additional keyword arguments (unused). 61 | 62 | Returns: 63 | ModelResponse: Updated response with rank scores assigned to documents. 64 | """ 65 | if not model_response.rank_scores: 66 | model_response.rank_scores = {} 67 | 68 | documents_map = model_response.meta_data["documents_map"] 69 | for node in model_response.raw: 70 | text = node.node.text 71 | idx = documents_map[text] 72 | model_response.rank_scores[idx] = node.score 73 | 74 | self.logger.info(self.logger.format_rank_message(model_response)) 75 | return model_response 76 | 77 | def _call(self, model_response: ModelResponse, **kwargs): 78 | """ 79 | Executes the ranking process by passing prepared data to the model's postprocessing method. 80 | 81 | Args: 82 | **kwargs: Keyword arguments (unused). 83 | 84 | Returns: 85 | ModelResponse: A response object encapsulating the ranked nodes. 86 | """ 87 | self.model.top_n = model_response.meta_data["data"]["top_n"] 88 | model_response.meta_data["data"].pop("top_n") 89 | model_response.raw = self.model.postprocess_nodes(**model_response.meta_data["data"]) 90 | 91 | async def _async_call(self, **kwargs) -> ModelResponse: 92 | """ 93 | Asynchronous wrapper for the `_call` method, maintaining the same functionality. 94 | 95 | Args: 96 | **kwargs: Keyword arguments (unused). 97 | 98 | Returns: 99 | ModelResponse: A response object encapsulating the ranked nodes. 100 | """ 101 | raise NotImplementedError 102 | -------------------------------------------------------------------------------- /memoryscope/core/operation/__init__.py: -------------------------------------------------------------------------------- 1 | from .backend_operation import BackendOperation 2 | from .base_operation import BaseOperation 3 | from .base_workflow import BaseWorkflow 4 | from .consolidate_memory_op import ConsolidateMemoryOp 5 | from .frontend_operation import FrontendOperation 6 | 7 | __all__ = [ 8 | "BackendOperation", 9 | "BaseOperation", 10 | "BaseWorkflow", 11 | "ConsolidateMemoryOp", 12 | "FrontendOperation" 13 | ] 14 | -------------------------------------------------------------------------------- /memoryscope/core/operation/backend_operation.py: -------------------------------------------------------------------------------- 1 | import time 2 | import threading 3 | 4 | from memoryscope.core.operation.base_operation import OPERATION_TYPE 5 | from memoryscope.core.operation.frontend_operation import FrontendOperation 6 | 7 | 8 | class BackendOperation(FrontendOperation): 9 | """ 10 | BaseBackendOperation serves as an abstract base class for defining backend operations. 11 | It manages operation status, loop control, and integrates with a global context for thread management. 12 | """ 13 | operation_type: OPERATION_TYPE = "backend" 14 | 15 | def __init__(self, interval_time: int, **kwargs): 16 | super().__init__(**kwargs) 17 | 18 | self._interval_time: int = interval_time 19 | 20 | self._operation_status_run: bool = False 21 | self._loop_switch: bool = False 22 | self._backend_task = None 23 | 24 | def init_workflow(self, **kwargs): 25 | """ 26 | Initializes the workflow by setting up workers with provided keyword arguments. 27 | 28 | Args: 29 | **kwargs: Arbitrary keyword arguments to be passed during worker initialization. 30 | """ 31 | self.init_workers(is_backend=True, **kwargs) 32 | 33 | def _loop_operation(self, **kwargs): 34 | """ 35 | Loops until _loop_switch is False, sleeping for 1 second in each interval. 36 | At each interval, it checks if _loop_switch is still True, and if so, executes the operation. 37 | """ 38 | while self._loop_switch: 39 | for _ in range(self._interval_time): 40 | if self._loop_switch: 41 | time.sleep(1) 42 | else: 43 | break 44 | 45 | if self._loop_switch: 46 | if self._operation_status_run: 47 | continue 48 | 49 | self._operation_status_run = True 50 | 51 | if len(self.target_names) > 1: 52 | self.logger.warning("current version is not stable under target_names.size > 1!") 53 | 54 | for target_name in self.target_names: 55 | try: 56 | self.run_operation(target_name=target_name, **kwargs) 57 | except Exception as e: 58 | self.logger.exception(f"op_name={self.name} target_name={target_name} encounter exception. " 59 | f"args={e.args}") 60 | 61 | self._operation_status_run = False 62 | 63 | def start_operation_backend(self, **kwargs): 64 | """ 65 | Initiates the background operation loop if it's not already running. 66 | Sets the _loop_switch to True and submits the _loop_operation to a thread from the global thread pool. 67 | """ 68 | if not self._loop_switch: 69 | self._loop_switch = True 70 | self._backend_task = self.thread_pool.submit(self._loop_operation, **kwargs) 71 | self.logger.info(f"start operation={self.name}...") 72 | 73 | def stop_operation_backend(self, wait_operation: bool = False): 74 | """ 75 | Stops the background operation loop by setting the _loop_switch to False. 76 | """ 77 | self._loop_switch = False 78 | if self._backend_task: 79 | if wait_operation: 80 | self._backend_task.result() 81 | self.logger.info(f"stop operation={self.name}...") 82 | else: 83 | self.logger.info(f"send stop signal to operation={self.name}...") 84 | -------------------------------------------------------------------------------- /memoryscope/core/operation/base_operation.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from typing import Literal, List 3 | 4 | from memoryscope.scheme.message import Message 5 | 6 | OPERATION_TYPE = Literal["frontend", "backend"] 7 | 8 | 9 | class BaseOperation(metaclass=ABCMeta): 10 | """ 11 | An abstract base class representing an operation that can be categorized as either frontend or backend. 12 | 13 | Attributes: 14 | operation_type (OPERATION_TYPE): Specifies the type of operation, defaulting to "frontend". 15 | name (str): The name of the operation. 16 | description (str): A description of the operation. 17 | """ 18 | 19 | operation_type: OPERATION_TYPE = "frontend" 20 | 21 | def __init__(self, 22 | name: str, 23 | user_name: str, 24 | target_names: List[str], 25 | chat_messages: List[List[Message]], 26 | description: str): 27 | """ 28 | Initializes a new instance of the BaseOperation. 29 | """ 30 | self.name: str = name 31 | self.user_name: str = user_name 32 | self.target_names: List[str] = target_names 33 | self.chat_messages: List[List[Message]] = chat_messages 34 | self.description: str = description 35 | 36 | def init_workflow(self, **kwargs): 37 | """ 38 | Initialize the workflow with additional keyword arguments if needed. 39 | 40 | Args: 41 | **kwargs: Additional parameters for initializing the workflow. 42 | """ 43 | pass 44 | 45 | @abstractmethod 46 | def run_operation(self, target_name: str, **kwargs): 47 | """ 48 | Abstract method to define the operation to be run. 49 | Subclasses must implement this method. 50 | 51 | Args: 52 | target_name (str): target_name(human name). 53 | **kwargs: Keyword arguments for running the operation. 54 | 55 | Raises: 56 | NotImplementedError: If the subclass does not implement this method. 57 | """ 58 | raise NotImplementedError 59 | 60 | def start_operation_backend(self, **kwargs): 61 | """ 62 | Placeholder method for running an operation specific to the backend. 63 | Intended to be overridden by subclasses if backend operations are required. 64 | """ 65 | pass 66 | 67 | def stop_operation_backend(self, wait_operation: bool = False): 68 | """ 69 | Placeholder method to stop any ongoing backend operations. 70 | Should be implemented in subclasses where backend operations are managed. 71 | """ 72 | pass 73 | -------------------------------------------------------------------------------- /memoryscope/core/operation/consolidate_memory_op.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from memoryscope.constants.common_constants import CHAT_KWARGS, CHAT_MESSAGES, RESULT, TARGET_NAME, USER_NAME 4 | from memoryscope.core.operation.backend_operation import BackendOperation 5 | from memoryscope.scheme.message import Message 6 | 7 | 8 | class ConsolidateMemoryOp(BackendOperation): 9 | 10 | def __init__(self, 11 | message_lock, 12 | contextual_msg_min_count: int = 0, 13 | **kwargs): 14 | super().__init__(**kwargs) 15 | self.message_lock = message_lock 16 | self.contextual_msg_min_count: int = contextual_msg_min_count 17 | 18 | def run_operation(self, target_name: str, **kwargs): 19 | """ 20 | Executes an operation after preparing the chat context, checking message memory status, 21 | and updating workflow status accordingly. 22 | 23 | If the number of not-memorized messages is less than the contextual message count, 24 | the operation is skipped. Otherwise, it sets up the chat context, runs the workflow, 25 | captures the result, and updates the memory status. 26 | 27 | Args: 28 | target_name (str): target_name(human name). 29 | **kwargs: Keyword arguments for chat operation configuration. 30 | 31 | Returns: 32 | Any: The result obtained from running the workflow. 33 | """ 34 | 35 | chat_messages: List[List[Message]] = [] 36 | for messages in self.chat_messages: 37 | if not messages: 38 | continue 39 | 40 | if messages[0].memorized: 41 | continue 42 | 43 | contain_flag = False 44 | 45 | for msg in messages: 46 | if msg.role_name == target_name: 47 | contain_flag = True 48 | break 49 | 50 | if contain_flag: 51 | chat_messages.append(messages) 52 | 53 | if not chat_messages: 54 | self.logger.info(f"empty not_memorized chat_messages for target_name={target_name}.") 55 | return 56 | 57 | if len(chat_messages) < self.contextual_msg_min_count: 58 | self.logger.info(f"not_memorized_size={len(chat_messages)} < {self.contextual_msg_min_count}, skip.") 59 | return 60 | 61 | # prepare kwargs 62 | workflow_kwargs = { 63 | CHAT_MESSAGES: chat_messages, 64 | CHAT_KWARGS: {**kwargs, **self.kwargs}, 65 | TARGET_NAME: target_name, 66 | USER_NAME: self.user_name, 67 | } 68 | 69 | # Execute the workflow with the prepared context 70 | self.run_workflow(**workflow_kwargs) 71 | 72 | # Retrieve the result from the context after workflow execution 73 | result = self.workflow_context.get(RESULT) 74 | 75 | # set message memorized 76 | with self.message_lock: 77 | for messages in chat_messages: 78 | for msg in messages: 79 | msg.memorized = True 80 | 81 | return result 82 | -------------------------------------------------------------------------------- /memoryscope/core/operation/frontend_operation.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from memoryscope.constants.common_constants import RESULT, CHAT_MESSAGES, CHAT_KWARGS, TARGET_NAME, USER_NAME 4 | from memoryscope.core.operation.base_operation import BaseOperation, OPERATION_TYPE 5 | from memoryscope.core.operation.base_workflow import BaseWorkflow 6 | from memoryscope.scheme.message import Message 7 | 8 | 9 | class FrontendOperation(BaseWorkflow, BaseOperation): 10 | operation_type: OPERATION_TYPE = "frontend" 11 | 12 | def __init__(self, 13 | name: str, 14 | user_name: str, 15 | target_names: List[str], 16 | chat_messages: List[List[Message]], 17 | description: str, 18 | **kwargs): 19 | super().__init__(name=name, **kwargs) 20 | BaseOperation.__init__(self, 21 | name=name, 22 | user_name=user_name, 23 | target_names=target_names, 24 | chat_messages=chat_messages, 25 | description=description) 26 | 27 | def init_workflow(self, **kwargs): 28 | """ 29 | Initializes the workflow by setting up workers with provided keyword arguments. 30 | 31 | Args: 32 | **kwargs: Arbitrary keyword arguments to be passed during worker initialization. 33 | """ 34 | self.init_workers(**kwargs) 35 | 36 | def run_operation(self, target_name: str, **kwargs): 37 | """ 38 | Executes the main operation of reading recent chat messages, initializing workflow, 39 | and returning the result of the workflow execution. 40 | 41 | Args: 42 | target_name (str): target_name(human name). 43 | **kwargs: Additional keyword arguments used in the operation context. 44 | 45 | Returns: 46 | Any: The result obtained from executing the workflow. 47 | """ 48 | 49 | # prepare kwargs 50 | workflow_kwargs = { 51 | CHAT_MESSAGES: self.chat_messages, 52 | CHAT_KWARGS: {**kwargs, **self.kwargs}, 53 | TARGET_NAME: target_name, 54 | USER_NAME: self.user_name, 55 | } 56 | 57 | # Execute the workflow with the prepared context 58 | self.run_workflow(**workflow_kwargs) 59 | 60 | # Retrieve the result from the context after workflow execution 61 | return self.workflow_context.get(RESULT) 62 | -------------------------------------------------------------------------------- /memoryscope/core/service/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_memory_service import BaseMemoryService 2 | from .memory_scope_service import MemoryScopeService 3 | 4 | __all__ = [ 5 | "BaseMemoryService", 6 | "MemoryScopeService" 7 | ] 8 | -------------------------------------------------------------------------------- /memoryscope/core/service/base_memory_service.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from typing import List, Dict 3 | 4 | from memoryscope.constants.language_constants import DEFAULT_HUMAN_NAME 5 | from memoryscope.core.memoryscope_context import MemoryscopeContext 6 | from memoryscope.core.operation.base_operation import BaseOperation 7 | from memoryscope.core.utils.logger import Logger 8 | from memoryscope.scheme.message import Message 9 | 10 | 11 | class BaseMemoryService(metaclass=ABCMeta): 12 | """ 13 | An abstract base class for managing memory operations within a multithreaded context. 14 | It sets up the infrastructure for operation handling, message storage, and synchronization, 15 | along with logging capabilities and customizable configurations. 16 | """ 17 | 18 | def __init__(self, 19 | memory_operations: Dict[str, dict], 20 | context: MemoryscopeContext, 21 | assistant_name: str = None, 22 | human_name: str = None, 23 | **kwargs): 24 | """ 25 | Initializes the BaseMemoryService with operation definitions, keys for memory access, 26 | and additional keyword arguments for flexibility. 27 | 28 | Args: 29 | memory_operations (Dict[str, dict]): A dictionary defining available memory operations. 30 | context (MemoryscopeContext): runtime context. 31 | human_name (str): human name. 32 | assistant_name (str): assistant name. 33 | **kwargs: Additional parameters to customize service behavior. 34 | """ 35 | self._operations_conf: Dict[str, dict] = memory_operations 36 | self._context: MemoryscopeContext = context 37 | self._human_name: str = human_name 38 | self._assistant_name: str = assistant_name 39 | self._kwargs = kwargs 40 | 41 | if not self._human_name: 42 | self._human_name = DEFAULT_HUMAN_NAME[self._context.language] 43 | if not self._assistant_name: 44 | self._assistant_name = "AI" 45 | 46 | self._operation_dict: Dict[str, BaseOperation] = {} 47 | self._chat_messages: List[List[Message]] = [] 48 | self._role_names: List[str] = [] 49 | 50 | self.logger = Logger.get_logger() 51 | 52 | @property 53 | def human_name(self) -> str: 54 | return self._human_name 55 | 56 | @property 57 | def assistant_name(self) -> str: 58 | return self._assistant_name 59 | 60 | def get_chat_messages_scatter(self, recent_n_pair: int) -> List[Message]: 61 | chat_messages_scatter: List[Message] = [] 62 | for messages in self._chat_messages[-recent_n_pair:]: 63 | chat_messages_scatter.extend(messages) 64 | return chat_messages_scatter 65 | 66 | @property 67 | def op_description_dict(self) -> Dict[str, str]: 68 | """ 69 | Property to retrieve a dictionary mapping operation keys to their descriptions. 70 | Returns: 71 | Dict[str, str]: A dictionary where keys are operation identifiers and values are their descriptions. 72 | """ 73 | return {k: v.description for k, v in self._operation_dict.items()} 74 | 75 | @abstractmethod 76 | def add_messages_pair(self, messages: List[Message]): 77 | raise NotImplementedError 78 | 79 | @abstractmethod 80 | def register_operation(self, name: str, operation_config: dict, **kwargs): 81 | raise NotImplementedError 82 | 83 | @abstractmethod 84 | def init_service(self, **kwargs): 85 | raise NotImplementedError 86 | 87 | def start_backend_service(self, name: str = None, **kwargs): 88 | pass 89 | 90 | def stop_backend_service(self, wait_service: bool = False): 91 | pass 92 | 93 | @abstractmethod 94 | def run_operation(self, name: str, role_name: str = "", **kwargs): 95 | raise NotImplementedError 96 | 97 | def __getattr__(self, name: str): 98 | assert name in self._operation_dict, f"operation={name} is not registered!" 99 | return lambda **kwargs: self.run_operation(name=name, **kwargs) 100 | -------------------------------------------------------------------------------- /memoryscope/core/storage/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_memory_store import BaseMemoryStore 2 | from .base_monitor import BaseMonitor 3 | from .dummy_memory_store import DummyMemoryStore 4 | from .dummy_monitor import DummyMonitor 5 | from .llama_index_es_memory_store import LlamaIndexEsMemoryStore 6 | from .llama_index_sync_elasticsearch import ( 7 | # get_elasticsearch_client, 8 | # _mode_must_match_retrieval_strategy, 9 | # _to_elasticsearch_filter, 10 | # _to_llama_similarities, 11 | ESCombinedRetrieveStrategy, 12 | SyncElasticsearchStore 13 | ) 14 | 15 | __all__ = [ 16 | "BaseMemoryStore", 17 | "BaseMonitor", 18 | "DummyMemoryStore", 19 | "DummyMonitor", 20 | "LlamaIndexEsMemoryStore", 21 | "ESCombinedRetrieveStrategy", 22 | "SyncElasticsearchStore" 23 | ] -------------------------------------------------------------------------------- /memoryscope/core/storage/base_memory_store.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from typing import Dict, List 3 | 4 | from memoryscope.scheme.memory_node import MemoryNode 5 | 6 | 7 | class BaseMemoryStore(metaclass=ABCMeta): 8 | """ 9 | An abstract base class defining the interface for a memory store which handles memory nodes. 10 | It outlines essential operations like retrieval, updating, flushing, and closing of memory scopes. 11 | """ 12 | 13 | @abstractmethod 14 | def retrieve_memories(self, 15 | query: str = "", 16 | top_k: int = 3, 17 | filter_dict: Dict[str, List[str]] = None) -> List[MemoryNode]: 18 | """ 19 | Retrieves a list of MemoryNode objects that are most relevant to the query, 20 | considering a filter dictionary for additional constraints. The number of nodes returned 21 | is limited by top_k. 22 | 23 | Args: 24 | query (str): The query string used to find relevant memories. 25 | top_k (int): The maximum number of MemoryNode objects to return. 26 | filter_dict (Dict[str, List[str]]): A dictionary with keys representing filter fields 27 | and values as lists of strings for filtering criteria. 28 | 29 | Returns: 30 | List[MemoryNode]: A list of MemoryNode objects sorted by relevance to the query, 31 | limited to top_k items. 32 | """ 33 | pass 34 | 35 | @abstractmethod 36 | async def a_retrieve_memories(self, 37 | query: str = "", 38 | top_k: int = 3, 39 | filter_dict: Dict[str, List[str]] = None) -> List[MemoryNode]: 40 | """ 41 | Asynchronously retrieves a list of MemoryNode objects that best match the query, 42 | respecting a filter dictionary, with the result size capped at top_k. 43 | 44 | Args: 45 | query (str): The text to search for in memory nodes. 46 | top_k (int): Maximum number of nodes to return. 47 | filter_dict (Dict[str, List[str]]): Filters to apply on memory nodes. 48 | 49 | Returns: 50 | List[MemoryNode]: A list of up to top_k MemoryNode objects matching the criteria. 51 | """ 52 | pass 53 | 54 | @abstractmethod 55 | def batch_insert(self, nodes: List[MemoryNode]): 56 | pass 57 | 58 | @abstractmethod 59 | def batch_update(self, nodes: List[MemoryNode], update_embedding: bool = True): 60 | pass 61 | 62 | @abstractmethod 63 | def batch_delete(self, nodes: List[MemoryNode]): 64 | pass 65 | 66 | def flush(self): 67 | """ 68 | Flushes any pending memory updates or operations to ensure data consistency. 69 | This method should be overridden by subclasses to provide the specific flushing mechanism. 70 | """ 71 | pass 72 | 73 | @abstractmethod 74 | def close(self): 75 | """ 76 | Closes the memory store, releasing any resources associated with it. 77 | Subclasses must implement this method to define how the memory store is properly closed. 78 | """ 79 | pass 80 | -------------------------------------------------------------------------------- /memoryscope/core/storage/base_monitor.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseMonitor(metaclass=ABCMeta): 5 | """ 6 | An abstract base class defining the interface for monitor classes. 7 | Subclasses should implement the methods defined here to provide concrete monitoring behavior. 8 | """ 9 | 10 | def __init__(self, **kwargs): 11 | pass 12 | 13 | @abstractmethod 14 | def add(self): 15 | """ 16 | Abstract method to add data or events to the monitor. 17 | This method should be implemented by subclasses to define how data is added into the monitoring system. 18 | 19 | :return: None 20 | """ 21 | 22 | @abstractmethod 23 | def add_token(self): 24 | """ 25 | Abstract method to add a token or a specific type of identifier to the monitor. 26 | Subclasses should implement this to specify how tokens are managed within the monitoring context. 27 | 28 | :return: None 29 | """ 30 | 31 | def flush(self): 32 | """ 33 | Method to flush any buffered data in the monitor. 34 | Intended to ensure that all pending recorded data is processed or written out. 35 | 36 | :return: None 37 | """ 38 | pass 39 | 40 | def close(self): 41 | """ 42 | Method to close the monitor, performing necessary cleanup operations. 43 | This could include releasing resources, closing files, or any other termination tasks. 44 | 45 | :return: None 46 | """ 47 | pass 48 | -------------------------------------------------------------------------------- /memoryscope/core/storage/dummy_memory_store.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | from memoryscope.core.models.base_model import BaseModel 4 | from memoryscope.core.storage.base_memory_store import BaseMemoryStore 5 | from memoryscope.scheme.memory_node import MemoryNode 6 | 7 | 8 | class DummyMemoryStore(BaseMemoryStore): 9 | """ 10 | Placeholder implementation of a memory storage system interface. Defines methods for querying, updating, 11 | and closing memory nodes with asynchronous capabilities, leveraging an embedding model for potential 12 | semantic retrieval. Actual storage operations are not implemented. 13 | """ 14 | 15 | def __init__(self, embedding_model: BaseModel, **kwargs): 16 | """ 17 | Initializes the DummyMemoryStore with an embedding model and additional keyword arguments. 18 | 19 | Args: 20 | embedding_model (BaseModel): The model used to embed data for potential similarity-based retrieval. 21 | **kwargs: Additional keyword arguments for configuration or future expansion. 22 | """ 23 | self.embedding_model: BaseModel = embedding_model 24 | self.kwargs = kwargs 25 | 26 | def retrieve_memories(self, 27 | query: str = "", 28 | top_k: int = 3, 29 | filter_dict: Dict[str, List[str]] = None) -> List[MemoryNode]: 30 | pass 31 | 32 | async def a_retrieve_memories(self, 33 | query: str = "", 34 | top_k: int = 3, 35 | filter_dict: Dict[str, List[str]] = None) -> List[MemoryNode]: 36 | pass 37 | 38 | def batch_insert(self, nodes: List[MemoryNode]): 39 | pass 40 | 41 | def batch_update(self, nodes: List[MemoryNode], update_embedding: bool = True): 42 | pass 43 | 44 | def batch_delete(self, nodes: List[MemoryNode]): 45 | pass 46 | 47 | def close(self): 48 | pass 49 | -------------------------------------------------------------------------------- /memoryscope/core/storage/dummy_monitor.py: -------------------------------------------------------------------------------- 1 | from memoryscope.core.storage.base_monitor import BaseMonitor 2 | 3 | 4 | class DummyMonitor(BaseMonitor): 5 | """ 6 | DummyMonitor serves as a placeholder or mock class extending BaseMonitor, 7 | providing empty method bodies for 'add', 'add_token', and 'close' operations. 8 | This can be used for testing or in situations where a full monitor implementation is not required. 9 | """ 10 | 11 | def add(self): 12 | """ 13 | Placeholder for adding data to the monitor. 14 | This method currently does nothing. 15 | """ 16 | pass 17 | 18 | def add_token(self): 19 | """ 20 | Placeholder for adding a token to the monitored data. 21 | This method currently does nothing. 22 | """ 23 | pass 24 | 25 | def close(self): 26 | """ 27 | Placeholder for closing the monitor and performing any necessary cleanup. 28 | This method currently does nothing. 29 | """ 30 | pass 31 | -------------------------------------------------------------------------------- /memoryscope/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .datetime_handler import DatetimeHandler 2 | from .logger import Logger 3 | from .prompt_handler import PromptHandler 4 | from .registry import Registry 5 | from .response_text_parser import ResponseTextParser 6 | from .timer import Timer 7 | from .tool_functions import ( 8 | underscore_to_camelcase, 9 | camelcase_to_underscore, 10 | init_instance_by_config, 11 | prompt_to_msg, 12 | char_logo, 13 | md5_hash, 14 | contains_keyword, 15 | cosine_similarity 16 | ) 17 | 18 | __all__ = [ 19 | "DatetimeHandler", 20 | "Logger", 21 | "PromptHandler", 22 | "Registry", 23 | "ResponseTextParser", 24 | "Timer", 25 | "underscore_to_camelcase", 26 | "camelcase_to_underscore", 27 | "init_instance_by_config", 28 | "prompt_to_msg", 29 | "char_logo", 30 | "md5_hash", 31 | "contains_keyword", 32 | "cosine_similarity" 33 | ] 34 | -------------------------------------------------------------------------------- /memoryscope/core/utils/registry.py: -------------------------------------------------------------------------------- 1 | """ 2 | Registry for different modules. 3 | Init class according to the class name and verify the input parameters. 4 | """ 5 | from typing import Dict, Any, List 6 | 7 | 8 | class Registry(object): 9 | """ 10 | A registry to manage and instantiate various modules by their names, ensuring the uniqueness of registered entries. 11 | It supports both individual and bulk registration of modules, as well as retrieval of modules by name. 12 | 13 | Attributes: 14 | name (str): The name of the registry. 15 | module_dict (Dict[str, Any]): A dictionary holding registered modules where keys are module names and values are 16 | the modules themselves. 17 | """ 18 | 19 | def __init__(self, name: str): 20 | """ 21 | Initializes the Registry with a given name. 22 | 23 | Args: 24 | name (str): The name to identify this registry. 25 | """ 26 | self.name: str = name 27 | self.module_dict: Dict[str, Any] = {} 28 | 29 | def register(self, module_name: str = None, module: Any = None): 30 | """ 31 | Registers module in the registry in a single call. 32 | 33 | Args: 34 | module_name (str): The name of module to be registered. 35 | module (List[Any] | Dict[str, Any]): The module to be registered. 36 | 37 | Raises: 38 | NotImplementedError: If the input is already registered. 39 | """ 40 | assert module is not None 41 | if module_name is None: 42 | module_name = module.__name__ 43 | 44 | if module_name in self.module_dict: 45 | raise KeyError(f'{module_name} is already registered in {self.name}') 46 | self.module_dict[module_name] = module 47 | 48 | def batch_register(self, modules: List[Any] | Dict[str, Any]): 49 | """ 50 | Registers multiple modules in the registry in a single call. Accepts either a list of modules or a dictionary 51 | mapping names to modules. 52 | 53 | Args: 54 | modules (List[Any] | Dict[str, Any]): A list of modules or a dictionary mapping module names to the modules. 55 | 56 | Raises: 57 | NotImplementedError: If the input is neither a list nor a dictionary. 58 | """ 59 | if isinstance(modules, list): 60 | module_name_dict = {m.__name__: m for m in modules} 61 | elif isinstance(modules, dict): 62 | module_name_dict = modules 63 | else: 64 | raise NotImplementedError("Input must be a list or a dictionary.") 65 | self.module_dict.update(module_name_dict) 66 | 67 | def __getitem__(self, module_name: str): 68 | """ 69 | Retrieves a registered module by its name using index notation. 70 | 71 | Args: 72 | module_name (str): The name of the module to retrieve. 73 | 74 | Returns: 75 | A registered module corresponding to the given name. 76 | 77 | Raises: 78 | AssertionError: If the specified module is not found in the registry. 79 | """ 80 | assert module_name in self.module_dict, f"{module_name} not found in {self.name}" 81 | return self.module_dict[module_name] 82 | -------------------------------------------------------------------------------- /memoryscope/core/utils/response_text_parser.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import List 3 | 4 | from memoryscope.constants.language_constants import NONE_WORD 5 | from memoryscope.core.utils.logger import Logger 6 | from memoryscope.enumeration.language_enum import LanguageEnum 7 | 8 | 9 | class ResponseTextParser(object): 10 | """ 11 | The `ResponseTextParser` class is designed to parse and process response texts. It provides methods to extract 12 | patterns from the text and filter out unnecessary information, while also logging the processing steps and outcomes. 13 | """ 14 | 15 | PATTERN_V1 = re.compile(r"<(.*?)>") # Regular expression pattern to match content within angle brackets 16 | 17 | def __init__(self, response_text: str, language: LanguageEnum, logger_prefix: str = ""): 18 | # Strips leading and trailing whitespace from the response text 19 | self.response_text: str = response_text.strip() 20 | self.language: LanguageEnum = language 21 | 22 | # The prefix of log. Defaults to "". 23 | self.logger_prefix: str = logger_prefix 24 | 25 | # Initializes a logger instance for logging parsing activities 26 | self.logger: Logger = Logger.get_logger() 27 | 28 | def parse_v1(self) -> List[List[str]]: 29 | """ 30 | Extract specific patterns from the text which match content within angle brackets. 31 | 32 | Returns: 33 | Contents match the specific patterns. 34 | """ 35 | result = [] 36 | for line in self.response_text.split("\n"): 37 | line = line.strip() 38 | if not line: 39 | continue 40 | matches = [match.group(1) for match in self.PATTERN_V1.finditer(line)] 41 | if matches: 42 | result.append(matches) 43 | self.logger.info(f"{self.logger_prefix} response_text={self.response_text} result={result}", stacklevel=2) 44 | return result 45 | 46 | def parse_v2(self) -> List[str]: 47 | """ 48 | Extract lines which contain NONE_WORD. 49 | 50 | Returns: 51 | Contents match the specific patterns. 52 | """ 53 | result = [] 54 | for line in self.response_text.split("\n"): 55 | line = line.strip() 56 | if not line or line.lower() == NONE_WORD.get(self.language): 57 | continue 58 | result.append(line) 59 | self.logger.info(f"{self.logger_prefix} response_text={self.response_text} result={result}", stacklevel=2) 60 | return result 61 | -------------------------------------------------------------------------------- /memoryscope/core/utils/singleton.py: -------------------------------------------------------------------------------- 1 | def singleton(cls): 2 | _instance = {} 3 | 4 | def _singleton(*args, **kargs): 5 | if cls not in _instance: 6 | _instance[cls] = cls(*args, **kargs) 7 | return _instance[cls] 8 | 9 | return _singleton -------------------------------------------------------------------------------- /memoryscope/core/utils/timer.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import Literal 3 | 4 | from memoryscope.core.utils.logger import Logger 5 | 6 | TIME_LOG_TYPE = Literal["end", "wrap", "none"] 7 | 8 | 9 | class Timer(object): 10 | """ 11 | A class used to measure the execution time of code blocks. It supports logging the elapsed time and can be 12 | customized to display time in seconds or milliseconds. 13 | """ 14 | 15 | def __init__(self, 16 | name: str, 17 | time_log_type: TIME_LOG_TYPE = "end", 18 | use_ms: bool = True, 19 | stack_level: int = 2, 20 | float_precision: int = 4, 21 | **kwargs): 22 | 23 | """ 24 | Initializes the `Timer` instance with the provided args and sets up a logger 25 | 26 | Args: 27 | name (str): The log name. 28 | time_log_type (str): The log type. Defaults to 'End'. 29 | use_ms (bool): Use 'ms' as the timescale or not. Defaults to True. 30 | stack_level (int): The stack level of log. Defaults to 2. 31 | float_precision (int): The precision of cost time. Defaults to 4. 32 | 33 | """ 34 | 35 | self.name: str = name 36 | self.time_log_type: TIME_LOG_TYPE = time_log_type 37 | self.use_ms: bool = use_ms 38 | self.stack_level: int = stack_level 39 | self.float_precision: int = float_precision 40 | self.kwargs: dict = kwargs 41 | 42 | # time recorder 43 | self.t_start = 0 44 | self.t_end = 0 45 | self.cost = 0 46 | 47 | self.logger = Logger.get_logger() 48 | 49 | def _set_cost(self): 50 | """ 51 | Accumulate the cost time. 52 | """ 53 | self.t_end = time.time() 54 | self.cost = self.t_end - self.t_start 55 | if self.use_ms: 56 | self.cost *= 1000 57 | 58 | @property 59 | def cost_str(self): 60 | """ 61 | Represent the cost time into a formatted string. 62 | """ 63 | self._set_cost() 64 | if self.use_ms: 65 | return f"cost={self.cost:.4f}ms" 66 | else: 67 | return f"cost={self.cost:.4f}s" 68 | 69 | def __enter__(self, *args, **kwargs): 70 | """ 71 | Begin timing. 72 | """ 73 | self.t_start = time.time() 74 | if self.time_log_type == "wrap": 75 | self.logger.info(f"----- {self.name}.begin -----") 76 | return self 77 | 78 | def __exit__(self, exc_type, exc_value, exc_tb): 79 | """ 80 | End timing and print the formatted log. 81 | """ 82 | if self.time_log_type == "none": 83 | return 84 | 85 | lines = [] 86 | if self.time_log_type == "wrap": 87 | lines.append(f"----- {self.name}.end -----") 88 | else: 89 | lines.append(self.name) 90 | 91 | lines.append(self.cost_str) 92 | 93 | if self.kwargs: 94 | for k, v in self.kwargs.items(): 95 | if isinstance(v, float): 96 | float_style = f".{self.float_precision}f" 97 | line = f"{k}={v:{float_style}}" 98 | else: 99 | line = f"{k}={v}" 100 | lines.append(line) 101 | 102 | self.logger.info(" ".join(lines), stacklevel=self.stack_level) 103 | 104 | 105 | def timer(func): 106 | """ 107 | A decorator function that measures the execution time of the wrapped function. 108 | 109 | Args: 110 | func (Callable): The function to be wrapped and timed. 111 | 112 | Returns: 113 | Callable: The wrapper function that includes timing functionality. 114 | """ 115 | 116 | def wrapper(*args, **kwargs): 117 | """ 118 | The wrapper function that manages the timing of the original function. 119 | 120 | Args: 121 | *args: Variable length argument list for the decorated function. 122 | **kwargs: Arbitrary keyword arguments for the decorated function. 123 | 124 | Returns: 125 | Any: The result of the decorated function. 126 | """ 127 | with Timer(name=func.__name__, **kwargs): 128 | return func(*args, **kwargs) 129 | 130 | return wrapper 131 | -------------------------------------------------------------------------------- /memoryscope/core/worker/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_worker import BaseWorker 2 | from .dummy_worker import DummyWorker 3 | from .memory_base_worker import MemoryBaseWorker 4 | from .memory_manager import MemoryManager 5 | 6 | __all__ = [ 7 | "BaseWorker", 8 | "DummyWorker", 9 | "MemoryBaseWorker", 10 | "MemoryManager" 11 | ] -------------------------------------------------------------------------------- /memoryscope/core/worker/backend/__init__.py: -------------------------------------------------------------------------------- 1 | from .contra_repeat_worker import ContraRepeatWorker 2 | from .get_observation_with_time_worker import GetObservationWithTimeWorker 3 | from .get_observation_worker import GetObservationWorker 4 | from .get_reflection_subject_worker import GetReflectionSubjectWorker 5 | from .info_filter_worker import InfoFilterWorker 6 | from .load_memory_worker import LoadMemoryWorker 7 | from .long_contra_repeat_worker import LongContraRepeatWorker 8 | from .update_insight_worker import UpdateInsightWorker 9 | from .update_memory_worker import UpdateMemoryWorker 10 | 11 | __all__ = [ 12 | "ContraRepeatWorker", 13 | "GetObservationWithTimeWorker", 14 | "GetObservationWorker", 15 | "GetReflectionSubjectWorker", 16 | "InfoFilterWorker", 17 | "LoadMemoryWorker", 18 | "LongContraRepeatWorker", 19 | "UpdateInsightWorker", 20 | "UpdateMemoryWorker" 21 | ] 22 | -------------------------------------------------------------------------------- /memoryscope/core/worker/backend/get_observation_with_time_worker.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from memoryscope.constants.common_constants import NEW_OBS_WITH_TIME_NODES 4 | from memoryscope.constants.language_constants import COLON_WORD 5 | from memoryscope.core.utils.datetime_handler import DatetimeHandler 6 | from memoryscope.core.worker.backend.get_observation_worker import GetObservationWorker 7 | from memoryscope.scheme.message import Message 8 | 9 | 10 | class GetObservationWithTimeWorker(GetObservationWorker): 11 | """ 12 | A specialized worker class that extends GetObservationWorker functionality to handle 13 | retrieval of observations which include associated timestamp information from chat messages. 14 | """ 15 | FILE_PATH: str = __file__ 16 | OBS_STORE_KEY: str = NEW_OBS_WITH_TIME_NODES 17 | 18 | def filter_messages(self) -> List[Message]: 19 | """ 20 | Filters the chat messages to only include those which contain time-related keywords. 21 | 22 | Returns: 23 | List[Message]: A list of filtered messages that mention time. 24 | """ 25 | filter_messages = [] 26 | for msg in self.chat_messages_scatter: 27 | # Checks if the message content has any time reference words 28 | if DatetimeHandler.has_time_word(query=msg.content, language=self.language): 29 | filter_messages.append(msg) 30 | return filter_messages 31 | 32 | def build_message(self, filter_messages: List[Message]) -> List[Message]: 33 | """ 34 | Constructs a prompt message for obtaining observations with timestamp information 35 | based on filtered chat messages. 36 | 37 | This method processes each filtered message with the timestamp information. 38 | It then organizes these timestamped messages into a structured prompt that includes a system prompt, 39 | few-shot examples, and the concatenated user queries. 40 | 41 | Args: 42 | filter_messages (List[Message]): A list of Message objects that have been filtered for processing. 43 | 44 | Returns: 45 | List[Message]: A list containing the newly constructed Message object for further interaction. 46 | """ 47 | user_query_list = [] 48 | for i, msg in enumerate(filter_messages): 49 | # Create a DatetimeHandler instance for each message's timestamp and format it 50 | dt_handler = DatetimeHandler(dt=msg.time_created) 51 | dt = dt_handler.string_format(string_format=self.prompt_handler.time_string_format, language=self.language) 52 | # Append formatted timestamp-query pairs to the user_query_list 53 | user_query_list.append(f"{i + 1} {dt} {self.target_name}{self.get_language_value(COLON_WORD)}{msg.content}") 54 | 55 | # Construct the system prompt with the count of observations 56 | system_prompt = self.prompt_handler.get_observation_with_time_system.format(num_obs=len(user_query_list), 57 | user_name=self.target_name) 58 | 59 | # Retrieve the few-shot examples for the prompt 60 | few_shot = self.prompt_handler.get_observation_with_time_few_shot.format(user_name=self.target_name) 61 | 62 | # Format the user query section with the concatenated list of timestamped queries 63 | user_query = self.prompt_handler.get_observation_with_time_user_query.format( 64 | user_query="\n".join(user_query_list), 65 | user_name=self.target_name) 66 | 67 | # Assemble the final message for observation retrieval 68 | get_observation_message_wt = self.prompt_to_msg(system_prompt=system_prompt, 69 | few_shot=few_shot, 70 | user_query=user_query) 71 | 72 | # Log the constructed message for debugging purposes 73 | self.logger.info(f"get_observation_message_wt={get_observation_message_wt}") 74 | 75 | # Return the newly created message 76 | return get_observation_message_wt 77 | -------------------------------------------------------------------------------- /memoryscope/core/worker/backend/info_filter_worker.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from memoryscope.constants.language_constants import COLON_WORD 4 | from memoryscope.core.utils.response_text_parser import ResponseTextParser 5 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker 6 | from memoryscope.scheme.message import Message 7 | 8 | 9 | class InfoFilterWorker(MemoryBaseWorker): 10 | """ 11 | This worker filters and modifies the chat message history (`self.chat_messages`) by retaining only the messages 12 | that include significant information. It then constructs a prompt from these filtered messages, utilizes an AI 13 | model to process this prompt, parses the AI's generated response to allocate scores, and ultimately retains 14 | messages in `self.chat_messages` based on these assigned scores. 15 | """ 16 | FILE_PATH: str = __file__ 17 | 18 | def _parse_params(self, **kwargs): 19 | self.preserved_scores: str = kwargs.get("preserved_scores", "2,3") 20 | self.info_filter_msg_max_size: int = kwargs.get("info_filter_msg_max_size", 200) 21 | self.generation_model_kwargs: dict = kwargs.get("generation_model_kwargs", {}) 22 | 23 | def _run(self): 24 | """ 25 | Filters user messages in the chat, generates a prompt incorporating these messages, 26 | utilizes an LLM to rate the information score for each message, 27 | and updates `self.chat_messages` to only include messages with designated scores. 28 | 29 | This method executes the following steps: 30 | 1. Filters out non-user messages and truncates long messages. 31 | 2. Constructs a prompt with user messages for LLM input. 32 | 3. Calls the LLM model with the constructed prompt. 33 | 4. Parses the LLM's response to extract message scores. 34 | 5. Retains message in `self.chat_messages` based on their scores. 35 | """ 36 | # filter user msg 37 | info_messages: List[Message] = [] 38 | for msg in self.chat_messages_scatter: 39 | if msg.memorized: 40 | continue 41 | 42 | # TODO: add memory for all messages 43 | if msg.role_name != self.target_name: 44 | continue 45 | 46 | if len(msg.content) >= self.info_filter_msg_max_size: 47 | half_size = int(self.info_filter_msg_max_size * 0.5 + 0.5) 48 | msg.content = msg.content[: half_size] + msg.content[-half_size:] 49 | info_messages.append(msg) 50 | 51 | if not info_messages: 52 | self.logger.warning("info_messages is empty!") 53 | self.continue_run = False 54 | return 55 | 56 | # generate prompt 57 | user_query_list = [] 58 | for i, msg in enumerate(info_messages): 59 | user_query_list.append(f"{i + 1} {self.target_name}{self.get_language_value(COLON_WORD)} {msg.content}") 60 | system_prompt = self.prompt_handler.info_filter_system.format(batch_size=len(info_messages), 61 | user_name=self.target_name) 62 | few_shot = self.prompt_handler.info_filter_few_shot.format(user_name=self.target_name) 63 | user_query = self.prompt_handler.info_filter_user_query.format(user_query="\n".join(user_query_list)) 64 | info_filter_message = self.prompt_to_msg(system_prompt=system_prompt, few_shot=few_shot, user_query=user_query) 65 | self.logger.info(f"info_filter_message={info_filter_message}") 66 | 67 | # call llm 68 | response = self.generation_model.call(messages=info_filter_message, **self.generation_model_kwargs) 69 | 70 | # return if empty 71 | if not response.status or not response.message.content: 72 | self.continue_run = False 73 | return 74 | response_text = response.message.content 75 | 76 | # parse text 77 | info_score_list = ResponseTextParser(response_text, self.language, self.__class__.__name__).parse_v1() 78 | if len(info_score_list) != len(info_messages): 79 | self.logger.warning(f"score_size != messages_size, {len(info_score_list)} vs {len(info_messages)}") 80 | 81 | # filter messages 82 | filtered_messages: List[Message] = [] 83 | for info_score in info_score_list: 84 | if not info_score: 85 | continue 86 | 87 | if len(info_score) != 2: 88 | self.logger.warning(f"info_score={info_score} is invalid!") 89 | continue 90 | 91 | idx, score = info_score 92 | 93 | idx = int(idx) - 1 94 | if idx >= len(info_messages): 95 | self.logger.warning(f"idx={idx} is invalid! info_messages.size={len(info_messages)}") 96 | continue 97 | message = info_messages[idx] 98 | 99 | if score in self.preserved_scores: 100 | message.meta_data["info_score"] = score 101 | filtered_messages.append(message) 102 | self.logger.info(f"info filter stage: keep {message.content}") 103 | 104 | if not filtered_messages: 105 | self.logger.warning("filtered_messages is empty!") 106 | self.continue_run = False 107 | return 108 | 109 | self.chat_messages_scatter = filtered_messages 110 | -------------------------------------------------------------------------------- /memoryscope/core/worker/backend/load_memory_worker.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from memoryscope.constants.common_constants import NOT_REFLECTED_NODES, NOT_UPDATED_NODES, INSIGHT_NODES, TODAY_NODES 4 | from memoryscope.core.utils.datetime_handler import DatetimeHandler 5 | from memoryscope.core.utils.timer import timer 6 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker 7 | from memoryscope.enumeration.memory_type_enum import MemoryTypeEnum 8 | from memoryscope.enumeration.store_status_enum import StoreStatusEnum 9 | from memoryscope.scheme.memory_node import MemoryNode 10 | 11 | 12 | class LoadMemoryWorker(MemoryBaseWorker): 13 | def _parse_params(self, **kwargs): 14 | self.retrieve_not_reflected_top_k: int = kwargs.get("retrieve_not_reflected_top_k", 0) 15 | self.retrieve_not_updated_top_k: int = kwargs.get("retrieve_not_updated_top_k", 0) 16 | self.retrieve_insight_top_k: int = kwargs.get("retrieve_insight_top_k", 0) 17 | self.retrieve_today_top_k: int = kwargs.get("retrieve_today_top_k", 0) 18 | 19 | @timer 20 | def retrieve_not_reflected_memory(self): 21 | """ 22 | Retrieves top-K not reflected memories based on the query and stores them in the memory handler. 23 | """ 24 | if not self.retrieve_not_reflected_top_k: 25 | return 26 | 27 | filter_dict = { 28 | "user_name": self.user_name, 29 | "target_name": self.target_name, 30 | "store_status": StoreStatusEnum.VALID.value, 31 | "memory_type": [MemoryTypeEnum.OBSERVATION.value, MemoryTypeEnum.OBS_CUSTOMIZED.value], 32 | "obs_reflected": 0, 33 | } 34 | nodes: List[MemoryNode] = self.memory_store.retrieve_memories(top_k=self.retrieve_not_reflected_top_k, 35 | filter_dict=filter_dict) 36 | self.memory_manager.set_memories(NOT_REFLECTED_NODES, nodes) 37 | 38 | @timer 39 | def retrieve_not_updated_memory(self): 40 | """ 41 | Retrieves top-K not updated memories based on the query and stores them in the memory handler. 42 | """ 43 | if not self.retrieve_not_updated_top_k: 44 | return 45 | 46 | filter_dict = { 47 | "user_name": self.user_name, 48 | "target_name": self.target_name, 49 | "store_status": StoreStatusEnum.VALID.value, 50 | "memory_type": [MemoryTypeEnum.OBSERVATION.value, MemoryTypeEnum.OBS_CUSTOMIZED.value], 51 | "obs_updated": 0, 52 | } 53 | nodes: List[MemoryNode] = self.memory_store.retrieve_memories(top_k=self.retrieve_not_updated_top_k, 54 | filter_dict=filter_dict) 55 | self.memory_manager.set_memories(NOT_UPDATED_NODES, nodes) 56 | 57 | @timer 58 | def retrieve_insight_memory(self): 59 | """ 60 | Retrieves top-K insight memories based on the query and stores them in the memory handler. 61 | """ 62 | if not self.retrieve_insight_top_k: 63 | return 64 | 65 | filter_dict = { 66 | "user_name": self.user_name, 67 | "target_name": self.target_name, 68 | "store_status": StoreStatusEnum.VALID.value, 69 | "memory_type": MemoryTypeEnum.INSIGHT.value, 70 | } 71 | nodes: List[MemoryNode] = self.memory_store.retrieve_memories(top_k=self.retrieve_insight_top_k, 72 | filter_dict=filter_dict) 73 | self.memory_manager.set_memories(INSIGHT_NODES, nodes) 74 | 75 | @timer 76 | def retrieve_today_memory(self, dt: str): 77 | """ 78 | Retrieves top-K memories from today based on the query and stores them in the memory handler. 79 | 80 | Args: 81 | dt (str): The date string to filter today's memories. 82 | """ 83 | if not self.retrieve_today_top_k: 84 | return 85 | 86 | filter_dict = { 87 | "user_name": self.user_name, 88 | "target_name": self.target_name, 89 | "store_status": StoreStatusEnum.VALID.value, 90 | "memory_type": [MemoryTypeEnum.OBSERVATION.value, MemoryTypeEnum.OBS_CUSTOMIZED.value], 91 | "dt": dt, 92 | } 93 | nodes: List[MemoryNode] = self.memory_store.retrieve_memories(top_k=self.retrieve_today_top_k, 94 | filter_dict=filter_dict) 95 | 96 | self.memory_manager.set_memories(TODAY_NODES, nodes) 97 | 98 | def _run(self): 99 | """ 100 | Initiates multithread tasks to retrieve various types of memory data including 101 | not reflected, not updated, insights, and data from today. After submitting all tasks, 102 | it waits for their completion by calling `gather_thread_result`. 103 | 104 | This method serves as the controller for data retrieval operations, enhancing efficiency 105 | by handling tasks concurrently. 106 | """ 107 | 108 | # Placeholder query 109 | dt = DatetimeHandler().datetime_format() 110 | self.submit_thread_task(self.retrieve_not_reflected_memory) 111 | self.submit_thread_task(self.retrieve_not_updated_memory) 112 | self.submit_thread_task(self.retrieve_insight_memory) 113 | self.submit_thread_task(self.retrieve_today_memory, dt=dt) 114 | 115 | # Waits for all submitted tasks to complete 116 | for _ in self.gather_thread_result(): 117 | pass 118 | -------------------------------------------------------------------------------- /memoryscope/core/worker/backend/update_memory_worker.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | 3 | from memoryscope.constants.common_constants import RESULT 4 | from memoryscope.core.utils.datetime_handler import DatetimeHandler 5 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker 6 | from memoryscope.enumeration.action_status_enum import ActionStatusEnum 7 | from memoryscope.enumeration.memory_type_enum import MemoryTypeEnum 8 | from memoryscope.scheme.memory_node import MemoryNode 9 | 10 | 11 | class UpdateMemoryWorker(MemoryBaseWorker): 12 | 13 | def _parse_params(self, **kwargs): 14 | self.method: str = kwargs.get("method", "") 15 | self.memory_key: str = kwargs.get("memory_key", "") 16 | 17 | def from_query(self): 18 | """ 19 | Creates a MemoryNode from the provided query if present in chat_kwargs. 20 | 21 | Returns: 22 | List[MemoryNode]: A list containing a single MemoryNode created from the query. 23 | """ 24 | if "query" not in self.chat_kwargs: 25 | return 26 | 27 | query = self.chat_kwargs["query"].strip() 28 | if not query: 29 | return 30 | 31 | dt_handler = DatetimeHandler() 32 | node = MemoryNode(user_name=self.user_name, 33 | target_name=self.target_name, 34 | content=query, 35 | memory_type=MemoryTypeEnum.OBS_CUSTOMIZED.value, 36 | action_status=ActionStatusEnum.NEW.value, 37 | timestamp=dt_handler.timestamp) 38 | return [node] 39 | 40 | def from_memory_key(self): 41 | """ 42 | Retrieves memories based on the memory key if it exists. 43 | 44 | Returns: 45 | List[MemoryNode]: A list of MemoryNode objects retrieved using the memory key. 46 | """ 47 | if not self.memory_key: 48 | return 49 | 50 | return self.memory_manager.get_memories(keys=self.memory_key) 51 | 52 | def delete_all(self): 53 | """ 54 | Marks all memories for deletion by setting their action_status to 'DELETE'. 55 | 56 | Returns: 57 | List[MemoryNode]: A list of all MemoryNode objects marked for deletion. 58 | """ 59 | nodes: List[MemoryNode] = self.memory_manager.get_memories(keys="all") 60 | for node in nodes: 61 | node.action_status = ActionStatusEnum.DELETE.value 62 | self.logger.info(f"delete_all.size={len(nodes)}") 63 | return nodes 64 | 65 | def delete_memory(self): 66 | """ 67 | Marks specific memories for deletion based on query or memory_id present in chat_kwargs. 68 | 69 | Returns: 70 | List[MemoryNode]: A list of MemoryNode objects marked for deletion based on the query or memory_id. 71 | """ 72 | if "query" in self.chat_kwargs: 73 | query = self.chat_kwargs["query"].strip() 74 | if not query: 75 | return 76 | 77 | i = 0 78 | nodes: List[MemoryNode] = self.memory_manager.get_memories(keys="all") 79 | for node in nodes: 80 | if node.content == query: 81 | i += 1 82 | node.action_status = ActionStatusEnum.DELETE.value 83 | self.logger.info(f"delete_memory.query.size={len(nodes)}") 84 | return nodes 85 | 86 | elif "memory_id" in self.chat_kwargs: 87 | memory_id = self.chat_kwargs["memory_id"].strip() 88 | if not memory_id: 89 | return 90 | 91 | i = 0 92 | nodes: List[MemoryNode] = self.memory_manager.get_memories(keys="all") 93 | for node in nodes: 94 | if node.memory_id == memory_id: 95 | i += 1 96 | node.action_status = ActionStatusEnum.DELETE.value 97 | self.logger.info(f"delete_memory.memory_id.size={len(nodes)}") 98 | return nodes 99 | 100 | return [] 101 | 102 | def _run(self): 103 | """ 104 | Executes a memory update method provided via the 'method' attribute. 105 | 106 | The method specified by the 'method' attribute is invoked, 107 | which updates memories accordingly. 108 | """ 109 | method = self.method.strip() 110 | if not hasattr(self, method): 111 | self.logger.info(f"method={method} is missing!") 112 | return 113 | 114 | updated_nodes: Dict[str, List[MemoryNode]] = self.memory_manager.update_memories(nodes=getattr(self, method)()) 115 | line = ["[MEMORY ACTIONS]:"] 116 | for action, nodes in updated_nodes.items(): 117 | for node in nodes: 118 | line.append(f"{action} {node.memory_type}: {node.content} ({node.store_status})") 119 | self.set_workflow_context(RESULT, "\n".join(line)) 120 | -------------------------------------------------------------------------------- /memoryscope/core/worker/dummy_worker.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from memoryscope.constants.common_constants import RESULT, WORKFLOW_NAME, CHAT_KWARGS 4 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker 5 | 6 | 7 | class DummyWorker(MemoryBaseWorker): 8 | def _run(self): 9 | """ 10 | Executes the dummy worker's run logic by logging workflow entry, capturing the current timestamp, 11 | file path, and setting the result context with details about the workflow execution. 12 | 13 | This method utilizes the BaseWorker's capabilities to interact with the workflow context. 14 | """ 15 | workflow_name = self.get_workflow_context(WORKFLOW_NAME) 16 | chat_kwargs = self.get_workflow_context(CHAT_KWARGS) 17 | self.logger.info(f"Entering workflow={workflow_name}.dummy_worker!") 18 | # Records the current timestamp as an integer 19 | ts = int(datetime.datetime.now().timestamp()) 20 | # Retrieves the current file's path 21 | file_path = __file__ 22 | self.set_workflow_context(RESULT, f"test {workflow_name} kwargs={chat_kwargs} file_path={file_path} \nts={ts}") 23 | -------------------------------------------------------------------------------- /memoryscope/core/worker/frontend/__init__.py: -------------------------------------------------------------------------------- 1 | from .extract_time_worker import ExtractTimeWorker 2 | from .fuse_rerank_worker import FuseRerankWorker 3 | from .print_memory_worker import PrintMemoryWorker 4 | from .read_message_worker import ReadMessageWorker 5 | from .retrieve_memory_worker import RetrieveMemoryWorker 6 | from .semantic_rank_worker import SemanticRankWorker 7 | from .set_query_worker import SetQueryWorker 8 | 9 | __all__ = [ 10 | "ExtractTimeWorker", 11 | "FuseRerankWorker", 12 | "PrintMemoryWorker", 13 | "ReadMessageWorker", 14 | "RetrieveMemoryWorker", 15 | "SemanticRankWorker", 16 | "SetQueryWorker" 17 | ] 18 | -------------------------------------------------------------------------------- /memoryscope/core/worker/frontend/extract_time_worker.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Dict 3 | 4 | from memoryscope.constants.common_constants import QUERY_WITH_TS, EXTRACT_TIME_DICT 5 | from memoryscope.constants.language_constants import DATATIME_KEY_MAP 6 | from memoryscope.core.utils.datetime_handler import DatetimeHandler 7 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker 8 | 9 | 10 | class ExtractTimeWorker(MemoryBaseWorker): 11 | """ 12 | A specialized worker class designed to identify and extract time-related information 13 | from text generated by an LLM, translating date-time keywords based on the set language, 14 | and storing this extracted data within a shared context. 15 | """ 16 | 17 | EXTRACT_TIME_PATTERN = r"-\s*(\S+)[::]\s*(\S+)" 18 | FILE_PATH: str = __file__ 19 | 20 | def _parse_params(self, **kwargs): 21 | self.generation_model_kwargs: dict = kwargs.get("generation_model_kwargs", {}) 22 | 23 | def _run(self): 24 | """ 25 | Executes the primary logic of identifying and extracting time data from an LLM's response. 26 | 27 | This method first checks if the input query contains any datetime keywords. If not, it logs and returns. 28 | It then constructs a prompt with contextual information including formatted timestamps and calls the LLM. 29 | The response is parsed for time-related data using regex, translated via a language-specific key map, 30 | and the resulting time data is stored in the shared context. 31 | """ 32 | query, query_timestamp = self.get_workflow_context(QUERY_WITH_TS) 33 | 34 | # Identify if the query contains datetime keywords 35 | contain_datetime = DatetimeHandler.has_time_word(query, self.language) 36 | if not contain_datetime: 37 | self.logger.info(f"contain_datetime={contain_datetime}") 38 | return 39 | 40 | # Prepare the prompt with necessary contextual details 41 | query_time_str = DatetimeHandler(dt=query_timestamp).string_format(self.prompt_handler.time_string_format, 42 | self.language) 43 | system_prompt = self.prompt_handler.extract_time_system 44 | few_shot = self.prompt_handler.extract_time_few_shot 45 | user_query = self.prompt_handler.extract_time_user_query.format(query=query, query_time_str=query_time_str) 46 | extract_time_message = self.prompt_to_msg(system_prompt=system_prompt, few_shot=few_shot, user_query=user_query) 47 | self.logger.info(f"extract_time_message={extract_time_message}") 48 | 49 | # Invoke the LLM to generate a response 50 | response = self.generation_model.call(messages=extract_time_message, **self.generation_model_kwargs) 51 | 52 | # Handle empty or unsuccessful responses 53 | if not response.status or not response.message.content: 54 | return 55 | response_text = response.message.content 56 | 57 | # Extract time information from the LLM's response using regex 58 | extract_time_dict: Dict[str, str] = {} 59 | matches = re.findall(self.EXTRACT_TIME_PATTERN, response_text) 60 | key_map: dict = self.get_language_value(DATATIME_KEY_MAP) 61 | for key, value in matches: 62 | if key in key_map.keys(): 63 | extract_time_dict[key_map[key]] = value 64 | self.logger.info(f"response_text={response_text} matches={matches} filters={extract_time_dict}") 65 | self.set_workflow_context(EXTRACT_TIME_DICT, extract_time_dict) 66 | -------------------------------------------------------------------------------- /memoryscope/core/worker/frontend/extract_time_worker.yaml: -------------------------------------------------------------------------------- 1 | time_string_format: 2 | cn: | 3 | {year}年{month}{day}日,{year}年第{week}周,{weekday},{hour}时。 4 | en: | 5 | {month} {day}, {year}, {week}th week of {year}, {weekday}, at {hour}. 6 | 7 | 8 | extract_time_system: 9 | cn: | 10 | 任务:从语句与语句发生的时间,推断并提取语句内容中指向的时间段。 11 | 回答尽可能完整的时间段。 12 | 回答的格式严格遵照示例中的已有格式规范。 13 | 若语句不涉及时间则回答无。 14 | en: | 15 | Task: From the sentences and the time when they occurred, infer and extract the time periods indicated in the content of the sentences. 16 | Answer with the most complete time periods possible. 17 | The format of the answers must strictly adhere to the specifications in the examples provided. 18 | If the sentence does not involve time, respond with "none." 19 | 20 | 21 | extract_time_few_shot: 22 | cn: | 23 | 示例1: 24 | 句子:我记得你前年四月份去了阿联酋,阿联酋有哪些好玩的地方?迪拜和阿布扎比你更喜欢哪个?沙漠的景色壮观吗? 25 | 时间:1992年8月20日,1992年第34周,周一,18时。 26 | 回答: 27 | - 年:1990 - 月:4月 28 | 29 | 示例2: 30 | 句子:后天下午三点的会议记得参加。我在日历上仔细标注了这个重要的日子,提醒自己不要错过。会议将在公司会议室举行,这是一个讨论未来发展方向的重要机会。 31 | 时间:2024年6月19日,2024年第25周,周二,13时。 32 | 回答: 33 | - 年:2024 - 月:6月 - 日:21 - 时:15 34 | 35 | 示例3: 36 | 句子:下个月第一个周六去杭州玩。 37 | 时间:2005年7月15日,2005年第28周,周六,0时。 38 | 回答: 39 | - 年:2005 - 月:8月 - 周:31 - 星期几:周六 40 | 41 | 示例4: 42 | 句子:上周末我们去的那个小镇真是太美了。 43 | 时间:1999年12月2日,1999年第48周,周二,8时。 44 | 回答: 45 | - 年:1999 - 周:47 - 星期几:周六,周日 46 | 47 | 示例5: 48 | 句子:再过半小时就要宣讲了,记得准备材料。 49 | 时间:2020年6月22日,2020年第25周,周一,9时。 50 | 回答: 51 | - 年:2020 - 月:6月 - 日:22 - 时:10 52 | 53 | 示例6: 54 | 句子:10000米长跑比赛的开始时间是3分47秒前。 55 | 时间:1987年2月17日,1987年第7周,周三,19时。 56 | 回答: 57 | - 年:1987 - 月:2 - 日:17 - 时:19 58 | 59 | 示例7: 60 | 句子:上个月的这个时候我们还在筹备音乐会。每天都是忙碌而充实的日子,我们为音乐会的顺利举办而努力奋斗着。彩排、布景、节目安排,每一个细节都需要精心安排和准备。 61 | 时间:1995年11月24日,1995年第48周,周二,17时。 62 | 回答: 63 | - 年:1995 - 月:10 - 日:24 64 | 65 | 示例8: 66 | 句子:我的朋友非常喜欢运动,他认为运动有助于增强身体素质。 67 | 时间:2015年1月23日,2015年第4周,周四,7时。 68 | 回答: 69 | 无 70 | 71 | en: | 72 | Example 1: 73 | Sentence: I remember you went to the UAE in April the year before last. Which places in the UAE are fun? Which do you prefer, Dubai or Abu Dhabi? Are the desert views spectacular? 74 | Time: August 20, 1992, 34th week of 1992, Monday, at 18. 75 | Answer: 76 | - Year: 1990 - Month: 4 77 | 78 | Example 2: 79 | Sentence: Remember to attend the meeting at 3 PM the day after tomorrow. I carefully marked this important day on my calendar to remind myself not to miss it. The meeting will be held in the company conference room, and it's an important opportunity to discuss future development directions. 80 | Time: June 19, 2024, 25th week of 2024, Tuesday, at 13. 81 | Answer: 82 | - Year: 2024 - Month: 6 - Day: 21 - Hour: 15 83 | 84 | Example 3: 85 | Sentence: Next month on the first Saturday, let's go to Hangzhou. 86 | Time: July 15, 2005, 28th week of 2005, Saturday, at 0. 87 | Answer: 88 | - Year: 2005 - Month: 8 - Week: 31 - Day of Week: 6 89 | 90 | Example 4: 91 | Sentence: The small town we visited last weekend was truly beautiful. 92 | Time: December 2, 1999, 48th week of 1999, Tuesday, at 8. 93 | Answer: 94 | - Year: 1999 - Week: 47 - Day of Week: 6, 7 95 | 96 | Example 5: 97 | Sentence: The presentation will start in half an hour, remember to prepare the materials. 98 | Time: June 22, 2020, 25th week of 2020, Monday, at 9. 99 | Answer: 100 | - Year: 2020 - Month: 6 - Day: 22 - Hour: 10 101 | 102 | Example 6: 103 | Sentence: The start time for the 10,000-meter race was 3 minutes and 47 seconds ago. 104 | Time: February 17, 1987, 7th week of 1987, Wednesday, at 19. 105 | Answer: 106 | - Year: 1987 - Month: 2 - Day: 17 - Hour: 19 107 | 108 | Example 7: 109 | Sentence: At this time last month, we were still preparing for the concert. Every day was busy and fulfilling, and we worked hard for the successful holding of the concert. Rehearsals, set design, and program arrangements - every detail needed careful planning and preparation. 110 | Time: November 24, 1995, 48th week of 1995, Tuesday, at 17. 111 | Answer: 112 | - Year: 1995 - Month: 10 - Day: 24 113 | 114 | Example 8: 115 | Sentence: My friend loves sports very much and believes that exercise helps improve physical fitness. 116 | Time: January 23, 2015, 4th week of 2015, Thursday, at 7. 117 | Answer: 118 | None 119 | 120 | 121 | extract_time_user_query: 122 | cn: | 123 | 句子:{query} 124 | 时间:{query_time_str} 125 | 回答: 126 | 127 | en: | 128 | Sentence: {query} 129 | Time: {query_time_str} 130 | Answer: 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /memoryscope/core/worker/frontend/print_memory_worker.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from memoryscope.constants.common_constants import RETRIEVE_MEMORY_NODES, RESULT 4 | from memoryscope.core.utils.datetime_handler import DatetimeHandler 5 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker 6 | from memoryscope.enumeration.memory_type_enum import MemoryTypeEnum 7 | from memoryscope.enumeration.store_status_enum import StoreStatusEnum 8 | from memoryscope.scheme.memory_node import MemoryNode 9 | 10 | 11 | class PrintMemoryWorker(MemoryBaseWorker): 12 | """ 13 | Formats the memories to print. 14 | """ 15 | FILE_PATH: str = __file__ 16 | 17 | def _run(self): 18 | """ 19 | Executes the primary function, it involves: 20 | 1. Fetches the memories. 21 | 2. Formats them by 'print_template'. 22 | 3. Set the formatted string back into the worker's context 23 | """ 24 | # get long-term memory 25 | memory_node_list: List[MemoryNode] = self.memory_manager.get_memories(RETRIEVE_MEMORY_NODES) 26 | memory_node_list = sorted(memory_node_list, key=lambda x: x.timestamp, reverse=True) 27 | 28 | observation_memory_list: List[str] = [] 29 | insight_memory_list: List[str] = [] 30 | expired_memory_list: List[str] = [] 31 | 32 | i = 0 33 | j = 0 34 | k = 0 35 | # remove duplicate content 36 | expired_content_set = set() 37 | for node in memory_node_list: 38 | if not node.content: 39 | continue 40 | 41 | dt_handler = DatetimeHandler(node.timestamp) 42 | dt = dt_handler.datetime_format("%Y%m%d %H:%M:%S") 43 | if StoreStatusEnum(node.store_status) is StoreStatusEnum.EXPIRED: 44 | if node.content in expired_content_set: 45 | continue 46 | else: 47 | expired_content_set.add(node.content) 48 | i += 1 49 | expired_memory_list.append(f"{dt}] {i}. {node.content}") 50 | 51 | elif MemoryTypeEnum(node.memory_type) in [MemoryTypeEnum.OBSERVATION, MemoryTypeEnum.OBS_CUSTOMIZED]: 52 | j += 1 53 | observation_memory_list.append(f"{dt}] {j}. {node.content} " 54 | f"[status({node.obs_reflected},{node.obs_updated})") 55 | 56 | elif MemoryTypeEnum(node.memory_type) is MemoryTypeEnum.INSIGHT: 57 | k += 1 58 | insight_memory_list.append(f"{dt}] {k}. {node.content}") 59 | 60 | result: str = self.prompt_handler.print_template.format( 61 | user_name=self.user_name, 62 | target_name=self.target_name, 63 | observation_memory="\n".join(observation_memory_list), 64 | insight_memory="\n".join(insight_memory_list), 65 | expired_memory="\n".join(expired_memory_list)).strip() 66 | self.set_workflow_context(RESULT, result) 67 | -------------------------------------------------------------------------------- /memoryscope/core/worker/frontend/print_memory_worker.yaml: -------------------------------------------------------------------------------- 1 | print_template: 2 | cn: | 3 | ========== {user_name}关于{target_name}的长期记忆 ========== 4 | ----- 观察记忆 ----- 5 | {observation_memory} 6 | 7 | ----- 洞察记忆 ----- 8 | {insight_memory} 9 | 10 | ----- 过期记忆 ----- 11 | {expired_memory} 12 | 13 | en: | 14 | ========== The {user_name}'s long-term memory about {target_name} ========== 15 | ----- observation memory ----- 16 | {observation_memory} 17 | 18 | ----- insight memory ----- 19 | {insight_memory} 20 | 21 | ----- expired memory ----- 22 | {expired_memory} -------------------------------------------------------------------------------- /memoryscope/core/worker/frontend/read_message_worker.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from memoryscope.constants.common_constants import RESULT 4 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker 5 | from memoryscope.scheme.message import Message 6 | 7 | 8 | class ReadMessageWorker(MemoryBaseWorker): 9 | """ 10 | Fetches unmemorized chat messages. 11 | """ 12 | 13 | def _run(self): 14 | """ 15 | Executes the primary function to fetch unmemorized chat messages. 16 | """ 17 | chat_messages_not_memorized: List[List[Message]] = [] 18 | for messages in self.chat_messages: 19 | if not messages: 20 | continue 21 | 22 | if messages[0].memorized: 23 | continue 24 | 25 | contain_flag = False 26 | 27 | for msg in messages: 28 | if msg.role_name == self.target_name: 29 | contain_flag = True 30 | break 31 | 32 | if contain_flag: 33 | chat_messages_not_memorized.append(messages) 34 | 35 | contextual_msg_max_count: int = self.chat_kwargs["contextual_msg_max_count"] 36 | chat_message_scatter = [] 37 | for messages in chat_messages_not_memorized[-contextual_msg_max_count:]: 38 | chat_message_scatter.extend(messages) 39 | chat_message_scatter.sort(key=lambda _: _.time_created) 40 | self.set_workflow_context(RESULT, chat_message_scatter) 41 | -------------------------------------------------------------------------------- /memoryscope/core/worker/frontend/semantic_rank_worker.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | 3 | from memoryscope.constants.common_constants import RETRIEVE_MEMORY_NODES, QUERY_WITH_TS, RANKED_MEMORY_NODES 4 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker 5 | from memoryscope.scheme.memory_node import MemoryNode 6 | 7 | 8 | class SemanticRankWorker(MemoryBaseWorker): 9 | """ 10 | The `SemanticRankWorker` class processes queries by retrieving memory nodes, 11 | removing duplicates, ranking them based on semantic relevance using a model, 12 | assigning scores, sorting the nodes, and storing the ranked nodes back, 13 | while logging relevant information. 14 | """ 15 | 16 | def _parse_params(self, **kwargs): 17 | self.enable_ranker: bool = self.memoryscope_context.meta_data["enable_ranker"] 18 | self.output_memory_max_count: int = self.memoryscope_context.meta_data["output_memory_max_count"] 19 | 20 | def _run(self): 21 | """ 22 | Executes the primary workflow of the SemanticRankWorker which includes: 23 | - Retrieves query and timestamp from context. 24 | - Fetches memory nodes. 25 | - Removes duplicate nodes. 26 | - Ranks nodes semantically. 27 | - Assigns scores to nodes. 28 | - Sorts nodes by score. 29 | - Saves the ranked nodes back with logging. 30 | 31 | If no memory nodes are retrieved or if the ranking model fails, 32 | appropriate warnings are logged. 33 | """ 34 | # query 35 | query, _ = self.get_workflow_context(QUERY_WITH_TS) 36 | memory_node_list: List[MemoryNode] = self.memory_manager.get_memories(RETRIEVE_MEMORY_NODES) 37 | if not memory_node_list: 38 | self.logger.warning("Retrieve memory nodes is empty!") 39 | return 40 | 41 | if not self.enable_ranker or len(memory_node_list) <= self.output_memory_max_count: 42 | for node in memory_node_list: 43 | node.score_rank = node.score_recall 44 | self.logger.warning("use score_recall instead of score_rank!") 45 | 46 | else: 47 | # drop repeated 48 | memory_node_dict: Dict[str, MemoryNode] = {n.content.strip(): n for n in memory_node_list if 49 | n.content.strip()} 50 | memory_node_list = list(memory_node_dict.values()) 51 | 52 | response = self.rank_model.call(query=query, documents=[n.content for n in memory_node_list]) 53 | if not response.status or not response.rank_scores: 54 | return 55 | 56 | # set score 57 | for idx, score in response.rank_scores.items(): 58 | if idx >= len(memory_node_list): 59 | self.logger.warning(f"Idx={idx} exceeds the maximum length of rank_scores!") 60 | continue 61 | memory_node_list[idx].score_rank = score 62 | 63 | # sort by score 64 | memory_node_list = sorted(memory_node_list, key=lambda n: n.score_rank, reverse=True) 65 | 66 | # log ranked nodes 67 | self.logger.info(f"Rank stage: query={query}") 68 | for node in memory_node_list: 69 | self.logger.info(f"Rank stage: Content={node.content}, Score={node.score_rank}") 70 | 71 | # save ranked nodes back to memory 72 | self.memory_manager.set_memories(RANKED_MEMORY_NODES, memory_node_list, log_repeat=False) 73 | -------------------------------------------------------------------------------- /memoryscope/core/worker/frontend/set_query_worker.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from memoryscope.constants.common_constants import QUERY_WITH_TS 4 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker 5 | 6 | 7 | class SetQueryWorker(MemoryBaseWorker): 8 | """ 9 | The `SetQueryWorker` class is responsible for setting a query and its associated timestamp 10 | into the context, utilizing either provided chat parameters or details from the most recent 11 | chat message. 12 | """ 13 | 14 | def _run(self): 15 | """ 16 | Executes the worker's primary function, which involves determining the query and its 17 | timestamp, then storing these values within the context. 18 | 19 | If 'query' is found within `self.chat_kwargs`, it is considered as the query input. 20 | Otherwise, the content of the last message in `self.chat_messages` is used as the query, 21 | along with its creation timestamp. 22 | """ 23 | query = "" # Default query value 24 | timestamp = int(datetime.datetime.now().timestamp()) # Current timestamp as default 25 | 26 | if "query" in self.chat_kwargs: 27 | # set query if exists 28 | query = self.chat_kwargs["query"] 29 | if not query: 30 | query = "" 31 | query = query.strip() 32 | 33 | # set ts if exists 34 | _timestamp = self.chat_kwargs.get("timestamp") 35 | if _timestamp and isinstance(_timestamp, int): 36 | timestamp = _timestamp 37 | 38 | # Store the determined query and its timestamp in the context 39 | self.set_workflow_context(QUERY_WITH_TS, (query, timestamp)) 40 | -------------------------------------------------------------------------------- /memoryscope/enumeration/__init__.py: -------------------------------------------------------------------------------- 1 | from .action_status_enum import ActionStatusEnum 2 | from .language_enum import LanguageEnum 3 | from .memory_type_enum import MemoryTypeEnum 4 | from .message_role_enum import MessageRoleEnum 5 | from .model_enum import ModelEnum 6 | from .store_status_enum import StoreStatusEnum 7 | 8 | __all__ = [ 9 | "ActionStatusEnum", 10 | "LanguageEnum", 11 | "MemoryTypeEnum", 12 | "MessageRoleEnum", 13 | "ModelEnum", 14 | "StoreStatusEnum" 15 | ] -------------------------------------------------------------------------------- /memoryscope/enumeration/action_status_enum.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class ActionStatusEnum(str, Enum): 5 | """ 6 | Enumeration representing various statuses of a memory node. 7 | 8 | Each status reflects a different state of the node in terms of its lifecycle or content: 9 | - NEW: Indicates a newly created node. 10 | - MODIFIED: Signifies that the node has been altered. 11 | - CONTENT_MODIFIED: Specifies changes in the actual content of the node. 12 | - NONE: do nothing. 13 | - DELETE: delete memories. 14 | """ 15 | NEW = "new" 16 | 17 | MODIFIED = "modified" 18 | 19 | CONTENT_MODIFIED = "content_modified" 20 | 21 | NONE = "none" 22 | 23 | DELETE = "delete" 24 | -------------------------------------------------------------------------------- /memoryscope/enumeration/language_enum.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class LanguageEnum(str, Enum): 5 | """ 6 | An enumeration representing supported languages. 7 | 8 | Members: 9 | - CN: Represents the Chinese language. 10 | - EN: Represents the English language. 11 | """ 12 | CN = "cn" 13 | 14 | EN = "en" 15 | -------------------------------------------------------------------------------- /memoryscope/enumeration/memory_type_enum.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class MemoryTypeEnum(str, Enum): 5 | """ 6 | Defines an enumeration for different types of memory categories. 7 | 8 | Each member represents a distinct type of memory content: 9 | - CONVERSATION: Represents conversation-based memories. 10 | - OBSERVATION: Denotes observational memories. 11 | - INSIGHT: Indicates insightful memories derived from analysis. 12 | - OBS_CUSTOMIZED: Customized observational memories. 13 | """ 14 | CONVERSATION = "conversation" 15 | 16 | OBSERVATION = "observation" 17 | 18 | INSIGHT = "insight" 19 | 20 | OBS_CUSTOMIZED = "obs_customized" 21 | -------------------------------------------------------------------------------- /memoryscope/enumeration/message_role_enum.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class MessageRoleEnum(str, Enum): 5 | """ 6 | Enumeration for different message roles within a conversation context. 7 | 8 | This enumeration includes predefined roles such as User, Assistant, and System, 9 | which can be used to categorize messages in chat interfaces, AI interactions, or 10 | any system that involves distinct participant roles. 11 | """ 12 | USER = "user" # Represents a message sent by the user. 13 | 14 | ASSISTANT = "assistant" # Represents a response or action performed by an assistant. 15 | 16 | SYSTEM = "system" # Represents system-level messages or actions. 17 | -------------------------------------------------------------------------------- /memoryscope/enumeration/model_enum.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class ModelEnum(str, Enum): 5 | """ 6 | An enumeration representing different types of models used within the system. 7 | 8 | Members: 9 | GENERATION_MODEL: Represents a model responsible for generating content. 10 | EMBEDDING_MODEL: Represents a model tasked with creating embeddings, typically used for transforming data into a 11 | numerical form suitable for machine learning tasks. 12 | RANK_MODEL: Denotes a model that specializes in ranking, often used to order items based on relevance. 13 | """ 14 | GENERATION_MODEL = "generation_model" 15 | 16 | EMBEDDING_MODEL = "embedding_model" 17 | 18 | RANK_MODEL = "rank_model" 19 | -------------------------------------------------------------------------------- /memoryscope/enumeration/store_status_enum.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class StoreStatusEnum(str, Enum): 5 | VALID = "valid" 6 | 7 | EXPIRED = "expired" 8 | -------------------------------------------------------------------------------- /memoryscope/scheme/__init__.py: -------------------------------------------------------------------------------- 1 | from .memory_node import MemoryNode 2 | from .message import Message 3 | from .model_response import ModelResponse, ModelResponseGen 4 | 5 | __all__ = [ 6 | "MemoryNode", 7 | "Message", 8 | "ModelResponse", 9 | "ModelResponseGen" 10 | ] 11 | -------------------------------------------------------------------------------- /memoryscope/scheme/memory_node.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from typing import Dict, List 3 | from uuid import uuid4 4 | 5 | from pydantic import Field, BaseModel 6 | 7 | 8 | class MemoryNode(BaseModel): 9 | """ 10 | Represents a memory node with comprehensive attributes to store memory information including unique ID, 11 | user details, content, metadata, scoring metrics. 12 | Automatically handles timestamp conversion to date format during initialization. 13 | """ 14 | memory_id: str = Field(default_factory=lambda: uuid4().hex, description="unique id for memory") 15 | 16 | user_name: str = Field("", description="the user who owns the memory") 17 | 18 | target_name: str = Field("", description="target name described by the memory") 19 | 20 | meta_data: Dict[str, str] = Field({}, description="meta data infos") 21 | 22 | content: str = Field("", description="memory content") 23 | 24 | key: str = Field("", description="memory key") 25 | 26 | key_vector: List[float] = Field([], description="memory key embedding result") 27 | 28 | value: str = Field("", description="memory value") 29 | 30 | score_recall: float = Field(0, description="embedding similarity score used in recall stage") 31 | 32 | score_rank: float = Field(0, description="rank model score used in rank stage") 33 | 34 | score_rerank: float = Field(0, description="rerank score used in rerank stage") 35 | 36 | memory_type: str = Field("", description="conversation / observation / insight...") 37 | 38 | action_status: str = Field("none", description="new / content_modified / modified / deleted / none") 39 | 40 | store_status: str = Field("valid", description="store_status: valid / expired") 41 | 42 | vector: List[float] = Field([], description="content embedding result") 43 | 44 | timestamp: int = Field(default_factory=lambda: int(datetime.datetime.now().timestamp()), 45 | description="timestamp of the memory node") 46 | 47 | dt: str = Field("", description="dt of the memory node") 48 | 49 | obs_reflected: int = Field(0, description="if the observation is reflected: 0/1") 50 | 51 | obs_updated: int = Field(0, description="if the observation has updated user profile or insight: 0/1") 52 | 53 | def __init__(self, **kwargs): 54 | super().__init__(**kwargs) 55 | self.dt = datetime.datetime.fromtimestamp(self.timestamp).strftime("%Y%m%d") 56 | 57 | @property 58 | def node_keys(self): 59 | return list(self.model_json_schema()["properties"].keys()) 60 | 61 | def __getitem__(self, key: str): 62 | return self.model_dump().get(key) 63 | -------------------------------------------------------------------------------- /memoryscope/scheme/message.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from typing import Dict 3 | 4 | from pydantic import Field, BaseModel 5 | 6 | 7 | class Message(BaseModel): 8 | """ 9 | Represents a structured message object with details about the sender, content, and metadata. 10 | 11 | Attributes: 12 | role (str): The role of the message sender (e.g., 'user', 'assistant', 'system'). 13 | role_name (str): Optional name associated with the role of the message sender. 14 | content (str): The actual content or text of the message. 15 | time_created (int): Timestamp indicating when the message was created. 16 | memorized (bool): Flag to indicate if the message has been saved or remembered. 17 | meta_data (Dict[str, str]): Additional data or context attached to the message. 18 | """ 19 | role: str = Field(..., description="The role of the message sender (user, assistant, system)") 20 | 21 | role_name: str = Field("", description="Name describing the role of the message sender") 22 | 23 | content: str = Field(..., description="The primary content of the message") 24 | 25 | time_created: int = Field(default_factory=lambda: int(datetime.datetime.now().timestamp()), 26 | description="Timestamp marking the message creation time") 27 | 28 | memorized: bool = Field(False, description="Indicates if the message is flagged for memory retention") 29 | 30 | meta_data: Dict[str, str] = Field({}, description="Supplementary data attached to the message") 31 | -------------------------------------------------------------------------------- /memoryscope/scheme/model_response.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Generator, List, Dict, Any 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | from memoryscope.enumeration.model_enum import ModelEnum 7 | from memoryscope.scheme.message import Message 8 | 9 | 10 | class ModelResponse(BaseModel): 11 | message: Message | None = Field(None, description="generation model result") 12 | 13 | delta: str = Field("", description="New text that just streamed in (only used when streaming)") 14 | 15 | embedding_results: List[List[float]] | List[float] = Field([], description="embedding vector") 16 | 17 | rank_scores: Dict[int, float] = Field({}, description="The rank scores of each documents. " 18 | "key: index, value: rank score") 19 | 20 | m_type: ModelEnum = Field(ModelEnum.GENERATION_MODEL, description="One of LLM, EMB, RANK.") 21 | 22 | status: bool = Field(True, description="Indicates whether the model call was successful.") 23 | 24 | details: str = Field("", description="The details information for model call, " 25 | "usually for storage of raw response or failure messages.") 26 | 27 | raw: Any = Field("", description="Raw response from model call") 28 | 29 | meta_data: Dict[str, Any] = Field({}, description="meta data for model response") 30 | 31 | def __str__(self, max_size=100, **kwargs): 32 | result = {} 33 | for key, value in self.model_dump().items(): 34 | if key == "raw" or not value: 35 | continue 36 | 37 | if isinstance(value, str): 38 | result[key] = value 39 | elif isinstance(value, list | dict): 40 | result[key] = f"{str(value)[:max_size]}... size={len(value)}" 41 | elif isinstance(value, ModelEnum): 42 | result[key] = value.value 43 | return json.dumps(result, **kwargs) 44 | 45 | 46 | ModelResponseGen = Generator[ModelResponse, None, None] 47 | -------------------------------------------------------------------------------- /quick-start-demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | if os.environ.get('DASHSCOPE_API_KEY', None) is None \ 4 | and os.environ.get('OPENAI_API_KEY', None) is None: 5 | raise RuntimeError(f""" 6 | Missing api key(dashscope api key or openai api key. 7 | `https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key` or 8 | `https://openai.com/`""") 9 | 10 | from memoryscope import cli 11 | cli() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | llama-index==0.10.45 2 | llama-index-embeddings-dashscope>=0.1.3 3 | llama-index-llms-dashscope>=0.1.2 4 | llama-index-postprocessor-dashscope-rerank-custom>=0.1.0 5 | dashscope>=1.19.1 6 | llama-index-vector-stores-elasticsearch>=0.2.0 7 | elasticsearch>=8.14.0 8 | pyfiglet>=1.0.2 9 | termcolor>=2.4.0 10 | fire>=0.6.0 11 | questionary>=2.0.1 12 | pydantic>=2.7.1 13 | pyyaml>=6.0.1 14 | numpy>=1.26.4 15 | rich>=13.0.0 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | # 1. remove old temp folders 3 | rm -rf dist build 4 | 5 | # 2. then, build 6 | python setup.py sdist bdist_wheel 7 | 8 | # 3. finally, upload 9 | twine upload dist/* 10 | 11 | rm -rf dist build && python setup.py sdist bdist_wheel && twine upload dist/* 12 | """ 13 | 14 | import os 15 | 16 | import setuptools 17 | 18 | with open("README.md", "r", encoding="utf-8") as fh: 19 | long_description = fh.read() 20 | 21 | 22 | def _process_requirements(): 23 | packages = open('requirements.txt').read().strip().split('\n') 24 | requires = [] 25 | for pkg in packages: 26 | if pkg.startswith('git+ssh'): 27 | return_code = os.system('pip install {}'.format(pkg)) 28 | assert return_code == 0, 'error, status_code is: {}, exit!'.format(return_code) 29 | else: 30 | requires.append(pkg) 31 | return requires 32 | 33 | 34 | def package_files(directory): 35 | paths = [] 36 | for (path, directories, filenames) in os.walk(directory): 37 | for filename in filenames: 38 | if filename.endswith('yaml'): 39 | paths.append(os.path.join('..', path, filename)) 40 | return paths 41 | 42 | 43 | extra_files = package_files('memoryscope') 44 | 45 | authors = [ 46 | {"name": "Li Yu", "email": "jinli.yl@alibaba-inc.com"}, 47 | {"name": "Tiancheng Qin", "email": "qiancheng.qtc@alibaba-inc.com"}, 48 | {"name": "Qingxu Fu", "email": "fuqingxu.fqx@alibaba-inc.com"}, 49 | {"name": "Sen Huang", "email": "huangsen.huang@alibaba-inc.com"}, 50 | {"name": "Xianzhe Xu", "email": "xianzhe.xxz@alibaba-inc.com"}, 51 | {"name": "Zhaoyang Liu", "email": "jingmu.lzy@alibaba-inc.com"}, 52 | {"name": "Boyin Liu", "email": "liuboyin.lby@alibaba-inc.com"}, 53 | ] 54 | 55 | setuptools.setup( 56 | name="memoryscope", 57 | version="0.1.1.0", 58 | author=', '.join([author['name'] for author in authors]), 59 | author_email=', '.join([author['email'] for author in authors]), 60 | description="MemoryScope is a powerful and flexible long term memory system for LLM chatbots. It consists of a " 61 | "memory database and three customizable system operations, which can be flexibly combined to provide " 62 | "robust long term memory services for your LLM chatbot.", 63 | long_description=long_description, 64 | long_description_content_type="text/markdown", 65 | url="https://github.com/modelscope/memoryscope", 66 | project_urls={ 67 | "Bug Tracker": "https://github.com/modelscope/memoryscope/issues", 68 | }, 69 | classifiers=[ 70 | "Programming Language :: Python :: 3", 71 | "License :: OSI Approved :: Apache Software License", 72 | "Operating System :: OS Independent", 73 | ], 74 | package_dir={"": "."}, 75 | package_data={"": extra_files}, 76 | include_package_data=True, 77 | entry_points={ 78 | 'console_scripts': ['memoryscope=memoryscope:cli'], 79 | }, 80 | packages=setuptools.find_packages(where="."), 81 | python_requires=">=3.10", 82 | install_requires=_process_requirements(), 83 | ) 84 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modelscope/MemoryScope/715b75e520b1029885647afc6b37419182ca94ce/tests/__init__.py -------------------------------------------------------------------------------- /tests/models/test_models_lli_embedding.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append(".") # noqa: E402 4 | 5 | import asyncio 6 | import unittest 7 | 8 | from memoryscope.core.models.llama_index_embedding_model import LlamaIndexEmbeddingModel 9 | from memoryscope.core.utils.logger import Logger 10 | 11 | 12 | class TestLLIEmbedding(unittest.TestCase): 13 | """Tests for LlamaIndexEmbeddingModel""" 14 | 15 | def setUp(self): 16 | config = { 17 | "module_name": "openai_embedding", 18 | "model_name": "text-embedding-3-large", 19 | "clazz": "models.base_embedding_model" 20 | } 21 | self.emb = LlamaIndexEmbeddingModel(**config) 22 | self.logger = Logger.get_logger() 23 | 24 | def test_single_embedding(self): 25 | text = "您吃了吗?" 26 | result = self.emb.call(text=text) 27 | self.logger.info(result.m_type) 28 | self.logger.info(len(result.embedding_results)) 29 | 30 | def test_batch_embedding(self): 31 | texts = ["您吃了吗?", 32 | "吃了吗您?"] 33 | result = self.emb.call(text=texts) 34 | print() 35 | self.logger.info(result) 36 | 37 | def test_async_embedding(self): 38 | texts = ["您吃了吗?", 39 | "吃了吗您?"] 40 | # 调用异步函数并等待其结果 41 | result = asyncio.run(self.emb.async_call(text=texts)) 42 | print() 43 | self.logger.info(result) 44 | -------------------------------------------------------------------------------- /tests/models/test_models_lli_generation.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append(".") # pylint: disable=E402 4 | 5 | import unittest 6 | import time 7 | import asyncio 8 | from memoryscope.scheme.message import Message 9 | from memoryscope.core.models.llama_index_generation_model import LlamaIndexGenerationModel 10 | from memoryscope.core.utils.logger import Logger 11 | 12 | 13 | class TestLLILLM(unittest.TestCase): 14 | """Tests for LlamaIndexGenerationModel""" 15 | 16 | def setUp(self): 17 | config = { 18 | "module_name": "openai_generation", 19 | "model_name": "gpt-3.5-turbo", 20 | "clazz": "models.llama_index_generation_model", 21 | } 22 | self.llm = LlamaIndexGenerationModel(**config) 23 | self.logger = Logger.get_logger() 24 | 25 | def test_llm_prompt(self): 26 | prompt = "你是谁?" 27 | ans = self.llm.call(stream=False, prompt=prompt) 28 | self.logger.info(ans.message.content) 29 | 30 | def test_llm_messages(self): 31 | messages = [Message(role="system", content="you are a helpful assistant."), 32 | Message(role="user", content="你如何看待黄金上涨?")] 33 | ans = self.llm.call(stream=False, messages=messages) 34 | self.logger.info(ans.message.content) 35 | 36 | def test_llm_prompt_stream(self): 37 | prompt = "你如何看待黄金上涨?" 38 | ans = self.llm.call(stream=True, prompt=prompt) 39 | self.logger.info("-----start-----") 40 | for a in ans: 41 | sys.stdout.write(a.delta) 42 | sys.stdout.flush() 43 | time.sleep(0.1) 44 | self.logger.info("-----end-----") 45 | 46 | def test_llm_messages_stream(self): 47 | messages = [Message(role="system", content="you are a helpful assistant."), 48 | Message(role="user", content="你如何看待黄金上涨?")] 49 | ans = self.llm.call(stream=True, messages=messages) 50 | self.logger.info("-----start-----") 51 | for a in ans: 52 | sys.stdout.write(a.delta) 53 | sys.stdout.flush() 54 | time.sleep(0.1) 55 | self.logger.info("-----end-----") 56 | 57 | def test_async_llm_messages(self): 58 | 59 | messages = [Message(role="system", content="you are a helpful assistant."), 60 | Message(role="user", content="你如何看待黄金上涨?")] 61 | 62 | ans = asyncio.run(self.llm.async_call(messages=messages)) 63 | self.logger.info(ans.message.content) 64 | -------------------------------------------------------------------------------- /tests/models/test_models_lli_rank.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import unittest 3 | 4 | from memoryscope.core.models.llama_index_rank_model import LlamaIndexRankModel 5 | 6 | 7 | class TestLLIReRank(unittest.TestCase): 8 | """Tests for LlamaIndexRerankModel""" 9 | 10 | def setUp(self): 11 | config = { 12 | "module_name": "dashscope_rank", 13 | "model_name": "gte-rerank", 14 | "clazz": "models.llama_index_rerank_model" 15 | } 16 | self.reranker = LlamaIndexRankModel(**config) 17 | 18 | def test_rerank(self): 19 | query = "吃啥?" 20 | documents = ["您吃了吗?", 21 | "吃了吗您?"] 22 | result = self.reranker.call( 23 | documents=documents, 24 | query=query) 25 | print(result) 26 | 27 | def test_async_rerank(self): 28 | query = "吃啥?" 29 | documents = ["您吃了吗?", 30 | "吃了吗您?"] 31 | result = asyncio.run(self.reranker.async_call( 32 | documents=documents, 33 | query=query)) 34 | print(result) 35 | -------------------------------------------------------------------------------- /tests/other/init_test.py: -------------------------------------------------------------------------------- 1 | def validate_path(): 2 | import os, sys 3 | 4 | os.path.dirname(__file__) 5 | root_dir_assume = os.path.abspath(os.path.dirname(__file__) + "/../..") 6 | os.chdir(root_dir_assume) 7 | sys.path.append(root_dir_assume) 8 | 9 | 10 | validate_path() # validate path so you can run from base directory 11 | -------------------------------------------------------------------------------- /tests/other/read_prompt.yaml: -------------------------------------------------------------------------------- 1 | a: 2 | cn: c 3 | en: e -------------------------------------------------------------------------------- /tests/other/read_yaml.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append(".") # noqa: E402 4 | 5 | from memoryscope.core.utils.prompt_handler import PromptHandler 6 | 7 | if __name__ == "__main__": 8 | file_path: str = __file__ 9 | print(file_path) 10 | handler = PromptHandler(__file__, language="cn", prompt_file="read_prompt", ) 11 | print(handler.prompt_dict) 12 | -------------------------------------------------------------------------------- /tests/other/test_attr.py: -------------------------------------------------------------------------------- 1 | class MyClass: 2 | def __init__(self): 3 | self.existing_attribute = "I exist" 4 | 5 | def do(self, name: str, **kwargs): 6 | print("do %s %s" % (name, kwargs)) 7 | 8 | def __getattr__(self, name): 9 | return lambda **kwargs: self.do(name, **kwargs) 10 | 11 | 12 | # 创建类的实例 13 | obj = MyClass() 14 | 15 | obj.haha(a=1, b=2) 16 | -------------------------------------------------------------------------------- /tests/other/test_cli.py: -------------------------------------------------------------------------------- 1 | import fire 2 | 3 | 4 | class CLI: 5 | def run(self, **kwargs): 6 | """ 7 | 打印传入的 kwargs 8 | """ 9 | for key, value in kwargs.items(): 10 | print(f"{key}: {value}") 11 | 12 | 13 | if __name__ == '__main__': 14 | fire.Fire(CLI().run) 15 | --------------------------------------------------------------------------------