├── .flake8
├── .github
└── workflows
│ ├── deploy_sphinx_docs.yml
│ ├── docker-image-arm.yml
│ └── docker-image.yml
├── .gitignore
├── .pre-commit-config.yaml
├── Dockerfile
├── DockerfileArm
├── LICENSE
├── README.md
├── README_JP.md
├── README_ZH.md
├── clear-vector-store.py
├── docker-compose.yml
├── docs
├── README.md
├── contribution.md
├── contribution_zh.md
├── images
│ ├── framework.png
│ └── logo.png
├── installation.md
├── installation_zh.md
└── sphinx_doc
│ ├── Makefile
│ ├── assets
│ └── redirect.html
│ ├── build_sphinx_doc.sh
│ ├── en
│ └── source
│ │ ├── _static
│ │ └── custom.css
│ │ ├── _templates
│ │ ├── language_selector.html
│ │ └── layout.html
│ │ ├── conf.py
│ │ ├── docs
│ │ └── api.rst
│ │ ├── index.rst
│ │ └── modules.rst
│ ├── ja
│ └── source
│ │ └── index.rst
│ ├── requirements.txt
│ ├── template
│ ├── module.rst_t
│ └── package.rst_t
│ └── zh
│ └── source
│ ├── _static
│ └── custom.css
│ ├── _templates
│ ├── language_selector.html
│ └── layout.html
│ ├── conf.py
│ ├── docs
│ └── api.rst
│ ├── index.rst
│ └── modules.rst
├── examples
├── advance
│ ├── custom_operator.md
│ ├── custom_operator_zh.md
│ └── replacement.yaml
├── api
│ ├── agentscope_example.md
│ ├── agentscope_example.py
│ ├── autogen_example.md
│ ├── autogen_example.py
│ ├── chat_example.py
│ ├── simple_usages.ipynb
│ └── simple_usages_zh.ipynb
├── cli
│ ├── CLI_README.md
│ └── CLI_README_ZH.md
└── docker
│ ├── entrypoint.sh
│ └── run_elastic_search.sh
├── memoryscope
├── __init__.py
├── constants
│ ├── __init__.py
│ ├── common_constants.py
│ └── language_constants.py
├── contrib
│ ├── example_query_worker.py
│ └── example_query_worker.yaml
├── core
│ ├── __init__.py
│ ├── chat
│ │ ├── __init__.py
│ │ ├── api_memory_chat.py
│ │ ├── base_memory_chat.py
│ │ ├── cli_memory_chat.py
│ │ └── memory_chat_prompt.yaml
│ ├── config
│ │ ├── __init__.py
│ │ ├── arguments.py
│ │ ├── config_manager.py
│ │ ├── demo_config.yaml
│ │ └── demo_config_zh.yaml
│ ├── memoryscope.py
│ ├── memoryscope_context.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── base_model.py
│ │ ├── dummy_generation_model.py
│ │ ├── llama_index_embedding_model.py
│ │ ├── llama_index_generation_model.py
│ │ └── llama_index_rank_model.py
│ ├── operation
│ │ ├── __init__.py
│ │ ├── backend_operation.py
│ │ ├── base_operation.py
│ │ ├── base_workflow.py
│ │ ├── consolidate_memory_op.py
│ │ └── frontend_operation.py
│ ├── service
│ │ ├── __init__.py
│ │ ├── base_memory_service.py
│ │ └── memory_scope_service.py
│ ├── storage
│ │ ├── __init__.py
│ │ ├── base_memory_store.py
│ │ ├── base_monitor.py
│ │ ├── dummy_memory_store.py
│ │ ├── dummy_monitor.py
│ │ ├── llama_index_es_memory_store.py
│ │ └── llama_index_sync_elasticsearch.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── datetime_handler.py
│ │ ├── logger.py
│ │ ├── prompt_handler.py
│ │ ├── registry.py
│ │ ├── response_text_parser.py
│ │ ├── singleton.py
│ │ ├── timer.py
│ │ └── tool_functions.py
│ └── worker
│ │ ├── __init__.py
│ │ ├── backend
│ │ ├── __init__.py
│ │ ├── contra_repeat_worker.py
│ │ ├── contra_repeat_worker.yaml
│ │ ├── get_observation_with_time_worker.py
│ │ ├── get_observation_with_time_worker.yaml
│ │ ├── get_observation_worker.py
│ │ ├── get_observation_worker.yaml
│ │ ├── get_reflection_subject_worker.py
│ │ ├── get_reflection_subject_worker.yaml
│ │ ├── info_filter_worker.py
│ │ ├── info_filter_worker.yaml
│ │ ├── load_memory_worker.py
│ │ ├── long_contra_repeat_worker.py
│ │ ├── long_contra_repeat_worker.yaml
│ │ ├── update_insight_worker.py
│ │ ├── update_insight_worker.yaml
│ │ └── update_memory_worker.py
│ │ ├── base_worker.py
│ │ ├── dummy_worker.py
│ │ ├── frontend
│ │ ├── __init__.py
│ │ ├── extract_time_worker.py
│ │ ├── extract_time_worker.yaml
│ │ ├── fuse_rerank_worker.py
│ │ ├── print_memory_worker.py
│ │ ├── print_memory_worker.yaml
│ │ ├── read_message_worker.py
│ │ ├── retrieve_memory_worker.py
│ │ ├── semantic_rank_worker.py
│ │ └── set_query_worker.py
│ │ ├── memory_base_worker.py
│ │ └── memory_manager.py
├── enumeration
│ ├── __init__.py
│ ├── action_status_enum.py
│ ├── language_enum.py
│ ├── memory_type_enum.py
│ ├── message_role_enum.py
│ ├── model_enum.py
│ └── store_status_enum.py
└── scheme
│ ├── __init__.py
│ ├── memory_node.py
│ ├── message.py
│ └── model_response.py
├── quick-start-demo.py
├── requirements.txt
├── setup.py
└── tests
├── __init__.py
├── models
├── test_models_lli_embedding.py
├── test_models_lli_generation.py
└── test_models_lli_rank.py
├── other
├── init_test.py
├── read_prompt.yaml
├── read_yaml.py
├── test_attr.py
└── test_cli.py
├── storages
├── test_storages_lli_es.py
└── test_storages_lli_synces.py
└── worker
├── test_workers_cn.py
└── test_workers_en.py
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude = tests/*,examples/*,memoryscope/core/storage/llama_index_sync_elasticsearch.py
3 | max-line-length = 120
4 | inline-quotes = "
5 | avoid-escape = no
6 | ignore =
--------------------------------------------------------------------------------
/.github/workflows/deploy_sphinx_docs.yml:
--------------------------------------------------------------------------------
1 | name: deploy-sphinx-documentation-to-pages
2 |
3 | on:
4 | pull_request:
5 | types: [opened, synchronize]
6 | paths:
7 | - 'docs/sphinx_doc/**/*'
8 | push:
9 | branches:
10 | - main
11 |
12 | jobs:
13 | pages:
14 | runs-on: ubuntu-20.04
15 | steps:
16 | - name: Checkout
17 | uses: actions/checkout@v4
18 | - name: Setup Python
19 | uses: actions/setup-python@master
20 | with:
21 | python-version: '3.10'
22 | - name: Choose Pandoc
23 | shell: bash
24 | run: |
25 | case $RUNNER_OS in
26 | "Linux")
27 | printf 'INSTALLER_SUFFIX=1-amd64.deb' >> $GITHUB_ENV
28 | ;;
29 | "macOS")
30 | printf 'INSTALLER_SUFFIX=macOS.pkg' >> $GITHUB_ENV
31 | ;;
32 | *)
33 | printf 'Do not know how to install pandoc on %s\n' "$RUNNER_OS"
34 | exit 1
35 | ;;
36 | esac
37 | - name: Download Pandoc
38 | shell: bash
39 | env:
40 | GITHUB_TOKEN: ${{ github.token }}
41 | REPO: jgm/pandoc
42 | DOWNLOAD_URL: 'https://github.com/jgm/pandoc/releases/download/'
43 | run: |
44 | gh release download ${{ inputs.version }} \
45 | --repo "$REPO" \
46 | --pattern '*'${{ env.INSTALLER_SUFFIX }}
47 | printf 'INSTALLER_VERSION=%s' \
48 | "$(ls pandoc-*-${{ env.INSTALLER_SUFFIX }} | \
49 | sed 's/pandoc-\([0-9.]*\)-.*/\1/')" \
50 | >> $GITHUB_ENV
51 | - name: Install Pandoc
52 | shell: bash
53 | env:
54 | INSTALLER: pandoc-${{ env.INSTALLER_VERSION }}-${{ env.INSTALLER_SUFFIX }}
55 | run: |
56 | case $RUNNER_OS in
57 | "Linux")
58 | sudo apt install ./$INSTALLER
59 | ;;
60 | "macOS")
61 | sudo installer -pkg ./$INSTALLER -target '/'
62 | ;;
63 | *)
64 | echo "$RUNNER_OS not supported"
65 | exit 1
66 | ;;
67 | esac
68 | rm $INSTALLER
69 | - name: Install Sphinx Dependencies
70 | run: |
71 | python -m pip install --upgrade pip
72 | pip install -r requirements.txt
73 | pip install -r docs/sphinx_doc/requirements.txt
74 | - name: Build Documentation
75 | run: |
76 | cd docs/sphinx_doc
77 | bash build_sphinx_doc.sh
78 | - name: Upload Documentation
79 | uses: actions/upload-artifact@v3
80 | with:
81 | name: SphinxDoc
82 | path: 'docs/sphinx_doc/build/html'
83 | - name: Push Pages
84 | uses: peaceiris/actions-gh-pages@v3
85 | if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
86 | with:
87 | github_token: ${{ secrets.GITHUB_TOKEN }}
88 | publish_dir: 'docs/sphinx_doc/build/html'
89 |
--------------------------------------------------------------------------------
/.github/workflows/docker-image-arm.yml:
--------------------------------------------------------------------------------
1 | name: build-and-upload-docker-image-arm
2 |
3 | on:
4 | push:
5 | branches:
6 | - 'main'
7 |
8 | env:
9 | REGISTRY: ghcr.io
10 | IMAGE_NAME: ${{ github.repository }}_arm
11 |
12 |
13 | jobs:
14 | build-and-push-image:
15 | runs-on: ubuntu-latest
16 | permissions:
17 | contents: read
18 | packages: write
19 |
20 | steps:
21 | - name: Set up QEMU
22 | uses: docker/setup-qemu-action@v3
23 |
24 | - name: Set up Docker Buildx
25 | uses: docker/setup-buildx-action@v3
26 |
27 | - name: Checkout repository
28 | uses: actions/checkout@v4
29 |
30 | - name: Log in to the Container registry
31 | uses: docker/login-action@v3
32 | with:
33 | registry: ${{ env.REGISTRY }}
34 | username: ${{ github.actor }}
35 | password: ${{ secrets.GITHUB_TOKEN }}
36 |
37 | - name: Extract metadata (tags, labels) for Docker
38 | id: meta
39 | uses: docker/metadata-action@v4
40 | with:
41 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
42 |
43 | - name: Build and push Docker image
44 | uses: docker/build-push-action@v6
45 | with:
46 | context: .
47 | push: true
48 | platforms: linux/arm64
49 | file: DockerfileArm
50 | tags: ${{ steps.meta.outputs.tags }}
51 | labels: ${{ steps.meta.outputs.labels }}
--------------------------------------------------------------------------------
/.github/workflows/docker-image.yml:
--------------------------------------------------------------------------------
1 | # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
2 | name: build-and-upload-docker-image
3 |
4 | on:
5 | push:
6 | branches:
7 | - 'main'
8 |
9 | env:
10 | REGISTRY: ghcr.io
11 | IMAGE_NAME: ${{ github.repository }}
12 |
13 | jobs:
14 | build-and-push-image:
15 | runs-on: ubuntu-latest
16 | permissions:
17 | contents: read
18 | packages: write
19 |
20 | steps:
21 | - name: Checkout repository
22 | uses: actions/checkout@v3
23 |
24 | - name: Log in to the Container registry
25 | uses: docker/login-action@v2
26 | with:
27 | registry: ${{ env.REGISTRY }}
28 | username: ${{ github.actor }}
29 | password: ${{ secrets.GITHUB_TOKEN }}
30 |
31 | - name: Extract metadata (tags, labels) for Docker
32 | id: meta
33 | uses: docker/metadata-action@v4
34 | with:
35 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
36 |
37 | - name: Build and push Docker image
38 | uses: docker/build-push-action@v4
39 | with:
40 | context: .
41 | push: true
42 | file: Dockerfile
43 | tags: ${{ steps.meta.outputs.tags }}
44 | labels: ${{ steps.meta.outputs.labels }}
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | .idea/
132 |
133 | # macOS
134 | .DS_Store
135 |
136 | # vscode
137 | .vscode
138 |
139 | # docs
140 | docs/sphinx_doc/build/
141 |
142 | # Used to save loggings and files
143 | *runs/
144 | memoryscope.db
145 | tmp*.json
146 | tmp*.py
147 | cradle*
148 |
149 | # sphinx docs
150 |
151 | memoryscope*.rst
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v4.3.0
4 | hooks:
5 | - id: check-ast
6 | - id: check-yaml
7 | - id: check-xml
8 | - id: check-toml
9 | - id: check-docstring-first
10 | - id: check-json
11 | - id: detect-private-key
12 | - id: trailing-whitespace
13 | exclude: (README\.md|README_ZH\.md|README_JP\.md)$
14 | - id: end-of-file-fixer
15 | files: \.py$
16 | - id: check-merge-conflict
17 | - id: check-symlinks
18 | - id: mixed-line-ending
19 | - repo: https://github.com/PyCQA/flake8
20 | rev: 6.1.0
21 | hooks:
22 | - id: flake8
23 | - repo: https://github.com/pappasam/toml-sort
24 | rev: v0.23.1
25 | hooks:
26 | - id: toml-sort-fix
27 | - repo: https://github.com/srstevenson/nb-clean
28 | rev: 3.1.0
29 | hooks:
30 | - id: nb-clean
31 | args: [ --preserve-cell-outputs, --remove-empty-cells ]
32 | - repo: https://github.com/codespell-project/codespell
33 | rev: v2.2.6
34 | hooks:
35 | - id: codespell
36 | additional_dependencies: [ tomli ]
37 | exclude: |
38 | poetry.lock|
39 | (\/.*?\.[\w:]+)/pyproject.toml|
40 | (\/.*?\.[\w:]+)/poetry.lock
41 | args:
42 | [
43 | "--ignore-words-list",
44 | "astroid,gallary,momento,narl,ot,rouge,nin,gere,asend,ans,thur",
45 | ]
46 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # __ __ ____
2 | # | \/ | ___ _ __ ___ ___ _ __ _ _/ ___| ___ ___ _ __ ___
3 | # | |\/| |/ _ \ '_ ` _ \ / _ \| '__| | | \___ \ / __/ _ \| '_ \ / _ \
4 | # | | | | __/ | | | | | (_) | | | |_| |___) | (_| (_) | |_) | __/
5 | # |_| |_|\___|_| |_| |_|\___/|_| \__, |____/ \___\___/| .__/ \___|
6 | # |___/ |_|
7 |
8 | # Instruction
9 |
10 | # To construct docker image:
11 | # sudo docker build --network=host -t memoryscope .
12 |
13 | # To run docker image:
14 | # sudo docker run -it --rm --memory=4G --net=host memoryscope
15 | # To run docker image with arguments (refer to memoryscope/core/config/arguments.py):
16 | # sudo docker run -it --rm --memory=4G --net=host -e "OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" -e "language=en" -e "human_name=superman" -e "generation_backend=openai_generation" -e "generation_model=gpt-4o" -e "embedding_backend=openai_embedding" -e "embedding_model=text-embedding-3-small" -e "enable_ranker=False" memoryscope
17 |
18 | FROM python:3.11
19 |
20 | # (Not necessary) Change pip source
21 | RUN echo '[global]' > /etc/pip.conf && \
22 | echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
23 | echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
24 |
25 | # Install Elastic Search
26 | RUN useradd -m elastic_search_user
27 | USER elastic_search_user
28 | WORKDIR /home/elastic_search_user/elastic_search
29 | # COPY elasticsearch-8.15.0-linux-x86_64.tar.gz ./elasticsearch-8.15.0-linux-x86_64.tar.gz
30 | RUN wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.15.0-linux-x86_64.tar.gz
31 | RUN tar -xzf elasticsearch-8.15.0-linux-x86_64.tar.gz
32 | WORKDIR /home/elastic_search_user/elastic_search/elasticsearch-8.15.0
33 | ENV DISCOVERY_TYPE=single-node \
34 | XPACK_SECURITY_ENABLED=false \
35 | XPACK_LICENSE_SELF_GENERATED_TYPE=trial
36 |
37 | # Change user back to root and fix ownership
38 | USER root
39 | RUN chown -R elastic_search_user:elastic_search_user /home/elastic_search_user/
40 | WORKDIR /memory_scope_project
41 |
42 | # (Not necessary) Install the majority of deps, using docker build cache to accelerate future building
43 | COPY requirements.txt ./
44 | RUN pip3 install -r requirements.txt
45 |
46 | # Enter working dir
47 | WORKDIR /memory_scope_project
48 | COPY . .
49 | # RUN pip3 install poetry
50 | # RUN poetry install
51 | RUN pip3 install -r requirements.txt
52 |
53 | # Launch!
54 | # CMD ["bash"]
55 | CMD ["bash", "examples/docker/entrypoint.sh"]
--------------------------------------------------------------------------------
/DockerfileArm:
--------------------------------------------------------------------------------
1 | # __ __ ____
2 | # | \/ | ___ _ __ ___ ___ _ __ _ _/ ___| ___ ___ _ __ ___
3 | # | |\/| |/ _ \ '_ ` _ \ / _ \| '__| | | \___ \ / __/ _ \| '_ \ / _ \
4 | # | | | | __/ | | | | | (_) | | | |_| |___) | (_| (_) | |_) | __/
5 | # |_| |_|\___|_| |_| |_|\___/|_| \__, |____/ \___\___/| .__/ \___|
6 | # |___/ |_|
7 |
8 | # Instruction
9 |
10 | # To construct docker image:
11 | # sudo docker build --network=host -t memoryscope .
12 |
13 | # To run docker image:
14 | # sudo docker run -it --rm --memory=4G --net=host memoryscope
15 | # To run docker image with arguments (refer to memoryscope/core/config/arguments.py):
16 | # sudo docker run -it --rm --memory=4G --net=host -e "OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" -e "language=en" -e "human_name=superman" -e "generation_backend=openai_generation" -e "generation_model=gpt-4o" -e "embedding_backend=openai_embedding" -e "embedding_model=text-embedding-3-small" -e "enable_ranker=False" memoryscope
17 | #docker run -it --rm ghcr.io/modelscope/memoryscope_arm /bin/bash
18 | FROM python:3.11
19 |
20 | # (Not necessary) Change pip source
21 | RUN echo '[global]' > /etc/pip.conf && \
22 | echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
23 | echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
24 |
25 | # Install Elastic Search
26 | RUN useradd -m elastic_search_user
27 | USER elastic_search_user
28 | WORKDIR /home/elastic_search_user/elastic_search
29 | RUN wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.15.2-linux-aarch64.tar.gz
30 | RUN tar -xzf elasticsearch-8.15.2-linux-aarch64.tar.gz
31 | RUN mv /home/elastic_search_user/elastic_search/elasticsearch-8.15.2 /home/elastic_search_user/elastic_search/elasticsearch-8.15.0
32 | WORKDIR /home/elastic_search_user/elastic_search/elasticsearch-8.15.0
33 | ENV DISCOVERY_TYPE=single-node \
34 | XPACK_SECURITY_ENABLED=false \
35 | XPACK_LICENSE_SELF_GENERATED_TYPE=trial
36 |
37 | # Change user back to root and fix ownership
38 | USER root
39 | RUN chown -R elastic_search_user:elastic_search_user /home/elastic_search_user/
40 | WORKDIR /memory_scope_project
41 |
42 | # (Not necessary) Install the majority of deps, using docker build cache to accelerate future building
43 | COPY requirements.txt ./
44 | RUN pip3 install -r requirements.txt
45 |
46 | # Enter working dir
47 | WORKDIR /memory_scope_project
48 | COPY . .
49 | # RUN pip3 install poetry
50 | # RUN poetry install
51 | RUN pip3 install -r requirements.txt
52 |
53 | # Launch!
54 | # CMD ["bash"]
55 | CMD ["bash", "examples/docker/entrypoint.sh"]
56 |
57 |
--------------------------------------------------------------------------------
/README_JP.md:
--------------------------------------------------------------------------------
1 | [**English**](./README.md) | [**中文**](./README_ZH.md) | 日本語
2 |
3 | # MemoryScope
4 |
5 |
6 |
7 | あなたのLLMチャットボットに強力で柔軟な長期記憶システムを装備しましょう。
8 |
9 | [](https://pypi.org/project/memoryscope/)
10 | [](https://pypi.org/project/memoryscope/)
11 | [](./LICENSE)
12 | [](https://modelscope.github.io/MemoryScope/en/index.html#welcome-to-memoryscope-tutorial)
13 | [](https://modelscope.github.io/MemoryScope/en/docs/api.html)
14 | [](https://modelscope.github.io/MemoryScope/en/docs/contribution.html)
15 |
16 | ----
17 | ## 📰 ニュース
18 |
19 | - **[2024-09-10]** MemoryScope v0.1.1.0をリリースしました。 [PyPI](https://pypi.org/simple/memoryscope/)でも入手可能です!
20 | ----
21 | ## 🌟 MemoryScopeとは?
22 | MemoryScopeは、LLMチャットボットに強力で柔軟な長期記憶能力を提供し、その能力を構築するためのフレームワークを提供します。
23 | 個人アシスタントや感情的な伴侶などのシナリオに適用でき、長期記憶を通じてユーザーの基本情報やさまざまな習慣や好みを覚え続けることができます。
24 | これにより、ユーザーはLLMを使用する際に徐々に「理解されている」感覚を体験することができます。
25 |
26 | ### デモ
27 |
28 |
29 |
30 |
31 | ### フレームワーク
32 |
33 |
34 |
35 |
36 | 💾 メモリデータベース: MemoryScopeは、システム内に記録されたすべての記憶片を保存するためのベクトルデータベース(デフォルトは*ElasticSearch*)を備えています。
37 |
38 | 🔧 ワーカーライブラリ: MemoryScopeは、長期記憶の能力を個々のワーカーに原子化し、クエリ情報のフィルタリング、観察の抽出、洞察の更新など、20以上のワーカーを含みます。
39 |
40 | 🛠️ オペレーションライブラリ: ワーカーパイプラインに基づいて、メモリサービスのオペレーションを構築し、メモリの取得やメモリの統合などの主要な機能を実現します。
41 |
42 | - メモリの取得: ユーザークエリが到着すると、この操作は意味的に関連する記憶片を返します。
43 | クエリが時間に言及している場合は、対応する時間の記憶片も返します。
44 | - メモリの統合: この操作は、一連のユーザークエリを受け取り、クエリから抽出された重要なユーザー情報を統合された*観察*としてメモリデータベースに保存します。
45 | - 反映と再統合: 定期的に、この操作は新たに記録された*観察*を反映し、*洞察*を形成および更新します。
46 | その後、メモリの再統合を実行して、記憶片間の矛盾や重複が適切に処理されるようにします。
47 |
48 | ⚙️ ベストプラクティス:
49 |
50 | - MemoryScopeは、長期記憶のコア機能に基づいて、長期記憶を持つ対話インターフェース(API)と長期記憶を持つコマンドライン対話の実践(CLI)を実装しています。
51 | - MemoryScopeは、現在人気のあるエージェントフレームワーク(AutoGen、AgentScope)を組み合わせて、ベストプラクティスを提供します。
52 |
53 | ### 主な特徴
54 |
55 | ⚡ 低い応答時間(RT):
56 | - システム内のバックエンド操作(メモリの統合、反映と再統合)は、フロントエンド操作(メモリの取得)と分離されています。
57 | - バックエンド操作は通常(および推奨される)キューに入れられるか、定期的に実行されるため、システムのユーザー応答時間(RT)はフロントエンド操作のみに依存し、約500ミリ秒です。
58 |
59 | 🌲 階層的で一貫性のある記憶:
60 | - システムに保存される記憶片は階層構造になっており、*洞察*は同様のテーマの*観察*の集約から得られる高レベルの情報です。
61 | - 記憶片間の矛盾や重複は定期的に処理され、一貫性が保たれます。
62 | - ユーザーの虚偽の内容はフィルタリングされ、LLMの幻覚を避けることができます。
63 |
64 | ⏰ 時間感覚:
65 | - メモリの取得とメモリの統合を実行する際に時間感覚があり、クエリが時間に言及している場合に正確な関連情報を取得できます。
66 |
67 | ----
68 |
69 | ## 💼 サポートされているモデルAPI
70 |
71 | | バックエンド | タスク | サポートされているモデルの一部 |
72 | |-------------------|------------|------------------------------------------------------------------------|
73 | | openai_backend | Generation | gpt-4o, gpt-4o-mini, gpt-4, gpt-3.5-turbo |
74 | | | Embedding | text-embedding-ada-002, text-embedding-3-large, text-embedding-3-small |
75 | | dashscope_backend | Generation | qwen-max, qwen-plus, qwen-plus, qwen2-72b-instruct |
76 | | | Embedding | text-embedding-v1, text-embedding-v2 |
77 | | | Reranker | gte-rerank |
78 |
79 | 将来的には、より多くのモデルインターフェースとローカルデプロイメントのLLMおよび埋め込みサービスをサポートする予定です。
80 |
81 | ## 🚀 インストール
82 | インストール方法については、[Installation.md](docs/installation.md)を参照してください。
83 |
84 | ## 🍕 クイックスタート
85 | - [簡単な使用法(クイックスタート)](./examples/api/simple_usages.ipynb)
86 | - [AutoGenとの連携](./examples/api/autogen_example.md)
87 | - [MemoryScopeチャットボットとのCLI](./examples/cli/README.md)
88 | - [高度なカスタマイズ](./examples/advance/custom_operator.md)
89 |
90 | ## 💡 貢献
91 |
92 | 貢献は常に奨励されています!
93 |
94 | プルリクエストをコミットする前に、このリポジトリにpre-commitフックをインストールすることを強くお勧めします。
95 | これらのフックは、gitコミットを行うたびに実行される小さなハウスキーピングスクリプトであり、フォーマットとリンティングを自動的に処理します。
96 | ```shell
97 | pip install -e .
98 | pre-commit install
99 | ```
100 |
101 | 詳細については、[貢献ガイド](./docs/contribution.md)を参照してください。
102 |
103 | ## 📖 引用
104 |
105 | MemoryScopeを論文で使用する場合は、以下の引用を追加してください:
106 |
107 | ```
108 | @software{MemoryScope,
109 | author = {Li Yu and
110 | Tiancheng Qin and
111 | Qingxu Fu and
112 | Sen Huang and
113 | Xianzhe Xu and
114 | Zhaoyang Liu and
115 | Boyin Liu},
116 | month = {09},
117 | title = {{MemoryScope}},
118 | url = {https://github.com/modelscope/MemoryScope},
119 | year = {2024}
120 | }
121 | ```
122 |
--------------------------------------------------------------------------------
/clear-vector-store.py:
--------------------------------------------------------------------------------
1 | """
2 | Warning!
3 |
4 | This script purges the entire vector store !
5 |
6 | """
7 |
8 | from memoryscope import MemoryScope, Arguments
9 |
10 | arguments = Arguments(
11 | language="en",
12 | human_name="user",
13 | assistant_name="AI",
14 | memory_chat_class="api_memory_chat",
15 | generation_backend="openai_generation",
16 | generation_model="gpt-4o",
17 | embedding_backend="openai_embedding",
18 | embedding_model="text-embedding-3-small",
19 | enable_ranker=False,
20 | )
21 |
22 | ms = MemoryScope(arguments=arguments)
23 | es_store = ms.context.memory_store.es_store
24 | es_store.sync_delete_all()
25 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | memory_scope_main:
3 | image: ghcr.io/modelscope/memoryscope_arm:main
4 | # image: ghcr.io/modelscope/memoryscope_arm:main # For ARM architecture
5 | environment:
6 | DASHSCOPE_API_KEY: "sk-0000000000"
7 | # OPENAI_API_KEY: "sk-0000000000"
8 | volumes:
9 | - ./memoryscope/core/config:/memory_scope_project/memoryscope/memoryscope/core/config
10 | deploy:
11 | resources:
12 | limits:
13 | memory: 4G
14 | stdin_open: true
15 | tty: true
16 | # Please execute `docker compose run memory_scope_main` instead of `docker compose up`
17 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # MemoryScope Documentation
2 |
3 | ## Build Documentation
4 |
5 | Please use the following commands to build sphinx doc of MemoryScope.
6 |
7 | ```shell
8 | # step 1: Install dependencies
9 | pip install sphinx sphinx-autobuild sphinx_rtd_theme myst-parser sphinxcontrib-mermaid
10 |
11 | # step 2: go into the sphinx_doc dir
12 | cd docs/sphinx_doc
13 |
14 | # step 3: build the sphinx doc
15 | ./build_sphinx_doc.sh
16 |
17 | # step 4: view sphinx_doc/build/html/index.html using your browser
18 | cd docs/sphinx_doc/build/html && python -m http.server 8899
19 | ```
20 |
--------------------------------------------------------------------------------
/docs/contribution.md:
--------------------------------------------------------------------------------
1 | # Contribute to MemoryScope
2 | Our community thrives on the diverse ideas and contributions of its members. Whether you're fixing a bug, adding a new feature, improving the documentation, or adding examples, your help is welcome. Here's how you can contribute:
3 | ## Report Bugs and Ask For New Features?
4 | Did you find a bug or have a feature request? Please first check the issue tracker to see if it has already been reported. If not, feel free to open a new issue. Include as much detail as possible:
5 | - A descriptive title
6 | - Clear description of the issue
7 | - Steps to reproduce the problem
8 | - Version of the MemoryScope you are using
9 | - Any relevant code snippets or error messages
10 | ## Contribute to Codebase
11 | ### Fork and Clone the Repository
12 | To work on an issue or a new feature, start by forking the MemoryScope repository and then cloning your fork locally.
13 | ```bash
14 | git clone https://github.com/your-username/memoryscope.git
15 | cd memoryscope
16 | ```
17 | ### Create a New Branch
18 | Create a new branch for your work. This helps keep proposed changes organized and separate from the `main` branch.
19 | ```bash
20 | git checkout -b your-feature-branch-name
21 | ```
22 | ### Making Changes
23 | With your new branch checked out, you can now make your changes to the code. Remember to keep your changes as focused as possible. If you're addressing multiple issues or features, it's better to create separate branches and pull requests for each.
24 | We provide a developer version with additional `pre-commit` hooks to perform format checks compared to the official version:
25 | ```bash
26 | # Install the developer version
27 | pip install -e .
28 | # Install pre-commit hooks
29 | pre-commit install
30 | ```
31 | ### Commit Your Changes
32 | Once you've made your changes, it's time to commit them. Write clear and concise commit messages that explain your changes.
33 | ```bash
34 | git add -A
35 | git commit -m "A brief description of the changes"
36 | ```
37 | You might get some error messages raised by `pre-commit`. Please resolve them according to the error code and commit again.
38 | ### Submit a Pull Request
39 | When you're ready for feedback, submit a pull request to the MemoryScope `main` branch. In your pull request description, explain the changes you've made and any other relevant context.
40 | We will review your pull request. This process might involve some discussion, additional changes on your part, or both.
41 | ### Code Review
42 | Wait for us to review your pull request. We may suggest some changes or improvements. Keep an eye on your GitHub notifications and be responsive to any feedback.
43 |
--------------------------------------------------------------------------------
/docs/contribution_zh.md:
--------------------------------------------------------------------------------
1 | # 贡献到MemoryScope
2 | 我们的社区因其成员的多样化思想和贡献而兴旺发展。无论是修复一个错误,添加一个新功能,改进文档,还是添加示例,我们都欢迎您的帮助。以下是您做出贡献的方法:
3 | ## 报告错误和提出新功能
4 | 当您发现一个错误或者有一个功能请求,请首先检查问题跟踪器,查看它是否已经被报告。如果没有,随时可以开设一个新的问题。请包含尽可能多的细节:
5 | - 简明扼要的标题
6 | - 清晰地描述问题
7 | - 提供重现问题的步骤
8 | - 提供所使用的MemoryScope版本
9 | - 提供所有相关代码片段或错误信息
10 | ## 对代码库做出贡献
11 | ### Fork和Clone仓库
12 | 要处理一个问题或新功能,首先要Fork仓库,然后将你的Fork克隆到本地。
13 | ```bash
14 | git clone git@github.com:modelscope/MemoryScope.git
15 | cd MemoryScope
16 | ```
17 | ### 创建一个新分支
18 | 为您的工作创建一个新分支。这有助于保持拟议更改的组织性,并与`main`分支分离。
19 | ```bash
20 | git checkout -b your-feature-branch-name
21 | ```
22 | ### 做出修改
23 | 我们非常推荐每一个贡献者在代码提交前,安装`pre-commit`钩子工具,
24 | 能够帮助在每一次git提交的时候,进行自动化的代码格式校验。
25 | ```bash
26 | # 安装开发者版本
27 | pip install -e .
28 | # 安装 pre-commit 钩子
29 | pre-commit install
30 | ```
31 |
32 | ### 提交您的修改
33 |
34 | 修改完成之后就是提交它们的时候了。请提供清晰而简洁的提交信息,以解释您的修改内容。
35 |
36 | ```bash
37 | git add -A
38 | git commit -m "修改内容的简要描述"
39 | ```
40 |
41 | 运行时您可能会收到 `pre-commit` 给出的错误信息。请根据错误信息修改您的代码然后再次提交。
42 |
43 | ### 提交 Pull Request
44 |
45 | 当您准备好您的修改分支后,向MemoryScope的 `main` 分支提交一个Pull Request。在您的Pull Request描述中,解释您所做的修改以及其他相关的信息。
46 |
47 | 我们将审查您的Pull Request。这个过程可能涉及一些讨论以及额外的代码修改。
48 |
49 | ### 代码审查
50 |
51 | 等待我们审核您的Pull Request。我们可能会提供一些更改或改进建议。请留意您的GitHub通知,并对反馈做出响应。
--------------------------------------------------------------------------------
/docs/images/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modelscope/MemoryScope/715b75e520b1029885647afc6b37419182ca94ce/docs/images/framework.png
--------------------------------------------------------------------------------
/docs/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modelscope/MemoryScope/715b75e520b1029885647afc6b37419182ca94ce/docs/images/logo.png
--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
1 | # Installing MemoryScope
2 |
3 | ## I. Install with docker [Recommended] [x86_64]
4 |
5 | 1. Clone the repository and edit settings
6 | ```bash
7 | # clone project
8 | git clone https://github.com/modelscope/memoryscope
9 | cd memoryscope
10 | # edit configuration, e.g. add api keys
11 | vim memoryscope/core/config/demo_config.yaml
12 | ```
13 |
14 | 2. Build Docker image
15 | ```bash
16 | sudo docker build --network=host -t memoryscope .
17 | ```
18 | If you are using arm-based computers, modify command above into: `sudo docker build -f DockerfileArm --network=host -t memoryscope .`
19 |
20 | 3. Launch Docker container
21 | ```bash
22 | sudo docker run -it --rm --net=host memoryscope
23 | ```
24 |
25 | > [!Important]
26 | > To inspect memory shift during the conversation, modify command in step 3 to `sudo docker run -it --name=memoryscope_container --rm --net=host memoryscope`;
27 | > Then start a new terminal window and execute `sudo docker exec -it memoryscope_container python quick-start-demo.py --config_path=memoryscope/core/config/demo_config_zh.yaml`;
28 | > In the second window, input `/list_memory refresh_time=5` to inspect memory
29 |
30 | ## II. Install with docker compose [Recommended] [x86_64]
31 |
32 | 1. Clone the repository and edit settings
33 | ```bash
34 | # clone project
35 | git clone https://github.com/modelscope/memoryscope
36 | cd memoryscope
37 | # edit configuration, e.g. add api keys
38 | vim memoryscope/core/config/demo_config.yaml
39 | ```
40 |
41 | 2. Edit `docker-compose.yml` to change environment variable.
42 | ```
43 | OPENAI_API_KEY: "sk-0000000000"
44 | ```
45 |
46 | 3. Run `docker-compose run memory_scope_main` to build and launch the memory-scope cli interface. (For ARM architecture, you should edit `docker-compose.yml`, changing `image: ghcr.io/modelscope/memoryscope:main` to `image: ghcr.io/modelscope/memoryscope_arm:main`)
47 |
48 |
49 | ## III. Install from PyPI
50 |
51 | 1. Install from PyPI
52 | ```bash
53 | pip install memoryscope
54 | ```
55 |
56 | 2. Run Elasticsearch service, refer to [elasticsearch documents](https://www.elastic.co/guide/en/elasticsearch/reference/current/getting-started.html).
57 | The docker method is recommended:
58 | ```
59 | sudo docker run -p 9200:9200 \
60 | -e "discovery.type=single-node" \
61 | -e "xpack.security.enabled=false" \
62 | -e "xpack.license.self_generated.type=trial" \
63 | docker.elastic.co/elasticsearch/elasticsearch:8.13.2
64 | ```
65 |
66 | 3. Test Chinese / Dashscope Configuration
67 | ```bash
68 | export DASHSCOPE_API_KEY="sk-0000000000"
69 | memoryscope --language="cn" \
70 | --memory_chat_class="cli_memory_chat" \
71 | --human_name="用户" \
72 | --assistant_name="AI" \
73 | --generation_backend="dashscope_generation" \
74 | --generation_model="qwen-max" \
75 | --embedding_backend="dashscope_embedding" \
76 | --embedding_model="text-embedding-v2" \
77 | --enable_ranker=True \
78 | --rank_backend="dashscope_rank" \
79 | --rank_model="gte-rerank"
80 | ```
81 |
82 | 4. Test English / OpenAI Configuration
83 | ```bash
84 | export OPENAI_API_KEY="sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
85 | memoryscope --language="en" \
86 | --memory_chat_class="cli_memory_chat" \
87 | --human_name="User" \
88 | --assistant_name="AI" \
89 | --generation_backend="openai_generation" \
90 | --generation_model="gpt-4o" \
91 | --embedding_backend="openai_embedding" \
92 | --embedding_model="text-embedding-3-small" \
93 | --enable_ranker=False
94 | ```
95 |
96 | ## IV. Install from source
97 |
98 | 1. Clone the repository and edit settings
99 | ```bash
100 | # clone project
101 | git clone https://github.com/modelscope/memoryscope
102 | cd memoryscope
103 | # edit configuration, e.g. add api keys
104 | vim memoryscope/core/config/demo_config.yaml
105 | ```
106 |
107 | 2. Install
108 | ```bash
109 | pip install -e .
110 | ```
111 |
112 | 3. Run Elasticsearch service, refer to [elasticsearch documents](https://www.elastic.co/guide/en/elasticsearch/reference/current/getting-started.html).
113 | The docker method is recommended:
114 | ```
115 | sudo docker run -p 9200:9200 \
116 | -e "discovery.type=single-node" \
117 | -e "xpack.security.enabled=false" \
118 | -e "xpack.license.self_generated.type=trial" \
119 | docker.elastic.co/elasticsearch/elasticsearch:8.13.2
120 | ```
121 |
122 | 4. Launch memoryscope, also refer to [cli documents](../examples/cli/CLI_README.md)
123 | ```bash
124 | export OPENAI_API_KEY="sk-0000000000"
125 | python quick-start-demo.py --config_path=memoryscope/core/config/demo_config_zh.yaml
126 | ```
127 |
--------------------------------------------------------------------------------
/docs/installation_zh.md:
--------------------------------------------------------------------------------
1 | # MemoryScope 安装指南
2 |
3 | ## 一、使用 Docker 安装 [推荐]
4 |
5 | 1. 克隆仓库并编辑配置
6 | ```bash
7 | # 克隆项目
8 | git clone https://github.com/modelscope/memoryscope
9 | cd memoryscope
10 | # 编辑配置,例如添加 API 密钥
11 | vim memoryscope/core/config/demo_config_zh.yaml
12 | ```
13 |
14 | 2. 构建 Docker 镜像
15 | ```bash
16 | sudo docker build --network=host -t memoryscope .
17 | ```
18 | 备注:如果是arm架构的电脑,则必须使用另一个命令:`sudo docker build -f DockerfileArm --network=host -t memoryscope .`
19 |
20 | 3. 启动 Docker 容器
21 | ```bash
22 | sudo docker run -it --rm --net=host memoryscope
23 | ```
24 |
25 |
26 | > [!Important]
27 | > 如果需要观察Memory的变化请调整第3步的运行命令。首先执行 `sudo docker run -it --name=memoryscope_container --rm --net=host memoryscope`启动memoryscope;
28 | > 然后新建命令行窗口,运行`sudo docker exec -it memoryscope_container python quick-start-demo.py --config_path=memoryscope/core/config/demo_config_zh.yaml`;
29 | > 在第二个窗口,继续输入`/list_memory refresh_time=5`来检查实时的memory
30 |
31 | ## 二、使用 Docker Compose 安装 [推荐] [x86_64]
32 |
33 | 1. 克隆仓库并编辑配置
34 | ```bash
35 | # 克隆项目
36 | git clone https://github.com/modelscope/memoryscope
37 | cd memoryscope
38 | # 编辑配置,例如添加 API 密钥
39 | vim memoryscope/core/config/demo_config_zh.yaml
40 | ```
41 |
42 | 2. 编辑 `docker-compose.yml` 文件以更改环境变量。
43 | ```
44 | DASHSCOPE_API_KEY: "sk-0000000000"
45 | ```
46 |
47 | 3. 运行 `docker-compose run memory_scope_main` 命令来构建并启动 MemoryScope CLI 界面。(备注:如果是arm架构,还需要手动将docker-compose.yml中的`ghcr.io/modelscope/memoryscope:main`修改成`ghcr.io/modelscope/memoryscope_arm:main`)
48 |
49 |
50 | ## 三、通过 PYPI 安装
51 |
52 | 1. 从 PyPI 安装:
53 | ```bash
54 | pip install memoryscope
55 | ```
56 |
57 | 2. 运行 Elasticsearch 服务,参照 [Elasticsearch 文档](https://www.elastic.co/guide/cn/elasticsearch/reference/current/getting-started.html)。
58 | 推荐使用 Docker 方法:
59 | ```
60 | sudo docker run -p 9200:9200 \
61 | -e "discovery.type=single-node" \
62 | -e "xpack.security.enabled=false" \
63 | -e "xpack.license.self_generated.type=trial" \
64 | docker.elastic.co/elasticsearch/elasticsearch:8.13.2
65 | ```
66 |
67 | 3. 测试中文 / Dashscope 对话配置:
68 | ```bash
69 | export DASHSCOPE_API_KEY="sk-0000000000"
70 | memoryscope --language="cn" \
71 | --memory_chat_class="cli_memory_chat" \
72 | --human_name="用户" \
73 | --assistant_name="AI" \
74 | --generation_backend="dashscope_generation" \
75 | --generation_model="qwen-max" \
76 | --embedding_backend="dashscope_embedding" \
77 | --embedding_model="text-embedding-v2" \
78 | --enable_ranker=True \
79 | --rank_backend="dashscope_rank" \
80 | --rank_model="gte-rerank"
81 | ```
82 |
83 | 4. 测试英文 / OpenAI 对话配置:
84 | ```bash
85 | export OPENAI_API_KEY="sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
86 | memoryscope --language="en" \
87 | --memory_chat_class="cli_memory_chat" \
88 | --human_name="User" \
89 | --assistant_name="AI" \
90 | --generation_backend="openai_generation" \
91 | --generation_model="gpt-4o" \
92 | --embedding_backend="openai_embedding" \
93 | --embedding_model="text-embedding-3-small" \
94 | --enable_ranker=False
95 | ```
96 |
97 |
98 | ## 四、从源码安装
99 |
100 | 1. 克隆仓库并编辑设置
101 | ```bash
102 | # 克隆项目
103 | git clone https://github.com/modelscope/memoryscope
104 | cd memoryscope
105 | # 编辑配置,例如添加 API 密钥
106 | vim memoryscope/core/config/demo_config_zh.yaml
107 | ```
108 |
109 | 2. 安装依赖
110 | ```bash
111 | pip install -e .
112 | ```
113 |
114 | 3. 运行 Elasticsearch 服务,参照 [Elasticsearch 文档](https://www.elastic.co/guide/cn/elasticsearch/reference/current/getting-started.html)。
115 | 推荐使用 Docker 方法:
116 | ```
117 | sudo docker run -p 9200:9200 \
118 | -e "discovery.type=single-node" \
119 | -e "xpack.security.enabled=false" \
120 | -e "xpack.license.self_generated.type=trial" \
121 | docker.elastic.co/elasticsearch/elasticsearch:8.13.2
122 | ```
123 |
124 | 4. 启动 MemoryScope,同时参考 [CLI 文档](../examples/cli/CLI_README_ZH.md)
125 | ```bash
126 | export DASHSCOPE_API_KEY="sk-0000000000"
127 | python quick-start-demo.py --config_path=memoryscope/core/config/demo_config_zh.yaml
128 | ```
129 |
130 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile
2 |
3 | SPHINXBUILD = sphinx-build
4 | SPHINXPROJ = MemoryScope-Doc
5 | ASSETSDIR = assets
6 | BUILDDIR = build/html
7 | SOURCEDIR_EN = en/source
8 | BUILDDIR_EN = build/html/en
9 | SOURCEDIR_ZH = zh/source
10 | BUILDDIR_ZH = build/html/zh
11 |
12 | # English document
13 | en:
14 | @$(SPHINXBUILD) -b html "$(SOURCEDIR_EN)" "$(BUILDDIR_EN)"
15 | @echo
16 | @echo "Build finished. The HTML pages are in $(BUILDDIR_EN)"
17 |
18 | # Chinese document
19 | zh:
20 | @$(SPHINXBUILD) -b html "$(SOURCEDIR_ZH)" "$(BUILDDIR_ZH)"
21 | @echo
22 | @echo "Build finished. The HTML pages are in $(BUILDDIR_ZH)"
23 |
24 | index:
25 | @cp "$(ASSETSDIR)/redirect.html" "$(BUILDDIR)/index.html"
26 |
27 | %: Makefile
28 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR_EN)" "$(BUILDDIR_EN)" $(O)
29 |
30 | all: en zh index
31 |
32 | .PHONY: all en zh index
--------------------------------------------------------------------------------
/docs/sphinx_doc/assets/redirect.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | MemoryScope Documentation
7 |
8 |
9 | Redirecting to English documentation...
10 | If you are not redirected, click here.
11 |
12 |
13 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/build_sphinx_doc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # remove build
4 | rm -rf build/html/*
5 | rm -rf en/source/memoryscope*.rst
6 | rm -rf zh/source/memoryscope*.rst
7 | rm -rf ja/source/memoryscope*.rst
8 |
9 | # copy related files
10 | cd ../../
11 |
12 | cp README.md docs/sphinx_doc/en/source/README.md
13 | cp docs/installation.md docs/sphinx_doc/en/source/docs/installation.md
14 | cp docs/contribution.md docs/sphinx_doc/en/source/docs/contribution.md
15 | cp -r docs/images docs/sphinx_doc/en/source/docs/images
16 | cp -r examples docs/sphinx_doc/en/source/examples
17 |
18 | cp README_ZH.md docs/sphinx_doc/zh/source/README.md
19 | cp docs/installation_zh.md docs/sphinx_doc/zh/source/docs/installation.md
20 | cp docs/contribution_zh.md docs/sphinx_doc/zh/source/docs/contribution.md
21 | cp -r docs/images docs/sphinx_doc/zh/source/docs/images
22 | cp -r examples docs/sphinx_doc/zh/source/examples
23 |
24 | cp README_JP.md docs/sphinx_doc/ja/source/README.md
25 | cp docs/installation_jp.md docs/sphinx_doc/ja/source/docs/installation.md
26 | cp docs/contribution_jp.md docs/sphinx_doc/ja/source/docs/contribution.md
27 | cp -r docs/images docs/sphinx_doc/ja/source/docs/images
28 | cp -r examples docs/sphinx_doc/ja/source/examples
29 |
30 | # build
31 | cd docs/sphinx_doc
32 | sphinx-apidoc -f -o en/source ../../memoryscope -t template -e
33 | sphinx-apidoc -f -o zh/source ../../memoryscope -t template -e
34 | sphinx-apidoc -f -o ja/source ../../memoryscope -t template -e
35 |
36 | # clear redundant files
37 | make clean all
38 |
39 | rm en/source/README.md
40 | rm en/source/docs/installation.md
41 | rm en/source/docs/contribution.md
42 | rm -rf en/source/docs/images
43 | rm -rf en/source/examples
44 |
45 | rm zh/source/README.md
46 | rm zh/source/docs/installation.md
47 | rm zh/source/docs/contribution.md
48 | rm -rf zh/source/docs/images
49 | rm -rf zh/source/examples
50 |
51 | rm ja/source/README.md
52 | rm ja/source/docs/installation.md
53 | rm ja/source/docs/contribution.md
54 | rm -rf ja/source/docs/images
55 | rm -rf ja/source/examples
56 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/en/source/_static/custom.css:
--------------------------------------------------------------------------------
1 | .language-selector a {
2 | color: white;
3 | width: 20px;
4 | }
--------------------------------------------------------------------------------
/docs/sphinx_doc/en/source/_templates/language_selector.html:
--------------------------------------------------------------------------------
1 |
2 |
6 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/en/source/_templates/layout.html:
--------------------------------------------------------------------------------
1 |
2 | {% extends "!layout.html" %} {% block sidebartitle %} {{ super() }} {% include
3 | "language_selector.html" %} {% endblock %}
4 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/en/source/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Configuration file for the Sphinx documentation builder.
3 | #
4 | # This file only contains a selection of the most common options. For a full
5 | # list see the documentation:
6 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
7 |
8 | # -- Path setup --------------------------------------------------------------
9 |
10 | # If extensions (or modules to document with autodoc) are in another directory,
11 | # add these directories to sys.path here. If the directory is relative to the
12 | # documentation root, use os.path.abspath to make it absolute, like shown here.
13 | #
14 | import os
15 | import sys
16 |
17 | sys.path.insert(0, os.path.abspath("../../../../../MemoryScope"))
18 |
19 |
20 | # -- Project information -----------------------------------------------------
21 |
22 | language = "en"
23 |
24 | project = "MemoryScope"
25 | copyright = "2024, Alibaba Tongyi Lab"
26 | author = "EcoML team of Alibaba Tongyi Lab"
27 |
28 |
29 | # -- General configuration ---------------------------------------------------
30 |
31 | # Add any Sphinx extension module names here, as strings. They can be
32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
33 | # ones.
34 | extensions = [
35 | "sphinx.ext.autodoc",
36 | "sphinx.ext.autosummary",
37 | "sphinx.ext.viewcode",
38 | "sphinx.ext.napoleon",
39 | "sphinxcontrib.mermaid",
40 | "myst_parser",
41 | "sphinx.ext.autosectionlabel",
42 | "sphinxcontrib.autodoc_pydantic",
43 | "nbsphinx"
44 | ]
45 |
46 | autodoc_pydantic_model_show_json = True
47 | autodoc_pydantic_settings_show_json = True
48 |
49 | # Prefix document path to section labels, otherwise autogenerated labels would
50 | # look like 'heading' rather than 'path/to/file:heading'
51 | autosectionlabel_prefix_document = True
52 | autosummary_generate = True
53 | autosummary_ignore_module_all = False
54 |
55 | autodoc_member_order = "bysource"
56 |
57 | # If true, '()' will be appended to :func: etc. cross-reference text.
58 | add_function_parentheses = False
59 |
60 | # If true, the current module name will be prepended to all description
61 | # unit titles (such as .. function::).
62 | add_module_names = True
63 |
64 | autodoc_default_flags = ["members"]
65 |
66 | autodoc_default_options = {
67 | "members": True,
68 | "member-order": "bysource",
69 | "special-members": "__init__",
70 | }
71 | # Add any paths that contain templates here, relative to this directory.
72 | templates_path = ["_templates"]
73 |
74 | # List of patterns, relative to source directory, that match files and
75 | # directories to ignore when looking for source files.
76 | # This pattern also affects html_static_path and html_extra_path.
77 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
78 |
79 | # -- Options for HTML output -------------------------------------------------
80 |
81 | # The theme to use for HTML and HTML Help pages. See the documentation for
82 | # a list of builtin themes.
83 | #
84 | html_theme = "sphinx_rtd_theme"
85 |
86 | # html_logo = "_static/logo.png"
87 |
88 | # Add any paths that contain custom static files (such as style sheets) here,
89 | # relative to this directory. They are copied after the builtin static files,
90 | # so a file named "default.css" will overwrite the builtin "default.css".
91 | html_static_path = ["_static"]
92 |
93 | html_theme_options = {
94 | # "logo_only": True,
95 | "navigation_depth": 4,
96 | }
97 |
98 | source_suffix = {
99 | ".rst": "restructuredtext",
100 | ".md": "markdown",
101 | }
102 |
103 | html_css_files = [
104 | "custom.css",
105 | ]
106 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/en/source/docs/api.rst:
--------------------------------------------------------------------------------
1 | .. _api:
2 |
3 |
4 | MemoryScope API Documentation
5 |
6 |
7 | Enumeration
8 | ===========
9 |
10 | .. automodule:: memoryscope.enumeration
11 | :members:
12 |
13 | Scheme
14 | ======
15 | .. automodule:: memoryscope.scheme
16 | :members:
17 |
18 | Config
19 | ======
20 | .. automodule:: memoryscope.core.config
21 | :members:
22 |
23 |
24 | Models
25 | ======
26 | .. automodule:: memoryscope.core.models
27 | :members:
28 |
29 |
30 |
31 | Storage
32 | =======
33 | .. automodule:: memoryscope.core.storage
34 | :members:
35 |
36 |
37 | Worker
38 | ======
39 | Base
40 | ----
41 |
42 | .. automodule:: memoryscope.core.worker
43 | :members:
44 |
45 | Frontend
46 | --------
47 | .. automodule:: memoryscope.core.worker.frontend
48 | :members:
49 |
50 | Backend
51 | --------
52 | .. automodule:: memoryscope.core.worker.backend
53 | :members:
54 |
55 | Operation
56 | =========
57 | .. automodule:: memoryscope.core.operation
58 | :members:
59 |
60 | Service
61 | =======
62 | .. automodule:: memoryscope.core.service
63 | :members:
64 |
65 | Chat
66 | ====
67 | .. automodule:: memoryscope.core.chat
68 | :members:
69 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/en/source/index.rst:
--------------------------------------------------------------------------------
1 | .. MemoryScope documentation master file, created by
2 | sphinx-quickstart on Fri Jan 5 17:53:54 2024.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | :github_url: https://github.com/modelscope/memoryscope
7 |
8 | MemoryScope Documentation
9 | =========================
10 |
11 | Welcome to MemoryScope Tutorial
12 | -------------------------------
13 |
14 | .. image:: docs/images/logo.png
15 | :align: center
16 |
17 | MemoryScope provides LLM chatbots with powerful and flexible long-term memory capabilities, offering a framework for building such abilities.
18 | It can be applied to scenarios like personal assistants and emotional companions, continuously learning through long-term memory to remember users' basic information as well as various habits and preferences.
19 | This allows users to gradually experience a sense of "understanding" when using the LLM.
20 |
21 | .. image:: docs/images/framework.png
22 | :align: center
23 |
24 | Framework
25 | ^^^^^^^^^^^^^^^^^^^^
26 |
27 | 💾 Memory Database: MemoryScope is equipped with a vector database (default is *ElasticSearch*) to store all memory fragments recorded in the system.
28 |
29 | 🔧 Worker Library: MemoryScope atomizes the capabilities of long-term memory into individual workers, including over 20 workers for tasks such as query information filtering, observation extraction, and insight updating.
30 |
31 | 🛠️ Operation Library: Based on the worker pipeline, it constructs the operations for memory services, realizing key capabilities such as memory retrieval and memory consolidation.
32 |
33 | - Memory Retrieval: Upon arrival of a user query, this operation returns the semantically related memory pieces
34 | and/or those from the corresponding time if the query involves reference to time.
35 | - Memory Consolidation: This operation takes in a batch of user queries and returns important user information
36 | extracted from the queries as consolidated *observations* to be stored in the memory database.
37 | - Reflection and Re-consolidation: At regular intervals, this operation performs reflection upon newly recorded *observations*
38 | to form and update *insights*. Then, memory re-consolidation is performed to ensure contradictions and repetitions
39 | among memory pieces are properly handled.
40 |
41 | .. toctree::
42 | :maxdepth: 2
43 | :caption: MemoryScope Tutorial
44 |
45 | About MemoryScope
46 | Installation
47 | Cli Client
48 | Simple Usages
49 | Advanced usage
50 | Contribution
51 |
52 |
53 | .. toctree::
54 | :maxdepth: 6
55 | :caption: MemoryScope API Reference
56 |
57 | API
58 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/en/source/modules.rst:
--------------------------------------------------------------------------------
1 | memoryscope
2 | ===========
3 |
4 | .. toctree::
5 | :maxdepth: 4
6 |
7 | memoryscope
8 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/ja/source/index.rst:
--------------------------------------------------------------------------------
1 | .. MemoryScope documentation master file, created by
2 | sphinx-quickstart on Fri Jan 5 17:53:54 2024.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | :github_url: https://github.com/modelscope/memoryscope
7 |
8 | MemoryScope ドキュメント
9 | =========================
10 |
11 | MemoryScopeに関するドキュメントへようこそ
12 | -------------------------------
13 |
14 | .. image:: ./docs/images/logo.png
15 | :align: center
16 |
17 | MemoryScopeは、LLMチャットボットに強力で柔軟な長期記憶能力を提供し、長期記憶能力を構築するためのフレームワークを提供します。
18 | MemoryScopeは、個人アシスタントや感情的な伴侶などの記憶シナリオに使用でき、長期記憶能力を通じてユーザーの基本情報やさまざまな習慣や好みを覚え続けることができます。
19 | これにより、ユーザーはLLMを使用する際に徐々に「理解されている」感覚を体験することができます。
20 |
21 | .. image:: docs/images/framework.png
22 | :align: center
23 |
24 | フレームワーク
25 | ^^^^^^^^^^^^^^^^^^^^
26 |
27 | 💾 メモリデータベース: MemoryScopeは、システム内に記録されたすべての記憶片を保存するためのベクトルデータベース(デフォルトは*ElasticSearch*)を備えています。
28 |
29 | 🔧 ワーカーライブラリ: MemoryScopeは、長期記憶の能力を個々のワーカーに原子化し、クエリ情報のフィルタリング、観察の抽出、洞察の更新など、20以上のワーカーを含みます。
30 |
31 | 🛠️ オペレーションライブラリ: ワーカーパイプラインに基づいて、メモリサービスのオペレーションを構築し、メモリの取得やメモリの統合などの主要な機能を実現します。
32 |
33 | - メモリの取得: ユーザークエリが到着すると、この操作は意味的に関連する記憶片を返します。
34 | クエリが時間に言及している場合は、対応する時間の記憶片も返します。
35 | - メモリの統合: この操作は、一連のユーザークエリを受け取り、クエリから抽出された重要なユーザー情報を統合された*観察*としてメモリデータベースに保存します。
36 | - 反映と再統合: 定期的に、この操作は新たに記録された*観察*を反映し、*洞察*を形成および更新します。
37 | その後、メモリの再統合を実行して、記憶片間の矛盾や重複が適切に処理されるようにします。
38 |
39 | .. toctree::
40 | :maxdepth: 2
41 | :caption: MemoryScope チュートリアル
42 |
43 | MemoryScopeについて
44 | インストール
45 | CLIクライアント
46 | 簡単な使用法
47 | 高度な使用法
48 | 貢献
49 |
50 |
51 | .. toctree::
52 | :maxdepth: 6
53 | :caption: MemoryScope APIリファレンス
54 |
55 | API
56 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/requirements.txt:
--------------------------------------------------------------------------------
1 | loguru
2 | tiktoken
3 | pillow
4 | requests
5 | openai
6 | numpy
7 | sphinx
8 | sphinx-autobuild
9 | sphinx_rtd_theme
10 | sphinxcontrib-mermaid
11 | myst-parser
12 | autodoc_pydantic
13 | nbsphinx
14 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/template/module.rst_t:
--------------------------------------------------------------------------------
1 | {{ basename | heading }}
2 | .. automodule:: {{ qualname }}
3 | {%- for option in automodule_options %}
4 | :{{ option }}:
5 | {%- endfor %}
--------------------------------------------------------------------------------
/docs/sphinx_doc/template/package.rst_t:
--------------------------------------------------------------------------------
1 | {%- macro automodule(modname, options) -%}
2 | .. automodule:: {{ modname }}
3 | {%- for option in options %}
4 | :{{ option }}:
5 | {%- endfor %}
6 | {%- endmacro %}
7 |
8 | {{- pkgname | heading }}
9 |
10 | {{ automodule(pkgname, automodule_options) }}
11 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/zh/source/_static/custom.css:
--------------------------------------------------------------------------------
1 | .language-selector a {
2 | color: white;
3 | width: 20px;
4 | }
--------------------------------------------------------------------------------
/docs/sphinx_doc/zh/source/_templates/language_selector.html:
--------------------------------------------------------------------------------
1 |
2 |
6 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/zh/source/_templates/layout.html:
--------------------------------------------------------------------------------
1 |
2 | {% extends "!layout.html" %} {% block sidebartitle %} {{ super() }} {% include
3 | "language_selector.html" %} {% endblock %}
4 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/zh/source/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Configuration file for the Sphinx documentation builder.
3 | #
4 | # This file only contains a selection of the most common options. For a full
5 | # list see the documentation:
6 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
7 |
8 | # -- Path setup --------------------------------------------------------------
9 |
10 | # If extensions (or modules to document with autodoc) are in another directory,
11 | # add these directories to sys.path here. If the directory is relative to the
12 | # documentation root, use os.path.abspath to make it absolute, like shown here.
13 | #
14 | import os
15 | import sys
16 |
17 | sys.path.insert(0, os.path.abspath("../../../../../MemoryScope"))
18 |
19 |
20 | # -- Project information -----------------------------------------------------
21 |
22 | language = "zh"
23 |
24 | project = "MemoryScope"
25 | copyright = "2024, Alibaba Tongyi Lab"
26 | author = "EcoML team of Alibaba Tongyi Lab"
27 |
28 |
29 | # -- General configuration ---------------------------------------------------
30 |
31 | # Add any Sphinx extension module names here, as strings. They can be
32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
33 | # ones.
34 | extensions = [
35 | "sphinx.ext.autodoc",
36 | "sphinx.ext.autosummary",
37 | "sphinx.ext.viewcode",
38 | "sphinx.ext.napoleon",
39 | "sphinxcontrib.mermaid",
40 | "myst_parser",
41 | "sphinx.ext.autosectionlabel",
42 | "sphinxcontrib.autodoc_pydantic",
43 | "nbsphinx"
44 | ]
45 |
46 | autodoc_pydantic_model_show_json = True
47 | autodoc_pydantic_settings_show_json = True
48 |
49 | # Prefix document path to section labels, otherwise autogenerated labels would
50 | # look like 'heading' rather than 'path/to/file:heading'
51 | autosectionlabel_prefix_document = True
52 | autosummary_generate = True
53 | autosummary_ignore_module_all = False
54 |
55 | autodoc_member_order = "bysource"
56 |
57 | # If true, '()' will be appended to :func: etc. cross-reference text.
58 | add_function_parentheses = False
59 |
60 | # If true, the current module name will be prepended to all description
61 | # unit titles (such as .. function::).
62 | add_module_names = True
63 |
64 | autodoc_default_flags = ["members"]
65 |
66 | autodoc_default_options = {
67 | "members": True,
68 | "member-order": "bysource",
69 | "special-members": "__init__",
70 | }
71 | # Add any paths that contain templates here, relative to this directory.
72 | templates_path = ["_templates"]
73 |
74 | # List of patterns, relative to source directory, that match files and
75 | # directories to ignore when looking for source files.
76 | # This pattern also affects html_static_path and html_extra_path.
77 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
78 |
79 | # -- Options for HTML output -------------------------------------------------
80 |
81 | # The theme to use for HTML and HTML Help pages. See the documentation for
82 | # a list of builtin themes.
83 | #
84 | html_theme = "sphinx_rtd_theme"
85 |
86 | # html_logo = "_static/logo.png"
87 |
88 | # Add any paths that contain custom static files (such as style sheets) here,
89 | # relative to this directory. They are copied after the builtin static files,
90 | # so a file named "default.css" will overwrite the builtin "default.css".
91 | html_static_path = ["_static"]
92 |
93 | html_theme_options = {
94 | # "logo_only": True,
95 | "navigation_depth": 4,
96 | }
97 |
98 | source_suffix = {
99 | ".rst": "restructuredtext",
100 | ".md": "markdown",
101 | }
102 |
103 | html_css_files = [
104 | "custom.css",
105 | ]
106 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/zh/source/docs/api.rst:
--------------------------------------------------------------------------------
1 | .. _api:
2 |
3 |
4 | MemoryScope API 接口文档
5 |
6 |
7 | Enumeration
8 | ===========
9 |
10 | .. automodule:: memoryscope.enumeration
11 | :members:
12 |
13 | Scheme
14 | ======
15 | .. automodule:: memoryscope.scheme
16 | :members:
17 |
18 | Config
19 | ======
20 | .. automodule:: memoryscope.core.config
21 | :members:
22 |
23 |
24 | Models
25 | ======
26 | .. automodule:: memoryscope.core.models
27 | :members:
28 |
29 |
30 |
31 | Storage
32 | =======
33 | .. automodule:: memoryscope.core.storage
34 | :members:
35 |
36 |
37 | Worker
38 | ======
39 | Base
40 | ----
41 |
42 | .. automodule:: memoryscope.core.worker
43 | :members:
44 |
45 | Frontend
46 | --------
47 | .. automodule:: memoryscope.core.worker.frontend
48 | :members:
49 |
50 | Backend
51 | --------
52 | .. automodule:: memoryscope.core.worker.backend
53 | :members:
54 |
55 | Operation
56 | =========
57 | .. automodule:: memoryscope.core.operation
58 | :members:
59 |
60 | Service
61 | =======
62 | .. automodule:: memoryscope.core.service
63 | :members:
64 |
65 | Chat
66 | ====
67 | .. automodule:: memoryscope.core.chat
68 | :members:
69 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/zh/source/index.rst:
--------------------------------------------------------------------------------
1 | .. MemoryScope documentation master file, created by
2 | sphinx-quickstart on Fri Jan 5 17:53:54 2024.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | :github_url: https://github.com/modelscope/memoryscope
7 |
8 | MemoryScope 文档
9 | =========================
10 |
11 | 欢迎浏览MemoryScope相关文档
12 | -------------------------------
13 |
14 | .. image:: ./docs/images/logo.png
15 | :align: center
16 |
17 | MemoryScope可以为LLM聊天机器人提供强大且灵活的长期记忆能力,并提供了构建长期记忆能力的框架。
18 | MemoryScope可以用于个人助理、情感陪伴等记忆场景,通过长期记忆能力来不断学习,记得用户的基础信息以及各种习惯和喜好,使得用户在使用LLM时逐渐感受到一种“默契”。
19 |
20 | .. image:: docs/images/framework.png
21 | :align: center
22 |
23 | 核心框架
24 | ^^^^^^^^^^^^^^^^^^^^
25 |
26 | 💾 记忆数据库: MemoryScope配备了向量数据库(默认是*ElasticSearch*),用于存储系统中记录的所有记忆片段。
27 |
28 | 🔧 核心worker库: MemoryScope将长期记忆的能力原子化,抽象成单独的worker,包括query信息过滤,observation抽取,insight更新等20+worker。
29 |
30 | 🛠️ 核心Op库: 并基于worker的pipeline构建了memory服务的核心operation,实现了记忆检索,记忆巩固等核心能力。
31 |
32 | - 记忆检索:当用户输入对话,此操作返回语义相关的记忆片段。如果输入对话包含对时间的指涉,则同时返回相应时间中的记忆片段。
33 | - 记忆巩固:此操作接收一批用户的输入对话,并从对话中提取重要的用户信息,将其作为 *observation* 形式的记忆片段存储在记忆数据库中。
34 | - 反思与再巩固:每隔一段时间,此操作对新记录的 *observations* 进行反思,以形成和更新 *insight*
35 | 形式的记忆片段。然后执行记忆再巩固,以确保记忆片段之间的矛盾和重复得到妥善处理。
36 |
37 | .. toctree::
38 | :maxdepth: 2
39 | :caption: MemoryScope 教程
40 |
41 | 关于 MemoryScope
42 | 安装
43 | 命令行终端
44 | 简单案例
45 | 高级用法
46 | 贡献
47 |
48 | .. toctree::
49 | :maxdepth: 6
50 | :caption: MemoryScope 接口
51 |
52 | API
53 |
54 |
--------------------------------------------------------------------------------
/docs/sphinx_doc/zh/source/modules.rst:
--------------------------------------------------------------------------------
1 | memoryscope
2 | ===========
3 |
4 | .. toctree::
5 | :maxdepth: 4
6 |
7 | memoryscope
8 |
--------------------------------------------------------------------------------
/examples/advance/custom_operator.md:
--------------------------------------------------------------------------------
1 | # Custom Operator and Worker
2 |
3 | 1. Create a new worker named `example_query_worker.py` in the `contrib` directory:
4 | ```bash
5 | vim memoryscope/contrib/example_query_worker.py
6 | ```
7 |
8 | 2. Write the program for the new custom worker. Note that the class name must match the filename, which is `ExampleQueryWorker`:
9 | ```python
10 | import datetime
11 | from memoryscope.constants.common_constants import QUERY_WITH_TS
12 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker
13 |
14 | class ExampleQueryWorker(MemoryBaseWorker):
15 | def _run(self):
16 | timestamp = int(datetime.datetime.now().timestamp()) # Current timestamp as default
17 | assert "query" in self.chat_kwargs
18 | query = self.chat_kwargs["query"]
19 | if not query:
20 | query = ""
21 | else:
22 | query = query.strip() + "\n You must add a `meow~` at the end of each of your answers."
23 | # Store the determined query and its timestamp in the context
24 | self.set_workflow_context(QUERY_WITH_TS, (query, timestamp))
25 | ```
26 |
27 | 3. Create a YAML startup file (copying `demo_config.yaml`):
28 | ```
29 | cp memoryscope/core/config/demo_config.yaml examples/advance/replacement.yaml
30 | vim examples/advance/replacement.yaml
31 | ```
32 |
33 | 4. At the bottom, insert the definition for the new worker and replace the previous default `set_query` worker, and update the operation's workflow:
34 | ```
35 | rewrite_query:
36 | class: contrib.example_query_worker
37 | generation_model: generation_model
38 | ```
39 | ```
40 | retrieve_memory:
41 | class: core.operation.frontend_operation
42 | workflow: rewrite_query,[extract_time|retrieve_obs_ins,semantic_rank],fuse_rerank
43 | description: "retrieve long-term memory"
44 | ```
45 |
46 | 5. Verify:
47 | ```
48 | python quick-start-demo.py --config examples/advance/replacement.yaml
49 | ```
--------------------------------------------------------------------------------
/examples/advance/custom_operator_zh.md:
--------------------------------------------------------------------------------
1 | # 自定义 Operator 和 Worker
2 |
3 | 1. 在 `contrib` 路径下创建新worker,命名为 `example_query_worker.py`:
4 | ```bash
5 | vim memoryscope/contrib/example_query_worker.py
6 | ```
7 |
8 | 2. 写入新的自定义worker的程序,注意`class`的命名需要与文件名保持一致,为`ExampleQueryWorker`:
9 | ```python
10 | import datetime
11 | from memoryscope.constants.common_constants import QUERY_WITH_TS
12 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker
13 |
14 | class ExampleQueryWorker(MemoryBaseWorker):
15 |
16 | def _run(self):
17 |
18 | timestamp = int(datetime.datetime.now().timestamp()) # Current timestamp as default
19 |
20 | assert "query" in self.chat_kwargs
21 | query = self.chat_kwargs["query"]
22 | if not query:
23 | query = ""
24 | else:
25 | query = query.strip() + "\n You must add a `meow~` at the end of each of your answer."
26 |
27 | # Store the determined query and its timestamp in the context
28 | self.set_workflow_context(QUERY_WITH_TS, (query, timestamp))
29 | ```
30 |
31 | 3. 创建yaml启动文件(复制demo_config_zh.yaml)
32 | ```
33 | cp memoryscope/core/config/demo_config_zh.yaml examples/advance/replacement.yaml
34 | vim examples/advance/replacement.yaml
35 | ```
36 |
37 | 4. 在最下面插入新worker的定义,并且取代之前的默认`set_query`worker,并替换operation的workflow
38 | ```
39 | rewrite_query:
40 | class: contrib.example_query_worker
41 | generation_model: generation_model
42 | ```
43 | ```
44 | retrieve_memory:
45 | class: core.operation.frontend_operation
46 | workflow: rewrite_query,[extract_time|retrieve_obs_ins,semantic_rank],fuse_rerank
47 | description: "retrieve long-term memory"
48 | ```
49 |
50 | 5. 验证:
51 | ```
52 | python quick-start-demo.py --config examples/advance/replacement.yaml
53 | ```
54 |
--------------------------------------------------------------------------------
/examples/api/agentscope_example.md:
--------------------------------------------------------------------------------
1 | # Working with AgentScope
2 |
3 | 1. First, make sure that you have installed AutoGen as well as memoryscope.
4 | ```
5 | pip install agentscope memoryscope
6 | ```
7 |
8 |
9 | 2. Then, ensure that es is up and running. [elasticsearch documents](https://www.elastic.co/guide/en/elasticsearch/reference/current/getting-started.html).
10 | The docker method is recommended:
11 | ```
12 | sudo docker run -p 9200:9200 \
13 | -e "discovery.type=single-node" \
14 | -e "xpack.security.enabled=false" \
15 | -e "xpack.license.self_generated.type=trial" \
16 | docker.elastic.co/elasticsearch/elasticsearch:8.13.2
17 | ```
18 |
19 | 3. Finally, we can start the autogen demo.
20 | ```
21 | python examples/api/agentscope_example.py
22 | ```
--------------------------------------------------------------------------------
/examples/api/agentscope_example.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Union, Sequence
2 |
3 | import agentscope
4 | from agentscope.agents import AgentBase, UserAgent
5 | from agentscope.message import Msg
6 |
7 | from memoryscope import MemoryScope, Arguments
8 |
9 |
10 | class MemoryScopeAgent(AgentBase):
11 | def __init__(self, name: str, arguments: Arguments, **kwargs) -> None:
12 | # Disable AgentScope memory and use MemoryScope memory instead
13 | super().__init__(name, use_memory=False, **kwargs)
14 |
15 | # Create a memory client in MemoryScope
16 | self.memory_scope = MemoryScope(arguments=arguments)
17 | self.memory_chat = self.memory_scope.default_memory_chat
18 |
19 | def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg:
20 | # Generate response
21 | response = self.memory_chat.chat_with_memory(query=x.content)
22 |
23 | # Wrap the response in a message object in AgentScope
24 | msg = Msg(name=self.name, content=response.message.content, role="assistant")
25 |
26 | # Print/speak the message in this agent's voice
27 | self.speak(msg)
28 |
29 | return msg
30 |
31 | def close(self):
32 | # Close the backend service of MemoryScope
33 | self.memory_scope.close()
34 |
35 |
36 | def main():
37 | # Setting of MemoryScope
38 | arguments = Arguments(
39 | language="cn",
40 | human_name="用户",
41 | assistant_name="AI",
42 | memory_chat_class="api_memory_chat",
43 | generation_backend="dashscope_generation",
44 | generation_model="qwen-max",
45 | embedding_backend="dashscope_embedding",
46 | embedding_model="text-embedding-v2",
47 | rank_backend="dashscope_rank",
48 | rank_model="gte-rerank")
49 |
50 | # Initialize AgentScope
51 | agentscope.init(project="MemoryScope")
52 |
53 | memoryscope_agent = MemoryScopeAgent(name="Assistant", arguments=arguments)
54 |
55 | user_agent = UserAgent()
56 |
57 | # Dialog
58 | msg = None
59 | while True:
60 | # User input
61 | msg = user_agent(msg)
62 | if msg.content == "exit":
63 | break
64 | # Agent speaks
65 | msg = memoryscope_agent(msg)
66 |
67 | # End memory
68 | memoryscope_agent.close()
69 |
70 |
71 | if __name__ == "__main__":
72 | main()
73 |
--------------------------------------------------------------------------------
/examples/api/autogen_example.md:
--------------------------------------------------------------------------------
1 | # Working with AutoGen
2 |
3 | 1. First, make sure that you have installed AutoGen as well as memoryscope.
4 | ```
5 | pip install pyautogen memoryscope
6 | ```
7 |
8 |
9 | 2. Then, ensure that es is up and running. [elasticsearch documents](https://www.elastic.co/guide/en/elasticsearch/reference/current/getting-started.html).
10 | The docker method is recommended:
11 | ```
12 | sudo docker run -p 9200:9200 \
13 | -e "discovery.type=single-node" \
14 | -e "xpack.security.enabled=false" \
15 | -e "xpack.license.self_generated.type=trial" \
16 | docker.elastic.co/elasticsearch/elasticsearch:8.13.2
17 | ```
18 |
19 | 3. Finally, we can start the autogen demo.
20 | ```
21 | python examples/api/autogen_example.py
22 | ```
--------------------------------------------------------------------------------
/examples/api/autogen_example.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Union, Literal, Dict, List, Any, Tuple
2 |
3 | from autogen import Agent, ConversableAgent, UserProxyAgent
4 |
5 | from memoryscope import MemoryScope, Arguments
6 |
7 |
8 | class MemoryScopeAgent(ConversableAgent):
9 | def __init__(
10 | self,
11 | name: str = "assistant",
12 | system_message: Optional[str] = "",
13 | human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER",
14 | llm_config: Optional[Union[Dict, bool]] = None,
15 | arguments: Arguments = None,
16 | **kwargs,
17 | ):
18 | super().__init__(
19 | name=name,
20 | system_message=system_message,
21 | human_input_mode=human_input_mode,
22 | llm_config=llm_config,
23 | **kwargs,
24 | )
25 |
26 | # Create a memory client in MemoryScope
27 | self.memory_scope = MemoryScope(arguments=arguments)
28 | self.memory_chat = self.memory_scope.default_memory_chat
29 |
30 | self.register_reply([Agent, None], MemoryScopeAgent.generate_reply_with_memory, remove_other_reply_funcs=True)
31 |
32 | def generate_reply_with_memory(
33 | self,
34 | messages: Optional[List[Dict]] = None,
35 | sender: Optional[Agent] = None,
36 | config: Optional[Any] = None,
37 | ) -> Tuple[bool, Union[str, Dict, None]]:
38 | # Generate response
39 |
40 | contents = []
41 | for message in messages:
42 | if message.get("role") != self.name:
43 | contents.append(message.get("content", ""))
44 |
45 | query = contents[-1]
46 | response = self.memory_chat.chat_with_memory(query=query)
47 | return True, response.message.content
48 |
49 | def close(self):
50 | self.memory_scope.close()
51 |
52 |
53 | def main():
54 | # Create the agent of MemoryScope
55 | arguments = Arguments(
56 | language="cn",
57 | human_name="用户",
58 | assistant_name="AI",
59 | memory_chat_class="api_memory_chat",
60 | generation_backend="dashscope_generation",
61 | generation_model="qwen-max",
62 | embedding_backend="dashscope_embedding",
63 | embedding_model="text-embedding-v2",
64 | rank_backend="dashscope_rank",
65 | rank_model="gte-rerank"
66 | )
67 |
68 | assistant = MemoryScopeAgent("assistant", arguments=arguments)
69 |
70 | # Create the agent that represents the user in the conversation.
71 | user_proxy = UserProxyAgent("user", code_execution_config=False)
72 |
73 | # Let the assistant start the conversation. It will end when the user types exit.
74 | assistant.initiate_chat(user_proxy, message="有什么需要帮忙的吗?")
75 | assistant.close()
76 |
77 |
78 | if __name__ == "__main__":
79 | main()
80 |
--------------------------------------------------------------------------------
/examples/api/chat_example.py:
--------------------------------------------------------------------------------
1 | from memoryscope import MemoryScope, Arguments
2 |
3 | arguments = Arguments(
4 | language="cn",
5 | human_name="用户",
6 | assistant_name="AI",
7 | memory_chat_class="api_memory_chat",
8 | generation_backend="dashscope_generation",
9 | generation_model="qwen-max",
10 | embedding_backend="dashscope_embedding",
11 | embedding_model="text-embedding-v2",
12 | rank_backend="dashscope_rank",
13 | rank_model="gte-rerank",
14 | enable_ranker=True)
15 |
16 |
17 | def chat_example1():
18 | with MemoryScope(config_path="memoryscope/core/config/demo_config_zh.yaml") as ms:
19 | memory_chat = ms.default_memory_chat
20 | response = memory_chat.chat_with_memory(query="我的爱好是弹琴。")
21 | print("回答:\n" + response.message.content)
22 | print("记忆:\n" + response.meta_data["memories"])
23 |
24 |
25 | def chat_example2():
26 | with MemoryScope(arguments=arguments) as ms:
27 | memory_chat = ms.default_memory_chat
28 | response = memory_chat.chat_with_memory(query="我的爱好是弹琴。")
29 | print("回答:\n" + response.message.content)
30 | print("记忆:\n" + response.meta_data["memories"])
31 |
32 |
33 | def chat_example3():
34 | with MemoryScope(arguments=arguments) as ms:
35 | memory_chat = ms.default_memory_chat
36 | response = memory_chat.chat_with_memory(
37 | query="你知道我消灭了几个星系吗?",
38 | temporary_memories="张三是一只哥斯拉,这是真是的,不是虚构的。\n前天张三消灭了星系0x4be?\n大前天张三消灭了星系0x5be?")
39 | print("回答:\n" + response.message.content)
40 |
41 |
42 | def chat_example4():
43 | with MemoryScope(arguments=arguments) as ms:
44 | memory_chat = ms.default_memory_chat
45 | memory_chat.run_service_operation("delete_all")
46 |
47 | response = memory_chat.chat_with_memory(query="我的爱好是弹琴。")
48 | print("回答1:\n" + response.message.content)
49 | result = memory_chat.run_service_operation("consolidate_memory")
50 | print(result)
51 |
52 | response = memory_chat.chat_with_memory(query="你知道我的乐器爱好是什么?", history_message_strategy=None)
53 | print("回答2:\n" + response.message.content)
54 | print("记忆2:\n" + response.meta_data["memories"])
55 |
56 |
57 | def chat_example5():
58 | with MemoryScope(arguments=arguments) as ms:
59 | memory_service = ms.default_memory_service
60 | memory_service.init_service()
61 |
62 | result = memory_service.list_memory()
63 | print(f"list_memory result={result}")
64 |
65 | result = memory_service.retrieve_memory()
66 | print(f"retrieve_memory result={result}")
67 |
68 | result = memory_service.consolidate_memory()
69 | print(f"consolidate_memory result={result}")
70 |
71 |
72 | def chat_example6():
73 | with MemoryScope(arguments=arguments) as ms:
74 | memory_chat = ms.default_memory_chat
75 | memory_chat.run_service_operation("delete_all", "张三")
76 | memory_chat.run_service_operation("delete_all", "李四")
77 |
78 | print("李四=========================")
79 | response = memory_chat.chat_with_memory(query="我的爱好是弹琴。", role_name="李四")
80 | print("回答1:\n" + response.message.content)
81 | result = memory_chat.run_service_operation("consolidate_memory", role_name="李四")
82 | print(result)
83 | response = memory_chat.chat_with_memory(query="你知道我的乐器爱好是什么?", role_name="李四",
84 | history_message_strategy=None)
85 | print("回答2:\n" + response.message.content)
86 | print("记忆2:\n" + response.meta_data["memories"])
87 |
88 | print("张三=========================")
89 | response = memory_chat.chat_with_memory(query="我的爱好是打羽毛球。", role_name="张三")
90 | print("回答1:\n" + response.message.content)
91 | result = memory_chat.run_service_operation("consolidate_memory", role_name="张三")
92 | print(result)
93 | response = memory_chat.chat_with_memory(query="你知道我的运动爱好是什么?", role_name="张三",
94 | history_message_strategy=None)
95 | print("回答2:\n" + response.message.content)
96 | print("记忆2:\n" + response.meta_data["memories"])
97 |
98 |
99 | if __name__ == "__main__":
100 | # chat_example1()
101 | # chat_example2()
102 | # chat_example3()
103 | chat_example4()
104 | # chat_example5()
105 | # chat_example6()
106 |
--------------------------------------------------------------------------------
/examples/cli/CLI_README.md:
--------------------------------------------------------------------------------
1 | # The Cli Interface of MemoryScope
2 |
3 | ## Usage
4 | Before running, follow the [**Installation**](../../docs/installation.md#iii-install-from-pypi) guidelines in Readme, and start the Docker image first.
5 | MemoryScope can be launched in two different ways:
6 |
7 | ### 1. Using YAML Configuration File
8 |
9 | If you prefer to configure your settings via a YAML file, you can do so by providing the path to the configuration file as follows:
10 | ```bash
11 | memoryscope --config_path=memoryscope/core/config/demo_config.yaml
12 | ```
13 |
14 | ### 2. Using Command Line Arguments
15 |
16 | Alternatively, you can specify all the parameters directly on the command line:
17 |
18 | ```bash
19 | # Chinese / Dashscope
20 | memoryscope --language="cn" \
21 | --memory_chat_class="cli_memory_chat" \
22 | --human_name="用户" \
23 | --assistant_name="AI" \
24 | --generation_backend="dashscope_generation" \
25 | --generation_model="qwen-max" \
26 | --embedding_backend="dashscope_embedding" \
27 | --embedding_model="text-embedding-v2" \
28 | --enable_ranker=True \
29 | --rank_backend="dashscope_rank" \
30 | --rank_model="gte-rerank"
31 | # English / OpenAI
32 | memoryscope --language="en" \
33 | --memory_chat_class="cli_memory_chat" \
34 | --human_name="user" \
35 | --assistant_name="AI" \
36 | --generation_backend="openai_generation" \
37 | --generation_model="gpt-4o" \
38 | --embedding_backend="openai_embedding" \
39 | --embedding_model="text-embedding-3-small" \
40 | --enable_ranker=False
41 | ```
42 |
43 | Here are the available options that can be set through either method:
44 |
45 | - `--language`: The language used for the conversation.
46 | - `--memory_chat_class`: The class name for managing the chat history.
47 | - `--human_name`: The name of the human user.
48 | - `--assistant_name`: The name of the AI assistant.
49 | - `--generation_backend`: The backend used for generating responses.
50 | - `--generation_model`: The model used for generating responses.
51 | - `--embedding_backend`: The backend used for text embeddings.
52 | - `--embedding_model`: The model used for creating text embeddings.
53 | - `--enable_ranker`: A boolean indicating whether to use a dummy ranker (default is `False`).
54 | - `--rank_backend`: The backend used for ranking responses.
55 | - `--rank_model`: The model used for ranking responses.
56 |
57 | ### 3. View Memory
58 | You can open two command line windows following the method in the second step.
59 | In one command line window, you can have a conversation with the AI, while in the other, you can check the AI's long-term memory about the user.
60 | Use /help to open the command line help, and find the command /list_memory along with the corresponding auto-refresh instruction.
61 | ```
62 | /list_memory refresh_time=5
63 | ```
64 | Then you can enjoy a pleasant conversation with the AI!
--------------------------------------------------------------------------------
/examples/cli/CLI_README_ZH.md:
--------------------------------------------------------------------------------
1 | # MemoryScope 的命令行接口
2 |
3 | ## 使用方法
4 | 在运行之前,请先按照 Readme 中的 [**Installation**](../../docs/installation_zh.md#三通过-pypi-安装) 指南进行安装,并启动 Docker 镜像。
5 | MemoryScope 可以通过两种不同的方式启动:
6 |
7 | ### 1. 使用 YAML 配置文件
8 |
9 | 如果您更喜欢通过 YAML 文件配置设置,可以通过提供配置文件的路径来实现:
10 | ```bash
11 | memoryscope --config_path=memoryscope/core/config/demo_config_zh.yaml
12 | ```
13 |
14 | ### 2. 使用命令行参数
15 |
16 | 或者,您可以直接在命令行上指定所有参数:
17 |
18 | ```
19 | # 中文
20 | memoryscope --language="cn" \
21 | --memory_chat_class="cli_memory_chat" \
22 | --human_name="用户" \
23 | --assistant_name="AI" \
24 | --generation_backend="dashscope_generation" \
25 | --generation_model="qwen-max" \
26 | --embedding_backend="dashscope_embedding" \
27 | --embedding_model="text-embedding-v2" \
28 | --enable_ranker=True \
29 | --rank_backend="dashscope_rank" \
30 | --rank_model="gte-rerank"
31 | # 英文
32 | memoryscope --language="en" \
33 | --memory_chat_class="cli_memory_chat" \
34 | --human_name="User" \
35 | --assistant_name="AI" \
36 | --generation_backend="openai_generation" \
37 | --generation_model="gpt-4o" \
38 | --embedding_backend="openai_embedding" \
39 | --embedding_model="text-embedding-3-small" \
40 | --enable_ranker=False
41 | ```
42 |
43 | 以下是可以通过任一方法设置的可用选项:
44 |
45 | - `--language`: 对话中使用的语言。
46 | - `--memory_chat_class`: 管理聊天记录的类名。
47 | - `--human_name`: 人类用户的名字。
48 | - `--assistant_name`: AI 助手的名字。
49 | - `--generation_backend`: 用于生成回复的后端。
50 | - `--generation_model`: 用于生成回复的模型。
51 | - `--embedding_backend`: 用于文本嵌入的后端。
52 | - `--embedding_model`: 用于创建文本嵌入的模型。
53 | - `--enable_ranker`: 一个布尔值,指示是否使用排名器(默认为 False)。
54 | - `--rank_backend`: 用于排名回复的后端。
55 | - `--rank_model`: 用于排名回复的模型。
56 |
57 | ### 3. 查看记忆
58 | 按照第二步的方式可以打开两个命令行的窗口。
59 | 其中一个命令行窗口可以和AI进行对话,另一个命令行窗口可以查看AI关于用户的长期记忆
60 | 使用/help打开命令行帮助,找到/list_memory的命令和对应自动刷新的指令。
61 | ```
62 | /list_memory refresh_time=5
63 | ```
64 | 接下来就可以和AI进行愉快地交流啦。
--------------------------------------------------------------------------------
/examples/docker/entrypoint.sh:
--------------------------------------------------------------------------------
1 | sh examples/docker/run_elastic_search.sh
2 | python quick-start-demo.py --config_path=memoryscope/core/config/demo_config_zh.yaml
--------------------------------------------------------------------------------
/examples/docker/run_elastic_search.sh:
--------------------------------------------------------------------------------
1 | su - elastic_search_user -c "/home/elastic_search_user/elastic_search/elasticsearch-8.15.0/bin/elasticsearch -E xpack.security.enabled=false -E discovery.type=single-node -E xpack.license.self_generated.type=trial -d"
2 |
--------------------------------------------------------------------------------
/memoryscope/__init__.py:
--------------------------------------------------------------------------------
1 | """ Version of MemoryScope."""
2 | __version__ = "0.1.1.0"
3 | import fire
4 |
5 | from memoryscope.core.config.arguments import Arguments # noqa: F401
6 | from memoryscope.core.memoryscope import MemoryScope # noqa: F401
7 |
8 |
9 | def cli():
10 | fire.Fire(MemoryScope.cli_memory_chat)
11 |
--------------------------------------------------------------------------------
/memoryscope/constants/__init__.py:
--------------------------------------------------------------------------------
1 | from . import common_constants
2 | from . import language_constants
3 |
4 |
5 | __all__ = [
6 | "common_constants",
7 | "language_constants"
8 | ]
9 |
--------------------------------------------------------------------------------
/memoryscope/constants/common_constants.py:
--------------------------------------------------------------------------------
1 | # common_constants.py
2 | # This module defines constants used as keys throughout the application to maintain a consistent reference
3 | # for data structures related to workflow management, chat interactions, context storage, memory operations,
4 | # node processing, and temporal inference functionalities.
5 |
6 | WORKFLOW_NAME = "workflow_name"
7 |
8 | MEMORYSCOPE_CONTEXT = "memoryscope_context"
9 |
10 | RESULT = "result"
11 |
12 | MEMORIES = "memories"
13 |
14 | CHAT_MESSAGES = "chat_messages"
15 |
16 | CHAT_MESSAGES_SCATTER = "chat_messages_scatter"
17 |
18 | CHAT_KWARGS = "chat_kwargs"
19 |
20 | USER_NAME = "user_name"
21 |
22 | TARGET_NAME = "target_name"
23 |
24 | MEMORY_MANAGER = "memory_manager"
25 |
26 | QUERY_WITH_TS = "query_with_ts"
27 |
28 | RETRIEVE_MEMORY_NODES = "retrieve_memory_nodes"
29 |
30 | RANKED_MEMORY_NODES = "ranked_memory_nodes"
31 |
32 | NOT_REFLECTED_NODES = "not_reflected_nodes"
33 |
34 | NOT_UPDATED_NODES = "not_updated_nodes"
35 |
36 | EXTRACT_TIME_DICT = "extract_time_dict"
37 |
38 | NEW_OBS_NODES = "new_obs_nodes"
39 |
40 | NEW_OBS_WITH_TIME_NODES = "new_obs_with_time_nodes"
41 |
42 | INSIGHT_NODES = "insight_nodes"
43 |
44 | TODAY_NODES = "today_nodes"
45 |
46 | MERGE_OBS_NODES = "merge_obs_nodes"
47 |
48 | TIME_INFER = "time_infer"
49 |
--------------------------------------------------------------------------------
/memoryscope/contrib/example_query_worker.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | from memoryscope.constants.common_constants import QUERY_WITH_TS
4 | from memoryscope.constants.language_constants import NONE_WORD
5 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker
6 | from memoryscope.enumeration.message_role_enum import MessageRoleEnum
7 |
8 |
9 | class ExampleQueryWorker(MemoryBaseWorker):
10 | # NOTE: If you want to utilize the capabilities of the prompt handler, please be sure to include this sentence.
11 | FILE_PATH: str = __file__
12 |
13 | def _parse_params(self, **kwargs):
14 | self.rewrite_history_count: int = kwargs.get("rewrite_history_count", 2)
15 | self.generation_model_kwargs: dict = kwargs.get("generation_model_kwargs", {})
16 |
17 | def rewrite_query(self, query: str) -> str:
18 | chat_messages = self.chat_messages_scatter
19 | if len(chat_messages) <= 1:
20 | return query
21 |
22 | if chat_messages[-1].role == MessageRoleEnum.USER:
23 | chat_messages = chat_messages[:-1]
24 | chat_messages = chat_messages[-self.rewrite_history_count:]
25 |
26 | # get context
27 | context_list = []
28 | for message in chat_messages:
29 | context = message.content
30 | if len(context) > 200:
31 | context = context[:100] + context[-100:]
32 | if message.role == MessageRoleEnum.USER:
33 | context_list.append(f"{self.target_name}: {context}")
34 | elif message.role == MessageRoleEnum.ASSISTANT:
35 | context_list.append(f"Assistant: {context}")
36 |
37 | if not context_list:
38 | return query
39 |
40 | system_prompt = self.prompt_handler.rewrite_query_system
41 | user_query = self.prompt_handler.rewrite_query_query.format(query=query,
42 | context="\n".join(context_list))
43 | rewrite_query_message = self.prompt_to_msg(system_prompt=system_prompt,
44 | few_shot="",
45 | user_query=user_query)
46 | self.logger.info(f"rewrite_query_message={rewrite_query_message}")
47 |
48 | # Invoke the LLM to generate a response
49 | response = self.generation_model.call(messages=rewrite_query_message,
50 | **self.generation_model_kwargs)
51 |
52 | # Handle empty or unsuccessful responses
53 | if not response.status or not response.message.content:
54 | return query
55 |
56 | response_text = response.message.content
57 | self.logger.info(f"rewrite_query.response_text={response_text}")
58 |
59 | if not response_text or response_text.lower() == self.get_language_value(NONE_WORD):
60 | return query
61 |
62 | return response_text
63 |
64 | def _run(self):
65 | query = "" # Default query value
66 | timestamp = int(datetime.datetime.now().timestamp()) # Current timestamp as default
67 |
68 | if "query" in self.chat_kwargs:
69 | # set query if exists
70 | query = self.chat_kwargs["query"]
71 | if not query:
72 | query = ""
73 | query = query.strip()
74 |
75 | # set ts if exists
76 | _timestamp = self.chat_kwargs.get("timestamp")
77 | if _timestamp and isinstance(_timestamp, int):
78 | timestamp = _timestamp
79 |
80 | if self.rewrite_history_count > 0:
81 | t_query = self.rewrite_query(query=query)
82 | if t_query:
83 | query = t_query
84 |
85 | # Store the determined query and its timestamp in the context
86 | self.set_workflow_context(QUERY_WITH_TS, (query, timestamp))
87 |
--------------------------------------------------------------------------------
/memoryscope/contrib/example_query_worker.yaml:
--------------------------------------------------------------------------------
1 | rewrite_query_system:
2 | cn: |
3 | 任务: 消除指代问题并重写
4 | 要求: 检查提供的问题是否存在指代。如果存在指代,通过上下文信息重写问题,使其信息充足,能够单独回答。如果没有指代问题,则回答“无”。
5 | en: |
6 | Task: Eliminate referencing issues and rewrite
7 | Requirements: Check the provided questions for any references. If references exist, rewrite the questions using contextual information to make them sufficiently informative so they can be answered independently. If there are no referencing issues, respond with "None".
8 |
9 | rewrite_query_query:
10 | cn: |
11 | 上下文:
12 | {context}
13 | 问题:{query}
14 | 重写:
15 |
16 | en: |
17 | Context:
18 | {context}
19 | Question: {query}
20 | Rewrite:
21 |
22 |
--------------------------------------------------------------------------------
/memoryscope/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .memoryscope import MemoryScope
2 | from .memoryscope_context import MemoryscopeContext
3 |
4 | __all__ = [
5 | "MemoryScope",
6 | "MemoryscopeContext"
7 | ]
8 |
--------------------------------------------------------------------------------
/memoryscope/core/chat/__init__.py:
--------------------------------------------------------------------------------
1 | from .api_memory_chat import ApiMemoryChat
2 | from .base_memory_chat import BaseMemoryChat
3 | from .cli_memory_chat import CliMemoryChat
4 |
5 | __all__ = [
6 | "ApiMemoryChat",
7 | "BaseMemoryChat",
8 | "CliMemoryChat"
9 | ]
10 |
--------------------------------------------------------------------------------
/memoryscope/core/chat/base_memory_chat.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | from typing import Optional, Literal
3 |
4 | from memoryscope.core.service.base_memory_service import BaseMemoryService
5 | from memoryscope.core.utils.logger import Logger
6 |
7 |
8 | class BaseMemoryChat(metaclass=ABCMeta):
9 | """
10 | An abstract base class representing a chat system integrated with memory services.
11 | It outlines the method to initiate a chat session leveraging memory data, which concrete subclasses must implement.
12 | """
13 |
14 | def __init__(self, **kwargs):
15 | self.kwargs: dict = kwargs
16 | self.logger = Logger.get_logger()
17 |
18 | @property
19 | def memory_service(self) -> BaseMemoryService:
20 | """
21 | Abstract property to access the memory service.
22 |
23 | Raises:
24 | NotImplementedError: This method should be implemented in a subclass.
25 | """
26 | raise NotImplementedError
27 |
28 | @abstractmethod
29 | def chat_with_memory(self,
30 | query: str,
31 | role_name: Optional[str] = None,
32 | system_prompt: Optional[str] = None,
33 | memory_prompt: Optional[str] = None,
34 | temporary_memories: Optional[str] = None,
35 | history_message_strategy: Literal["auto", None] | int = "auto",
36 | remember_response: bool = True,
37 | **kwargs):
38 | """
39 | The core function that carries out conversation with memory accepts user queries through query and returns the
40 | conversation results through model_response. The retrieved memories are stored in the memories within meta_data.
41 | Args:
42 | query (str): User's query, includes the user's question.
43 | role_name (str, optional): User's role name.
44 | system_prompt (str, optional): System prompt. Defaults to the system_prompt in "memory_chat_prompt.yaml".
45 | memory_prompt (str, optional): Memory prompt, It takes effect when there is a memory and will be placed in
46 | front of the retrieved memory. Defaults to the memory_prompt in "memory_chat_prompt.yaml".
47 | temporary_memories (str, optional): Manually added user memory in this function.
48 | history_message_strategy ("auto", None, int):
49 | - If it is set to "auto", the history messages in the conversation will retain those that have not
50 | yet been summarized. Default to "auto".
51 | - If it is set to None, no conversation history will be saved.
52 | - If it is set to an integer value "n", recent "n" message-pair[user, assistant] will be retained.
53 | remember_response (bool, optional): Flag indicating whether to save the AI's response to memory.
54 | Defaults to False.
55 | Returns:
56 | - ModelResponse: In non-streaming mode, returns a complete AI response.
57 | - ModelResponseGen: In streaming mode, returns a generator yielding AI response parts.
58 | - Memories: To obtain the memory by invoking the method of model_response.meta_data[MEMORIES]
59 | """
60 | raise NotImplementedError
61 |
62 | def start_backend_service(self, **kwargs):
63 | self.memory_service.start_backend_service(**kwargs)
64 |
65 | def run_service_operation(self, name: str, role_name: Optional[str] = None, **kwargs):
66 | return self.memory_service.run_operation(name, role_name=role_name, **kwargs)
67 |
68 | def run(self):
69 | """
70 | Abstract method to run the chat system.
71 |
72 | This method should contain the logic to initiate and manage the chat process,
73 | utilizing the memory service as needed. It must be implemented by subclasses.
74 | """
75 | pass
76 |
--------------------------------------------------------------------------------
/memoryscope/core/chat/memory_chat_prompt.yaml:
--------------------------------------------------------------------------------
1 | system_prompt:
2 | cn: |
3 | 你是一个名为MemoryScope的智能助理,请用中文简洁地回答用户问题。当前时间是{date_time}。
4 | en: |
5 | You are a helpful assistant named MemoryScope, please answer questions concisely in English. The current time is {date_time}.
6 |
7 | memory_prompt:
8 | cn: |
9 | 在回答用户问题时,请尽量忘记大部分不相关的信息。只有当信息与用户问题或对话内容非常相关时,才记住这些信息并加以使用。请确保你的回答简洁、准确,并聚焦于用户问题或对话主题。信息:
10 | en: |
11 | When responding to user questions, please try to forget most of the irrelevant information. Only remember and use the information if it is highly relevant to the current question or conversation. Ensure that your answers are concise, accurate, and focused on the user's current question or the topic of discussion. Information:
--------------------------------------------------------------------------------
/memoryscope/core/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .arguments import Arguments
2 | from .config_manager import ConfigManager
3 |
4 | __all__ = [
5 | "Arguments",
6 | "ConfigManager",
7 | ]
8 |
--------------------------------------------------------------------------------
/memoryscope/core/config/arguments.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass, field
2 | from typing import Literal, Dict
3 |
4 |
5 | @dataclass
6 | class Arguments(object):
7 | language: Literal["cn", "en"] = field(default="cn", metadata={"help": "support en & cn now"})
8 |
9 | thread_pool_max_workers: int = field(default=5, metadata={"help": "thread pool max workers"})
10 |
11 | memory_chat_class: str = field(default="cli_memory_chat", metadata={
12 | "help": "cli_memory_chat(Command-line interaction), api_memory_chat(API interface interaction), etc."})
13 |
14 | chat_stream: bool | None = field(default=None, metadata={
15 | "help": "In the case of cli_memory_chat, stream mode is recommended. For api_memory_chat mode, "
16 | "please use non-stream. If set to None, the value will be automatically determined."})
17 |
18 | human_name: str = field(default="user", metadata={"help": "Human user's name"})
19 |
20 | assistant_name: str = field(default="AI", metadata={"help": "assistant' name"})
21 |
22 | consolidate_memory_interval_time: int | None = field(default=1, metadata={
23 | "help": "Memory backend service: If you feel that the token consumption is relatively high, "
24 | "please increase the time interval. When set to None, the value will not be updated."})
25 |
26 | reflect_and_reconsolidate_interval_time: int | None = field(default=15, metadata={
27 | "help": "Memory backend service: If you feel that the token consumption is relatively high, "
28 | "please increase the time interval. When set to None, the value will not be updated."})
29 |
30 | worker_params: Dict[str, dict] = field(default_factory=lambda: {}, metadata={
31 | "help": "dict format: worker_name -> param_key -> param_value"})
32 |
33 | generation_backend: str = field(default="dashscope_generation", metadata={
34 | "help": "global generation backend: openai_generation, dashscope_generation, etc."})
35 |
36 | generation_model: str = field(default="qwen-max", metadata={
37 | "help": "global generation model: gpt-4o, gpt-4o-mini, gpt-4-turbo, qwen-max, etc."})
38 |
39 | generation_params: dict = field(default_factory=lambda: {}, metadata={
40 | "help": "global generation params: max_tokens, top_p, temperature, etc."})
41 |
42 | embedding_backend: str = field(default="dashscope_generation", metadata={
43 | "help": "global embedding backend: openai_embedding, dashscope_embedding, etc."})
44 |
45 | embedding_model: str = field(default="text-embedding-v2", metadata={
46 | "help": "global embedding model: text-embedding-3-large, text-embedding-3-small, text-embedding-ada-002, "
47 | "text-embedding-v2, etc."})
48 |
49 | embedding_params: dict = field(default_factory=lambda: {})
50 |
51 | rank_backend: str = field(default="dashscope_rank", metadata={"help": "global rank backend: dashscope_rank, etc."})
52 |
53 | rank_model: str = field(default="gte-rerank", metadata={"help": "global rank model: gte-rerank, etc."})
54 |
55 | rank_params: dict = field(default_factory=lambda: {})
56 |
57 | es_index_name: str = field(default="memory_index")
58 |
59 | es_url: str = field(default="http://localhost:9200")
60 |
61 | retrieve_mode: str = field(default="dense", metadata={
62 | "help": "retrieve_mode: dense, sparse(not implemented), hybrid(not implemented)"})
63 |
64 | enable_ranker: bool = field(default=False, metadata={
65 | "help": "If a semantic ranking model is not available, MemoryScope will use cosine similarity scoring as a "
66 | "substitute. However, the ranking effectiveness will be somewhat compromised.",
67 | "map_yaml": "global->enable_ranker"})
68 |
69 | enable_today_contra_repeat: bool = field(default=True, metadata={
70 | "help": "Whether enable conflict resolution and deduplication for the day? "
71 | "Note that enabling this will increase token consumption.",
72 | "map_yaml": "global->enable_today_contra_repeat"})
73 |
74 | enable_long_contra_repeat: bool = field(default=False, metadata={
75 | "help": "Whether to enable long-term conflict resolution and deduplication. "
76 | "Note that enabling this will increase token consumption.",
77 | "map_yaml": "global->enable_long_contra_repeat"})
78 |
79 | output_memory_max_count: int = field(default=20, metadata={
80 | "help": "The maximum number of memories retrieved during memory recall.",
81 | "map_yaml": "global->output_memory_max_count"})
82 |
--------------------------------------------------------------------------------
/memoryscope/core/memoryscope.py:
--------------------------------------------------------------------------------
1 | from concurrent.futures import ThreadPoolExecutor
2 | from datetime import datetime
3 |
4 | from memoryscope.core.chat.base_memory_chat import BaseMemoryChat
5 | from memoryscope.core.config.config_manager import ConfigManager
6 | from memoryscope.core.memoryscope_context import MemoryscopeContext
7 | from memoryscope.core.service.base_memory_service import BaseMemoryService
8 | from memoryscope.core.utils.tool_functions import init_instance_by_config
9 | from memoryscope.enumeration.language_enum import LanguageEnum
10 | from memoryscope.enumeration.model_enum import ModelEnum
11 |
12 |
13 | class MemoryScope(ConfigManager):
14 |
15 | def __init__(self, **kwargs):
16 | self._context: MemoryscopeContext = MemoryscopeContext()
17 | self._context.memory_scope_uuid = datetime.now().strftime(r"%Y%m%d_%H%M%S")
18 | super().__init__(**kwargs)
19 | self._init_context_by_config()
20 |
21 | def _init_context_by_config(self):
22 | # set global config
23 | global_conf = self.config["global"]
24 | self._context.language = LanguageEnum(global_conf["language"])
25 | self._context.thread_pool = ThreadPoolExecutor(max_workers=global_conf["thread_pool_max_workers"])
26 | self._context.meta_data.update({
27 | "enable_ranker": global_conf["enable_ranker"],
28 | "enable_today_contra_repeat": global_conf["enable_today_contra_repeat"],
29 | "enable_long_contra_repeat": global_conf["enable_long_contra_repeat"],
30 | "output_memory_max_count": global_conf["output_memory_max_count"],
31 | })
32 |
33 | if not global_conf["enable_ranker"]:
34 | self.logger.warning("If a semantic ranking model is not available, MemoryScope will use cosine similarity "
35 | "scoring as a substitute. However, the ranking effectiveness will be somewhat "
36 | "compromised.")
37 |
38 | # init memory_chat
39 | memory_chat_conf_dict = self.config["memory_chat"]
40 | if memory_chat_conf_dict:
41 | for name, conf in memory_chat_conf_dict.items():
42 | self._context.memory_chat_dict[name] = init_instance_by_config(conf, name=name, context=self._context)
43 |
44 | # set memory_service
45 | memory_service_conf_dict = self.config["memory_service"]
46 | assert memory_service_conf_dict
47 | for name, conf in memory_service_conf_dict.items():
48 | self._context.memory_service_dict[name] = init_instance_by_config(conf, name=name, context=self._context)
49 |
50 | # init model
51 | model_conf_dict = self.config["model"]
52 | assert model_conf_dict
53 | for name, conf in model_conf_dict.items():
54 | self._context.model_dict[name] = init_instance_by_config(conf, name=name)
55 |
56 | # init memory_store
57 | memory_store_conf = self.config["memory_store"]
58 | assert memory_store_conf
59 | emb_model_name: str = memory_store_conf[ModelEnum.EMBEDDING_MODEL.value]
60 | embedding_model = self._context.model_dict[emb_model_name]
61 | self._context.memory_store = init_instance_by_config(memory_store_conf, embedding_model=embedding_model)
62 |
63 | # init monitor
64 | monitor_conf = self.config["monitor"]
65 | if monitor_conf:
66 | self._context.monitor = init_instance_by_config(monitor_conf)
67 |
68 | # set worker config
69 | self._context.worker_conf_dict = self.config["worker"]
70 |
71 | def close(self):
72 | # wait service to stop
73 | for _, service in self._context.memory_service_dict.items():
74 | service.stop_backend_service(wait_service=True)
75 |
76 | self._context.thread_pool.shutdown()
77 |
78 | self._context.memory_store.close()
79 |
80 | if self._context.monitor:
81 | self._context.monitor.close()
82 |
83 | self.logger.close()
84 |
85 | def __enter__(self):
86 | return self
87 |
88 | def __exit__(self, exc_type, exc_val, exc_tb):
89 | if exc_type is not None:
90 | self.logger.warning(f"An exception occurred: {exc_type.__name__}: {exc_val}\n{exc_tb}")
91 | self.close()
92 |
93 | @property
94 | def context(self):
95 | return self._context
96 |
97 | @property
98 | def memory_chat_dict(self):
99 | return self._context.memory_chat_dict
100 |
101 | @property
102 | def memory_service_dict(self):
103 | return self._context.memory_service_dict
104 |
105 | @property
106 | def default_memory_chat(self) -> BaseMemoryChat:
107 | return list(self.memory_chat_dict.values())[0]
108 |
109 | @property
110 | def default_memory_service(self) -> BaseMemoryService:
111 | return list(self.memory_service_dict.values())[0]
112 |
113 | @classmethod
114 | def cli_memory_chat(cls, **kwargs):
115 | with cls(**kwargs) as ms:
116 | memory_chat = ms.default_memory_chat
117 | memory_chat.run()
118 |
--------------------------------------------------------------------------------
/memoryscope/core/memoryscope_context.py:
--------------------------------------------------------------------------------
1 | from concurrent.futures import ThreadPoolExecutor
2 | from dataclasses import dataclass, field
3 |
4 | from memoryscope.enumeration.language_enum import LanguageEnum
5 | from memoryscope.core.utils.singleton import singleton
6 |
7 | @singleton
8 | @dataclass
9 | class MemoryscopeContext(object):
10 | """
11 | The context class archives all configs utilized by store, monitor, services and workers.
12 | """
13 |
14 | language: LanguageEnum = LanguageEnum.EN
15 |
16 | thread_pool: ThreadPoolExecutor | None = None
17 |
18 | memory_store = None
19 |
20 | monitor = None
21 |
22 | memory_chat_dict: dict = field(default_factory=lambda: {}, metadata={"help": "name -> memory_chat"})
23 |
24 | memory_service_dict: dict = field(default_factory=lambda: {}, metadata={"help": "name -> memory_service"})
25 |
26 | model_dict: dict = field(default_factory=lambda: {}, metadata={"help": "name -> model"})
27 |
28 | worker_conf_dict: dict = field(default_factory=lambda: {}, metadata={"help": "name -> worker_conf"})
29 |
30 | meta_data: dict = field(default_factory=lambda: {})
31 |
32 | memory_scope_uuid: str = ""
33 |
34 | print_workflow_dynamic: bool = False
35 |
36 | log_elasticsearch_dynamic: bool = False
37 |
38 |
39 | def get_memoryscope_uuid():
40 | ms_context = MemoryscopeContext()
41 | if ms_context.memory_scope_uuid:
42 | return ms_context.memory_scope_uuid
43 | else:
44 | # raise RuntimeError("MemoryscopeContext is not initialized yet. Please initialize it first.")
45 | return "memory_scope_uuid_not_registered"
46 |
47 | def get_memoryscope_context():
48 | ms_context = MemoryscopeContext()
49 | if ms_context.memory_scope_uuid:
50 | return ms_context
51 | else:
52 | # raise RuntimeError("MemoryscopeContext is not initialized yet. Please initialize it first.")
53 | return "memory_scope_uuid_not_registered"
54 |
--------------------------------------------------------------------------------
/memoryscope/core/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_model import BaseModel
2 | from .dummy_generation_model import DummyGenerationModel
3 | from .llama_index_embedding_model import LlamaIndexEmbeddingModel
4 | from .llama_index_generation_model import LlamaIndexGenerationModel
5 | from .llama_index_rank_model import LlamaIndexRankModel
6 |
7 | __all__ = [
8 | "BaseModel",
9 | "DummyGenerationModel",
10 | "LlamaIndexEmbeddingModel",
11 | "LlamaIndexGenerationModel",
12 | "LlamaIndexRankModel"
13 | ]
14 |
--------------------------------------------------------------------------------
/memoryscope/core/models/dummy_generation_model.py:
--------------------------------------------------------------------------------
1 | import time
2 | from typing import List
3 |
4 | from llama_index.core.base.llms.types import ChatMessage
5 |
6 | from memoryscope.core.models.base_model import BaseModel, MODEL_REGISTRY
7 | from memoryscope.enumeration.message_role_enum import MessageRoleEnum
8 | from memoryscope.enumeration.model_enum import ModelEnum
9 | from memoryscope.scheme.message import Message
10 | from memoryscope.scheme.model_response import ModelResponse, ModelResponseGen
11 |
12 |
13 | class DummyGenerationModel(BaseModel):
14 | """
15 | The `DummyGenerationModel` class serves as a placeholder model for generating responses.
16 | It processes input prompts or sequences of messages, adapting them into a structure compatible
17 | with chat interfaces. It also facilitates the generation of mock (dummy) responses for testing,
18 | supporting both immediate and streamed output.
19 | """
20 | m_type: ModelEnum = ModelEnum.GENERATION_MODEL
21 |
22 | MODEL_REGISTRY.register("dummy_generation", object)
23 |
24 | def before_call(self, model_response: ModelResponse, **kwargs):
25 | """
26 | Prepares the input data before making a call to the language model.
27 | It accepts either a 'prompt' directly or a list of 'messages'.
28 | If 'prompt' is provided, it sets the data accordingly.
29 | If 'messages' are provided, it constructs a list of ChatMessage objects from the list.
30 | Raises an error if neither 'prompt' nor 'messages' are supplied.
31 |
32 | Args:
33 | model_response: model_response
34 | **kwargs: Arbitrary keyword arguments including 'prompt' and 'messages'.
35 |
36 | Raises:
37 | RuntimeError: When both 'prompt' and 'messages' inputs are not provided.
38 | """
39 | prompt: str = kwargs.pop("prompt", "")
40 | messages: List[Message] | List[dict] = kwargs.pop("messages", [])
41 |
42 | if prompt:
43 | data = {"prompt": prompt}
44 | elif messages:
45 | if isinstance(messages[0], dict):
46 | data = {"messages": [ChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]}
47 | else:
48 | data = {"messages": [ChatMessage(role=msg.role, content=msg.content) for msg in messages]}
49 | else:
50 | raise RuntimeError("prompt and messages are both empty!")
51 | data.update(**kwargs)
52 | model_response.meta_data["data"] = data
53 |
54 | def after_call(self,
55 | model_response: ModelResponse,
56 | stream: bool = False,
57 | **kwargs) -> ModelResponse | ModelResponseGen:
58 | """
59 | Processes the model's response post-call, optionally streaming the output or returning it as a whole.
60 |
61 | This method modifies the input `model_response` by resetting its message content and, based on the `stream`
62 | parameter, either yields the response in a generated stream or returns the complete response directly.
63 |
64 | Args:
65 | model_response (ModelResponse): The initial response object to be processed.
66 | stream (bool, optional): Flag indicating whether to stream the response. Defaults to False.
67 | **kwargs: Additional keyword arguments (not used in this implementation).
68 |
69 | Returns:
70 | ModelResponse | ModelResponseGen: If `stream` is True, a generator yielding updated `ModelResponse` objects;
71 | otherwise, a modified `ModelResponse` object with the complete content.
72 | """
73 | model_response.message = Message(role=MessageRoleEnum.ASSISTANT, content="")
74 |
75 | call_result = ["-" for _ in range(10)]
76 | if stream:
77 | def gen() -> ModelResponseGen:
78 | for delta in call_result:
79 | model_response.message.content += delta
80 | model_response.delta = delta
81 | time.sleep(0.1)
82 | yield model_response
83 |
84 | return gen()
85 | else:
86 | model_response.message.content = "".join(call_result)
87 | return model_response
88 |
89 | def _call(self, model_response: ModelResponse, stream: bool = False, **kwargs):
90 | return model_response
91 |
92 | async def _async_call(self, model_response: ModelResponse, **kwargs):
93 | return model_response
94 |
--------------------------------------------------------------------------------
/memoryscope/core/models/llama_index_embedding_model.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from llama_index.embeddings.dashscope import DashScopeEmbedding
4 | from llama_index.embeddings.openai import OpenAIEmbedding
5 |
6 | from memoryscope.core.models.base_model import BaseModel, MODEL_REGISTRY
7 | from memoryscope.enumeration.model_enum import ModelEnum
8 | from memoryscope.scheme.model_response import ModelResponse
9 | from memoryscope.core.utils.logger import Logger
10 |
11 |
12 | class LlamaIndexEmbeddingModel(BaseModel):
13 | """
14 | Manages text embeddings utilizing the DashScopeEmbedding within the LlamaIndex framework,
15 | facilitating embedding operations for both sync and async modes, inheriting from BaseModel.
16 | """
17 | m_type: ModelEnum = ModelEnum.EMBEDDING_MODEL
18 |
19 | def __init__(self, *args, **kwargs):
20 | super().__init__(*args, **kwargs)
21 | self.logger = Logger.get_logger("llama_index_embedding_model")
22 |
23 | @classmethod
24 | def register_model(cls, model_name: str, model_class: type):
25 | """
26 | Registers a new embedding model class with the model registry.
27 |
28 | Args:
29 | model_name (str): The name to register the model under.
30 | model_class (type): The class of the model to register.
31 | """
32 | MODEL_REGISTRY.register(model_name, model_class)
33 |
34 | MODEL_REGISTRY.register("dashscope_embedding", DashScopeEmbedding)
35 | MODEL_REGISTRY.register("openai_embedding", OpenAIEmbedding)
36 |
37 | def before_call(self, model_response: ModelResponse, **kwargs):
38 | text: str | List[str] = kwargs.pop("text", "")
39 | if isinstance(text, str):
40 | text = [text]
41 | model_response.meta_data["data"] = dict(texts=text)
42 | self.logger.info("Embedding Model:\n" + text[0])
43 |
44 | def after_call(self, model_response: ModelResponse, **kwargs) -> ModelResponse:
45 | embeddings = model_response.raw
46 | if not embeddings:
47 | model_response.details = "empty embeddings"
48 | model_response.status = False
49 | return model_response
50 | if len(embeddings) == 1:
51 | # return list[float]
52 | embeddings = embeddings[0]
53 |
54 | model_response.embedding_results = embeddings
55 | return model_response
56 |
57 | def _call(self, model_response: ModelResponse, **kwargs):
58 | """
59 | Executes a synchronous call to generate embeddings for the input data.
60 |
61 | This method utilizes the `get_text_embedding_batch` method of the encapsulated model,
62 | passing the processed data from `self.data`. The result is then packaged into a
63 | `ModelResponse` object with the model type specified by `self.m_type`.
64 |
65 | Args:
66 | **kwargs: Additional keyword arguments that might be used in the embedding process.
67 |
68 | Returns:
69 | ModelResponse: An object containing the embedding results and the model type.
70 | """
71 | model_response.raw = self.model.get_text_embedding_batch(**model_response.meta_data["data"])
72 |
73 | async def _async_call(self, model_response: ModelResponse, **kwargs):
74 | """
75 | Executes an asynchronous call to generate embeddings for the input data.
76 |
77 | Similar to `_call`, but uses the asynchronous `aget_text_embedding_batch` method
78 | of the model. It handles the input data asynchronously and packages the result
79 | within a `ModelResponse` instance.
80 |
81 | Args:
82 | **kwargs: Additional keyword arguments for the embedding process, if any.
83 |
84 | Returns:
85 | ModelResponse: An object encapsulating the embedding output and the model's type.
86 | """
87 | model_response.raw = await self.model.aget_text_embedding_batch(**model_response.meta_data["data"])
88 |
--------------------------------------------------------------------------------
/memoryscope/core/models/llama_index_rank_model.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from llama_index.core.data_structs import Node
4 | from llama_index.core.schema import NodeWithScore
5 | from llama_index.postprocessor.dashscope_rerank import DashScopeRerank
6 |
7 | from memoryscope.core.models.base_model import BaseModel, MODEL_REGISTRY
8 | from memoryscope.enumeration.model_enum import ModelEnum
9 | from memoryscope.scheme.model_response import ModelResponse
10 | from memoryscope.core.utils.logger import Logger
11 |
12 |
13 | class LlamaIndexRankModel(BaseModel):
14 | """
15 | The LlamaIndexRankModel class is designed to rerank documents according to their relevance
16 | to a provided query, utilizing the DashScope Rerank model. It transforms document lists
17 | and queries into a compatible format for ranking, manages the ranking process, and allocates
18 | rank scores to individual documents.
19 | """
20 | m_type: ModelEnum = ModelEnum.RANK_MODEL
21 |
22 | MODEL_REGISTRY.register("dashscope_rank", DashScopeRerank)
23 |
24 | def __init__(self, *args, **kwargs):
25 | super().__init__(*args, **kwargs)
26 | self.logger = Logger.get_logger("llama_index_rank_model")
27 |
28 | def before_call(self, model_response: ModelResponse, **kwargs):
29 | """
30 | Prepares necessary data before the ranking call by extracting the query and documents,
31 | ensuring they are valid, and initializing nodes with dummy scores.
32 |
33 | Args:
34 | model_response: model response
35 | **kwargs: Keyword arguments containing 'query' and 'documents'.
36 | """
37 | query: str = kwargs.pop("query", "")
38 | documents: List[str] = kwargs.pop("documents", [])
39 | if isinstance(documents, str):
40 | documents = [documents]
41 | assert query and documents and all(documents), \
42 | f"query or documents is empty! query={query}, documents={len(documents)}"
43 | assert len(documents) < 500, \
44 | "The input documents of Dashscope rerank model should not larger than 500!"
45 | # Using -1.0 as dummy scores
46 | nodes = [NodeWithScore(node=Node(text=doc), score=-1.0) for doc in documents]
47 |
48 | model_response.meta_data.update({
49 | "data": {"nodes": nodes, "query_str": query, "top_n": len(documents)},
50 | "documents_map": {doc: idx for idx, doc in enumerate(documents)},
51 | })
52 |
53 | def after_call(self, model_response: ModelResponse, **kwargs) -> ModelResponse:
54 | """
55 | Processes the model response post-ranking, assigning calculated rank scores to each document
56 | based on their index in the original document list.
57 |
58 | Args:
59 | model_response (ModelResponse): The initial response from the ranking model.
60 | **kwargs: Additional keyword arguments (unused).
61 |
62 | Returns:
63 | ModelResponse: Updated response with rank scores assigned to documents.
64 | """
65 | if not model_response.rank_scores:
66 | model_response.rank_scores = {}
67 |
68 | documents_map = model_response.meta_data["documents_map"]
69 | for node in model_response.raw:
70 | text = node.node.text
71 | idx = documents_map[text]
72 | model_response.rank_scores[idx] = node.score
73 |
74 | self.logger.info(self.logger.format_rank_message(model_response))
75 | return model_response
76 |
77 | def _call(self, model_response: ModelResponse, **kwargs):
78 | """
79 | Executes the ranking process by passing prepared data to the model's postprocessing method.
80 |
81 | Args:
82 | **kwargs: Keyword arguments (unused).
83 |
84 | Returns:
85 | ModelResponse: A response object encapsulating the ranked nodes.
86 | """
87 | self.model.top_n = model_response.meta_data["data"]["top_n"]
88 | model_response.meta_data["data"].pop("top_n")
89 | model_response.raw = self.model.postprocess_nodes(**model_response.meta_data["data"])
90 |
91 | async def _async_call(self, **kwargs) -> ModelResponse:
92 | """
93 | Asynchronous wrapper for the `_call` method, maintaining the same functionality.
94 |
95 | Args:
96 | **kwargs: Keyword arguments (unused).
97 |
98 | Returns:
99 | ModelResponse: A response object encapsulating the ranked nodes.
100 | """
101 | raise NotImplementedError
102 |
--------------------------------------------------------------------------------
/memoryscope/core/operation/__init__.py:
--------------------------------------------------------------------------------
1 | from .backend_operation import BackendOperation
2 | from .base_operation import BaseOperation
3 | from .base_workflow import BaseWorkflow
4 | from .consolidate_memory_op import ConsolidateMemoryOp
5 | from .frontend_operation import FrontendOperation
6 |
7 | __all__ = [
8 | "BackendOperation",
9 | "BaseOperation",
10 | "BaseWorkflow",
11 | "ConsolidateMemoryOp",
12 | "FrontendOperation"
13 | ]
14 |
--------------------------------------------------------------------------------
/memoryscope/core/operation/backend_operation.py:
--------------------------------------------------------------------------------
1 | import time
2 | import threading
3 |
4 | from memoryscope.core.operation.base_operation import OPERATION_TYPE
5 | from memoryscope.core.operation.frontend_operation import FrontendOperation
6 |
7 |
8 | class BackendOperation(FrontendOperation):
9 | """
10 | BaseBackendOperation serves as an abstract base class for defining backend operations.
11 | It manages operation status, loop control, and integrates with a global context for thread management.
12 | """
13 | operation_type: OPERATION_TYPE = "backend"
14 |
15 | def __init__(self, interval_time: int, **kwargs):
16 | super().__init__(**kwargs)
17 |
18 | self._interval_time: int = interval_time
19 |
20 | self._operation_status_run: bool = False
21 | self._loop_switch: bool = False
22 | self._backend_task = None
23 |
24 | def init_workflow(self, **kwargs):
25 | """
26 | Initializes the workflow by setting up workers with provided keyword arguments.
27 |
28 | Args:
29 | **kwargs: Arbitrary keyword arguments to be passed during worker initialization.
30 | """
31 | self.init_workers(is_backend=True, **kwargs)
32 |
33 | def _loop_operation(self, **kwargs):
34 | """
35 | Loops until _loop_switch is False, sleeping for 1 second in each interval.
36 | At each interval, it checks if _loop_switch is still True, and if so, executes the operation.
37 | """
38 | while self._loop_switch:
39 | for _ in range(self._interval_time):
40 | if self._loop_switch:
41 | time.sleep(1)
42 | else:
43 | break
44 |
45 | if self._loop_switch:
46 | if self._operation_status_run:
47 | continue
48 |
49 | self._operation_status_run = True
50 |
51 | if len(self.target_names) > 1:
52 | self.logger.warning("current version is not stable under target_names.size > 1!")
53 |
54 | for target_name in self.target_names:
55 | try:
56 | self.run_operation(target_name=target_name, **kwargs)
57 | except Exception as e:
58 | self.logger.exception(f"op_name={self.name} target_name={target_name} encounter exception. "
59 | f"args={e.args}")
60 |
61 | self._operation_status_run = False
62 |
63 | def start_operation_backend(self, **kwargs):
64 | """
65 | Initiates the background operation loop if it's not already running.
66 | Sets the _loop_switch to True and submits the _loop_operation to a thread from the global thread pool.
67 | """
68 | if not self._loop_switch:
69 | self._loop_switch = True
70 | self._backend_task = self.thread_pool.submit(self._loop_operation, **kwargs)
71 | self.logger.info(f"start operation={self.name}...")
72 |
73 | def stop_operation_backend(self, wait_operation: bool = False):
74 | """
75 | Stops the background operation loop by setting the _loop_switch to False.
76 | """
77 | self._loop_switch = False
78 | if self._backend_task:
79 | if wait_operation:
80 | self._backend_task.result()
81 | self.logger.info(f"stop operation={self.name}...")
82 | else:
83 | self.logger.info(f"send stop signal to operation={self.name}...")
84 |
--------------------------------------------------------------------------------
/memoryscope/core/operation/base_operation.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | from typing import Literal, List
3 |
4 | from memoryscope.scheme.message import Message
5 |
6 | OPERATION_TYPE = Literal["frontend", "backend"]
7 |
8 |
9 | class BaseOperation(metaclass=ABCMeta):
10 | """
11 | An abstract base class representing an operation that can be categorized as either frontend or backend.
12 |
13 | Attributes:
14 | operation_type (OPERATION_TYPE): Specifies the type of operation, defaulting to "frontend".
15 | name (str): The name of the operation.
16 | description (str): A description of the operation.
17 | """
18 |
19 | operation_type: OPERATION_TYPE = "frontend"
20 |
21 | def __init__(self,
22 | name: str,
23 | user_name: str,
24 | target_names: List[str],
25 | chat_messages: List[List[Message]],
26 | description: str):
27 | """
28 | Initializes a new instance of the BaseOperation.
29 | """
30 | self.name: str = name
31 | self.user_name: str = user_name
32 | self.target_names: List[str] = target_names
33 | self.chat_messages: List[List[Message]] = chat_messages
34 | self.description: str = description
35 |
36 | def init_workflow(self, **kwargs):
37 | """
38 | Initialize the workflow with additional keyword arguments if needed.
39 |
40 | Args:
41 | **kwargs: Additional parameters for initializing the workflow.
42 | """
43 | pass
44 |
45 | @abstractmethod
46 | def run_operation(self, target_name: str, **kwargs):
47 | """
48 | Abstract method to define the operation to be run.
49 | Subclasses must implement this method.
50 |
51 | Args:
52 | target_name (str): target_name(human name).
53 | **kwargs: Keyword arguments for running the operation.
54 |
55 | Raises:
56 | NotImplementedError: If the subclass does not implement this method.
57 | """
58 | raise NotImplementedError
59 |
60 | def start_operation_backend(self, **kwargs):
61 | """
62 | Placeholder method for running an operation specific to the backend.
63 | Intended to be overridden by subclasses if backend operations are required.
64 | """
65 | pass
66 |
67 | def stop_operation_backend(self, wait_operation: bool = False):
68 | """
69 | Placeholder method to stop any ongoing backend operations.
70 | Should be implemented in subclasses where backend operations are managed.
71 | """
72 | pass
73 |
--------------------------------------------------------------------------------
/memoryscope/core/operation/consolidate_memory_op.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from memoryscope.constants.common_constants import CHAT_KWARGS, CHAT_MESSAGES, RESULT, TARGET_NAME, USER_NAME
4 | from memoryscope.core.operation.backend_operation import BackendOperation
5 | from memoryscope.scheme.message import Message
6 |
7 |
8 | class ConsolidateMemoryOp(BackendOperation):
9 |
10 | def __init__(self,
11 | message_lock,
12 | contextual_msg_min_count: int = 0,
13 | **kwargs):
14 | super().__init__(**kwargs)
15 | self.message_lock = message_lock
16 | self.contextual_msg_min_count: int = contextual_msg_min_count
17 |
18 | def run_operation(self, target_name: str, **kwargs):
19 | """
20 | Executes an operation after preparing the chat context, checking message memory status,
21 | and updating workflow status accordingly.
22 |
23 | If the number of not-memorized messages is less than the contextual message count,
24 | the operation is skipped. Otherwise, it sets up the chat context, runs the workflow,
25 | captures the result, and updates the memory status.
26 |
27 | Args:
28 | target_name (str): target_name(human name).
29 | **kwargs: Keyword arguments for chat operation configuration.
30 |
31 | Returns:
32 | Any: The result obtained from running the workflow.
33 | """
34 |
35 | chat_messages: List[List[Message]] = []
36 | for messages in self.chat_messages:
37 | if not messages:
38 | continue
39 |
40 | if messages[0].memorized:
41 | continue
42 |
43 | contain_flag = False
44 |
45 | for msg in messages:
46 | if msg.role_name == target_name:
47 | contain_flag = True
48 | break
49 |
50 | if contain_flag:
51 | chat_messages.append(messages)
52 |
53 | if not chat_messages:
54 | self.logger.info(f"empty not_memorized chat_messages for target_name={target_name}.")
55 | return
56 |
57 | if len(chat_messages) < self.contextual_msg_min_count:
58 | self.logger.info(f"not_memorized_size={len(chat_messages)} < {self.contextual_msg_min_count}, skip.")
59 | return
60 |
61 | # prepare kwargs
62 | workflow_kwargs = {
63 | CHAT_MESSAGES: chat_messages,
64 | CHAT_KWARGS: {**kwargs, **self.kwargs},
65 | TARGET_NAME: target_name,
66 | USER_NAME: self.user_name,
67 | }
68 |
69 | # Execute the workflow with the prepared context
70 | self.run_workflow(**workflow_kwargs)
71 |
72 | # Retrieve the result from the context after workflow execution
73 | result = self.workflow_context.get(RESULT)
74 |
75 | # set message memorized
76 | with self.message_lock:
77 | for messages in chat_messages:
78 | for msg in messages:
79 | msg.memorized = True
80 |
81 | return result
82 |
--------------------------------------------------------------------------------
/memoryscope/core/operation/frontend_operation.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from memoryscope.constants.common_constants import RESULT, CHAT_MESSAGES, CHAT_KWARGS, TARGET_NAME, USER_NAME
4 | from memoryscope.core.operation.base_operation import BaseOperation, OPERATION_TYPE
5 | from memoryscope.core.operation.base_workflow import BaseWorkflow
6 | from memoryscope.scheme.message import Message
7 |
8 |
9 | class FrontendOperation(BaseWorkflow, BaseOperation):
10 | operation_type: OPERATION_TYPE = "frontend"
11 |
12 | def __init__(self,
13 | name: str,
14 | user_name: str,
15 | target_names: List[str],
16 | chat_messages: List[List[Message]],
17 | description: str,
18 | **kwargs):
19 | super().__init__(name=name, **kwargs)
20 | BaseOperation.__init__(self,
21 | name=name,
22 | user_name=user_name,
23 | target_names=target_names,
24 | chat_messages=chat_messages,
25 | description=description)
26 |
27 | def init_workflow(self, **kwargs):
28 | """
29 | Initializes the workflow by setting up workers with provided keyword arguments.
30 |
31 | Args:
32 | **kwargs: Arbitrary keyword arguments to be passed during worker initialization.
33 | """
34 | self.init_workers(**kwargs)
35 |
36 | def run_operation(self, target_name: str, **kwargs):
37 | """
38 | Executes the main operation of reading recent chat messages, initializing workflow,
39 | and returning the result of the workflow execution.
40 |
41 | Args:
42 | target_name (str): target_name(human name).
43 | **kwargs: Additional keyword arguments used in the operation context.
44 |
45 | Returns:
46 | Any: The result obtained from executing the workflow.
47 | """
48 |
49 | # prepare kwargs
50 | workflow_kwargs = {
51 | CHAT_MESSAGES: self.chat_messages,
52 | CHAT_KWARGS: {**kwargs, **self.kwargs},
53 | TARGET_NAME: target_name,
54 | USER_NAME: self.user_name,
55 | }
56 |
57 | # Execute the workflow with the prepared context
58 | self.run_workflow(**workflow_kwargs)
59 |
60 | # Retrieve the result from the context after workflow execution
61 | return self.workflow_context.get(RESULT)
62 |
--------------------------------------------------------------------------------
/memoryscope/core/service/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_memory_service import BaseMemoryService
2 | from .memory_scope_service import MemoryScopeService
3 |
4 | __all__ = [
5 | "BaseMemoryService",
6 | "MemoryScopeService"
7 | ]
8 |
--------------------------------------------------------------------------------
/memoryscope/core/service/base_memory_service.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | from typing import List, Dict
3 |
4 | from memoryscope.constants.language_constants import DEFAULT_HUMAN_NAME
5 | from memoryscope.core.memoryscope_context import MemoryscopeContext
6 | from memoryscope.core.operation.base_operation import BaseOperation
7 | from memoryscope.core.utils.logger import Logger
8 | from memoryscope.scheme.message import Message
9 |
10 |
11 | class BaseMemoryService(metaclass=ABCMeta):
12 | """
13 | An abstract base class for managing memory operations within a multithreaded context.
14 | It sets up the infrastructure for operation handling, message storage, and synchronization,
15 | along with logging capabilities and customizable configurations.
16 | """
17 |
18 | def __init__(self,
19 | memory_operations: Dict[str, dict],
20 | context: MemoryscopeContext,
21 | assistant_name: str = None,
22 | human_name: str = None,
23 | **kwargs):
24 | """
25 | Initializes the BaseMemoryService with operation definitions, keys for memory access,
26 | and additional keyword arguments for flexibility.
27 |
28 | Args:
29 | memory_operations (Dict[str, dict]): A dictionary defining available memory operations.
30 | context (MemoryscopeContext): runtime context.
31 | human_name (str): human name.
32 | assistant_name (str): assistant name.
33 | **kwargs: Additional parameters to customize service behavior.
34 | """
35 | self._operations_conf: Dict[str, dict] = memory_operations
36 | self._context: MemoryscopeContext = context
37 | self._human_name: str = human_name
38 | self._assistant_name: str = assistant_name
39 | self._kwargs = kwargs
40 |
41 | if not self._human_name:
42 | self._human_name = DEFAULT_HUMAN_NAME[self._context.language]
43 | if not self._assistant_name:
44 | self._assistant_name = "AI"
45 |
46 | self._operation_dict: Dict[str, BaseOperation] = {}
47 | self._chat_messages: List[List[Message]] = []
48 | self._role_names: List[str] = []
49 |
50 | self.logger = Logger.get_logger()
51 |
52 | @property
53 | def human_name(self) -> str:
54 | return self._human_name
55 |
56 | @property
57 | def assistant_name(self) -> str:
58 | return self._assistant_name
59 |
60 | def get_chat_messages_scatter(self, recent_n_pair: int) -> List[Message]:
61 | chat_messages_scatter: List[Message] = []
62 | for messages in self._chat_messages[-recent_n_pair:]:
63 | chat_messages_scatter.extend(messages)
64 | return chat_messages_scatter
65 |
66 | @property
67 | def op_description_dict(self) -> Dict[str, str]:
68 | """
69 | Property to retrieve a dictionary mapping operation keys to their descriptions.
70 | Returns:
71 | Dict[str, str]: A dictionary where keys are operation identifiers and values are their descriptions.
72 | """
73 | return {k: v.description for k, v in self._operation_dict.items()}
74 |
75 | @abstractmethod
76 | def add_messages_pair(self, messages: List[Message]):
77 | raise NotImplementedError
78 |
79 | @abstractmethod
80 | def register_operation(self, name: str, operation_config: dict, **kwargs):
81 | raise NotImplementedError
82 |
83 | @abstractmethod
84 | def init_service(self, **kwargs):
85 | raise NotImplementedError
86 |
87 | def start_backend_service(self, name: str = None, **kwargs):
88 | pass
89 |
90 | def stop_backend_service(self, wait_service: bool = False):
91 | pass
92 |
93 | @abstractmethod
94 | def run_operation(self, name: str, role_name: str = "", **kwargs):
95 | raise NotImplementedError
96 |
97 | def __getattr__(self, name: str):
98 | assert name in self._operation_dict, f"operation={name} is not registered!"
99 | return lambda **kwargs: self.run_operation(name=name, **kwargs)
100 |
--------------------------------------------------------------------------------
/memoryscope/core/storage/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_memory_store import BaseMemoryStore
2 | from .base_monitor import BaseMonitor
3 | from .dummy_memory_store import DummyMemoryStore
4 | from .dummy_monitor import DummyMonitor
5 | from .llama_index_es_memory_store import LlamaIndexEsMemoryStore
6 | from .llama_index_sync_elasticsearch import (
7 | # get_elasticsearch_client,
8 | # _mode_must_match_retrieval_strategy,
9 | # _to_elasticsearch_filter,
10 | # _to_llama_similarities,
11 | ESCombinedRetrieveStrategy,
12 | SyncElasticsearchStore
13 | )
14 |
15 | __all__ = [
16 | "BaseMemoryStore",
17 | "BaseMonitor",
18 | "DummyMemoryStore",
19 | "DummyMonitor",
20 | "LlamaIndexEsMemoryStore",
21 | "ESCombinedRetrieveStrategy",
22 | "SyncElasticsearchStore"
23 | ]
--------------------------------------------------------------------------------
/memoryscope/core/storage/base_memory_store.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | from typing import Dict, List
3 |
4 | from memoryscope.scheme.memory_node import MemoryNode
5 |
6 |
7 | class BaseMemoryStore(metaclass=ABCMeta):
8 | """
9 | An abstract base class defining the interface for a memory store which handles memory nodes.
10 | It outlines essential operations like retrieval, updating, flushing, and closing of memory scopes.
11 | """
12 |
13 | @abstractmethod
14 | def retrieve_memories(self,
15 | query: str = "",
16 | top_k: int = 3,
17 | filter_dict: Dict[str, List[str]] = None) -> List[MemoryNode]:
18 | """
19 | Retrieves a list of MemoryNode objects that are most relevant to the query,
20 | considering a filter dictionary for additional constraints. The number of nodes returned
21 | is limited by top_k.
22 |
23 | Args:
24 | query (str): The query string used to find relevant memories.
25 | top_k (int): The maximum number of MemoryNode objects to return.
26 | filter_dict (Dict[str, List[str]]): A dictionary with keys representing filter fields
27 | and values as lists of strings for filtering criteria.
28 |
29 | Returns:
30 | List[MemoryNode]: A list of MemoryNode objects sorted by relevance to the query,
31 | limited to top_k items.
32 | """
33 | pass
34 |
35 | @abstractmethod
36 | async def a_retrieve_memories(self,
37 | query: str = "",
38 | top_k: int = 3,
39 | filter_dict: Dict[str, List[str]] = None) -> List[MemoryNode]:
40 | """
41 | Asynchronously retrieves a list of MemoryNode objects that best match the query,
42 | respecting a filter dictionary, with the result size capped at top_k.
43 |
44 | Args:
45 | query (str): The text to search for in memory nodes.
46 | top_k (int): Maximum number of nodes to return.
47 | filter_dict (Dict[str, List[str]]): Filters to apply on memory nodes.
48 |
49 | Returns:
50 | List[MemoryNode]: A list of up to top_k MemoryNode objects matching the criteria.
51 | """
52 | pass
53 |
54 | @abstractmethod
55 | def batch_insert(self, nodes: List[MemoryNode]):
56 | pass
57 |
58 | @abstractmethod
59 | def batch_update(self, nodes: List[MemoryNode], update_embedding: bool = True):
60 | pass
61 |
62 | @abstractmethod
63 | def batch_delete(self, nodes: List[MemoryNode]):
64 | pass
65 |
66 | def flush(self):
67 | """
68 | Flushes any pending memory updates or operations to ensure data consistency.
69 | This method should be overridden by subclasses to provide the specific flushing mechanism.
70 | """
71 | pass
72 |
73 | @abstractmethod
74 | def close(self):
75 | """
76 | Closes the memory store, releasing any resources associated with it.
77 | Subclasses must implement this method to define how the memory store is properly closed.
78 | """
79 | pass
80 |
--------------------------------------------------------------------------------
/memoryscope/core/storage/base_monitor.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 |
3 |
4 | class BaseMonitor(metaclass=ABCMeta):
5 | """
6 | An abstract base class defining the interface for monitor classes.
7 | Subclasses should implement the methods defined here to provide concrete monitoring behavior.
8 | """
9 |
10 | def __init__(self, **kwargs):
11 | pass
12 |
13 | @abstractmethod
14 | def add(self):
15 | """
16 | Abstract method to add data or events to the monitor.
17 | This method should be implemented by subclasses to define how data is added into the monitoring system.
18 |
19 | :return: None
20 | """
21 |
22 | @abstractmethod
23 | def add_token(self):
24 | """
25 | Abstract method to add a token or a specific type of identifier to the monitor.
26 | Subclasses should implement this to specify how tokens are managed within the monitoring context.
27 |
28 | :return: None
29 | """
30 |
31 | def flush(self):
32 | """
33 | Method to flush any buffered data in the monitor.
34 | Intended to ensure that all pending recorded data is processed or written out.
35 |
36 | :return: None
37 | """
38 | pass
39 |
40 | def close(self):
41 | """
42 | Method to close the monitor, performing necessary cleanup operations.
43 | This could include releasing resources, closing files, or any other termination tasks.
44 |
45 | :return: None
46 | """
47 | pass
48 |
--------------------------------------------------------------------------------
/memoryscope/core/storage/dummy_memory_store.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, List
2 |
3 | from memoryscope.core.models.base_model import BaseModel
4 | from memoryscope.core.storage.base_memory_store import BaseMemoryStore
5 | from memoryscope.scheme.memory_node import MemoryNode
6 |
7 |
8 | class DummyMemoryStore(BaseMemoryStore):
9 | """
10 | Placeholder implementation of a memory storage system interface. Defines methods for querying, updating,
11 | and closing memory nodes with asynchronous capabilities, leveraging an embedding model for potential
12 | semantic retrieval. Actual storage operations are not implemented.
13 | """
14 |
15 | def __init__(self, embedding_model: BaseModel, **kwargs):
16 | """
17 | Initializes the DummyMemoryStore with an embedding model and additional keyword arguments.
18 |
19 | Args:
20 | embedding_model (BaseModel): The model used to embed data for potential similarity-based retrieval.
21 | **kwargs: Additional keyword arguments for configuration or future expansion.
22 | """
23 | self.embedding_model: BaseModel = embedding_model
24 | self.kwargs = kwargs
25 |
26 | def retrieve_memories(self,
27 | query: str = "",
28 | top_k: int = 3,
29 | filter_dict: Dict[str, List[str]] = None) -> List[MemoryNode]:
30 | pass
31 |
32 | async def a_retrieve_memories(self,
33 | query: str = "",
34 | top_k: int = 3,
35 | filter_dict: Dict[str, List[str]] = None) -> List[MemoryNode]:
36 | pass
37 |
38 | def batch_insert(self, nodes: List[MemoryNode]):
39 | pass
40 |
41 | def batch_update(self, nodes: List[MemoryNode], update_embedding: bool = True):
42 | pass
43 |
44 | def batch_delete(self, nodes: List[MemoryNode]):
45 | pass
46 |
47 | def close(self):
48 | pass
49 |
--------------------------------------------------------------------------------
/memoryscope/core/storage/dummy_monitor.py:
--------------------------------------------------------------------------------
1 | from memoryscope.core.storage.base_monitor import BaseMonitor
2 |
3 |
4 | class DummyMonitor(BaseMonitor):
5 | """
6 | DummyMonitor serves as a placeholder or mock class extending BaseMonitor,
7 | providing empty method bodies for 'add', 'add_token', and 'close' operations.
8 | This can be used for testing or in situations where a full monitor implementation is not required.
9 | """
10 |
11 | def add(self):
12 | """
13 | Placeholder for adding data to the monitor.
14 | This method currently does nothing.
15 | """
16 | pass
17 |
18 | def add_token(self):
19 | """
20 | Placeholder for adding a token to the monitored data.
21 | This method currently does nothing.
22 | """
23 | pass
24 |
25 | def close(self):
26 | """
27 | Placeholder for closing the monitor and performing any necessary cleanup.
28 | This method currently does nothing.
29 | """
30 | pass
31 |
--------------------------------------------------------------------------------
/memoryscope/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .datetime_handler import DatetimeHandler
2 | from .logger import Logger
3 | from .prompt_handler import PromptHandler
4 | from .registry import Registry
5 | from .response_text_parser import ResponseTextParser
6 | from .timer import Timer
7 | from .tool_functions import (
8 | underscore_to_camelcase,
9 | camelcase_to_underscore,
10 | init_instance_by_config,
11 | prompt_to_msg,
12 | char_logo,
13 | md5_hash,
14 | contains_keyword,
15 | cosine_similarity
16 | )
17 |
18 | __all__ = [
19 | "DatetimeHandler",
20 | "Logger",
21 | "PromptHandler",
22 | "Registry",
23 | "ResponseTextParser",
24 | "Timer",
25 | "underscore_to_camelcase",
26 | "camelcase_to_underscore",
27 | "init_instance_by_config",
28 | "prompt_to_msg",
29 | "char_logo",
30 | "md5_hash",
31 | "contains_keyword",
32 | "cosine_similarity"
33 | ]
34 |
--------------------------------------------------------------------------------
/memoryscope/core/utils/registry.py:
--------------------------------------------------------------------------------
1 | """
2 | Registry for different modules.
3 | Init class according to the class name and verify the input parameters.
4 | """
5 | from typing import Dict, Any, List
6 |
7 |
8 | class Registry(object):
9 | """
10 | A registry to manage and instantiate various modules by their names, ensuring the uniqueness of registered entries.
11 | It supports both individual and bulk registration of modules, as well as retrieval of modules by name.
12 |
13 | Attributes:
14 | name (str): The name of the registry.
15 | module_dict (Dict[str, Any]): A dictionary holding registered modules where keys are module names and values are
16 | the modules themselves.
17 | """
18 |
19 | def __init__(self, name: str):
20 | """
21 | Initializes the Registry with a given name.
22 |
23 | Args:
24 | name (str): The name to identify this registry.
25 | """
26 | self.name: str = name
27 | self.module_dict: Dict[str, Any] = {}
28 |
29 | def register(self, module_name: str = None, module: Any = None):
30 | """
31 | Registers module in the registry in a single call.
32 |
33 | Args:
34 | module_name (str): The name of module to be registered.
35 | module (List[Any] | Dict[str, Any]): The module to be registered.
36 |
37 | Raises:
38 | NotImplementedError: If the input is already registered.
39 | """
40 | assert module is not None
41 | if module_name is None:
42 | module_name = module.__name__
43 |
44 | if module_name in self.module_dict:
45 | raise KeyError(f'{module_name} is already registered in {self.name}')
46 | self.module_dict[module_name] = module
47 |
48 | def batch_register(self, modules: List[Any] | Dict[str, Any]):
49 | """
50 | Registers multiple modules in the registry in a single call. Accepts either a list of modules or a dictionary
51 | mapping names to modules.
52 |
53 | Args:
54 | modules (List[Any] | Dict[str, Any]): A list of modules or a dictionary mapping module names to the modules.
55 |
56 | Raises:
57 | NotImplementedError: If the input is neither a list nor a dictionary.
58 | """
59 | if isinstance(modules, list):
60 | module_name_dict = {m.__name__: m for m in modules}
61 | elif isinstance(modules, dict):
62 | module_name_dict = modules
63 | else:
64 | raise NotImplementedError("Input must be a list or a dictionary.")
65 | self.module_dict.update(module_name_dict)
66 |
67 | def __getitem__(self, module_name: str):
68 | """
69 | Retrieves a registered module by its name using index notation.
70 |
71 | Args:
72 | module_name (str): The name of the module to retrieve.
73 |
74 | Returns:
75 | A registered module corresponding to the given name.
76 |
77 | Raises:
78 | AssertionError: If the specified module is not found in the registry.
79 | """
80 | assert module_name in self.module_dict, f"{module_name} not found in {self.name}"
81 | return self.module_dict[module_name]
82 |
--------------------------------------------------------------------------------
/memoryscope/core/utils/response_text_parser.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import List
3 |
4 | from memoryscope.constants.language_constants import NONE_WORD
5 | from memoryscope.core.utils.logger import Logger
6 | from memoryscope.enumeration.language_enum import LanguageEnum
7 |
8 |
9 | class ResponseTextParser(object):
10 | """
11 | The `ResponseTextParser` class is designed to parse and process response texts. It provides methods to extract
12 | patterns from the text and filter out unnecessary information, while also logging the processing steps and outcomes.
13 | """
14 |
15 | PATTERN_V1 = re.compile(r"<(.*?)>") # Regular expression pattern to match content within angle brackets
16 |
17 | def __init__(self, response_text: str, language: LanguageEnum, logger_prefix: str = ""):
18 | # Strips leading and trailing whitespace from the response text
19 | self.response_text: str = response_text.strip()
20 | self.language: LanguageEnum = language
21 |
22 | # The prefix of log. Defaults to "".
23 | self.logger_prefix: str = logger_prefix
24 |
25 | # Initializes a logger instance for logging parsing activities
26 | self.logger: Logger = Logger.get_logger()
27 |
28 | def parse_v1(self) -> List[List[str]]:
29 | """
30 | Extract specific patterns from the text which match content within angle brackets.
31 |
32 | Returns:
33 | Contents match the specific patterns.
34 | """
35 | result = []
36 | for line in self.response_text.split("\n"):
37 | line = line.strip()
38 | if not line:
39 | continue
40 | matches = [match.group(1) for match in self.PATTERN_V1.finditer(line)]
41 | if matches:
42 | result.append(matches)
43 | self.logger.info(f"{self.logger_prefix} response_text={self.response_text} result={result}", stacklevel=2)
44 | return result
45 |
46 | def parse_v2(self) -> List[str]:
47 | """
48 | Extract lines which contain NONE_WORD.
49 |
50 | Returns:
51 | Contents match the specific patterns.
52 | """
53 | result = []
54 | for line in self.response_text.split("\n"):
55 | line = line.strip()
56 | if not line or line.lower() == NONE_WORD.get(self.language):
57 | continue
58 | result.append(line)
59 | self.logger.info(f"{self.logger_prefix} response_text={self.response_text} result={result}", stacklevel=2)
60 | return result
61 |
--------------------------------------------------------------------------------
/memoryscope/core/utils/singleton.py:
--------------------------------------------------------------------------------
1 | def singleton(cls):
2 | _instance = {}
3 |
4 | def _singleton(*args, **kargs):
5 | if cls not in _instance:
6 | _instance[cls] = cls(*args, **kargs)
7 | return _instance[cls]
8 |
9 | return _singleton
--------------------------------------------------------------------------------
/memoryscope/core/utils/timer.py:
--------------------------------------------------------------------------------
1 | import time
2 | from typing import Literal
3 |
4 | from memoryscope.core.utils.logger import Logger
5 |
6 | TIME_LOG_TYPE = Literal["end", "wrap", "none"]
7 |
8 |
9 | class Timer(object):
10 | """
11 | A class used to measure the execution time of code blocks. It supports logging the elapsed time and can be
12 | customized to display time in seconds or milliseconds.
13 | """
14 |
15 | def __init__(self,
16 | name: str,
17 | time_log_type: TIME_LOG_TYPE = "end",
18 | use_ms: bool = True,
19 | stack_level: int = 2,
20 | float_precision: int = 4,
21 | **kwargs):
22 |
23 | """
24 | Initializes the `Timer` instance with the provided args and sets up a logger
25 |
26 | Args:
27 | name (str): The log name.
28 | time_log_type (str): The log type. Defaults to 'End'.
29 | use_ms (bool): Use 'ms' as the timescale or not. Defaults to True.
30 | stack_level (int): The stack level of log. Defaults to 2.
31 | float_precision (int): The precision of cost time. Defaults to 4.
32 |
33 | """
34 |
35 | self.name: str = name
36 | self.time_log_type: TIME_LOG_TYPE = time_log_type
37 | self.use_ms: bool = use_ms
38 | self.stack_level: int = stack_level
39 | self.float_precision: int = float_precision
40 | self.kwargs: dict = kwargs
41 |
42 | # time recorder
43 | self.t_start = 0
44 | self.t_end = 0
45 | self.cost = 0
46 |
47 | self.logger = Logger.get_logger()
48 |
49 | def _set_cost(self):
50 | """
51 | Accumulate the cost time.
52 | """
53 | self.t_end = time.time()
54 | self.cost = self.t_end - self.t_start
55 | if self.use_ms:
56 | self.cost *= 1000
57 |
58 | @property
59 | def cost_str(self):
60 | """
61 | Represent the cost time into a formatted string.
62 | """
63 | self._set_cost()
64 | if self.use_ms:
65 | return f"cost={self.cost:.4f}ms"
66 | else:
67 | return f"cost={self.cost:.4f}s"
68 |
69 | def __enter__(self, *args, **kwargs):
70 | """
71 | Begin timing.
72 | """
73 | self.t_start = time.time()
74 | if self.time_log_type == "wrap":
75 | self.logger.info(f"----- {self.name}.begin -----")
76 | return self
77 |
78 | def __exit__(self, exc_type, exc_value, exc_tb):
79 | """
80 | End timing and print the formatted log.
81 | """
82 | if self.time_log_type == "none":
83 | return
84 |
85 | lines = []
86 | if self.time_log_type == "wrap":
87 | lines.append(f"----- {self.name}.end -----")
88 | else:
89 | lines.append(self.name)
90 |
91 | lines.append(self.cost_str)
92 |
93 | if self.kwargs:
94 | for k, v in self.kwargs.items():
95 | if isinstance(v, float):
96 | float_style = f".{self.float_precision}f"
97 | line = f"{k}={v:{float_style}}"
98 | else:
99 | line = f"{k}={v}"
100 | lines.append(line)
101 |
102 | self.logger.info(" ".join(lines), stacklevel=self.stack_level)
103 |
104 |
105 | def timer(func):
106 | """
107 | A decorator function that measures the execution time of the wrapped function.
108 |
109 | Args:
110 | func (Callable): The function to be wrapped and timed.
111 |
112 | Returns:
113 | Callable: The wrapper function that includes timing functionality.
114 | """
115 |
116 | def wrapper(*args, **kwargs):
117 | """
118 | The wrapper function that manages the timing of the original function.
119 |
120 | Args:
121 | *args: Variable length argument list for the decorated function.
122 | **kwargs: Arbitrary keyword arguments for the decorated function.
123 |
124 | Returns:
125 | Any: The result of the decorated function.
126 | """
127 | with Timer(name=func.__name__, **kwargs):
128 | return func(*args, **kwargs)
129 |
130 | return wrapper
131 |
--------------------------------------------------------------------------------
/memoryscope/core/worker/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_worker import BaseWorker
2 | from .dummy_worker import DummyWorker
3 | from .memory_base_worker import MemoryBaseWorker
4 | from .memory_manager import MemoryManager
5 |
6 | __all__ = [
7 | "BaseWorker",
8 | "DummyWorker",
9 | "MemoryBaseWorker",
10 | "MemoryManager"
11 | ]
--------------------------------------------------------------------------------
/memoryscope/core/worker/backend/__init__.py:
--------------------------------------------------------------------------------
1 | from .contra_repeat_worker import ContraRepeatWorker
2 | from .get_observation_with_time_worker import GetObservationWithTimeWorker
3 | from .get_observation_worker import GetObservationWorker
4 | from .get_reflection_subject_worker import GetReflectionSubjectWorker
5 | from .info_filter_worker import InfoFilterWorker
6 | from .load_memory_worker import LoadMemoryWorker
7 | from .long_contra_repeat_worker import LongContraRepeatWorker
8 | from .update_insight_worker import UpdateInsightWorker
9 | from .update_memory_worker import UpdateMemoryWorker
10 |
11 | __all__ = [
12 | "ContraRepeatWorker",
13 | "GetObservationWithTimeWorker",
14 | "GetObservationWorker",
15 | "GetReflectionSubjectWorker",
16 | "InfoFilterWorker",
17 | "LoadMemoryWorker",
18 | "LongContraRepeatWorker",
19 | "UpdateInsightWorker",
20 | "UpdateMemoryWorker"
21 | ]
22 |
--------------------------------------------------------------------------------
/memoryscope/core/worker/backend/get_observation_with_time_worker.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from memoryscope.constants.common_constants import NEW_OBS_WITH_TIME_NODES
4 | from memoryscope.constants.language_constants import COLON_WORD
5 | from memoryscope.core.utils.datetime_handler import DatetimeHandler
6 | from memoryscope.core.worker.backend.get_observation_worker import GetObservationWorker
7 | from memoryscope.scheme.message import Message
8 |
9 |
10 | class GetObservationWithTimeWorker(GetObservationWorker):
11 | """
12 | A specialized worker class that extends GetObservationWorker functionality to handle
13 | retrieval of observations which include associated timestamp information from chat messages.
14 | """
15 | FILE_PATH: str = __file__
16 | OBS_STORE_KEY: str = NEW_OBS_WITH_TIME_NODES
17 |
18 | def filter_messages(self) -> List[Message]:
19 | """
20 | Filters the chat messages to only include those which contain time-related keywords.
21 |
22 | Returns:
23 | List[Message]: A list of filtered messages that mention time.
24 | """
25 | filter_messages = []
26 | for msg in self.chat_messages_scatter:
27 | # Checks if the message content has any time reference words
28 | if DatetimeHandler.has_time_word(query=msg.content, language=self.language):
29 | filter_messages.append(msg)
30 | return filter_messages
31 |
32 | def build_message(self, filter_messages: List[Message]) -> List[Message]:
33 | """
34 | Constructs a prompt message for obtaining observations with timestamp information
35 | based on filtered chat messages.
36 |
37 | This method processes each filtered message with the timestamp information.
38 | It then organizes these timestamped messages into a structured prompt that includes a system prompt,
39 | few-shot examples, and the concatenated user queries.
40 |
41 | Args:
42 | filter_messages (List[Message]): A list of Message objects that have been filtered for processing.
43 |
44 | Returns:
45 | List[Message]: A list containing the newly constructed Message object for further interaction.
46 | """
47 | user_query_list = []
48 | for i, msg in enumerate(filter_messages):
49 | # Create a DatetimeHandler instance for each message's timestamp and format it
50 | dt_handler = DatetimeHandler(dt=msg.time_created)
51 | dt = dt_handler.string_format(string_format=self.prompt_handler.time_string_format, language=self.language)
52 | # Append formatted timestamp-query pairs to the user_query_list
53 | user_query_list.append(f"{i + 1} {dt} {self.target_name}{self.get_language_value(COLON_WORD)}{msg.content}")
54 |
55 | # Construct the system prompt with the count of observations
56 | system_prompt = self.prompt_handler.get_observation_with_time_system.format(num_obs=len(user_query_list),
57 | user_name=self.target_name)
58 |
59 | # Retrieve the few-shot examples for the prompt
60 | few_shot = self.prompt_handler.get_observation_with_time_few_shot.format(user_name=self.target_name)
61 |
62 | # Format the user query section with the concatenated list of timestamped queries
63 | user_query = self.prompt_handler.get_observation_with_time_user_query.format(
64 | user_query="\n".join(user_query_list),
65 | user_name=self.target_name)
66 |
67 | # Assemble the final message for observation retrieval
68 | get_observation_message_wt = self.prompt_to_msg(system_prompt=system_prompt,
69 | few_shot=few_shot,
70 | user_query=user_query)
71 |
72 | # Log the constructed message for debugging purposes
73 | self.logger.info(f"get_observation_message_wt={get_observation_message_wt}")
74 |
75 | # Return the newly created message
76 | return get_observation_message_wt
77 |
--------------------------------------------------------------------------------
/memoryscope/core/worker/backend/info_filter_worker.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from memoryscope.constants.language_constants import COLON_WORD
4 | from memoryscope.core.utils.response_text_parser import ResponseTextParser
5 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker
6 | from memoryscope.scheme.message import Message
7 |
8 |
9 | class InfoFilterWorker(MemoryBaseWorker):
10 | """
11 | This worker filters and modifies the chat message history (`self.chat_messages`) by retaining only the messages
12 | that include significant information. It then constructs a prompt from these filtered messages, utilizes an AI
13 | model to process this prompt, parses the AI's generated response to allocate scores, and ultimately retains
14 | messages in `self.chat_messages` based on these assigned scores.
15 | """
16 | FILE_PATH: str = __file__
17 |
18 | def _parse_params(self, **kwargs):
19 | self.preserved_scores: str = kwargs.get("preserved_scores", "2,3")
20 | self.info_filter_msg_max_size: int = kwargs.get("info_filter_msg_max_size", 200)
21 | self.generation_model_kwargs: dict = kwargs.get("generation_model_kwargs", {})
22 |
23 | def _run(self):
24 | """
25 | Filters user messages in the chat, generates a prompt incorporating these messages,
26 | utilizes an LLM to rate the information score for each message,
27 | and updates `self.chat_messages` to only include messages with designated scores.
28 |
29 | This method executes the following steps:
30 | 1. Filters out non-user messages and truncates long messages.
31 | 2. Constructs a prompt with user messages for LLM input.
32 | 3. Calls the LLM model with the constructed prompt.
33 | 4. Parses the LLM's response to extract message scores.
34 | 5. Retains message in `self.chat_messages` based on their scores.
35 | """
36 | # filter user msg
37 | info_messages: List[Message] = []
38 | for msg in self.chat_messages_scatter:
39 | if msg.memorized:
40 | continue
41 |
42 | # TODO: add memory for all messages
43 | if msg.role_name != self.target_name:
44 | continue
45 |
46 | if len(msg.content) >= self.info_filter_msg_max_size:
47 | half_size = int(self.info_filter_msg_max_size * 0.5 + 0.5)
48 | msg.content = msg.content[: half_size] + msg.content[-half_size:]
49 | info_messages.append(msg)
50 |
51 | if not info_messages:
52 | self.logger.warning("info_messages is empty!")
53 | self.continue_run = False
54 | return
55 |
56 | # generate prompt
57 | user_query_list = []
58 | for i, msg in enumerate(info_messages):
59 | user_query_list.append(f"{i + 1} {self.target_name}{self.get_language_value(COLON_WORD)} {msg.content}")
60 | system_prompt = self.prompt_handler.info_filter_system.format(batch_size=len(info_messages),
61 | user_name=self.target_name)
62 | few_shot = self.prompt_handler.info_filter_few_shot.format(user_name=self.target_name)
63 | user_query = self.prompt_handler.info_filter_user_query.format(user_query="\n".join(user_query_list))
64 | info_filter_message = self.prompt_to_msg(system_prompt=system_prompt, few_shot=few_shot, user_query=user_query)
65 | self.logger.info(f"info_filter_message={info_filter_message}")
66 |
67 | # call llm
68 | response = self.generation_model.call(messages=info_filter_message, **self.generation_model_kwargs)
69 |
70 | # return if empty
71 | if not response.status or not response.message.content:
72 | self.continue_run = False
73 | return
74 | response_text = response.message.content
75 |
76 | # parse text
77 | info_score_list = ResponseTextParser(response_text, self.language, self.__class__.__name__).parse_v1()
78 | if len(info_score_list) != len(info_messages):
79 | self.logger.warning(f"score_size != messages_size, {len(info_score_list)} vs {len(info_messages)}")
80 |
81 | # filter messages
82 | filtered_messages: List[Message] = []
83 | for info_score in info_score_list:
84 | if not info_score:
85 | continue
86 |
87 | if len(info_score) != 2:
88 | self.logger.warning(f"info_score={info_score} is invalid!")
89 | continue
90 |
91 | idx, score = info_score
92 |
93 | idx = int(idx) - 1
94 | if idx >= len(info_messages):
95 | self.logger.warning(f"idx={idx} is invalid! info_messages.size={len(info_messages)}")
96 | continue
97 | message = info_messages[idx]
98 |
99 | if score in self.preserved_scores:
100 | message.meta_data["info_score"] = score
101 | filtered_messages.append(message)
102 | self.logger.info(f"info filter stage: keep {message.content}")
103 |
104 | if not filtered_messages:
105 | self.logger.warning("filtered_messages is empty!")
106 | self.continue_run = False
107 | return
108 |
109 | self.chat_messages_scatter = filtered_messages
110 |
--------------------------------------------------------------------------------
/memoryscope/core/worker/backend/load_memory_worker.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from memoryscope.constants.common_constants import NOT_REFLECTED_NODES, NOT_UPDATED_NODES, INSIGHT_NODES, TODAY_NODES
4 | from memoryscope.core.utils.datetime_handler import DatetimeHandler
5 | from memoryscope.core.utils.timer import timer
6 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker
7 | from memoryscope.enumeration.memory_type_enum import MemoryTypeEnum
8 | from memoryscope.enumeration.store_status_enum import StoreStatusEnum
9 | from memoryscope.scheme.memory_node import MemoryNode
10 |
11 |
12 | class LoadMemoryWorker(MemoryBaseWorker):
13 | def _parse_params(self, **kwargs):
14 | self.retrieve_not_reflected_top_k: int = kwargs.get("retrieve_not_reflected_top_k", 0)
15 | self.retrieve_not_updated_top_k: int = kwargs.get("retrieve_not_updated_top_k", 0)
16 | self.retrieve_insight_top_k: int = kwargs.get("retrieve_insight_top_k", 0)
17 | self.retrieve_today_top_k: int = kwargs.get("retrieve_today_top_k", 0)
18 |
19 | @timer
20 | def retrieve_not_reflected_memory(self):
21 | """
22 | Retrieves top-K not reflected memories based on the query and stores them in the memory handler.
23 | """
24 | if not self.retrieve_not_reflected_top_k:
25 | return
26 |
27 | filter_dict = {
28 | "user_name": self.user_name,
29 | "target_name": self.target_name,
30 | "store_status": StoreStatusEnum.VALID.value,
31 | "memory_type": [MemoryTypeEnum.OBSERVATION.value, MemoryTypeEnum.OBS_CUSTOMIZED.value],
32 | "obs_reflected": 0,
33 | }
34 | nodes: List[MemoryNode] = self.memory_store.retrieve_memories(top_k=self.retrieve_not_reflected_top_k,
35 | filter_dict=filter_dict)
36 | self.memory_manager.set_memories(NOT_REFLECTED_NODES, nodes)
37 |
38 | @timer
39 | def retrieve_not_updated_memory(self):
40 | """
41 | Retrieves top-K not updated memories based on the query and stores them in the memory handler.
42 | """
43 | if not self.retrieve_not_updated_top_k:
44 | return
45 |
46 | filter_dict = {
47 | "user_name": self.user_name,
48 | "target_name": self.target_name,
49 | "store_status": StoreStatusEnum.VALID.value,
50 | "memory_type": [MemoryTypeEnum.OBSERVATION.value, MemoryTypeEnum.OBS_CUSTOMIZED.value],
51 | "obs_updated": 0,
52 | }
53 | nodes: List[MemoryNode] = self.memory_store.retrieve_memories(top_k=self.retrieve_not_updated_top_k,
54 | filter_dict=filter_dict)
55 | self.memory_manager.set_memories(NOT_UPDATED_NODES, nodes)
56 |
57 | @timer
58 | def retrieve_insight_memory(self):
59 | """
60 | Retrieves top-K insight memories based on the query and stores them in the memory handler.
61 | """
62 | if not self.retrieve_insight_top_k:
63 | return
64 |
65 | filter_dict = {
66 | "user_name": self.user_name,
67 | "target_name": self.target_name,
68 | "store_status": StoreStatusEnum.VALID.value,
69 | "memory_type": MemoryTypeEnum.INSIGHT.value,
70 | }
71 | nodes: List[MemoryNode] = self.memory_store.retrieve_memories(top_k=self.retrieve_insight_top_k,
72 | filter_dict=filter_dict)
73 | self.memory_manager.set_memories(INSIGHT_NODES, nodes)
74 |
75 | @timer
76 | def retrieve_today_memory(self, dt: str):
77 | """
78 | Retrieves top-K memories from today based on the query and stores them in the memory handler.
79 |
80 | Args:
81 | dt (str): The date string to filter today's memories.
82 | """
83 | if not self.retrieve_today_top_k:
84 | return
85 |
86 | filter_dict = {
87 | "user_name": self.user_name,
88 | "target_name": self.target_name,
89 | "store_status": StoreStatusEnum.VALID.value,
90 | "memory_type": [MemoryTypeEnum.OBSERVATION.value, MemoryTypeEnum.OBS_CUSTOMIZED.value],
91 | "dt": dt,
92 | }
93 | nodes: List[MemoryNode] = self.memory_store.retrieve_memories(top_k=self.retrieve_today_top_k,
94 | filter_dict=filter_dict)
95 |
96 | self.memory_manager.set_memories(TODAY_NODES, nodes)
97 |
98 | def _run(self):
99 | """
100 | Initiates multithread tasks to retrieve various types of memory data including
101 | not reflected, not updated, insights, and data from today. After submitting all tasks,
102 | it waits for their completion by calling `gather_thread_result`.
103 |
104 | This method serves as the controller for data retrieval operations, enhancing efficiency
105 | by handling tasks concurrently.
106 | """
107 |
108 | # Placeholder query
109 | dt = DatetimeHandler().datetime_format()
110 | self.submit_thread_task(self.retrieve_not_reflected_memory)
111 | self.submit_thread_task(self.retrieve_not_updated_memory)
112 | self.submit_thread_task(self.retrieve_insight_memory)
113 | self.submit_thread_task(self.retrieve_today_memory, dt=dt)
114 |
115 | # Waits for all submitted tasks to complete
116 | for _ in self.gather_thread_result():
117 | pass
118 |
--------------------------------------------------------------------------------
/memoryscope/core/worker/backend/update_memory_worker.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict
2 |
3 | from memoryscope.constants.common_constants import RESULT
4 | from memoryscope.core.utils.datetime_handler import DatetimeHandler
5 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker
6 | from memoryscope.enumeration.action_status_enum import ActionStatusEnum
7 | from memoryscope.enumeration.memory_type_enum import MemoryTypeEnum
8 | from memoryscope.scheme.memory_node import MemoryNode
9 |
10 |
11 | class UpdateMemoryWorker(MemoryBaseWorker):
12 |
13 | def _parse_params(self, **kwargs):
14 | self.method: str = kwargs.get("method", "")
15 | self.memory_key: str = kwargs.get("memory_key", "")
16 |
17 | def from_query(self):
18 | """
19 | Creates a MemoryNode from the provided query if present in chat_kwargs.
20 |
21 | Returns:
22 | List[MemoryNode]: A list containing a single MemoryNode created from the query.
23 | """
24 | if "query" not in self.chat_kwargs:
25 | return
26 |
27 | query = self.chat_kwargs["query"].strip()
28 | if not query:
29 | return
30 |
31 | dt_handler = DatetimeHandler()
32 | node = MemoryNode(user_name=self.user_name,
33 | target_name=self.target_name,
34 | content=query,
35 | memory_type=MemoryTypeEnum.OBS_CUSTOMIZED.value,
36 | action_status=ActionStatusEnum.NEW.value,
37 | timestamp=dt_handler.timestamp)
38 | return [node]
39 |
40 | def from_memory_key(self):
41 | """
42 | Retrieves memories based on the memory key if it exists.
43 |
44 | Returns:
45 | List[MemoryNode]: A list of MemoryNode objects retrieved using the memory key.
46 | """
47 | if not self.memory_key:
48 | return
49 |
50 | return self.memory_manager.get_memories(keys=self.memory_key)
51 |
52 | def delete_all(self):
53 | """
54 | Marks all memories for deletion by setting their action_status to 'DELETE'.
55 |
56 | Returns:
57 | List[MemoryNode]: A list of all MemoryNode objects marked for deletion.
58 | """
59 | nodes: List[MemoryNode] = self.memory_manager.get_memories(keys="all")
60 | for node in nodes:
61 | node.action_status = ActionStatusEnum.DELETE.value
62 | self.logger.info(f"delete_all.size={len(nodes)}")
63 | return nodes
64 |
65 | def delete_memory(self):
66 | """
67 | Marks specific memories for deletion based on query or memory_id present in chat_kwargs.
68 |
69 | Returns:
70 | List[MemoryNode]: A list of MemoryNode objects marked for deletion based on the query or memory_id.
71 | """
72 | if "query" in self.chat_kwargs:
73 | query = self.chat_kwargs["query"].strip()
74 | if not query:
75 | return
76 |
77 | i = 0
78 | nodes: List[MemoryNode] = self.memory_manager.get_memories(keys="all")
79 | for node in nodes:
80 | if node.content == query:
81 | i += 1
82 | node.action_status = ActionStatusEnum.DELETE.value
83 | self.logger.info(f"delete_memory.query.size={len(nodes)}")
84 | return nodes
85 |
86 | elif "memory_id" in self.chat_kwargs:
87 | memory_id = self.chat_kwargs["memory_id"].strip()
88 | if not memory_id:
89 | return
90 |
91 | i = 0
92 | nodes: List[MemoryNode] = self.memory_manager.get_memories(keys="all")
93 | for node in nodes:
94 | if node.memory_id == memory_id:
95 | i += 1
96 | node.action_status = ActionStatusEnum.DELETE.value
97 | self.logger.info(f"delete_memory.memory_id.size={len(nodes)}")
98 | return nodes
99 |
100 | return []
101 |
102 | def _run(self):
103 | """
104 | Executes a memory update method provided via the 'method' attribute.
105 |
106 | The method specified by the 'method' attribute is invoked,
107 | which updates memories accordingly.
108 | """
109 | method = self.method.strip()
110 | if not hasattr(self, method):
111 | self.logger.info(f"method={method} is missing!")
112 | return
113 |
114 | updated_nodes: Dict[str, List[MemoryNode]] = self.memory_manager.update_memories(nodes=getattr(self, method)())
115 | line = ["[MEMORY ACTIONS]:"]
116 | for action, nodes in updated_nodes.items():
117 | for node in nodes:
118 | line.append(f"{action} {node.memory_type}: {node.content} ({node.store_status})")
119 | self.set_workflow_context(RESULT, "\n".join(line))
120 |
--------------------------------------------------------------------------------
/memoryscope/core/worker/dummy_worker.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | from memoryscope.constants.common_constants import RESULT, WORKFLOW_NAME, CHAT_KWARGS
4 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker
5 |
6 |
7 | class DummyWorker(MemoryBaseWorker):
8 | def _run(self):
9 | """
10 | Executes the dummy worker's run logic by logging workflow entry, capturing the current timestamp,
11 | file path, and setting the result context with details about the workflow execution.
12 |
13 | This method utilizes the BaseWorker's capabilities to interact with the workflow context.
14 | """
15 | workflow_name = self.get_workflow_context(WORKFLOW_NAME)
16 | chat_kwargs = self.get_workflow_context(CHAT_KWARGS)
17 | self.logger.info(f"Entering workflow={workflow_name}.dummy_worker!")
18 | # Records the current timestamp as an integer
19 | ts = int(datetime.datetime.now().timestamp())
20 | # Retrieves the current file's path
21 | file_path = __file__
22 | self.set_workflow_context(RESULT, f"test {workflow_name} kwargs={chat_kwargs} file_path={file_path} \nts={ts}")
23 |
--------------------------------------------------------------------------------
/memoryscope/core/worker/frontend/__init__.py:
--------------------------------------------------------------------------------
1 | from .extract_time_worker import ExtractTimeWorker
2 | from .fuse_rerank_worker import FuseRerankWorker
3 | from .print_memory_worker import PrintMemoryWorker
4 | from .read_message_worker import ReadMessageWorker
5 | from .retrieve_memory_worker import RetrieveMemoryWorker
6 | from .semantic_rank_worker import SemanticRankWorker
7 | from .set_query_worker import SetQueryWorker
8 |
9 | __all__ = [
10 | "ExtractTimeWorker",
11 | "FuseRerankWorker",
12 | "PrintMemoryWorker",
13 | "ReadMessageWorker",
14 | "RetrieveMemoryWorker",
15 | "SemanticRankWorker",
16 | "SetQueryWorker"
17 | ]
18 |
--------------------------------------------------------------------------------
/memoryscope/core/worker/frontend/extract_time_worker.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import Dict
3 |
4 | from memoryscope.constants.common_constants import QUERY_WITH_TS, EXTRACT_TIME_DICT
5 | from memoryscope.constants.language_constants import DATATIME_KEY_MAP
6 | from memoryscope.core.utils.datetime_handler import DatetimeHandler
7 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker
8 |
9 |
10 | class ExtractTimeWorker(MemoryBaseWorker):
11 | """
12 | A specialized worker class designed to identify and extract time-related information
13 | from text generated by an LLM, translating date-time keywords based on the set language,
14 | and storing this extracted data within a shared context.
15 | """
16 |
17 | EXTRACT_TIME_PATTERN = r"-\s*(\S+)[::]\s*(\S+)"
18 | FILE_PATH: str = __file__
19 |
20 | def _parse_params(self, **kwargs):
21 | self.generation_model_kwargs: dict = kwargs.get("generation_model_kwargs", {})
22 |
23 | def _run(self):
24 | """
25 | Executes the primary logic of identifying and extracting time data from an LLM's response.
26 |
27 | This method first checks if the input query contains any datetime keywords. If not, it logs and returns.
28 | It then constructs a prompt with contextual information including formatted timestamps and calls the LLM.
29 | The response is parsed for time-related data using regex, translated via a language-specific key map,
30 | and the resulting time data is stored in the shared context.
31 | """
32 | query, query_timestamp = self.get_workflow_context(QUERY_WITH_TS)
33 |
34 | # Identify if the query contains datetime keywords
35 | contain_datetime = DatetimeHandler.has_time_word(query, self.language)
36 | if not contain_datetime:
37 | self.logger.info(f"contain_datetime={contain_datetime}")
38 | return
39 |
40 | # Prepare the prompt with necessary contextual details
41 | query_time_str = DatetimeHandler(dt=query_timestamp).string_format(self.prompt_handler.time_string_format,
42 | self.language)
43 | system_prompt = self.prompt_handler.extract_time_system
44 | few_shot = self.prompt_handler.extract_time_few_shot
45 | user_query = self.prompt_handler.extract_time_user_query.format(query=query, query_time_str=query_time_str)
46 | extract_time_message = self.prompt_to_msg(system_prompt=system_prompt, few_shot=few_shot, user_query=user_query)
47 | self.logger.info(f"extract_time_message={extract_time_message}")
48 |
49 | # Invoke the LLM to generate a response
50 | response = self.generation_model.call(messages=extract_time_message, **self.generation_model_kwargs)
51 |
52 | # Handle empty or unsuccessful responses
53 | if not response.status or not response.message.content:
54 | return
55 | response_text = response.message.content
56 |
57 | # Extract time information from the LLM's response using regex
58 | extract_time_dict: Dict[str, str] = {}
59 | matches = re.findall(self.EXTRACT_TIME_PATTERN, response_text)
60 | key_map: dict = self.get_language_value(DATATIME_KEY_MAP)
61 | for key, value in matches:
62 | if key in key_map.keys():
63 | extract_time_dict[key_map[key]] = value
64 | self.logger.info(f"response_text={response_text} matches={matches} filters={extract_time_dict}")
65 | self.set_workflow_context(EXTRACT_TIME_DICT, extract_time_dict)
66 |
--------------------------------------------------------------------------------
/memoryscope/core/worker/frontend/extract_time_worker.yaml:
--------------------------------------------------------------------------------
1 | time_string_format:
2 | cn: |
3 | {year}年{month}{day}日,{year}年第{week}周,{weekday},{hour}时。
4 | en: |
5 | {month} {day}, {year}, {week}th week of {year}, {weekday}, at {hour}.
6 |
7 |
8 | extract_time_system:
9 | cn: |
10 | 任务:从语句与语句发生的时间,推断并提取语句内容中指向的时间段。
11 | 回答尽可能完整的时间段。
12 | 回答的格式严格遵照示例中的已有格式规范。
13 | 若语句不涉及时间则回答无。
14 | en: |
15 | Task: From the sentences and the time when they occurred, infer and extract the time periods indicated in the content of the sentences.
16 | Answer with the most complete time periods possible.
17 | The format of the answers must strictly adhere to the specifications in the examples provided.
18 | If the sentence does not involve time, respond with "none."
19 |
20 |
21 | extract_time_few_shot:
22 | cn: |
23 | 示例1:
24 | 句子:我记得你前年四月份去了阿联酋,阿联酋有哪些好玩的地方?迪拜和阿布扎比你更喜欢哪个?沙漠的景色壮观吗?
25 | 时间:1992年8月20日,1992年第34周,周一,18时。
26 | 回答:
27 | - 年:1990 - 月:4月
28 |
29 | 示例2:
30 | 句子:后天下午三点的会议记得参加。我在日历上仔细标注了这个重要的日子,提醒自己不要错过。会议将在公司会议室举行,这是一个讨论未来发展方向的重要机会。
31 | 时间:2024年6月19日,2024年第25周,周二,13时。
32 | 回答:
33 | - 年:2024 - 月:6月 - 日:21 - 时:15
34 |
35 | 示例3:
36 | 句子:下个月第一个周六去杭州玩。
37 | 时间:2005年7月15日,2005年第28周,周六,0时。
38 | 回答:
39 | - 年:2005 - 月:8月 - 周:31 - 星期几:周六
40 |
41 | 示例4:
42 | 句子:上周末我们去的那个小镇真是太美了。
43 | 时间:1999年12月2日,1999年第48周,周二,8时。
44 | 回答:
45 | - 年:1999 - 周:47 - 星期几:周六,周日
46 |
47 | 示例5:
48 | 句子:再过半小时就要宣讲了,记得准备材料。
49 | 时间:2020年6月22日,2020年第25周,周一,9时。
50 | 回答:
51 | - 年:2020 - 月:6月 - 日:22 - 时:10
52 |
53 | 示例6:
54 | 句子:10000米长跑比赛的开始时间是3分47秒前。
55 | 时间:1987年2月17日,1987年第7周,周三,19时。
56 | 回答:
57 | - 年:1987 - 月:2 - 日:17 - 时:19
58 |
59 | 示例7:
60 | 句子:上个月的这个时候我们还在筹备音乐会。每天都是忙碌而充实的日子,我们为音乐会的顺利举办而努力奋斗着。彩排、布景、节目安排,每一个细节都需要精心安排和准备。
61 | 时间:1995年11月24日,1995年第48周,周二,17时。
62 | 回答:
63 | - 年:1995 - 月:10 - 日:24
64 |
65 | 示例8:
66 | 句子:我的朋友非常喜欢运动,他认为运动有助于增强身体素质。
67 | 时间:2015年1月23日,2015年第4周,周四,7时。
68 | 回答:
69 | 无
70 |
71 | en: |
72 | Example 1:
73 | Sentence: I remember you went to the UAE in April the year before last. Which places in the UAE are fun? Which do you prefer, Dubai or Abu Dhabi? Are the desert views spectacular?
74 | Time: August 20, 1992, 34th week of 1992, Monday, at 18.
75 | Answer:
76 | - Year: 1990 - Month: 4
77 |
78 | Example 2:
79 | Sentence: Remember to attend the meeting at 3 PM the day after tomorrow. I carefully marked this important day on my calendar to remind myself not to miss it. The meeting will be held in the company conference room, and it's an important opportunity to discuss future development directions.
80 | Time: June 19, 2024, 25th week of 2024, Tuesday, at 13.
81 | Answer:
82 | - Year: 2024 - Month: 6 - Day: 21 - Hour: 15
83 |
84 | Example 3:
85 | Sentence: Next month on the first Saturday, let's go to Hangzhou.
86 | Time: July 15, 2005, 28th week of 2005, Saturday, at 0.
87 | Answer:
88 | - Year: 2005 - Month: 8 - Week: 31 - Day of Week: 6
89 |
90 | Example 4:
91 | Sentence: The small town we visited last weekend was truly beautiful.
92 | Time: December 2, 1999, 48th week of 1999, Tuesday, at 8.
93 | Answer:
94 | - Year: 1999 - Week: 47 - Day of Week: 6, 7
95 |
96 | Example 5:
97 | Sentence: The presentation will start in half an hour, remember to prepare the materials.
98 | Time: June 22, 2020, 25th week of 2020, Monday, at 9.
99 | Answer:
100 | - Year: 2020 - Month: 6 - Day: 22 - Hour: 10
101 |
102 | Example 6:
103 | Sentence: The start time for the 10,000-meter race was 3 minutes and 47 seconds ago.
104 | Time: February 17, 1987, 7th week of 1987, Wednesday, at 19.
105 | Answer:
106 | - Year: 1987 - Month: 2 - Day: 17 - Hour: 19
107 |
108 | Example 7:
109 | Sentence: At this time last month, we were still preparing for the concert. Every day was busy and fulfilling, and we worked hard for the successful holding of the concert. Rehearsals, set design, and program arrangements - every detail needed careful planning and preparation.
110 | Time: November 24, 1995, 48th week of 1995, Tuesday, at 17.
111 | Answer:
112 | - Year: 1995 - Month: 10 - Day: 24
113 |
114 | Example 8:
115 | Sentence: My friend loves sports very much and believes that exercise helps improve physical fitness.
116 | Time: January 23, 2015, 4th week of 2015, Thursday, at 7.
117 | Answer:
118 | None
119 |
120 |
121 | extract_time_user_query:
122 | cn: |
123 | 句子:{query}
124 | 时间:{query_time_str}
125 | 回答:
126 |
127 | en: |
128 | Sentence: {query}
129 | Time: {query_time_str}
130 | Answer:
131 |
132 |
133 |
134 |
--------------------------------------------------------------------------------
/memoryscope/core/worker/frontend/print_memory_worker.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from memoryscope.constants.common_constants import RETRIEVE_MEMORY_NODES, RESULT
4 | from memoryscope.core.utils.datetime_handler import DatetimeHandler
5 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker
6 | from memoryscope.enumeration.memory_type_enum import MemoryTypeEnum
7 | from memoryscope.enumeration.store_status_enum import StoreStatusEnum
8 | from memoryscope.scheme.memory_node import MemoryNode
9 |
10 |
11 | class PrintMemoryWorker(MemoryBaseWorker):
12 | """
13 | Formats the memories to print.
14 | """
15 | FILE_PATH: str = __file__
16 |
17 | def _run(self):
18 | """
19 | Executes the primary function, it involves:
20 | 1. Fetches the memories.
21 | 2. Formats them by 'print_template'.
22 | 3. Set the formatted string back into the worker's context
23 | """
24 | # get long-term memory
25 | memory_node_list: List[MemoryNode] = self.memory_manager.get_memories(RETRIEVE_MEMORY_NODES)
26 | memory_node_list = sorted(memory_node_list, key=lambda x: x.timestamp, reverse=True)
27 |
28 | observation_memory_list: List[str] = []
29 | insight_memory_list: List[str] = []
30 | expired_memory_list: List[str] = []
31 |
32 | i = 0
33 | j = 0
34 | k = 0
35 | # remove duplicate content
36 | expired_content_set = set()
37 | for node in memory_node_list:
38 | if not node.content:
39 | continue
40 |
41 | dt_handler = DatetimeHandler(node.timestamp)
42 | dt = dt_handler.datetime_format("%Y%m%d %H:%M:%S")
43 | if StoreStatusEnum(node.store_status) is StoreStatusEnum.EXPIRED:
44 | if node.content in expired_content_set:
45 | continue
46 | else:
47 | expired_content_set.add(node.content)
48 | i += 1
49 | expired_memory_list.append(f"{dt}] {i}. {node.content}")
50 |
51 | elif MemoryTypeEnum(node.memory_type) in [MemoryTypeEnum.OBSERVATION, MemoryTypeEnum.OBS_CUSTOMIZED]:
52 | j += 1
53 | observation_memory_list.append(f"{dt}] {j}. {node.content} "
54 | f"[status({node.obs_reflected},{node.obs_updated})")
55 |
56 | elif MemoryTypeEnum(node.memory_type) is MemoryTypeEnum.INSIGHT:
57 | k += 1
58 | insight_memory_list.append(f"{dt}] {k}. {node.content}")
59 |
60 | result: str = self.prompt_handler.print_template.format(
61 | user_name=self.user_name,
62 | target_name=self.target_name,
63 | observation_memory="\n".join(observation_memory_list),
64 | insight_memory="\n".join(insight_memory_list),
65 | expired_memory="\n".join(expired_memory_list)).strip()
66 | self.set_workflow_context(RESULT, result)
67 |
--------------------------------------------------------------------------------
/memoryscope/core/worker/frontend/print_memory_worker.yaml:
--------------------------------------------------------------------------------
1 | print_template:
2 | cn: |
3 | ========== {user_name}关于{target_name}的长期记忆 ==========
4 | ----- 观察记忆 -----
5 | {observation_memory}
6 |
7 | ----- 洞察记忆 -----
8 | {insight_memory}
9 |
10 | ----- 过期记忆 -----
11 | {expired_memory}
12 |
13 | en: |
14 | ========== The {user_name}'s long-term memory about {target_name} ==========
15 | ----- observation memory -----
16 | {observation_memory}
17 |
18 | ----- insight memory -----
19 | {insight_memory}
20 |
21 | ----- expired memory -----
22 | {expired_memory}
--------------------------------------------------------------------------------
/memoryscope/core/worker/frontend/read_message_worker.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from memoryscope.constants.common_constants import RESULT
4 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker
5 | from memoryscope.scheme.message import Message
6 |
7 |
8 | class ReadMessageWorker(MemoryBaseWorker):
9 | """
10 | Fetches unmemorized chat messages.
11 | """
12 |
13 | def _run(self):
14 | """
15 | Executes the primary function to fetch unmemorized chat messages.
16 | """
17 | chat_messages_not_memorized: List[List[Message]] = []
18 | for messages in self.chat_messages:
19 | if not messages:
20 | continue
21 |
22 | if messages[0].memorized:
23 | continue
24 |
25 | contain_flag = False
26 |
27 | for msg in messages:
28 | if msg.role_name == self.target_name:
29 | contain_flag = True
30 | break
31 |
32 | if contain_flag:
33 | chat_messages_not_memorized.append(messages)
34 |
35 | contextual_msg_max_count: int = self.chat_kwargs["contextual_msg_max_count"]
36 | chat_message_scatter = []
37 | for messages in chat_messages_not_memorized[-contextual_msg_max_count:]:
38 | chat_message_scatter.extend(messages)
39 | chat_message_scatter.sort(key=lambda _: _.time_created)
40 | self.set_workflow_context(RESULT, chat_message_scatter)
41 |
--------------------------------------------------------------------------------
/memoryscope/core/worker/frontend/semantic_rank_worker.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict
2 |
3 | from memoryscope.constants.common_constants import RETRIEVE_MEMORY_NODES, QUERY_WITH_TS, RANKED_MEMORY_NODES
4 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker
5 | from memoryscope.scheme.memory_node import MemoryNode
6 |
7 |
8 | class SemanticRankWorker(MemoryBaseWorker):
9 | """
10 | The `SemanticRankWorker` class processes queries by retrieving memory nodes,
11 | removing duplicates, ranking them based on semantic relevance using a model,
12 | assigning scores, sorting the nodes, and storing the ranked nodes back,
13 | while logging relevant information.
14 | """
15 |
16 | def _parse_params(self, **kwargs):
17 | self.enable_ranker: bool = self.memoryscope_context.meta_data["enable_ranker"]
18 | self.output_memory_max_count: int = self.memoryscope_context.meta_data["output_memory_max_count"]
19 |
20 | def _run(self):
21 | """
22 | Executes the primary workflow of the SemanticRankWorker which includes:
23 | - Retrieves query and timestamp from context.
24 | - Fetches memory nodes.
25 | - Removes duplicate nodes.
26 | - Ranks nodes semantically.
27 | - Assigns scores to nodes.
28 | - Sorts nodes by score.
29 | - Saves the ranked nodes back with logging.
30 |
31 | If no memory nodes are retrieved or if the ranking model fails,
32 | appropriate warnings are logged.
33 | """
34 | # query
35 | query, _ = self.get_workflow_context(QUERY_WITH_TS)
36 | memory_node_list: List[MemoryNode] = self.memory_manager.get_memories(RETRIEVE_MEMORY_NODES)
37 | if not memory_node_list:
38 | self.logger.warning("Retrieve memory nodes is empty!")
39 | return
40 |
41 | if not self.enable_ranker or len(memory_node_list) <= self.output_memory_max_count:
42 | for node in memory_node_list:
43 | node.score_rank = node.score_recall
44 | self.logger.warning("use score_recall instead of score_rank!")
45 |
46 | else:
47 | # drop repeated
48 | memory_node_dict: Dict[str, MemoryNode] = {n.content.strip(): n for n in memory_node_list if
49 | n.content.strip()}
50 | memory_node_list = list(memory_node_dict.values())
51 |
52 | response = self.rank_model.call(query=query, documents=[n.content for n in memory_node_list])
53 | if not response.status or not response.rank_scores:
54 | return
55 |
56 | # set score
57 | for idx, score in response.rank_scores.items():
58 | if idx >= len(memory_node_list):
59 | self.logger.warning(f"Idx={idx} exceeds the maximum length of rank_scores!")
60 | continue
61 | memory_node_list[idx].score_rank = score
62 |
63 | # sort by score
64 | memory_node_list = sorted(memory_node_list, key=lambda n: n.score_rank, reverse=True)
65 |
66 | # log ranked nodes
67 | self.logger.info(f"Rank stage: query={query}")
68 | for node in memory_node_list:
69 | self.logger.info(f"Rank stage: Content={node.content}, Score={node.score_rank}")
70 |
71 | # save ranked nodes back to memory
72 | self.memory_manager.set_memories(RANKED_MEMORY_NODES, memory_node_list, log_repeat=False)
73 |
--------------------------------------------------------------------------------
/memoryscope/core/worker/frontend/set_query_worker.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | from memoryscope.constants.common_constants import QUERY_WITH_TS
4 | from memoryscope.core.worker.memory_base_worker import MemoryBaseWorker
5 |
6 |
7 | class SetQueryWorker(MemoryBaseWorker):
8 | """
9 | The `SetQueryWorker` class is responsible for setting a query and its associated timestamp
10 | into the context, utilizing either provided chat parameters or details from the most recent
11 | chat message.
12 | """
13 |
14 | def _run(self):
15 | """
16 | Executes the worker's primary function, which involves determining the query and its
17 | timestamp, then storing these values within the context.
18 |
19 | If 'query' is found within `self.chat_kwargs`, it is considered as the query input.
20 | Otherwise, the content of the last message in `self.chat_messages` is used as the query,
21 | along with its creation timestamp.
22 | """
23 | query = "" # Default query value
24 | timestamp = int(datetime.datetime.now().timestamp()) # Current timestamp as default
25 |
26 | if "query" in self.chat_kwargs:
27 | # set query if exists
28 | query = self.chat_kwargs["query"]
29 | if not query:
30 | query = ""
31 | query = query.strip()
32 |
33 | # set ts if exists
34 | _timestamp = self.chat_kwargs.get("timestamp")
35 | if _timestamp and isinstance(_timestamp, int):
36 | timestamp = _timestamp
37 |
38 | # Store the determined query and its timestamp in the context
39 | self.set_workflow_context(QUERY_WITH_TS, (query, timestamp))
40 |
--------------------------------------------------------------------------------
/memoryscope/enumeration/__init__.py:
--------------------------------------------------------------------------------
1 | from .action_status_enum import ActionStatusEnum
2 | from .language_enum import LanguageEnum
3 | from .memory_type_enum import MemoryTypeEnum
4 | from .message_role_enum import MessageRoleEnum
5 | from .model_enum import ModelEnum
6 | from .store_status_enum import StoreStatusEnum
7 |
8 | __all__ = [
9 | "ActionStatusEnum",
10 | "LanguageEnum",
11 | "MemoryTypeEnum",
12 | "MessageRoleEnum",
13 | "ModelEnum",
14 | "StoreStatusEnum"
15 | ]
--------------------------------------------------------------------------------
/memoryscope/enumeration/action_status_enum.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 |
4 | class ActionStatusEnum(str, Enum):
5 | """
6 | Enumeration representing various statuses of a memory node.
7 |
8 | Each status reflects a different state of the node in terms of its lifecycle or content:
9 | - NEW: Indicates a newly created node.
10 | - MODIFIED: Signifies that the node has been altered.
11 | - CONTENT_MODIFIED: Specifies changes in the actual content of the node.
12 | - NONE: do nothing.
13 | - DELETE: delete memories.
14 | """
15 | NEW = "new"
16 |
17 | MODIFIED = "modified"
18 |
19 | CONTENT_MODIFIED = "content_modified"
20 |
21 | NONE = "none"
22 |
23 | DELETE = "delete"
24 |
--------------------------------------------------------------------------------
/memoryscope/enumeration/language_enum.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 |
4 | class LanguageEnum(str, Enum):
5 | """
6 | An enumeration representing supported languages.
7 |
8 | Members:
9 | - CN: Represents the Chinese language.
10 | - EN: Represents the English language.
11 | """
12 | CN = "cn"
13 |
14 | EN = "en"
15 |
--------------------------------------------------------------------------------
/memoryscope/enumeration/memory_type_enum.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 |
4 | class MemoryTypeEnum(str, Enum):
5 | """
6 | Defines an enumeration for different types of memory categories.
7 |
8 | Each member represents a distinct type of memory content:
9 | - CONVERSATION: Represents conversation-based memories.
10 | - OBSERVATION: Denotes observational memories.
11 | - INSIGHT: Indicates insightful memories derived from analysis.
12 | - OBS_CUSTOMIZED: Customized observational memories.
13 | """
14 | CONVERSATION = "conversation"
15 |
16 | OBSERVATION = "observation"
17 |
18 | INSIGHT = "insight"
19 |
20 | OBS_CUSTOMIZED = "obs_customized"
21 |
--------------------------------------------------------------------------------
/memoryscope/enumeration/message_role_enum.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 |
4 | class MessageRoleEnum(str, Enum):
5 | """
6 | Enumeration for different message roles within a conversation context.
7 |
8 | This enumeration includes predefined roles such as User, Assistant, and System,
9 | which can be used to categorize messages in chat interfaces, AI interactions, or
10 | any system that involves distinct participant roles.
11 | """
12 | USER = "user" # Represents a message sent by the user.
13 |
14 | ASSISTANT = "assistant" # Represents a response or action performed by an assistant.
15 |
16 | SYSTEM = "system" # Represents system-level messages or actions.
17 |
--------------------------------------------------------------------------------
/memoryscope/enumeration/model_enum.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 |
4 | class ModelEnum(str, Enum):
5 | """
6 | An enumeration representing different types of models used within the system.
7 |
8 | Members:
9 | GENERATION_MODEL: Represents a model responsible for generating content.
10 | EMBEDDING_MODEL: Represents a model tasked with creating embeddings, typically used for transforming data into a
11 | numerical form suitable for machine learning tasks.
12 | RANK_MODEL: Denotes a model that specializes in ranking, often used to order items based on relevance.
13 | """
14 | GENERATION_MODEL = "generation_model"
15 |
16 | EMBEDDING_MODEL = "embedding_model"
17 |
18 | RANK_MODEL = "rank_model"
19 |
--------------------------------------------------------------------------------
/memoryscope/enumeration/store_status_enum.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 |
4 | class StoreStatusEnum(str, Enum):
5 | VALID = "valid"
6 |
7 | EXPIRED = "expired"
8 |
--------------------------------------------------------------------------------
/memoryscope/scheme/__init__.py:
--------------------------------------------------------------------------------
1 | from .memory_node import MemoryNode
2 | from .message import Message
3 | from .model_response import ModelResponse, ModelResponseGen
4 |
5 | __all__ = [
6 | "MemoryNode",
7 | "Message",
8 | "ModelResponse",
9 | "ModelResponseGen"
10 | ]
11 |
--------------------------------------------------------------------------------
/memoryscope/scheme/memory_node.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from typing import Dict, List
3 | from uuid import uuid4
4 |
5 | from pydantic import Field, BaseModel
6 |
7 |
8 | class MemoryNode(BaseModel):
9 | """
10 | Represents a memory node with comprehensive attributes to store memory information including unique ID,
11 | user details, content, metadata, scoring metrics.
12 | Automatically handles timestamp conversion to date format during initialization.
13 | """
14 | memory_id: str = Field(default_factory=lambda: uuid4().hex, description="unique id for memory")
15 |
16 | user_name: str = Field("", description="the user who owns the memory")
17 |
18 | target_name: str = Field("", description="target name described by the memory")
19 |
20 | meta_data: Dict[str, str] = Field({}, description="meta data infos")
21 |
22 | content: str = Field("", description="memory content")
23 |
24 | key: str = Field("", description="memory key")
25 |
26 | key_vector: List[float] = Field([], description="memory key embedding result")
27 |
28 | value: str = Field("", description="memory value")
29 |
30 | score_recall: float = Field(0, description="embedding similarity score used in recall stage")
31 |
32 | score_rank: float = Field(0, description="rank model score used in rank stage")
33 |
34 | score_rerank: float = Field(0, description="rerank score used in rerank stage")
35 |
36 | memory_type: str = Field("", description="conversation / observation / insight...")
37 |
38 | action_status: str = Field("none", description="new / content_modified / modified / deleted / none")
39 |
40 | store_status: str = Field("valid", description="store_status: valid / expired")
41 |
42 | vector: List[float] = Field([], description="content embedding result")
43 |
44 | timestamp: int = Field(default_factory=lambda: int(datetime.datetime.now().timestamp()),
45 | description="timestamp of the memory node")
46 |
47 | dt: str = Field("", description="dt of the memory node")
48 |
49 | obs_reflected: int = Field(0, description="if the observation is reflected: 0/1")
50 |
51 | obs_updated: int = Field(0, description="if the observation has updated user profile or insight: 0/1")
52 |
53 | def __init__(self, **kwargs):
54 | super().__init__(**kwargs)
55 | self.dt = datetime.datetime.fromtimestamp(self.timestamp).strftime("%Y%m%d")
56 |
57 | @property
58 | def node_keys(self):
59 | return list(self.model_json_schema()["properties"].keys())
60 |
61 | def __getitem__(self, key: str):
62 | return self.model_dump().get(key)
63 |
--------------------------------------------------------------------------------
/memoryscope/scheme/message.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from typing import Dict
3 |
4 | from pydantic import Field, BaseModel
5 |
6 |
7 | class Message(BaseModel):
8 | """
9 | Represents a structured message object with details about the sender, content, and metadata.
10 |
11 | Attributes:
12 | role (str): The role of the message sender (e.g., 'user', 'assistant', 'system').
13 | role_name (str): Optional name associated with the role of the message sender.
14 | content (str): The actual content or text of the message.
15 | time_created (int): Timestamp indicating when the message was created.
16 | memorized (bool): Flag to indicate if the message has been saved or remembered.
17 | meta_data (Dict[str, str]): Additional data or context attached to the message.
18 | """
19 | role: str = Field(..., description="The role of the message sender (user, assistant, system)")
20 |
21 | role_name: str = Field("", description="Name describing the role of the message sender")
22 |
23 | content: str = Field(..., description="The primary content of the message")
24 |
25 | time_created: int = Field(default_factory=lambda: int(datetime.datetime.now().timestamp()),
26 | description="Timestamp marking the message creation time")
27 |
28 | memorized: bool = Field(False, description="Indicates if the message is flagged for memory retention")
29 |
30 | meta_data: Dict[str, str] = Field({}, description="Supplementary data attached to the message")
31 |
--------------------------------------------------------------------------------
/memoryscope/scheme/model_response.py:
--------------------------------------------------------------------------------
1 | import json
2 | from typing import Generator, List, Dict, Any
3 |
4 | from pydantic import BaseModel, Field
5 |
6 | from memoryscope.enumeration.model_enum import ModelEnum
7 | from memoryscope.scheme.message import Message
8 |
9 |
10 | class ModelResponse(BaseModel):
11 | message: Message | None = Field(None, description="generation model result")
12 |
13 | delta: str = Field("", description="New text that just streamed in (only used when streaming)")
14 |
15 | embedding_results: List[List[float]] | List[float] = Field([], description="embedding vector")
16 |
17 | rank_scores: Dict[int, float] = Field({}, description="The rank scores of each documents. "
18 | "key: index, value: rank score")
19 |
20 | m_type: ModelEnum = Field(ModelEnum.GENERATION_MODEL, description="One of LLM, EMB, RANK.")
21 |
22 | status: bool = Field(True, description="Indicates whether the model call was successful.")
23 |
24 | details: str = Field("", description="The details information for model call, "
25 | "usually for storage of raw response or failure messages.")
26 |
27 | raw: Any = Field("", description="Raw response from model call")
28 |
29 | meta_data: Dict[str, Any] = Field({}, description="meta data for model response")
30 |
31 | def __str__(self, max_size=100, **kwargs):
32 | result = {}
33 | for key, value in self.model_dump().items():
34 | if key == "raw" or not value:
35 | continue
36 |
37 | if isinstance(value, str):
38 | result[key] = value
39 | elif isinstance(value, list | dict):
40 | result[key] = f"{str(value)[:max_size]}... size={len(value)}"
41 | elif isinstance(value, ModelEnum):
42 | result[key] = value.value
43 | return json.dumps(result, **kwargs)
44 |
45 |
46 | ModelResponseGen = Generator[ModelResponse, None, None]
47 |
--------------------------------------------------------------------------------
/quick-start-demo.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | if os.environ.get('DASHSCOPE_API_KEY', None) is None \
4 | and os.environ.get('OPENAI_API_KEY', None) is None:
5 | raise RuntimeError(f"""
6 | Missing api key(dashscope api key or openai api key.
7 | `https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key` or
8 | `https://openai.com/`""")
9 |
10 | from memoryscope import cli
11 | cli()
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | llama-index==0.10.45
2 | llama-index-embeddings-dashscope>=0.1.3
3 | llama-index-llms-dashscope>=0.1.2
4 | llama-index-postprocessor-dashscope-rerank-custom>=0.1.0
5 | dashscope>=1.19.1
6 | llama-index-vector-stores-elasticsearch>=0.2.0
7 | elasticsearch>=8.14.0
8 | pyfiglet>=1.0.2
9 | termcolor>=2.4.0
10 | fire>=0.6.0
11 | questionary>=2.0.1
12 | pydantic>=2.7.1
13 | pyyaml>=6.0.1
14 | numpy>=1.26.4
15 | rich>=13.0.0
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """
2 | # 1. remove old temp folders
3 | rm -rf dist build
4 |
5 | # 2. then, build
6 | python setup.py sdist bdist_wheel
7 |
8 | # 3. finally, upload
9 | twine upload dist/*
10 |
11 | rm -rf dist build && python setup.py sdist bdist_wheel && twine upload dist/*
12 | """
13 |
14 | import os
15 |
16 | import setuptools
17 |
18 | with open("README.md", "r", encoding="utf-8") as fh:
19 | long_description = fh.read()
20 |
21 |
22 | def _process_requirements():
23 | packages = open('requirements.txt').read().strip().split('\n')
24 | requires = []
25 | for pkg in packages:
26 | if pkg.startswith('git+ssh'):
27 | return_code = os.system('pip install {}'.format(pkg))
28 | assert return_code == 0, 'error, status_code is: {}, exit!'.format(return_code)
29 | else:
30 | requires.append(pkg)
31 | return requires
32 |
33 |
34 | def package_files(directory):
35 | paths = []
36 | for (path, directories, filenames) in os.walk(directory):
37 | for filename in filenames:
38 | if filename.endswith('yaml'):
39 | paths.append(os.path.join('..', path, filename))
40 | return paths
41 |
42 |
43 | extra_files = package_files('memoryscope')
44 |
45 | authors = [
46 | {"name": "Li Yu", "email": "jinli.yl@alibaba-inc.com"},
47 | {"name": "Tiancheng Qin", "email": "qiancheng.qtc@alibaba-inc.com"},
48 | {"name": "Qingxu Fu", "email": "fuqingxu.fqx@alibaba-inc.com"},
49 | {"name": "Sen Huang", "email": "huangsen.huang@alibaba-inc.com"},
50 | {"name": "Xianzhe Xu", "email": "xianzhe.xxz@alibaba-inc.com"},
51 | {"name": "Zhaoyang Liu", "email": "jingmu.lzy@alibaba-inc.com"},
52 | {"name": "Boyin Liu", "email": "liuboyin.lby@alibaba-inc.com"},
53 | ]
54 |
55 | setuptools.setup(
56 | name="memoryscope",
57 | version="0.1.1.0",
58 | author=', '.join([author['name'] for author in authors]),
59 | author_email=', '.join([author['email'] for author in authors]),
60 | description="MemoryScope is a powerful and flexible long term memory system for LLM chatbots. It consists of a "
61 | "memory database and three customizable system operations, which can be flexibly combined to provide "
62 | "robust long term memory services for your LLM chatbot.",
63 | long_description=long_description,
64 | long_description_content_type="text/markdown",
65 | url="https://github.com/modelscope/memoryscope",
66 | project_urls={
67 | "Bug Tracker": "https://github.com/modelscope/memoryscope/issues",
68 | },
69 | classifiers=[
70 | "Programming Language :: Python :: 3",
71 | "License :: OSI Approved :: Apache Software License",
72 | "Operating System :: OS Independent",
73 | ],
74 | package_dir={"": "."},
75 | package_data={"": extra_files},
76 | include_package_data=True,
77 | entry_points={
78 | 'console_scripts': ['memoryscope=memoryscope:cli'],
79 | },
80 | packages=setuptools.find_packages(where="."),
81 | python_requires=">=3.10",
82 | install_requires=_process_requirements(),
83 | )
84 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modelscope/MemoryScope/715b75e520b1029885647afc6b37419182ca94ce/tests/__init__.py
--------------------------------------------------------------------------------
/tests/models/test_models_lli_embedding.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | sys.path.append(".") # noqa: E402
4 |
5 | import asyncio
6 | import unittest
7 |
8 | from memoryscope.core.models.llama_index_embedding_model import LlamaIndexEmbeddingModel
9 | from memoryscope.core.utils.logger import Logger
10 |
11 |
12 | class TestLLIEmbedding(unittest.TestCase):
13 | """Tests for LlamaIndexEmbeddingModel"""
14 |
15 | def setUp(self):
16 | config = {
17 | "module_name": "openai_embedding",
18 | "model_name": "text-embedding-3-large",
19 | "clazz": "models.base_embedding_model"
20 | }
21 | self.emb = LlamaIndexEmbeddingModel(**config)
22 | self.logger = Logger.get_logger()
23 |
24 | def test_single_embedding(self):
25 | text = "您吃了吗?"
26 | result = self.emb.call(text=text)
27 | self.logger.info(result.m_type)
28 | self.logger.info(len(result.embedding_results))
29 |
30 | def test_batch_embedding(self):
31 | texts = ["您吃了吗?",
32 | "吃了吗您?"]
33 | result = self.emb.call(text=texts)
34 | print()
35 | self.logger.info(result)
36 |
37 | def test_async_embedding(self):
38 | texts = ["您吃了吗?",
39 | "吃了吗您?"]
40 | # 调用异步函数并等待其结果
41 | result = asyncio.run(self.emb.async_call(text=texts))
42 | print()
43 | self.logger.info(result)
44 |
--------------------------------------------------------------------------------
/tests/models/test_models_lli_generation.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | sys.path.append(".") # pylint: disable=E402
4 |
5 | import unittest
6 | import time
7 | import asyncio
8 | from memoryscope.scheme.message import Message
9 | from memoryscope.core.models.llama_index_generation_model import LlamaIndexGenerationModel
10 | from memoryscope.core.utils.logger import Logger
11 |
12 |
13 | class TestLLILLM(unittest.TestCase):
14 | """Tests for LlamaIndexGenerationModel"""
15 |
16 | def setUp(self):
17 | config = {
18 | "module_name": "openai_generation",
19 | "model_name": "gpt-3.5-turbo",
20 | "clazz": "models.llama_index_generation_model",
21 | }
22 | self.llm = LlamaIndexGenerationModel(**config)
23 | self.logger = Logger.get_logger()
24 |
25 | def test_llm_prompt(self):
26 | prompt = "你是谁?"
27 | ans = self.llm.call(stream=False, prompt=prompt)
28 | self.logger.info(ans.message.content)
29 |
30 | def test_llm_messages(self):
31 | messages = [Message(role="system", content="you are a helpful assistant."),
32 | Message(role="user", content="你如何看待黄金上涨?")]
33 | ans = self.llm.call(stream=False, messages=messages)
34 | self.logger.info(ans.message.content)
35 |
36 | def test_llm_prompt_stream(self):
37 | prompt = "你如何看待黄金上涨?"
38 | ans = self.llm.call(stream=True, prompt=prompt)
39 | self.logger.info("-----start-----")
40 | for a in ans:
41 | sys.stdout.write(a.delta)
42 | sys.stdout.flush()
43 | time.sleep(0.1)
44 | self.logger.info("-----end-----")
45 |
46 | def test_llm_messages_stream(self):
47 | messages = [Message(role="system", content="you are a helpful assistant."),
48 | Message(role="user", content="你如何看待黄金上涨?")]
49 | ans = self.llm.call(stream=True, messages=messages)
50 | self.logger.info("-----start-----")
51 | for a in ans:
52 | sys.stdout.write(a.delta)
53 | sys.stdout.flush()
54 | time.sleep(0.1)
55 | self.logger.info("-----end-----")
56 |
57 | def test_async_llm_messages(self):
58 |
59 | messages = [Message(role="system", content="you are a helpful assistant."),
60 | Message(role="user", content="你如何看待黄金上涨?")]
61 |
62 | ans = asyncio.run(self.llm.async_call(messages=messages))
63 | self.logger.info(ans.message.content)
64 |
--------------------------------------------------------------------------------
/tests/models/test_models_lli_rank.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import unittest
3 |
4 | from memoryscope.core.models.llama_index_rank_model import LlamaIndexRankModel
5 |
6 |
7 | class TestLLIReRank(unittest.TestCase):
8 | """Tests for LlamaIndexRerankModel"""
9 |
10 | def setUp(self):
11 | config = {
12 | "module_name": "dashscope_rank",
13 | "model_name": "gte-rerank",
14 | "clazz": "models.llama_index_rerank_model"
15 | }
16 | self.reranker = LlamaIndexRankModel(**config)
17 |
18 | def test_rerank(self):
19 | query = "吃啥?"
20 | documents = ["您吃了吗?",
21 | "吃了吗您?"]
22 | result = self.reranker.call(
23 | documents=documents,
24 | query=query)
25 | print(result)
26 |
27 | def test_async_rerank(self):
28 | query = "吃啥?"
29 | documents = ["您吃了吗?",
30 | "吃了吗您?"]
31 | result = asyncio.run(self.reranker.async_call(
32 | documents=documents,
33 | query=query))
34 | print(result)
35 |
--------------------------------------------------------------------------------
/tests/other/init_test.py:
--------------------------------------------------------------------------------
1 | def validate_path():
2 | import os, sys
3 |
4 | os.path.dirname(__file__)
5 | root_dir_assume = os.path.abspath(os.path.dirname(__file__) + "/../..")
6 | os.chdir(root_dir_assume)
7 | sys.path.append(root_dir_assume)
8 |
9 |
10 | validate_path() # validate path so you can run from base directory
11 |
--------------------------------------------------------------------------------
/tests/other/read_prompt.yaml:
--------------------------------------------------------------------------------
1 | a:
2 | cn: c
3 | en: e
--------------------------------------------------------------------------------
/tests/other/read_yaml.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | sys.path.append(".") # noqa: E402
4 |
5 | from memoryscope.core.utils.prompt_handler import PromptHandler
6 |
7 | if __name__ == "__main__":
8 | file_path: str = __file__
9 | print(file_path)
10 | handler = PromptHandler(__file__, language="cn", prompt_file="read_prompt", )
11 | print(handler.prompt_dict)
12 |
--------------------------------------------------------------------------------
/tests/other/test_attr.py:
--------------------------------------------------------------------------------
1 | class MyClass:
2 | def __init__(self):
3 | self.existing_attribute = "I exist"
4 |
5 | def do(self, name: str, **kwargs):
6 | print("do %s %s" % (name, kwargs))
7 |
8 | def __getattr__(self, name):
9 | return lambda **kwargs: self.do(name, **kwargs)
10 |
11 |
12 | # 创建类的实例
13 | obj = MyClass()
14 |
15 | obj.haha(a=1, b=2)
16 |
--------------------------------------------------------------------------------
/tests/other/test_cli.py:
--------------------------------------------------------------------------------
1 | import fire
2 |
3 |
4 | class CLI:
5 | def run(self, **kwargs):
6 | """
7 | 打印传入的 kwargs
8 | """
9 | for key, value in kwargs.items():
10 | print(f"{key}: {value}")
11 |
12 |
13 | if __name__ == '__main__':
14 | fire.Fire(CLI().run)
15 |
--------------------------------------------------------------------------------