├── .github ├── mergify.yml └── workflows │ ├── integration.yml │ ├── lint.yml │ ├── publish_dev_package.yml │ ├── publish_release_package.yml │ ├── pull_request.yml │ ├── release_event.yml │ └── update_branch_doc_event.yml ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── OWNERS ├── README.md ├── codecov.yml ├── docs ├── Makefile ├── README.md ├── draft │ ├── collection.md │ ├── connections.md │ ├── index.md │ ├── partition.md │ ├── schema.md │ ├── search.md │ └── utility.md ├── make.bat └── source │ ├── _templates │ ├── autosummaryclass.rst │ └── layout.html │ ├── about.rst │ ├── api │ ├── api.rst │ ├── collection.rst │ ├── connections.rst │ ├── future.rst │ ├── milvus_index.rst │ ├── partition.rst │ ├── schema.rst │ ├── search.rst │ └── utility.rst │ ├── conf.py │ ├── contribute.rst │ ├── index.rst │ ├── install.rst │ ├── param.rst │ ├── res │ ├── Intro_to_Indexes.md │ └── about_documentation.md │ ├── results.rst │ └── tutorial.rst ├── examples ├── collection.py ├── connections.py ├── example.py ├── example_index.py ├── hello_milvus.py └── partition.py ├── pylint.conf ├── pymilvus_orm ├── __init__.py ├── collection.py ├── connections.py ├── constants.py ├── default_config.py ├── exceptions.py ├── future.py ├── index.py ├── mutation.py ├── partition.py ├── prepare.py ├── schema.py ├── search.py ├── types.py └── utility.py ├── requirements.txt ├── setup.cfg ├── setup.py └── tests ├── conftest.py ├── mock_milvus.py ├── mock_result.py ├── pytest.ini ├── test_collection.py ├── test_connections.py ├── test_index.py ├── test_partition.py ├── test_schema.py ├── test_types.py ├── test_utility.py └── utils.py /.github/mergify.yml: -------------------------------------------------------------------------------- 1 | pull_request_rules: 2 | - name: Test passed 3 | conditions: 4 | - base=main 5 | - "status-success=Code lint check (3.6)" 6 | - "status-success=Run Python Tests (3.6)" 7 | actions: 8 | label: 9 | add: 10 | - ci-passed 11 | 12 | -------------------------------------------------------------------------------- /.github/workflows/integration.yml: -------------------------------------------------------------------------------- 1 | name: Master branch integration 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | build: 10 | name: Run Python Tests 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: [3.6] 15 | 16 | steps: 17 | - uses: actions/checkout@v2 18 | 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install pytest 28 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 29 | 30 | - name: Test with pytest 31 | run: | 32 | make codecov 33 | 34 | - name: Upload coverage to Codecov 35 | uses: codecov/codecov-action@v1 36 | with: 37 | token: ${{ secrets.CODECOV_TOKEN }} 38 | flags: unittests -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: PyLint 2 | 3 | # Run this workflow every time a new commit pushed to main repository 4 | on: 5 | pull_request: 6 | branches: 7 | - main 8 | 9 | 10 | jobs: 11 | code-lint: 12 | name: Code lint check 13 | runs-on: ubuntu-18.04 14 | strategy: 15 | matrix: 16 | python-version: [3.6] 17 | steps: 18 | - name: Checkout code 19 | uses: actions/checkout@v2 20 | - name: Set up python 21 | uses: actions/setup-python@v2 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install requirements 25 | run: | 26 | pip install -r requirements.txt 27 | - name: Run pylint 28 | shell: bash 29 | run: | 30 | pylint --rcfile=pylint.conf pymilvus_orm 31 | -------------------------------------------------------------------------------- /.github/workflows/publish_dev_package.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distributions 📦 to TestPyPI 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'main' 7 | 8 | jobs: 9 | build-n-publish: 10 | name: Build and publish Python 🐍 distributions 📦 to TestPyPI 11 | runs-on: ubuntu-18.04 12 | 13 | steps: 14 | - name: Check out from Git 15 | uses: actions/checkout@master 16 | - name: Get history and tags for SCM versioning 17 | run: | 18 | git fetch --prune --unshallow 19 | git fetch --depth=1 origin +refs/tags/*:refs/tags/* 20 | - name: Set up Python 3.7 21 | uses: actions/setup-python@v1 22 | with: 23 | python-version: 3.7 24 | - name: Install pypa/build 25 | run: >- 26 | python -m 27 | pip install 28 | build 29 | --user 30 | - name: Build a binary wheel and a source tarball 31 | run: >- 32 | python -m 33 | build 34 | --sdist 35 | --wheel 36 | --outdir dist/ 37 | . 38 | - name: Publish distribution 📦 to Test PyPI 39 | uses: pypa/gh-action-pypi-publish@master 40 | with: 41 | password: ${{ secrets.TOKEN_TEST_PYPI }} 42 | repository_url: https://test.pypi.org/legacy/ 43 | 44 | -------------------------------------------------------------------------------- /.github/workflows/publish_release_package.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distributions 📦 to PyPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v2.[0-9]+.[0-9]+**' 7 | jobs: 8 | build-n-publish: 9 | name: Build and publish Python 🐍 distributions 📦 to PyPI 10 | runs-on: ubuntu-18.04 11 | 12 | steps: 13 | - name: Check out from Git 14 | uses: actions/checkout@master 15 | - name: Get history and tags for SCM versioning 16 | run: | 17 | git fetch --prune --unshallow 18 | git fetch --depth=1 origin +refs/tags/*:refs/tags/* 19 | - name: Set up Python 3.7 20 | uses: actions/setup-python@v1 21 | with: 22 | python-version: 3.7 23 | - name: Install pypa/build 24 | run: >- 25 | python -m 26 | pip install 27 | build 28 | --user 29 | - name: Build a binary wheel and a source tarball 30 | run: >- 31 | python -m 32 | build 33 | --sdist 34 | --wheel 35 | --outdir dist/ 36 | . 37 | - name: Publish distribution 📦 to PyPI 38 | if: startsWith(github.ref, 'refs/tags') 39 | uses: pypa/gh-action-pypi-publish@master 40 | with: 41 | password: ${{ secrets.TOKEN_PYPI }} 42 | 43 | -------------------------------------------------------------------------------- /.github/workflows/pull_request.yml: -------------------------------------------------------------------------------- 1 | name: Test on pull request 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | build: 10 | name: Run Python Tests 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: [3.6] 15 | 16 | steps: 17 | - uses: actions/checkout@v2 18 | 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install pytest 28 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 29 | 30 | - name: Test with pytest 31 | run: | 32 | make unittest -------------------------------------------------------------------------------- /.github/workflows/release_event.yml: -------------------------------------------------------------------------------- 1 | name: dispatch new release event 2 | on: 3 | release: 4 | types: [published] 5 | 6 | jobs: 7 | dispatch: 8 | name: Dispatch event 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Tag name 12 | id: tag_name 13 | run: | 14 | echo ::set-output name=SOURCE_TAG::${GITHUB_REF#refs/tags/} 15 | - name: Dispatch tag name 16 | id: dispatch_tag_name 17 | run: | 18 | curl \ 19 | -X POST \ 20 | -H "Accept: application/vnd.github.v3+json" \ 21 | "https://api.github.com/repos/milvus-io/web-content/actions/workflows/updateApiReference.yml/dispatches" \ 22 | -d '{"ref":"master", "inputs": { "tagName": "${{ steps.tag_name.outputs.SOURCE_TAG }}", "repoName": "${{ github.event.repository.name }}" } }' \ 23 | -u ".:${{secrets.DOC_TOKEN}}" -------------------------------------------------------------------------------- /.github/workflows/update_branch_doc_event.yml: -------------------------------------------------------------------------------- 1 | name: Dispatch event to milvus-io/docs if update docs files 2 | 3 | on: 4 | push: 5 | branches-ignore: 6 | - 'main' 7 | paths: 8 | - 'docs/**' 9 | 10 | jobs: 11 | dispatch_event: 12 | name: Dispatch event 13 | runs-on: ubuntu-latest 14 | steps: 15 | - id: extract_branch 16 | name: Extract branch name 17 | shell: bash 18 | run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" 19 | - id: dispatch_branch_name 20 | name: Dispatch branch name if update any docs 21 | run: | 22 | curl \ 23 | -X POST \ 24 | -H "Accept: application/vnd.github.v3+json" \ 25 | "https://api.github.com/repos/milvus-io/web-content/actions/workflows/updateApiByBranchEvent.yml/dispatches" \ 26 | -d '{"ref":"master", "inputs": { "branchName": "${{ steps.extract_branch.outputs.branch }}", "repoName": "${{ github.event.repository.name }}" } }' \ 27 | -u ".:${{secrets.DOC_TOKEN}}" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE 2 | ## Pycharm IDE 3 | /.idea 4 | **/.idea/* 5 | .vscode 6 | 7 | # Python files 8 | *.pyc 9 | dist 10 | docs/build/ 11 | docs/_build/ 12 | *.egg 13 | *.egg-info 14 | *.eggs/ 15 | **/__pycache__/ 16 | .pytest*/ 17 | /build/ 18 | .remember 19 | venv/ 20 | 21 | # Env 22 | .env 23 | gtags.conf 24 | 25 | # Local Temp 26 | temp/ 27 | *.swp 28 | *.swo 29 | assets/ 30 | TODO 31 | 32 | # GitHub 33 | .coverage 34 | htmlcov/ 35 | debug/ 36 | .codecov.yml 37 | 38 | # Code coverage 39 | coverage.xml 40 | 41 | # macOS 42 | .DS_Store 43 | 44 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # pymilvus 0.0.1(TBD) 2 | 3 | ## Task 4 | - Get value of metadata fields from search result output (#90) 5 | - Improve interface documentation (#89) 6 | - Add examples in partition.py (#88) 7 | - add example for index.py (#87) 8 | - Format collection exception raising (#84) 9 | - Add dim param example (#86) 10 | - Add search for hello_milvus (#85) 11 | - Add wrapper to SearchResult-related class (#83) 12 | - Improve hello_milvus file (#81) 13 | - update return format in index.py (#82) 14 | - Move return type behind the return annotation in partition.py (#79) 15 | - update exception in index.py and add index.rst (#80) 16 | - Format returns (#78) 17 | - Improve the docstring of connections.Connections (#77) 18 | - Add interface connections (#76) 19 | - Add annotions in partition.py (#74) 20 | - Add exceptions and errors in collection (#75) 21 | - Add exceptions and errors class (#70) 22 | - Implement and add example to Collection.search (#73) 23 | - Add requirements for installing and add hello_milvus (#72) 24 | - Add index related examples (#71) 25 | - Add util implements (#63) 26 | - Fix partition and add partition example (#67) 27 | - Add interface documentation about collection (#69) 28 | - Update unittest (#68) 29 | - Test trigger readocs (#66) 30 | - Add case when CollectionSchema specifies the primary field (#61) 31 | - Referencing other interfaces in interface comments (#65) 32 | - Fix todo lists (#64) 33 | - Update todo items (#62) 34 | - Fix mistake return type (#60) 35 | - Add TODO marks (#59) 36 | - Fix bug for inserting when collection schema has a primary field (#58) 37 | - Add create index test for binary data (#56) 38 | - Update tests in test_partition (#57) 39 | - Add codecov config file (#55) 40 | - Add check in some interfaces (#54) 41 | - Add test for create index (#53) 42 | - Update example (#52) 43 | - Add copyright and modify params and documentation (#51) 44 | - Delete auto_id param for collection schema (#49) 45 | - Add an example (#50) 46 | - Add example for testing collection (#48) 47 | - Add codecov badge in README.md (#47) 48 | - Update GitHub Actions (#46) 49 | - Fix some partition unittests (#45) 50 | - Add partition impl in MockMilvus (#44) 51 | - Update mergify (#43) 52 | - Implement mock milvus (#42) 53 | - Update index and partition unittests (#41) 54 | - Implement Collection.insert, Support list and tuple (#40) 55 | - Add example for collection init method (#36) 56 | - Update doc: add util doc 57 | - Update github actions (#35) 58 | - Update unittest (#32) 59 | - Add to_dict for CollectionSchema and FieldSchema (#31) 60 | - Implement the init method of the v1 of collection (#26) 61 | - Fix bug for html bug (#29) 62 | - Fix bug: access attribute from None (#25) 63 | - Add example to Collection.load & Collection.release (#28) 64 | - Update index interfaces (#27) 65 | - Impl CollectionSchema and FieldSchema (#24) 66 | - Add example for proprety methods of collection(#17) 67 | - Mock milvus.Milvus in test_connections.py (#21) 68 | - Fix compile error (#23) 69 | - Add mergify file (#22) 70 | - Update interface for partition module (#20) 71 | - Update documentation (#19) 72 | - Realized the properties of collection (#16) 73 | - Update interfaces in collection module (#14) 74 | - Update interfaces in utility module (#11) 75 | - Implement the management class of milvus connections (#10) 76 | - Add GitHub Actions to run unittests (#12) 77 | - rename milvus_orm to pymilvus_orm (#9) 78 | - Update doc (#8) 79 | - Add the annotation of apis (#7) 80 | - Add annotation for apis (#6) 81 | - Add sdk interface draft (#5) 82 | - Initialize the api framework #2 83 | - Initialize the api framework 84 | - Establish the initial directory structure and copy the document files (#1) 85 | - Initial commit -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to pymilvus-orm 2 | 3 | We welcome all kinds of contributions. Simply file an issue stating your reason 4 | and plans for making the change, update CHANGELOG.md, and create a pull request 5 | to the current active branch. Make sure to refer to the issue you filed in your 6 | PR's description. Cheers! 7 | 8 | 9 | ## What contributions can I make? 10 | 11 | Any contributions are allowed without changing the project architecture and interfaces here. 12 | You are welcome to make contributions. 13 | 14 | 15 | ## How can I contribute? 16 | 17 | ### Development environment 18 | 19 | You are recommended to develop in a virtual environment launched by python environment management tool, 20 | for example, virtualenv, conda and etc. When your virtual environment is activate, run command `pip install -r requirements.txt` 21 | to install dependent packages. 22 | 23 | 24 | ### Coding Style 25 | Before submitting a pull request, make sure the coding style is qualified. run command `pylint --rcfile=pylint.conf pymilvus_orm` 26 | to check it. 27 | 28 | 29 | ## Run unit test with code coverage 30 | 31 | Before submitting your PR, make sure you have run unit test, and your code coverage rate is >= 90%. 32 | 33 | ```shell 34 | $ pytest --cov=pymilvus_orm --cov-report=html 35 | ``` 36 | 37 | You may need a milvus server which is running when you run unit test. See more details on [Milvus server](https://github.com/milvus-io/milvus). 38 | 39 | 40 | ## Update CHANGLOG.md 41 | 42 | Add issue tips into CHANGLOG.md, make sure all issue tips are sorted by issue number in ascending order. 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | unittest: 3 | PYTHONPATH=`pwd` pytest --cov=pymilvus_orm tests -x -rxXs 4 | 5 | lint: 6 | PYTHONPATH=`pwd` pylint --rcfile=pylint.conf pymilvus_orm 7 | 8 | codecov: 9 | PYTHONPATH=`pwd` pytest --cov=pymilvus_orm --cov-report=xml tests -x -rxXs 10 | 11 | example: 12 | PYTHONPATH=`pwd` python examples/example.py 13 | 14 | example_index: 15 | PYTHONPATH=`pwd` python examples/example_index.py 16 | -------------------------------------------------------------------------------- /OWNERS: -------------------------------------------------------------------------------- 1 | filters: 2 | ".*": 3 | reviewers: 4 | - bigsheeper 5 | - DragonDriver 6 | - fishpenguin 7 | - scsven 8 | - xiaocai2333 9 | - XuanYang-cn 10 | approvers: 11 | - czs007 12 | - scsven 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### The content of this repository had been merged into [pymilvus](https://github.com/milvus-io/pymilvus) repository. This repository was archived on August 31th, and no changes to it were allowed ever since. Please find newest codes or assistance at pymilvus repository. 2 | 3 | --- 4 | 5 | # Milvus Python SDK 6 | 7 | [![version](https://img.shields.io/pypi/v/pymilvus-orm.svg?color=blue)](https://pypi.org/project/pymilvus-orm/) 8 | [![Supported Python Versions](https://img.shields.io/pypi/pyversions/pymilvus-orm?logo=python&logoColor=blue)](https://pypi.org/project/pymilvus-orm/) 9 | [![Downloads](https://pepy.tech/badge/pymilvus-orm)](https://pepy.tech/project/pymilvus-orm) 10 | [![Downloads](https://pepy.tech/badge/pymilvus-orm/month)](https://pepy.tech/project/pymilvus-orm/month) 11 | [![Downloads](https://pepy.tech/badge/pymilvus-orm/week)](https://pepy.tech/project/pymilvus-orm/week) 12 | [![license](https://img.shields.io/hexpm/l/plug.svg?color=green)](https://github.com/milvus-io/pymilvus-orm/blob/master/LICENSE) 13 | [![Mergify Status][mergify-status]][mergify] 14 | 15 | [mergify]: https://mergify.io 16 | [mergify-status]: https://img.shields.io/endpoint.svg?url=https://gh.mergify.io/badges/milvus-io/pymilvus-orm&style=plastic 17 | 18 | Another Python SDK for [Milvus](https://github.com/milvus-io/milvus). To contribute code to this project, please read our [contribution guidelines](https://github.com/milvus-io/milvus/blob/master/CONTRIBUTING.md) first. If you have some ideas or encounter a problem, you can find us in the Slack channel [#py-milvus](https://milvusio.slack.com/archives/C024XTWMT4L). 19 | 20 | 21 | ## Compatibility 22 | The following collection shows Milvus versions and recommended PyMilvus-ORM versions: 23 | 24 | |Milvus version| Recommended PyMilvus-ORM version | 25 | |:-----:|:-----:| 26 | | 2.0.0-RC1 | 2.0.0rc1 | 27 | | 2.0.0-RC2 | 2.0.0rc2 | 28 | | 2.0.0-RC4 | 2.0.0rc4 | 29 | 30 | 31 | ## Installation 32 | 33 | You can install PyMilvus-ORM via `pip3` for Python 3.6+: 34 | 35 | ```shell 36 | # Note this will only install the latest stable version 37 | $ pip3 install pymilvus-orm 38 | ``` 39 | 40 | You can install a specific version of PyMilvus-ORM by: 41 | 42 | ```shell 43 | $ pip3 install pymilvus-orm==2.0.0rc4 44 | ``` 45 | 46 | You can upgrade PyMilvus-ORM to the latest stable version by: 47 | 48 | ```shell 49 | $ pip3 install --upgrade pymilvus-orm 50 | ``` 51 | 52 | 53 | ## Documentation 54 | 55 | Documentation is available online: https://pymilvus-orm.readthedocs.io/. 56 | 57 | 58 | ## Packages 59 | 60 | ### Released packages 61 | 62 | The release of PyMilvus ORM is managed on GitHub, and GitHub Actions will package and upload each version to PyPI. 63 | 64 | The release version number of PyMilvus ORM follows PEP440, the format is x.y.z, and the corresponding git tag name is vx.y.z (x/y/z are numbers from 0 to 9). 65 | 66 | For example, after PyMilvus ORM 1.0.1 is released, a tag named v1.0.1 can be found on GitHub, and a package with version 1.0.1 can be downloaded on PyPI. 67 | 68 | ### Developing packages 69 | 70 | The commits on the development branch of each version will be packaged and uploaded to Test PyPI. Development branches refer to branches such as 1.0 and 1.1, and version releases are generated from the development branches, such as 1.0.1 and 1.0.2. 71 | 72 | The package name generated by the development branch is x.y.z.dev, where is the number of commits that differ from the most recent release. 73 | 74 | For example, after the release of 1.0.1, two commits were submitted on the 1.0 branch. At this time, the automatic packaging version number of the development branch is 1.0.1.dev2. 75 | 76 | To install a specific version package on test.pypi.org, you need to append the parameter --extra-index-url after pip, for example: 77 | ```shell 78 | $ python3 -m pip install --extra-index-url https://test.pypi.org/simple/ pymilvus-orm==x.y.z.dev 79 | ``` 80 | 81 | 82 | ## License 83 | [Apache License 2.0](LICENSE) 84 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | target: 0% 5 | patch: 6 | target: 0% 7 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Documentation Structure 2 | 3 | All the documentation files store under folder `source`. There are two default file: 4 | - **conf.py**: configuration file about documentation. 5 | - **index.rst**: corresponding to the index page of documentation website. The rest other page 6 | need to be displaced here. For example, there is a page named `param`, and under 7 | folder `source` a file named `param.rst` exists, `param` need to be added in index.rst 8 | under `.. toctree::`. 9 | 10 | 11 | # Documentation Update 12 | Except index.rst, each other rst file correspond to a page on documentation website. Add a new rst file if 13 | you want to add a new page on a website or modify rst files to update existing page contents. 14 | 15 | 16 | # Browser documentation locally 17 | 18 | ## Setup environment 19 | 20 | * create a virtual environment 21 | Here we demo how to create a virtual environment using built-in tool ``venv``, you can also choose 22 | [Virtualenv](https://virtualenv.pypa.io/en/latest/). 23 | ```shell 24 | $ python3 -m venv venv 25 | ``` 26 | 27 | After above, we create a new virtual environment, and it is stored in folder `venv` under current path. 28 | Then, activate it. 29 | ```shell 30 | $ source venv/bin/activate 31 | ``` 32 | 33 | Next, install required third-party packages. 34 | ```shell 35 | $ pip install -r requirements.txt 36 | ``` 37 | 38 | ## Build documentation locally 39 | ```shell 40 | $ cd docs 41 | $ make html 42 | ``` 43 | 44 | The documentation could be generated under directory build/html. 45 | 46 | ## Browser documentation locally 47 | To preview it, you can open index.html in your browser. 48 | 49 | Or run a web server in directory `build/html`: 50 | ```shell 51 | $ python -m http.server 52 | ``` 53 | 54 | Then open your browser to `http://localhost:8000`. 55 | 56 | # Submit documentation change 57 | After a documentation changed, please use Git to store you modification and push to your remote repository, 58 | then pull a new request to repository `milvus-io/pymilvus-orm`. 59 | 60 | -------------------------------------------------------------------------------- /docs/draft/collection.md: -------------------------------------------------------------------------------- 1 | #### pymilvus.Collection 2 | 3 | --- 4 | 5 | 6 | 7 | ##### Accessing and constructing collection 8 | 9 | | Methods | Descriptions | 参数 | 返回值 | 10 | | -------------------------------------------------- | :------------------------------------------- | ------------------------------------------------------------ | -------------- | 11 | | Collection(name, data=None, schema=None, **kwargs) | 创建Collection,如果不存在同名的,则新建一个 | name 类型 string
data 类型是 pandas.DataFrame
schema 类型 是CollectionSchema
kwargs 可传入参数是 primary_key = field_name | Collection对象 | 12 | | | | | | 13 | 14 | 15 | 16 | ##### Manipulating and querying collection meta 17 | 18 | | Properties | Descriptions | 参数 | 返回值 | 19 | | ----------------------- | ----------------------------- | ---- | ----------------------- | 20 | | Collection.schema | Return the collection schema. | / | CollectionSchema 对象 | 21 | | Collection.description | 返回自定义描述 | / | 类型 string,自定义描述 | 22 | | Collection.name | 返回collection名字 | / | 类型 string, 名字 | 23 | | Collection.is_empty | 是否为空 | / | 类型 boolean | 24 | | Collection.num_entities | 返回行数 | / | 类型int | 25 | 26 | 27 | 28 | ##### Manipulating, loading, and querying collection 29 | 30 | | Methods | Descriptions | 参数 | 返回值 | 31 | | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | 32 | | Collection.drop(**kwargs) | Drop the collection, as well as its indexes. | kwargs reversed.目前为空 | None 或 Raise Exception | 33 | | Collection.load(field_names=None, index_names=None, partition_names=None, **kwargs) | Load the collection from disk to memory. | field_names 类型是 list(string)
index_names 类型是 list(string)
partitions_names 类型是 list(string)
kwargs reversed.目前为空 | None或者Raise Exception | 34 | | Collection.release(**kwargs) | Release the collection from memory. | kwargs reversed.目前为空 | None或者Raise Exception | 35 | | Collection.insert(data, partition_name="", **kwargs) | Insert data into the collection, or into one of its partitions. | data 类型是 list-like(list, tuple) 对象或者pandas.DataFrame,data的维度需要和列的数目对齐
partition_name 类型是 string
kwargs可以是 _async=False, _callback | ids 类型是 list(int) or list(string)
或者 MutationFuture 或者Raise Exception | 36 | | Collection.search(data, anns_field, params, limit, expr="", partition_names=None, output_fields=None, **kwargs) | Vector similarity search with an optional boolean expression as filters. | data是 list-like(list, tuple) 或者 pd.Series
anns_field 类型是 string, 表示在哪个列上进行向量的近似查询
params 类型是 dict
limit 类型是 int
expr 类型是string
partitions_names类型是 list(string)
output_fields类型是list(string)
kwargs 可以是 async=False | SearchResultFuture或者 SearchResult 或者Raise Exception | 37 | | Collection.query(expr="", output_fields=None, partition_names=None) | Query with a set of criteria, and results in a list of records that match the query exactly. | expr 类型是string
output_fields 类型是 list(string), 表示要返回哪些列的原始数据
partitions_names类型是 list(string) | dict 或者Raise Exception | 38 | 39 | 40 | 41 | ##### Accessing and constructing partition 42 | 43 | | Methods | Descriptions | 参数 | 返回值 | 44 | | ---------------------------------------------------------- | ----------------------------------------------------- | -------------------------- | -------------------------------- | 45 | | Collection.partitions | Return all partitions of the collection. | / | list(Partition对象) | 46 | | Collection.partition(partition_name) | Return the partition corresponding to name. | partition_name类型是string | None或者Partition对象 | 47 | | Collection.create_partition(partition_name, desription="") | Create a new one if not existed. | | Partition对象或者Raise Exception | 48 | | Collection.has_partition(partition_name) | Checks if a specified partition exists. | partition_name类型是string | boolean | 49 | | Collection.drop_partition(partition_name, **kwargs) | Drop the partition and its corresponding index files. | partition_name类型是string | None或者Raise Exception | 50 | 51 | 52 | 53 | ##### Accessing and constructing index 54 | 55 | | Methods | Descriptions | 参数 | 返回值 | 56 | | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ----------------------------- | 57 | | Collection.indexes | Return all indexes of the collection. | / | list(Index对象) | 58 | | Collection.index(index_name) | Return the index corresponding to name. | index_name类型是 string | None或者Index对象 | 59 | | Collection.has_index(index_name) | Checks whether a specified index exists. | index_name类型是 string | bool | 60 | | Collection.create_index(field_name, index_name, index_params, **kwargs) | Create index on a specified column according to the index parameters. Return Index Object. | field_name类型是string
index_params类型是dict
index_name类型是 string | Index对象或者 Raise Exception | 61 | | Collection.drop_index(index_name, **kwargs) | Drop index and its corresponding index files. | index_name类型是string | None或者Raise Exception | 62 | 63 | -------------------------------------------------------------------------------- /docs/draft/connections.md: -------------------------------------------------------------------------------- 1 | | Methods | Descriptions | 参数 | 返回值 | 2 | | ---------------------------------------------- | :------------------------------- | ------------------------------------------------------------ | ----------------------- | 3 | | Connections.configure(**kwargs) | Configure connections. | milvus 客户端连接相关配置,包括 ip,port 等; | None或Raise Exception | 4 | | Connections.add_connection(alias, conn) | Add a connection using alias. | alias:待添加的 milvus 客户端连接 conn 的别名,conn:milvus 客户端连接; | None 或 Raise Exception | 5 | | Connections.remove_connection(alias) | Remove a connection by alias. | alias:待删除的 milvus 客户端连接别名; | None 或 Raise Exception | 6 | | Connections.create_connection(alias, **kwargs) | Create a connection named alias. | alias:待创建的 milvus 客户端连接别名,kwargs:客户端连接配置,包括 ip,port 等; | None 或 Raise Exception | 7 | | Connections.get_connection(alias) | Get a connection by alias. | alias:待使用的 milvus 客户端连接别名; | milvus 客户端连接 | 8 | -------------------------------------------------------------------------------- /docs/draft/index.md: -------------------------------------------------------------------------------- 1 | #### pymilvus.Index 2 | 3 | --- 4 | 5 | ##### Accessing and constructing Index 6 | 7 | | Methods | Descriptions | 参数 | 返回值 | 8 | | ---------------------------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ---------------------------- | 9 | | Index(collection, field_name, index_params, name="") | Create index on a specified column according to the index parameters. | collection类型是 Collection
name 类型是 string
field_name 类型是 string
index_params 类型是 dict | Index 对象或者 Raise Exception | 10 | | Index.name | Return the index name. | / | string | 11 | | Index.params | Return the index params. | / | dict (克隆) | 12 | | Index.collection_name | Return corresponding collection name. | / | string | 13 | | Index.field_name | Return corresponding column name. | / | string | 14 | 15 | ##### Manipulating 16 | 17 | | Methods | Descriptions | 参数 | 返回值 | 18 | | ------------ | --------------------------------------------- | ---- | ----------------------- | 19 | | Index.drop() | Drop index and its corresponding index files. | / | None 或者 Raise Exception | 20 | -------------------------------------------------------------------------------- /docs/draft/partition.md: -------------------------------------------------------------------------------- 1 | #### pymilvus.Partition 2 | 3 | --- 4 | 5 | ##### Manipulating and querying partition meta 6 | 7 | | Methods | Descriptions | 参数 | 返回值 | 8 | | ------------------------------------------- | -------------------------------------- | ---- | ------- | 9 | | Partition(collection, name, description="") | collection类型是 Collection
| | | 10 | | Partition.description | Return the description text. | / | string | 11 | | Partition.name | Return the partition name. | / | string | 12 | | Partition.is_empty | Return whether the partition is empty. | / | boolean | 13 | | Partition.num_entities | Return the number of entities. | / | int | 14 | 15 | ##### Manipulating, loading, and querying partition 16 | 17 | | Methods | Descriptions | 参数 | 返回值 | 18 | | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------- | 19 | | Partition.drop(**kwargs) | Drop the partition, as well as its corresponding index files. | kwargs reversed.目前为空 | None或者Raise Exception | 20 | | Partition.load(field_names=None, index_names=None, **kwargs) | Load the partition from disk to memory. | field_names 类型是list(string)
index_names类型是list(string)
kwargs reversed.目前为空 | None或者Raise Exception | 21 | | Partition.release(**kwargs) | Release the partition from memory. | kwargs reversed.目前为空 | None或者Raise Exception | 22 | | Partition.insert(data, **kwargs) | Insert data into partition. | data 类型是list-like(list, tuple, numpy.ndarray) 对象或者pandas.DataFrame,data的维度需要和列的数目对齐
kwargs可以是 sync=False | None或者InsertFuture或者Raise Exception | 23 | | Partition.search(data, anns_field, params, limit, expr=None, output_fields=None, **kwargs) | Vector similarity search with an optional boolean expression as filters. | data是 list-like(list, tuple),或者pd.Series
anns_field 类型是 string, 表示在哪个列上进行向量的近似查询
params 类型是 dict

limit 类型是 int
expr 类型是string
output_fields类型是list(string)
kwargs 可以是 async=False | SearchResultFuture或者 SearchResult 或者Raise Exception | 24 | | Partition.query(expr="", output_fields=None) | Query with a set of criteria, and results in a list of records that match the query exactly. | expr 类型是string
output_fields 类型是 list(string), 表示要返回哪些列的原始数据 | dict 或者Raise Exception | 25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/draft/schema.md: -------------------------------------------------------------------------------- 1 | ### CollectionSchema 2 | 3 | | Methods | Descriptions | 参数描述 | 返回值 | 4 | | -------------------- | -------------------------------------- | -------------------- | -------------------- | 5 | | CollectionSchema(fields, description="", **kwargs) | 构造一个CollectionSchema对象 | 参数fields是一个 list-like的对象,每个元素是FieldSchema对象
description 类型 string 自定义描述 | CollectionSchema对象或者Raise Exception | 6 | | CollectionSchema.fields | 返回所有的列 | / | list,每个元素是一个 FieldSchema 对象 | 7 | | CollectionSchema.description | 返回自定义描述 | / | string 自定义描述 | 8 | | CollectionSchema.primary_field | 返回主键列的FieldSchema | / | None 或 FieldSchema 对象 | 9 | | CollectionSchema.auto_id | 是否自动生成主键 | / | bool | 10 | | | | | | 11 | 12 | 13 | 14 | ### FieldSchema 15 | 16 | 17 | 18 | | Methods | Descriptions | 参数描述 | 返回值 | 19 | | --------------------------------------------------- | ----------------------- | ------------------------------------------------------------ | ------------------------------------------ | 20 | | FieldSchema(name, dtype, description="", **kwargs) | 构造一个FieldScheam对象 | name 参数类型是string
dtype参数类型是 名为 DataType 的 python enum
description 类型是 string,自定义描述 | FieldScheam对象或者Raise Exception | 21 | | | | | | 22 | | FieldSchema.name | 列名 | / | string | 23 | | FieldSchema.dtype | 返回数据类型 | / | DataType | 24 | | FieldSchema.description | 返回自定义描述 | / | string, 自定义描述 | 25 | | FieldSchema.xxx | 其他属性 | / | None 或者确定的值
比如ndim, str_len等 | 26 | 27 | 28 | 29 | #### DataType 30 | 31 | 32 | | DataType Enum | 33 | | ----------------------- | 34 | | DataType.BOOL | 35 | | DataType.INT8 | 36 | | DataType.INT16 | 37 | | DataType.INT32 | 38 | | DataType.INT64 | 39 | | DataType.FLOAT | 40 | | DataType.DOUBLE | 41 | | DataType.BINARY_VECTOR | 42 | | DataType.FLOAT_VECTOR | 43 | 44 | 45 | 46 | ### 例子 47 | 48 | ```python 49 | fields = [ 50 | FieldSchema("A", DataType.INT32, True), 51 | FieldSchema("B", DataType.INT64), 52 | FieldSchema("C", DataType.FLOAT), 53 | FieldSchema("Vec", DataType.FLOAT_VECTOR)] 54 | 55 | schema = Schema(fields, description = "This is a test collection.") 56 | 57 | assert len(schema.fields()) == len(fields) 58 | ``` 59 | -------------------------------------------------------------------------------- /docs/draft/search.md: -------------------------------------------------------------------------------- 1 | 2 | | Methods | Descriptions | 参数 | 返回值 | 3 | | --------------------------- | ------------------------------------------------------------ | ------------------------ | ----------------------- | 4 | | SearchResult(grpc_response) | Construct a Search Result from response. | 内部构造函数,用户用不到 | SearchResult对象 | 5 | | SearchResult.__iter__() | Iterate the Search Result. Every iteration returns a `Hits` coresponding to a query. | / | python generator | 6 | | SearchResult[n] | Return the `Hits` coresponding to the nth query. | int | Hits对象 | 7 | | SearchResult.__len__() | Return the number of query of Search Result. | / | int | 8 | | SearchResult.done() | 同步等待结果,幂等操作 | / | None或者Raise Exception | 9 | 10 | 11 | | Methods | Descriptions | 参数 | 返回值 | 12 | | ---------------- | ------------------------------------------------------------ | -------------- | --------------------------------- | 13 | | Hits(raw_data) | Construct a Hits object from response. | | Hits对象 | 14 | | Hits.__iter__() | Iterate the `Hits` object. Every iteration returns a `Hit` which represent a record coresponding to the query. | | python迭代器,每次迭代返回Hit对象 | 15 | | Hits[k] | Return the kth `Hit` coresponding to the query. | 参数k 类型 int | Hit对象 | 16 | | Hits.__len__() | Return the number of hit record. | / | int | 17 | | Hits.ids | Return the ids of all hit record. | / | list(int)或者list(string) | 18 | | Hits.distances | Return the distances of all hit record. | / | list(float) | 19 | 20 | 21 | | Methods | Descriptions | 参数 | 返回值 | 22 | | -------------- | ------------------------------------------------------------ | ---- | ----------- | 23 | | Hit(raw_data) | Construct a Hit object from response. A hit represent a record coresponding to the query. | | Hit对象 | 24 | | Hit.id | Return the id of the hit record. | / | int /string | 25 | | Hit.distance | Return the distance between the hit record and the query. | / | float | 26 | | Hit.score | Return the calculated score of the hit record, now the score is equal to distance. | / | float | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /docs/draft/utility.md: -------------------------------------------------------------------------------- 1 | #### pymilvus.utility 2 | 3 | --- 4 | 5 | 6 | 7 | ##### Checking job states 8 | 9 | | Methods | Description | 参数 | 返回值 | 10 | | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | 11 | | utility.loading_progress(collection_name, partition_names=[], using="default") | Show # loaded entities vs. # total entities | collection_name 类型是string
partition_names 类型是 list | dict{
num_loaded_entities: int,
num_total_entities:int} | 12 | | utility.wait_for_loading_complete(collection_name, partition_names=[], timeout=None,using="default") | Block until loading is done or Raise Exception after timeout. | collection_name 类型是 string
partition_names 类型是 list | None或Raise Exception | 13 | | utility.index_building_progress(collection_name, index_name="",using="default") | Show # indexed entities vs. # total entities | collection_name 类型是 string
index_name 类型是 string | dict{
num_indexed_entities: int,
num_total_entities:int} | 14 | | utility.wait_for_index_building_complete(collection_name, index_name, timeout = None,using="default") | Block until building is done or Raise Exception after timeout. | collection_name 类型是string
partition_name 类型是 string
timeout 类型是 int (秒) | None或Raise Exception | 15 | | utility.has_collection(collection_name,using="default") | Checks whether a specified collection exists. | collection_name 类型是string | boolean | 16 | | utility.has_partition(collecton_name, partition_name,using="default") | Checks if a specified partition exists in a collection. | collection_name 类型是string
partition_name 类型是 string | boolean | 17 | | utility.list_collections(timeout=None, using="default") | Returns a list of all collection names | | list(string) | 18 | 19 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/_templates/autosummaryclass.rst: -------------------------------------------------------------------------------- 1 | {{ fullname | escape | underline}} 2 | 3 | .. currentmodule:: {{ module }} 4 | 5 | .. autoclass:: {{ objname }} -------------------------------------------------------------------------------- /docs/source/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends '!layout.html' %} 2 | {% block document %} 3 | {{super()}} 4 | 5 | Fork me on GitHub 6 | 7 | {% endblock %} 8 | -------------------------------------------------------------------------------- /docs/source/about.rst: -------------------------------------------------------------------------------- 1 | ========================= 2 | About this documentation 3 | ========================= 4 | 5 | 6 | .. mdinclude:: res/about_documentation.md 7 | 8 | -------------------------------------------------------------------------------- /docs/source/api/api.rst: -------------------------------------------------------------------------------- 1 | .. _api: 2 | 3 | API Reference 4 | ============ 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | collection 10 | schema 11 | partition 12 | milvus_index 13 | search 14 | connections 15 | utility 16 | future 17 | -------------------------------------------------------------------------------- /docs/source/api/collection.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Collection 3 | ========= 4 | 5 | The scheme of a collection is fixed when collection created. Collection scheme consists of many fields, 6 | and must contain a vector field. A field to collection is like a column to RDBMS table. Data type are the same in one field. 7 | 8 | A collection is a set of entities, which are also called rows. An entity contains data of all fields. 9 | Each entity can be labeled, a group of entities with the same label is called a partition. Entity without a 10 | label will be tagged a default label by Milvus. 11 | 12 | Constructor 13 | ----------- 14 | 15 | +--------------------------------------------+---------------+ 16 | | Constructor | Description | 17 | +============================================+===============+ 18 | | `Collection() <#pymilvus_orm.Collection>`_ | Milvus client | 19 | +--------------------------------------------+---------------+ 20 | 21 | Attributes 22 | ---------- 23 | 24 | +-----------------------------------------------------------+---------------------------------------------------+ 25 | | Attributes | Description | 26 | +===========================================================+===================================================+ 27 | | `schema <#pymilvus_orm.Collection.schema>`_ | Return the schema of collection. | 28 | +-----------------------------------------------------------+---------------------------------------------------+ 29 | | `description <#pymilvus_orm.Collection.description>`_ | Return the description text about the collection. | 30 | +-----------------------------------------------------------+---------------------------------------------------+ 31 | | `name <#pymilvus_orm.Collection.name>`_ | Return the collection name. | 32 | +-----------------------------------------------------------+---------------------------------------------------+ 33 | | `is_empty <#pymilvus_orm.Collection.is_empty>`_ | Return whether the collection is empty. | 34 | +-----------------------------------------------------------+---------------------------------------------------+ 35 | | `num_entities <#pymilvus_orm.Collection.num_entities>`_ | Return the number of entities. | 36 | +-----------------------------------------------------------+---------------------------------------------------+ 37 | | `primary_field <#pymilvus_orm.Collection.primary_field>`_ | Return the primary field of collection. | 38 | +-----------------------------------------------------------+---------------------------------------------------+ 39 | | `partitions <#pymilvus_orm.Collection.partitions>`_ | Return all partitions of the collection. | 40 | +-----------------------------------------------------------+---------------------------------------------------+ 41 | | `indexes <#pymilvus_orm.Collection.indexes>`_ | Return all indexes of the collection. | 42 | +-----------------------------------------------------------+---------------------------------------------------+ 43 | 44 | 45 | 46 | Methods 47 | ------- 48 | 49 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 50 | | API | Description | 51 | +===================================================================+============================================================================+ 52 | | `drop() <#pymilvus_orm.Collection.drop>`_ | Drop the collection, as well as its corresponding index files. | 53 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 54 | | `load() <#pymilvus_orm.Collection.load>`_ | Load the collection from disk to memory. | 55 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 56 | | `release() <#pymilvus_orm.Collection.release>`_ | Release the collection from memory. | 57 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 58 | | `insert() <#pymilvus_orm.Collection.insert>`_ | Insert data into collection. | 59 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 60 | | `search() <#pymilvus_orm.Collection.search>`_ | Vector similarity search with an optional boolean expression as filters. | 61 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 62 | | `query() <#pymilvus_orm.Collection.query>`_ | Query with a set of criteria. | 63 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 64 | | `partition() <#pymilvus_orm.Collection.partition>`_ | Return the partition corresponding to name. | 65 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 66 | | `create_partition() <#pymilvus_orm.Collection.create_partition>`_ | Create the partition for the collection. | 67 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 68 | | `has_partition() <#pymilvus_orm.Collection.has_partition>`_ | Checks if a specified partition exists. | 69 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 70 | | `drop_partition() <#pymilvus_orm.Collection.drop_partition>`_ | Drop the partition and its corresponding index files. | 71 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 72 | | `index() <#pymilvus_orm.Collection.index>`_ | Return the index corresponding to name. | 73 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 74 | | `create_index() <#pymilvus_orm.Collection.create_index>`_ | Create index on a specified column according to the index parameters. | 75 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 76 | | `has_index() <#pymilvus_orm.Collection.has_index>`_ | Checks whether a specified index exists. | 77 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 78 | | `drop_index() <#pymilvus_orm.Collection.drop_index>`_ | Drop index and its corresponding index files. | 79 | +-------------------------------------------------------------------+----------------------------------------------------------------------------+ 80 | 81 | 82 | APIs References 83 | --------------- 84 | 85 | .. autoclass:: pymilvus_orm.Collection 86 | :member-order: bysource 87 | :special-members: __init__ 88 | :members: schema, description, name, is_empty, num_entities, primary_field, partitions, indexes, 89 | drop, load, release, insert, search, query, partition, create_partition, has_partition, drop_partition, 90 | index, create_index, has_index, drop_index 91 | -------------------------------------------------------------------------------- /docs/source/api/connections.rst: -------------------------------------------------------------------------------- 1 | .. _Connections: 2 | 3 | Connections 4 | =========== 5 | .. currentmodule:: pymilvus_orm 6 | 7 | Before connecting to Milvus, the user needs to configure the address and port of the service, and an alias can be assigned to the configuration. The role of `Connections` is to manage the configuration content of each connection and the corresponding connection object. Using the `Connections` object, users can either configure a single connection to a single service instance, or configure multiple connections to multiple different service instances. In PyMilvus-ORM, `Connections` is implemented as a singleton class. 8 | 9 | Constructor 10 | ----------- 11 | .. autosummary:: 12 | :toctree: api/ 13 | :template: autosummaryclass.rst 14 | 15 | +---------------------------------------------+---------------------------------------------------------------------------------+ 16 | | Constructor | Description | 17 | +=============================================+=================================================================================+ 18 | | `Connections() <#pymilvus_orm.Connection>`_ | A singleton class used to manage connections and correspoinding configurations. | 19 | +---------------------------------------------+---------------------------------------------------------------------------------+ 20 | 21 | 22 | Methods 23 | --------------------- 24 | .. autosummary:: 25 | :toctree: api/ 26 | 27 | +---------------------------------------------------------------------------------+---------------------------------------------------------+ 28 | | API | Description | 29 | +=================================================================================+=========================================================+ 30 | | `add_connection() <#pymilvus_orm.Connections.add_connection>`_ | Configures a connection, including address and port. | 31 | +---------------------------------------------------------------------------------+---------------------------------------------------------+ 32 | | `remove_connection(alias) <#pymilvus_orm.Connections.remove_connection>`_ | Delete a connection configuration. | 33 | +---------------------------------------------------------------------------------+---------------------------------------------------------+ 34 | | `connect([alias]) <#pymilvus_orm.Connections.connect>`_ | Create a connection object to connect to Milvus. | 35 | +---------------------------------------------------------------------------------+---------------------------------------------------------+ 36 | | `disconnect([alias]) <#pymilvus_orm.Connections.disconnect>`_ | Disconnect from Milvus and close the connection object. | 37 | +---------------------------------------------------------------------------------+---------------------------------------------------------+ 38 | | `get_connection([alias]) <#pymilvus_orm.Connections.get_connection>`_ | Retrieve a milvus connection by alias. | 39 | +---------------------------------------------------------------------------------+---------------------------------------------------------+ 40 | | `list_connections() <#pymilvus_orm.Connections.list_connections>`_ | List all connections. | 41 | +---------------------------------------------------------------------------------+---------------------------------------------------------+ 42 | | `get_connection_addr([alias]) <#pymilvus_orm.Connections.get_connection_addr>`_ | Retrieves connection's configuration by alias. | 43 | +---------------------------------------------------------------------------------+---------------------------------------------------------+ 44 | 45 | 46 | APIs 47 | ----- 48 | 49 | 50 | .. autoclass:: pymilvus_orm.Connections 51 | :member-order: bysource 52 | :members: add_connection, remove_connection, connect, disconnect, get_connection, list_connections, get_connection_addr 53 | -------------------------------------------------------------------------------- /docs/source/api/future.rst: -------------------------------------------------------------------------------- 1 | ====== 2 | Future 3 | ====== 4 | 5 | 6 | SearchFuture 7 | ------------------ 8 | 9 | Constructor 10 | ~~~~~~~~~~~ 11 | 12 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 13 | | Constructor | Description | 14 | +======================================================================+========================================================================+ 15 | | `SearchFuture() <#pymilvus_orm.SearchFuture>`_ | Search future. | 16 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 17 | 18 | Attributes 19 | ~~~~~~~~~~ 20 | 21 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 22 | | API | Description | 23 | +======================================================================+========================================================================+ 24 | | `result() <#pymilvus_orm.SearchFuture.result>`_ | Return the search result. | 25 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 26 | | `cancel() <#pymilvus_orm.SearchFuture.cancel>`_ | Cancel the search request. | 27 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 28 | | `done() <#pymilvus_orm.SearchFuture.done>`_ | Wait for search request done. | 29 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 30 | 31 | 32 | APIs References 33 | ~~~~~~~~~~~~~~~ 34 | 35 | 36 | .. autoclass:: pymilvus_orm.SearchFuture 37 | :member-order: bysource 38 | :members: result, cancel, done 39 | 40 | 41 | MutationFuture 42 | -------------- 43 | 44 | Constructor 45 | ~~~~~~~~~~~ 46 | 47 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 48 | | Constructor | Description | 49 | +======================================================================+========================================================================+ 50 | | `MutationFuture() <#pymilvus_orm.MutationFuture>`_ | Mutationfuture. | 51 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 52 | 53 | Attributes 54 | ~~~~~~~~~~ 55 | 56 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 57 | | API | Description | 58 | +======================================================================+========================================================================+ 59 | | `result() <#pymilvus_orm.MutationFuture.result>`_ | Return the insert result. | 60 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 61 | | `cancel() <#pymilvus_orm.MutationFuture.cancel>`_ | Cancel the insert request. | 62 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 63 | | `done() <#pymilvus_orm.MutationFuture.done>`_ | Wait for insert request done. | 64 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 65 | 66 | 67 | APIs References 68 | ~~~~~~~~~~~~~~~ 69 | 70 | 71 | .. autoclass:: pymilvus_orm.MutationFuture 72 | :member-order: bysource 73 | :members: result, cancel, done 74 | -------------------------------------------------------------------------------- /docs/source/api/milvus_index.rst: -------------------------------------------------------------------------------- 1 | .. _index: 2 | 3 | Index 4 | ========= 5 | .. currentmodule:: pymilvus_orm 6 | 7 | An index belongs to a specific vector field in a collection, it helps accelerating search. 8 | 9 | Constructor 10 | ----------- 11 | .. autosummary:: 12 | :toctree: api/ 13 | :template: autosummaryclass.rst 14 | 15 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 16 | | Constructor | Description | 17 | +======================================================================+===========================================================================+ 18 | | `Index() <#pymilvus_orm.Index>`_ | Create index on a specified column according to the index parameters. | 19 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 20 | 21 | 22 | Attributes 23 | --------------------- 24 | .. autosummary:: 25 | :toctree: api/ 26 | 27 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 28 | | API | Description | 29 | +======================================================================+===========================================================================+ 30 | | `params <#pymilvus_orm.Index.params>`_ | Return the index params. | 31 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 32 | | `collection_name <#pymilvus_orm.Index.collection_name>`_ | Return corresponding collection name. | 33 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 34 | | `field_name <#pymilvus_orm.Index.field_name>`_ | Return corresponding field name. | 35 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 36 | | `drop <#pymilvus_orm.Index.drop>`_ | Drop index and its corresponding index files. | 37 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 38 | 39 | 40 | APIs 41 | ----- 42 | 43 | 44 | .. autoclass:: pymilvus_orm.Index 45 | :member-order: bysource 46 | :members: params, collection_name, field_name, drop 47 | -------------------------------------------------------------------------------- /docs/source/api/partition.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Partition 3 | ========= 4 | 5 | A partition is a group of entities in one collection with the same label. Entities inserted without a label 6 | will be tagged a default label by milvus. 7 | 8 | Partition is managable, which means managing a group of entities with the same label in one collection. 9 | 10 | Constructor 11 | ----------- 12 | 13 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 14 | | Constructor | Description | 15 | +======================================================================+========================================================================+ 16 | | `Partition() <#pymilvus_orm.Partition>`_ | Milvus partition. | 17 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 18 | 19 | 20 | Attributes 21 | ---------- 22 | 23 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 24 | | API | Description | 25 | +======================================================================+========================================================================+ 26 | | `description <#pymilvus_orm.Partition.description>`_ | Return the description text. | 27 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 28 | | `name <#pymilvus_orm.Partition.name>`_ | Return the partition name. | 29 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 30 | | `is_empty <#pymilvus_orm.Partition.is_empty>`_ | Return whether the Partition is empty. | 31 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 32 | | `num_entities <#pymilvus_orm.Partition.num_entities>`_ | Return the number of entities. | 33 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 34 | 35 | 36 | Methods 37 | --------------------- 38 | 39 | 40 | +----------------------------------------------------------------------+-------------------------------------------------------------------------+ 41 | | API | Description | 42 | +======================================================================+=========================================================================+ 43 | | `drop() <#pymilvus_orm.Partition.drop>`_ | Drop the Partition, as well as its corresponding index files. | 44 | +----------------------------------------------------------------------+-------------------------------------------------------------------------+ 45 | | `load() <#pymilvus_orm.Partition.load>`_ | Load the Partition from disk to memory. | 46 | +----------------------------------------------------------------------+-------------------------------------------------------------------------+ 47 | | `release() <#pymilvus_orm.Partition.release>`_ | Release the Partition from memory. | 48 | +----------------------------------------------------------------------+-------------------------------------------------------------------------+ 49 | | `insert() <#pymilvus_orm.Partition.insert>`_ | Insert data into partition. | 50 | +----------------------------------------------------------------------+-------------------------------------------------------------------------+ 51 | | `search() <#pymilvus_orm.Partition.search>`_ | Vector similarity search with an optional boolean expression as filters.| 52 | +----------------------------------------------------------------------+-------------------------------------------------------------------------+ 53 | | `query() <#pymilvus_orm.Partition.query>`_ | Query with a set of criteria. | 54 | +----------------------------------------------------------------------+-------------------------------------------------------------------------+ 55 | 56 | API Refereences 57 | --------------- 58 | 59 | 60 | .. autoclass:: pymilvus_orm.Partition 61 | :member-order: bysource 62 | :members: description, name, is_empty, num_entities, drop, load, release, insert, search, query 63 | 64 | -------------------------------------------------------------------------------- /docs/source/api/schema.rst: -------------------------------------------------------------------------------- 1 | .. _Schema: 2 | 3 | Schema 4 | ========= 5 | .. currentmodule:: pymilvus_orm 6 | 7 | CollectionSchema and FieldSchema. 8 | 9 | Constructor 10 | ----------- 11 | .. autosummary:: 12 | :toctree: api/ 13 | :template: autosummaryclass.rst 14 | 15 | +----------------------------------------------------------------------+---------------------------------------------+ 16 | | Constructor | Description | 17 | +======================================================================+=============================================+ 18 | | `CollectionSchema() <#pymilvus_orm.CollectionSchema>`_ | Schema of collection. | 19 | +----------------------------------------------------------------------+---------------------------------------------+ 20 | | `FieldSchema() <#pymilvus_orm.FieldSchema>`_ | Schema of field. | 21 | +----------------------------------------------------------------------+---------------------------------------------+ 22 | 23 | 24 | CollectionSchema Attributes 25 | --------------------------- 26 | .. autosummary:: 27 | :toctree: api/ 28 | 29 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 30 | | API | Description | 31 | +======================================================================+===========================================================================+ 32 | | `fields <#pymilvus_orm.CollectionSchema.fields>`_ | Return the fields of collection. | 33 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 34 | | `description <#pymilvus_orm.CollectionSchema.description>`_ | Return the description text about the collection. | 35 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 36 | | `primary_field() <#pymilvus_orm.CollectionSchema.primary_field>`_ | Return the primary key column of collection. | 37 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 38 | | `auto_id() <#pymilvus_orm.CollectionSchema.auto_id>`_ | Return whether the ids is automatically generated. | 39 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 40 | 41 | 42 | APIs 43 | ----- 44 | 45 | .. autoclass:: pymilvus_orm.CollectionSchema 46 | :member-order: bysource 47 | :members: fields, description, primary_field, auto_id 48 | 49 | 50 | FieldSchema Attributes 51 | ---------------------- 52 | .. autosummary:: 53 | :toctree: api/ 54 | 55 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 56 | | API | Description | 57 | +======================================================================+===========================================================================+ 58 | | `name <#pymilvus_orm.name>`_ | Return the name of field. | 59 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 60 | | `is_primary <#pymilvus_orm.FieldSchema.is_primary>`_ | Return whether the field is primary key column. | 61 | +----------------------------------------------------------------------+---------------------------------------------------------------------------+ 62 | 63 | 64 | APIs 65 | ----- 66 | 67 | 68 | .. autoclass:: pymilvus_orm.FieldSchema 69 | :member-order: bysource 70 | :members: name, is_primary 71 | 72 | 73 | -------------------------------------------------------------------------------- /docs/source/api/search.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | SearchResult 3 | ============ 4 | 5 | SearchResult 6 | ------------ 7 | 8 | Constructor 9 | ~~~~~~~~~~~ 10 | 11 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 12 | | Constructor | Description | 13 | +======================================================================+========================================================================+ 14 | | `SearchResult() <#pymilvus_orm.SearchResult>`_ | Search Result | 15 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 16 | 17 | Attributes 18 | ~~~~~~~~~~ 19 | 20 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 21 | | API | Description | 22 | +======================================================================+========================================================================+ 23 | | `iter(self) <#pymilvus_orm.SearchResult.\_\_iter\_\_>`_ | Iterate the search result. | 24 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 25 | | `self[item] <#pymilvus_orm.SearchResult.\_\_getitem\_\_>`_ | Return the Hits corresponding to the nth query. | 26 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 27 | | `len(self) <#pymilvus_orm.SearchResult.\_\_len\_\_>`_ | Return the number of query of search result. | 28 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 29 | 30 | 31 | APIs References 32 | ~~~~~~~~~~~~~~~ 33 | 34 | 35 | .. autoclass:: pymilvus_orm.SearchResult 36 | :member-order: bysource 37 | :members: __iter__, __getitem__, __len__ 38 | 39 | 40 | Hits 41 | ---- 42 | 43 | Constructor 44 | ~~~~~~~~~~~ 45 | 46 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 47 | | Constructor | Description | 48 | +======================================================================+========================================================================+ 49 | | `Hits() <#pymilvus_orm.Hits>`_ | Search result about specific query. | 50 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 51 | 52 | Attributes 53 | ~~~~~~~~~~ 54 | 55 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 56 | | API | Description | 57 | +======================================================================+========================================================================+ 58 | | `iter(self) <#pymilvus_orm.Hits.\_\_iter\_\_>`_ | Iterate the hits object. | 59 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 60 | | `self[item] <#pymilvus_orm.Hits.\_\_getitem\_\_>`_ | Return the hit record to the query. | 61 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 62 | | `len(self) <#pymilvus_orm.Hits.\_\_len\_\_>`_ | Return the number of hit records. | 63 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 64 | | `ids <#pymilvus_orm.Hits.ids>`_ | Return the ids of hit records. | 65 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 66 | | `distances <#pymilvus_orm.Hits.distances>`_ | Return the distances of hit records. | 67 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 68 | 69 | 70 | APIs References 71 | ~~~~~~~~~~~~~~~ 72 | 73 | 74 | .. autoclass:: pymilvus_orm.Hits 75 | :member-order: bysource 76 | :members: __iter__, __getitem__, __len__, ids, distances 77 | 78 | 79 | Hit 80 | --- 81 | 82 | Constructor 83 | ~~~~~~~~~~~ 84 | 85 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 86 | | Constructor | Description | 87 | +======================================================================+========================================================================+ 88 | | `Hit() <#pymilvus_orm.Hit>`_ | Search result about specific query. | 89 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 90 | 91 | Attributes 92 | ~~~~~~~~~~ 93 | 94 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 95 | | API | Description | 96 | +======================================================================+========================================================================+ 97 | | `id <#pymilvus_orm.Hit.id>`_ | Return the id of hit record. | 98 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 99 | | `distance <#pymilvus_orm.Hit.distance>`_ | Return the distance of hit record. | 100 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 101 | | `score <#pymilvus_orm.Hit.score>`_ | Return the score of hit record. | 102 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 103 | | `str(self) <#pymilvus_orm.Hit.\_\_str\_\_>`_ | Return the information of hit record. | 104 | +----------------------------------------------------------------------+------------------------------------------------------------------------+ 105 | 106 | 107 | APIs References 108 | ~~~~~~~~~~~~~~~ 109 | 110 | 111 | .. autoclass:: pymilvus_orm.Hit 112 | :member-order: bysource 113 | :members: id, distance, score, __str__ 114 | 115 | -------------------------------------------------------------------------------- /docs/source/api/utility.rst: -------------------------------------------------------------------------------- 1 | .. _utility: 2 | :toctree: api/ 3 | 4 | Utility 5 | ========== 6 | 7 | Methods 8 | ------- 9 | 10 | +------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------+ 11 | | API | Description | 12 | +====================================================================================================================================+==============================================+ 13 | | `loading_progress(collection_name, [partition_names,using]) <#pymilvus_orm.utility.loading_progress>`_ | Query the progress of loading. | 14 | +------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------+ 15 | | `wait_for_loading_complete(collection_name, [partition_names, timeout, using]) <#pymilvus_orm.utility.wait_for_loading_complete>`_ | Wait until loading is complete. | 16 | +------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------+ 17 | | `index_building_progress(collection_name, [using]) <#pymilvus_orm.utility.index_building_progress>`_ | Query the progress of index building. | 18 | +------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------+ 19 | | `wait_for_index_building_complete(collection_name, [timeout, using]) <#pymilvus_orm.utility.wait_for_index_building_complete>`_ | Wait util index building is complete. | 20 | +------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------+ 21 | | `has_collection(collection_name, [using]) <#pymilvus_orm.utility.has_collection>`_ | Check if a specified collection exists. | 22 | +------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------+ 23 | | `has_partition(collection_name, partition_name, [using]) <#pymilvus_orm.utility.has_partition>`_ | Check if a specified partition exists. | 24 | +------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------+ 25 | | `list_collections([timeout, using]) <#pymilvus_orm.utility.list_collections>`_ | List all collections. | 26 | +------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------+ 27 | 28 | APIs References 29 | --------------- 30 | 31 | .. automodule:: pymilvus_orm.utility 32 | :member-order: bysource 33 | :members: loading_progress, wait_for_loading_complete, index_building_progress, 34 | wait_for_index_building_complete, has_collection, has_partition, list_collections 35 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('../..')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'pymilvus-orm' 21 | copyright = '2021, Milvus' 22 | author = 'Milvus' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = '1.0.1' 26 | show_authors = True 27 | 28 | 29 | # -- General configuration --------------------------------------------------- 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 35 | 'sphinx.ext.autodoc', 36 | 'sphinx.ext.viewcode', 37 | 'sphinx.ext.githubpages', 38 | 'sphinx_copybutton', 39 | 'm2r', 40 | 'sphinx.ext.autosummary', 41 | 'sphinxcontrib.prettyspecialmethods' 42 | ] 43 | 44 | 45 | # Copy button configuration for python and shell 46 | # regexp for: 47 | # ">>> ", "... ", "$ ", "In [digit]", "(char) $ " 48 | copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: |\(.\) \$ " 49 | copybutton_prompt_is_regexp = True 50 | 51 | # Add any source suffix 52 | source_suffix = ['.rst', '.md'] 53 | 54 | # Add any paths that contain templates here, relative to this directory. 55 | templates_path = ['_templates'] 56 | 57 | # The language for content autogenerated by Sphinx. Refer to documentation 58 | # for a list of supported languages. 59 | # 60 | # This is also used if you do content translation via gettext catalogs. 61 | # Usually you set "language" from the command line for these cases. 62 | language = 'python' 63 | 64 | # List of patterns, relative to source directory, that match files and 65 | # directories to ignore when looking for source files. 66 | # This pattern also affects html_static_path and html_extra_path. 67 | exclude_patterns = [] 68 | 69 | 70 | # -- Options for HTML output ------------------------------------------------- 71 | 72 | # The theme to use for HTML and HTML Help pages. See the documentation for 73 | # a list of builtin themes. 74 | # html_theme = 'alabaster' 75 | html_theme = 'sphinx_rtd_theme' 76 | html_show_sphinx = False 77 | copyright = '2019-2021 Zilliz. All rights reserved.' 78 | 79 | # Add any paths that contain custom static files (such as style sheets) here, 80 | # relative to this directory. They are copied after the builtin static files, 81 | # so a file named "default.css" will overwrite the builtin "default.css". 82 | -------------------------------------------------------------------------------- /docs/source/contribute.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Contributing 3 | ============ 4 | 5 | - `Open Issues`_ 6 | - `Submit Pull Requests`_ 7 | - `Github workflow`_ 8 | - `Contribution Guideline`_ 9 | 10 | Contributing is warmly welcomed. You can contribute to PyMilvus ORM project by opening issues and submitting pull 11 | requests on `PyMilvus ORM Github page `_. 12 | 13 | Open Issues 14 | =========== 15 | To request a new feature, report a bug or ask a question, it's recommended for you to **open an issue**. 16 | 17 | For a feature 18 | You can tell us why you need it and we will decide whether to implement it soon. 19 | If we think it's a good improvement, we will make it a feature request and start to work on it. It's 20 | also welcomed for you to open an issue with your PR as a solution. 21 | 22 | For a bug 23 | You need to tell us as much information as possible, better start with our 24 | `bug report template `_. 25 | With information, we can reproduce the bug easily and solve it later. 26 | 27 | For a question 28 | It's welcomed to ask any questions about PyMilvus ORM and Milvus, we are pleased to communicate with you. 29 | 30 | Submit Pull Requests 31 | ==================== 32 | 33 | If you have improvements to PyMilvus ORM, please submit pull requests(PR) to master, see workflow below. 34 | 35 | **PR for codes**, you need to tell us why we need it, mentioning an existing issue would be better. 36 | 37 | **PR for docs**, you also need to tell us why we need it. 38 | 39 | Your PRs will be reviewed and checked, merged into our project if approved. 40 | 41 | Github workflow 42 | =============== 43 | 44 | This is a brief instruction of Github workflow for beginners. 45 | 46 | * **Fork** the `PyMilvus ORM repository `_ on Github. 47 | 48 | * **Clone** your fork to your local machine with ``git clone git@github.com:/pymilvus-orm.git``. 49 | 50 | * Create a new branch with ``git checkout -b my_working_branch``. 51 | 52 | * Make your changes, commit, then push to your forked repository. 53 | 54 | * Visit Github and make you PR. 55 | 56 | If you already have an existing local repository, always update it before you start to make changes like below: 57 | 58 | .. code-block:: shell 59 | 60 | $ git remote add upstream git@github.com:milvus-io/pymilvus-orm.git 61 | $ git checkout main 62 | $ git pull upstream master 63 | $ git checkout -b my_working_branch 64 | 65 | 66 | Contribution guideline 67 | ====================== 68 | 69 | .. todo: 70 | More details about tests and pylint check . 71 | 72 | **1. Update CHANGELOG.md** 73 | 74 | If any improvement or feature being added, you are recommended to open a new issue(if not exist) then 75 | record your change in file `CHANGELOG.md`. The format is: 76 | `- \#{GitHub issue number} - {Brief description for your change}` 77 | 78 | **2. Add unit tests for your codes** 79 | 80 | To run unit test in github action, you need make sure the last commit message of PR starts with "[ci]". 81 | If you want to run unit test locally, under root folder of PyMilvus ORM project run `pytest --ip=${IP} --port=${PORT}`. 82 | 83 | **3. Pass pylint check** 84 | 85 | In the root directory, run ``pylint --rcfile=pylint.conf pymilvus-orm`` to make sure the rate is 10. 86 | 87 | **4. For documentations** 88 | 89 | You need to enter the ``doc`` directory and run ``make html``, please refer to 90 | `About this documentations `_. 91 | 92 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to PyMilvus-ORM's documentation! 2 | ==================================== 3 | 4 | Overview 5 | ======== 6 | 7 | PyMilvus-ORM is a python SDK for Milvus2.0 and is a recommended way to work with Milvus. This documentation covers 8 | every thing you need to know about PyMilvus-ORM. 9 | 10 | :doc:`install` 11 | Instructions on how to install PyMilvus-ORM. 12 | 13 | :doc:`tutorial` 14 | A quick start to use PyMilvus-ORM. 15 | 16 | :doc:`api/api` 17 | The complete API documentation. 18 | 19 | :doc:`param` 20 | Index and relevant parameters. 21 | 22 | :doc:`results` 23 | How to deal with search results. 24 | 25 | :doc:`changes` 26 | Changes in the latest PyMilvus-ORM. 27 | 28 | :doc:`contribute` 29 | Method of contribution, bug shooting and contribution guide. 30 | 31 | :doc:`faq` 32 | Some questions that come up often. 33 | 34 | :doc:`about` 35 | How this documentation is generated. 36 | 37 | 38 | .. toctree:: 39 | :hidden: 40 | :maxdepth: 8 41 | :caption: Table of Contents 42 | 43 | install 44 | tutorial 45 | api/api 46 | param 47 | results 48 | changes 49 | faq 50 | contribute 51 | about 52 | 53 | 54 | -------------------------------------------------------------------------------- /docs/source/install.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | Installing via pip 6 | ================== 7 | 8 | PyMilvus-ORM is in the `Python Package Index `_. 9 | 10 | PyMilvus-ORM only support python3(>= 3.6), usually, it's ok to install PyMilvus-ORM like below. 11 | 12 | .. code-block:: shell 13 | 14 | $ python3 -m pip install pymilvus-orm==2.0.0rc2 15 | 16 | Installing in a virtual environment 17 | ==================================== 18 | 19 | It's recommended to use PyMilvus-ORM in a virtual environment, using virtual environment allows you to avoid 20 | installing Python packages globally which could break system tools or other projects. 21 | We use ``virtualenv`` as an example to demonstrate how to install and using PyMilvus-ORM in a virtual environment. 22 | See `virtualenv `_ for more information about why and how. 23 | 24 | 25 | .. code-block:: shell 26 | 27 | $ python3 -m pip install virtualenv 28 | $ virtualenv venv 29 | $ source venv/bin/activate 30 | (venv) $ pip install pymilvus-orm==2.0.0rc2 31 | 32 | If you want to exit the virtualenv ``venv``, you can use ``deactivate``. 33 | 34 | 35 | .. code-block:: shell 36 | 37 | (venv) $ deactivate 38 | $ 39 | 40 | 41 | Installing a specific PyMilvus-ORM version 42 | ====================================== 43 | 44 | Here we assume you are already in a virtual environment. 45 | 46 | Suitable PyMilvus-ORM version depends on Milvus version you are using. See `install pymilvus-orm `_ for recommended pymilvus-orm version. 47 | 48 | If you want to install a specific version of PyMilvus-ORM: 49 | 50 | .. code-block:: shell 51 | 52 | (venv) $ pip install pymilvus-orm==2.0.0rc2 53 | 54 | If you want to upgrade PyMilvus-ORM into the latest version published: 55 | 56 | .. code-block:: shell 57 | 58 | (venv) $ pip install --upgrade pymilvus-orm 59 | 60 | 61 | Installing from source 62 | ====================== 63 | 64 | This will install the latest PyMilvus-ORM into your virtual environment. 65 | 66 | .. code-block:: shell 67 | 68 | (venv) $ pip install git+https://github.com/milvus-io/pymilvus-orm.git 69 | 70 | Verifying installation 71 | ====================== 72 | 73 | Your installation is correct if the following command in the Python shell doesn't raise an exception. 74 | 75 | .. code-block:: shell 76 | 77 | (venv) $ python -c "from pymilvus_orm import Milvus, DataType" 78 | 79 | -------------------------------------------------------------------------------- /docs/source/param.rst: -------------------------------------------------------------------------------- 1 | ===== 2 | Index 3 | ===== 4 | 5 | 6 | .. mdinclude:: res/Intro_to_Indexes.md 7 | 8 | -------------------------------------------------------------------------------- /docs/source/res/Intro_to_Indexes.md: -------------------------------------------------------------------------------- 1 | 2 | [Milvus](https://github.com/milvus-io) support to create index to accelerate vector approximate search. 3 | 4 | To learn how to create an index by python client, see method [create_index()](api/collection.html#pymilvus_orm.Collection.create_index) and 5 | [index example](https://github.com/milvus-io/pymilvus-orm/tree/main/examples) . 6 | 7 | For more detailed information about indexes, please refer to [Milvus documentation index chapter.](https://milvus.io/docs/index.md) 8 | 9 | To learn how to choose an appropriate index for your application scenarios, please read [How to Select an Index in Milvus](https://medium.com/@milvusio/how-to-choose-an-index-in-milvus-4f3d15259212). 10 | 11 | To learn how to choose an appropriate index for a metric, see [Distance Metrics](https://www.milvus.io/docs/metric.md). 12 | 13 | ## Vector Index 14 | 15 | - `FLAT`_ 16 | - `IVF_FLAT`_ 17 | - `IVF_SQ8`_ 18 | - `IVF_PQ`_ 19 | - `HNSW`_ 20 | - `ANNOY`_ 21 | 22 | 23 | ### FLAT 24 | If FLAT index is used, the vectors are stored in an array of float/binary data without any compression. during 25 | searching vectors, all indexed vectors are decoded sequentially and compared to the query vectors. 26 | 27 | FLAT index provides 100% query recall rate. Compared to other indexes, it is the most efficient indexing method 28 | when the number of queries is small. 29 | 30 | The inserted and index-inbuilt vectors and index-dropped vectors are regard as built with ``FLAT``. 31 | 32 | FLAT is the default index type in the server, so it's not necessary to create an FLAT index. When you search a 33 | collection without creating an index before, in fact you search a collection with FLAT index. 34 | 35 | - search parameters: 36 | **N/A** 37 | 38 | ```python 39 | # FLAT 40 | collection.search(data, anns_field, search_params, topK, expression) 41 | ``` 42 | 43 | ### IVF_FLAT 44 | 45 | **IVF** (*Inverted File*) is an index type based on quantization. It divides the points in space into `nlist` 46 | units by clustering method. During searching vectors, it compares the distances between the target vector 47 | and the center of all the units, and then select the `nprobe` nearest unit. Then, it compares all the vectors 48 | in these selected cells to get the final result. 49 | 50 | IVF_FLAT is the most basic IVF index, and the encoded data stored in each unit is consistent with the original data. 51 | 52 | - building parameters: 53 | 54 | **nlist**: Number of cluster units. 55 | 56 | ```python 57 | # IVF_FLAT 58 | collection.create_index(field_name=field_name, 59 | index_params={'index_type': 'IVF_FLAT', 60 | 'metric_type': 'L2', 61 | 'params': { 62 | 'nlist': 100 # int. 1~65536 63 | }}) 64 | ``` 65 | 66 | - search parameters: 67 | 68 | **nprobe**: Number of inverted file cell to probe. 69 | 70 | ```python 71 | # IVF_FLAT 72 | collection.search(data, anns_field, { 73 | "nprobe": 8 # int. 1~nlist(cpu), 1~min[2048, nlist](gpu) 74 | }, topK, expression) 75 | ``` 76 | 77 | ### IVF_PQ 78 | 79 | **PQ** (*Product Quantization*) uniformly decomposes the original high-dimensional vector space into 80 | Cartesian products of `m` low-dimensional vector spaces, and then quantizes the decomposed low-dimensional 81 | vector spaces. In the end, each vector is stored in `m` × `nbits` bits. Instead of calculating the distances 82 | between the target vector and the center of all the units, product quantization enables the calculation of 83 | distances between the target vector, and the clustering center of each low-dimensional space and greatly reduces 84 | the time complexity and space complexity of the algorithm. 85 | 86 | IVF_PQ performs IVF index clustering, and then quantizes the product of vectors. Its index file is even 87 | smaller than IVF_SQ8, but it also causes a loss of accuracy during searching. 88 | 89 | - building parameters: 90 | 91 | **nlist**: Number of cluster units. 92 | 93 | **m**: Number of factors of product quantization. **CPU-only** Milvus: `m ≡ dim (mod m)`; **GPU-enabled** Milvus: `m` ∈ {1, 2, 3, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 96}, and (dim / m) ∈ {1, 2, 3, 4, 6, 8, 10, 12, 16, 20, 24, 28, 32}. (`m` x 1024) ≥ `MaxSharedMemPerBlock` of your graphics card. 94 | 95 | **nbits**: Number of bits in which each low-dimensional vector is stored. 96 | 97 | ```python 98 | # IVF_PQ 99 | collection.create_index(field_name=field_name, 100 | index_params={'index_type': 'IVF_PQ', 101 | 'metric_type': 'L2', 102 | 'params': { 103 | 'nlist': 100, # int. 1~65536 104 | "m": 8 # int. 1~16. 8 by default 105 | }}) 106 | ``` 107 | 108 | - search parameters: 109 | 110 | **nprobe**: Number of inverted file cell to probe. 111 | 112 | ```python 113 | # IVF_PQ 114 | collection.search(data, anns_field, { 115 | "nprobe": 8 # int. 1~nlist(cpu), 1~min[2048, nlist](gpu) 116 | }, topK, expression) 117 | ``` 118 | 119 | ### IVF_SQ8 120 | 121 | **IVF_SQ8** does scalar quantization for each vector placed in the unit based on IVF. Scalar quantization 122 | converts each dimension of the original vector from a 4-byte floating-point number to a 1-byte unsigned integer, 123 | so the IVF_SQ8 index file occupies much less space than the IVF_FLAT index file. 124 | However, scalar quantization results in a loss of accuracy during searching vectors. 125 | 126 | - building parameters: 127 | 128 | **nlist**: Number of cluster units. 129 | 130 | ```python 131 | # IVF_SQ8 132 | collection.create_index(field_name=field_name, 133 | index_params={'index_type': 'IVF_SQ8', 134 | 'metric_type': 'L2', 135 | 'params': { 136 | 'nlist': 100, # int. 1~65536 137 | }}) 138 | ``` 139 | 140 | - search parameters: 141 | 142 | **nprobe**: Number of inverted file cell to probe. 143 | 144 | ```python 145 | # IVF_SQ8 146 | collection.search(data, anns_field, { 147 | "nprobe": 8 # int. 1~nlist(cpu), 1~min[2048, nlist](gpu) 148 | }, topK, expression) 149 | ``` 150 | 151 | ### ANNOY 152 | 153 | **ANNOY** (*Approximate Nearest Neighbors Oh Yeah*) is an index that uses a hyperplane to divide a 154 | high-dimensional space into multiple subspaces, and then stores them in a tree structure. 155 | 156 | When searching for vectors, ANNOY follows the tree structure to find subspaces closer to the target vector, 157 | and then compares all the vectors in these subspaces (The number of vectors being compared should not be 158 | less than `search_k`) to obtain the final result. Obviously, when the target vector is close to the edge of 159 | a certain subspace, sometimes it is necessary to greatly increase the number of searched subspaces to obtain 160 | a high recall rate. Therefore, ANNOY uses `n_trees` different methods to divide the whole space, and searches 161 | all the dividing methods simultaneously to reduce the probability that the target vector is always at the edge of the subspace. 162 | 163 | - building parameters: 164 | 165 | **n_trees**: The number of methods of space division. 166 | 167 | ```python 168 | # ANNOY 169 | collection.create_index(field_name=field_name, 170 | index_params={'index_type': 'ANNOY', 171 | 'metric_type': 'L2', 172 | 'params': { 173 | "n_trees": 8 # int. 1~1024 174 | }}) 175 | ``` 176 | 177 | - search parameters: 178 | 179 | **search_k**: The number of nodes to search. -1 means 5% of the whole data. 180 | 181 | ```python 182 | # ANNOY 183 | collection.search(data, anns_field, { 184 | "search_k": -1 # int. {-1} U [top_k, n*n_trees], n represents vectors count. 185 | }, topK, expression) 186 | ``` 187 | 188 | ### HNSW 189 | 190 | **HNSW** (*Hierarchical Navigable Small World Graph*) is a graph-based indexing algorithm. It builds a 191 | multi-layer navigation structure for an image according to certain rules. In this structure, the upper 192 | layers are more sparse and the distances between nodes are farther; the lower layers are denser and 193 | he distances between nodes are closer. The search starts from the uppermost layer, finds the node closest 194 | to the target in this layer, and then enters the next layer to begin another search. After multiple iterations, 195 | it can quickly approach the target position. 196 | 197 | In order to improve performance, HNSW limits the maximum degree of nodes on each layer of the graph to `M`. 198 | In addition, you can use `efConstruction` (when building index) or `ef` (when searching targets) to specify a search range. 199 | 200 | - building parameters: 201 | 202 | **M**: Maximum degree of the node. 203 | 204 | **efConstruction**: Take the effect in stage of index construction. 205 | 206 | ```python 207 | # HNSW 208 | collection.create_index(field_name=field_name, 209 | index_params={'index_type': 'HNSW', 210 | 'metric_type': 'L2', 211 | 'params': { 212 | "M": 16, # int. 4~64 213 | "efConstruction": 40 # int. 8~512 214 | }}) 215 | ``` 216 | 217 | - search parameters: 218 | 219 | **ef**: Take the effect in stage of search scope, should be larger than `top_k`. 220 | 221 | ```python 222 | # HNSW 223 | collection.search(data, anns_field, { 224 | "ef": 64 # int. top_k~32768 225 | }, topK, expression) 226 | ``` 227 | 228 | 229 | -------------------------------------------------------------------------------- /docs/source/res/about_documentation.md: -------------------------------------------------------------------------------- 1 | 2 | This documentation is generated using the Sphinx documentation generator. The source files for the documentation are 3 | located in the *doc/* directory of the Pymilvus-ORM distribution. To generate the docs locally run the following command 4 | under directory *doc/*: 5 | 6 | ```shell 7 | $ make html 8 | ``` 9 | 10 | The documentation should be generated under directory *build/html*. 11 | 12 | To preview it, you can open *index.html* in your browser. 13 | 14 | Or run a web server in that directory: 15 | 16 | ```shell 17 | $ python3 -m http.server 18 | ``` 19 | 20 | Then open your browser to . 21 | -------------------------------------------------------------------------------- /docs/source/results.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Search results 3 | =============== 4 | 5 | 6 | How to deal with search results 7 | -------------------------------- 8 | 9 | The invocation of `search()` is like this: 10 | 11 | >>> import random 12 | >>> dim = 128 13 | >>> nq = 10 14 | >>> query_vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] 15 | >>> anns_field = "vector field used to search" 16 | >>> search_params = {"metric_type": "L2", "params": {"nprobe": 10}} 17 | >>> limit = 10 # topk 18 | >>> expr = "FieldA > 10" # filter record whose value on FieldA is less than 10 19 | >>> results = collection.search(query_vectors, anns_field, search_params, limit, expr) 20 | 21 | The result object can be used as a 2-D array. `results[i]` (0 <= i < len(results)) represents topk results of i-th query 22 | vector, and `results[i][j]` (0 <= j < len( `results[i]` )) represents j-th result of i-th query vector. To get result id and distance, 23 | you can invoke like this: 24 | 25 | >>> id = results[i][j].id 26 | >>> distance = results[i][j].distance 27 | 28 | The results object can be iterated, so you can traverse the results with two-level loop: 29 | 30 | >>> for raw_result in results: 31 | ... for result in raw_result: 32 | ... id = result.id # result id 33 | ... distance = result.distance 34 | 35 | 36 | Meanwhile, the topk results provide attributes to separately access result ids and distances, 37 | so you can traverse the results like this: 38 | 39 | >>> for result in results: 40 | ... for id, dis in zip(result.ids, result.distances): 41 | ... print(f"id = {id}, distance = {dis}") 42 | 43 | -------------------------------------------------------------------------------- /docs/source/tutorial.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Tutorial 3 | ======== 4 | 5 | This is a basic introduction to Milvus by PyMilvus-ORM. 6 | 7 | For a runnable python script, 8 | checkout `example.py `_ on PyMilvus-ORM Github, 9 | or `hello milvus `_ on Milvus official website. It's a good recommended 10 | start to get started with Milvus and PyMilvus-ORM as well. 11 | 12 | 13 | .. note:: 14 | Here we use float vectors as example vector field data, if you want to learn example about binary vectors, see 15 | `binary vector example `_. 16 | 17 | 18 | Prerequisites 19 | ============= 20 | 21 | Before we start, there are some prerequisites. 22 | 23 | Make sure that: 24 | 25 | - You have a running Milvus instance. 26 | - PyMilvus-ORM is correctly :doc:`install`. 27 | 28 | Connect to Milvus 29 | ================= 30 | 31 | First of all, we need to import `pymilvus-orm`. 32 | 33 | >>> from pymilvus_orm import connections 34 | 35 | Then, we can make connection with Milvus server. 36 | By default Milvus runs on localhost in port 19530, so you can use default value to connect to Milvus. 37 | 38 | >>> host = '127.0.0.1' 39 | >>> port = '19530' 40 | >>> connections.add_connection(default={"host": host, "port": port}) 41 | >>> connections.connect(alias='default') 42 | 43 | After connecting, we can communicate with Milvus in the following ways. If you are confused about the 44 | terminology, see `Milvus Terminology `_ for explanations. 45 | 46 | 47 | Collection 48 | ========== 49 | 50 | Now let's create a new collection. Before we start, we can list all the collections already exist. For a brand 51 | new Milvus running instance, the result should be empty. 52 | 53 | >>> from pymilvus_orm import list_collections 54 | >>> list_collections() 55 | [] 56 | 57 | Create Collection 58 | ================= 59 | 60 | To create collection, we could provide the schema for it. 61 | 62 | In this tutorial, we will create a collection with three fields: `id`, `year` and `embedding`. 63 | 64 | The type of 'id' field is `int64`, and it is set as primary field. 65 | The type of `year` field is `int64`, and the type of `embedding` is `FLOAT_VECTOR` whose `dim` is 128. 66 | 67 | Now we can create a collection: 68 | 69 | >>> from pymilvus_orm import Collection, DataType, FieldSchema, CollectionSchema 70 | >>> dim = 128 71 | >>> id_field = FieldSchema(name="id", dtype=DataType.INT64, description="primary_field") 72 | >>> year_field = FieldSchema(name="year", dtype=DataType.INT64, description="year") 73 | >>> embedding_field = FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=dim) 74 | >>> schema = CollectionSchema(fields=[id_field, year_field, embedding_field], primary_field='id', auto_id=True, description='desc of collection') 75 | >>> collection_name = "tutorial" 76 | >>> collection = Collection(name=collection_name, schema=schema) 77 | 78 | Then you can list collections and 'tutorial' will be in the result. 79 | 80 | >>> list_collections() 81 | ['tutorial'] 82 | 83 | You can also get info of the collection. 84 | 85 | >>> collection.description 86 | "desc of collection" 87 | 88 | 89 | This tutorial is a basic intro tutorial, building index won't be covered by this tutorial. 90 | If you want to go further into Milvus with indexes, it's recommended to check our 91 | `index examples `_. 92 | 93 | If you're already known about indexes from ``index examples``, and you want a full lists of params supported 94 | by PyMilvus-ORM, you check out `Index `_ 95 | chapter of the PyMilvus-ORM documentation. 96 | 97 | Further more, if you want to get a thorough view of indexes, check our official website for 98 | `Vector Index `_. 99 | 100 | Create Partition 101 | ================ 102 | 103 | If you don't create a partition, there will be a default one called "``_default``", all the entities will be 104 | inserted into the "``_default``" partition. You can check it by ``Collection.partitions()`` 105 | 106 | >>> collection.partitions 107 | [{"name": "_default", "description": "", "num_entities": 0}] 108 | 109 | You can provide a partition name to create a new partition. 110 | 111 | >>> collection.create_partition("new_partition") 112 | >>> collection.partitions 113 | [{"name": "_default", "description": "", "num_entities": 0}, {"name": "new_partition", "description": "", "num_entities": 0}] 114 | 115 | Insert Entities 116 | ======== 117 | 118 | An entity is a group of fields that corresponds to real world objects. In this tutorial, collection has three fields. 119 | Here is an example of 30 entities structured in list of list. 120 | .. note: 121 | The field `id` was set as primary and auto_id above, so we shall not input the value for it when inserting. 122 | 123 | >>> import random 124 | >>> nb = 30 125 | >>> years = [i for i in range(nb)] 126 | >>> embeddings = [[random.random() for _ in range(dim)] for _ in range(nb)] 127 | >>> entities = [years, embeddings] 128 | >>> collection.insert(entities) 129 | 130 | .. note: 131 | If ``partition_name`` isn't provided, these entities will be inserted into the "``_default``" partition, 132 | otherwise, them will be inserted into specified partition. 133 | 134 | 135 | Search 136 | ====== 137 | 138 | Search Entities by Vector Similarity 139 | ------------------------------------ 140 | 141 | You can get entities by vector similarity. Assuming we have a ``embedding_A`` like below, and we want to get top 2 records whose year is greater than 20 142 | that are most similar with it. 143 | 144 | In below example, we search the collection on ``embedding`` field. 145 | .. note: 146 | Before searching, we need to load data into memory. 147 | 148 | >>> nq = 10 149 | >>> embedding_A = [[random.random() for _ in range(dim)] for _ in range(nq)] 150 | >>> anns_field = "embedding" 151 | >>> search_params = {"metric_type": "L2", "params": {"nprobe": 10}} 152 | >>> limit = 2 153 | >>> expr = "year > 20" 154 | >>> collection.load() 155 | >>> results = collection.search(embedding_A, anns_field, search_params, limit, expr) 156 | 157 | .. note:: 158 | For more about the parameter expr, please refer to: https://github.com/milvus-io/milvus/blob/master/docs/design_docs/query_boolean_expr.md 159 | 160 | .. note:: 161 | If the collection is index-built, user need to specify search param, and pass parameter `search_params` like: `collection.search(..., search_params={...})`. 162 | You can refer to `Index params `_ for more details. 163 | 164 | .. note:: 165 | If parameter `partition_names` is specified, milvus executes search request on these partition instead of whole collection. 166 | 167 | The returned ``results`` is a 2-D like structure, 1 for 1 entity querying, 2 for top 2. For more clarity, we obtain 168 | the film as below. If you want to know how to deal with search result in a better way, you can refer to 169 | `search result `_ in PyMilvus-ORM doc. 170 | 171 | >>> result = results[0] 172 | >>> embedding_1 = result[0] 173 | >>> embedding_2 = result[1] 174 | 175 | Then how do we get ids, distances and fields? It's as below. 176 | 177 | .. note:: 178 | Because vectors are randomly generated, so the retrieved vector id and distance may differ. 179 | 180 | >>> embedding_1.id # id 181 | 1615279498011637002 182 | 183 | >>> embedding_1.distance # distance 184 | 1.0709768533706665 185 | 186 | 187 | Drop a Partition 188 | ---------------- 189 | 190 | You can also drop a partition. 191 | 192 | .. Danger:: 193 | Once you drop a partition, all the data in this partition will be deleted too. 194 | 195 | >>> collection.drop_partition("new_partition") 196 | 197 | 198 | Drop a Collection 199 | ----------------- 200 | 201 | Finally, you can drop an entire collection. 202 | 203 | .. Danger:: 204 | Once you drop a collection, all the data in this collection will be deleted too. 205 | 206 | >>> collection.drop() 207 | 208 | -------------------------------------------------------------------------------- /examples/collection.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2020 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance 4 | # with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under the License. 11 | 12 | from pymilvus_orm.collection import Collection 13 | from pymilvus_orm.connections import connections 14 | from pymilvus_orm.schema import FieldSchema, CollectionSchema 15 | from pymilvus_orm.types import DataType 16 | import random 17 | import numpy as np 18 | from sklearn import preprocessing 19 | import string 20 | 21 | default_dim = 128 22 | default_nb = 3000 23 | default_float_vec_field_name = "float_vector" 24 | default_binary_vec_field_name = "binary_vector" 25 | 26 | 27 | all_index_types = [ 28 | "FLAT", 29 | "IVF_FLAT", 30 | "IVF_SQ8", 31 | # "IVF_SQ8_HYBRID", 32 | "IVF_PQ", 33 | "HNSW", 34 | # "NSG", 35 | "ANNOY", 36 | "RHNSW_FLAT", 37 | "RHNSW_PQ", 38 | "RHNSW_SQ", 39 | "BIN_FLAT", 40 | "BIN_IVF_FLAT" 41 | ] 42 | 43 | default_index_params = [ 44 | {"nlist": 128}, 45 | {"nlist": 128}, 46 | {"nlist": 128}, 47 | # {"nlist": 128}, 48 | {"nlist": 128, "m": 16, "nbits": 8}, 49 | {"M": 48, "efConstruction": 500}, 50 | # {"search_length": 50, "out_degree": 40, "candidate_pool_size": 100, "knng": 50}, 51 | {"n_trees": 50}, 52 | {"M": 48, "efConstruction": 500}, 53 | {"M": 48, "efConstruction": 500, "PQM": 64}, 54 | {"M": 48, "efConstruction": 500}, 55 | {"nlist": 128}, 56 | {"nlist": 128} 57 | ] 58 | 59 | 60 | default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} 61 | default_binary_index = {"index_type": "BIN_FLAT", "params": {"nlist": 1024}, "metric_type": "JACCARD"} 62 | 63 | 64 | def gen_default_fields(): 65 | default_fields = [ 66 | FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True), 67 | FieldSchema(name="double", dtype=DataType.DOUBLE), 68 | FieldSchema(name=default_float_vec_field_name, dtype=DataType.FLOAT_VECTOR, dim=default_dim) 69 | ] 70 | default_schema = CollectionSchema(fields=default_fields, description="test collection") 71 | return default_schema 72 | 73 | 74 | def gen_default_fields_with_primary_key_1(): 75 | default_fields = [ 76 | FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True), 77 | FieldSchema(name="double", dtype=DataType.DOUBLE), 78 | FieldSchema(name=default_float_vec_field_name, dtype=DataType.FLOAT_VECTOR, dim=default_dim) 79 | ] 80 | default_schema = CollectionSchema(fields=default_fields, description="test collection") 81 | return default_schema 82 | 83 | 84 | def gen_default_fields_with_primary_key_2(): 85 | default_fields = [ 86 | FieldSchema(name="int64", dtype=DataType.INT64), 87 | FieldSchema(name="double", dtype=DataType.DOUBLE), 88 | FieldSchema(name=default_float_vec_field_name, dtype=DataType.FLOAT_VECTOR, dim=default_dim) 89 | ] 90 | default_schema = CollectionSchema(fields=default_fields, description="test collection", primary_field="int64") 91 | return default_schema 92 | 93 | 94 | def gen_binary_schema(): 95 | binary_fields = [ 96 | FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True), 97 | FieldSchema(name="double", dtype=DataType.DOUBLE), 98 | FieldSchema(name=default_binary_vec_field_name, dtype=DataType.BINARY_VECTOR, dim=default_dim) 99 | ] 100 | default_schema = CollectionSchema(fields=binary_fields, description="test collection") 101 | return default_schema 102 | 103 | 104 | def gen_float_vectors(num, dim, is_normal=True): 105 | vectors = [[random.random() for _ in range(dim)] for _ in range(num)] 106 | vectors = preprocessing.normalize(vectors, axis=1, norm='l2') 107 | return vectors.tolist() 108 | 109 | 110 | def gen_float_data(nb, is_normal=False): 111 | vectors = gen_float_vectors(nb, default_dim, is_normal) 112 | entities = [ 113 | [i for i in range(nb)], 114 | [float(i) for i in range(nb)], 115 | vectors 116 | ] 117 | return entities 118 | 119 | 120 | def gen_dataframe(nb, is_normal=False): 121 | import pandas 122 | import numpy 123 | 124 | vectors = gen_float_vectors(nb, default_dim, is_normal) 125 | data = { 126 | "int64": [i for i in range(nb)], 127 | "float": numpy.array([i for i in range(nb)], dtype=numpy.float32), 128 | "float_vector": vectors 129 | } 130 | 131 | return pandas.DataFrame(data) 132 | 133 | 134 | def gen_binary_vectors(num, dim): 135 | raw_vectors = [] 136 | binary_vectors = [] 137 | for i in range(num): 138 | raw_vector = [random.randint(0, 1) for i in range(dim)] 139 | raw_vectors.append(raw_vector) 140 | binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist())) 141 | return raw_vectors, binary_vectors 142 | 143 | 144 | def gen_binary_data(nb): 145 | raw_vectors, binary_vectors = gen_binary_vectors(nb, dim=default_dim) 146 | entities = [ 147 | [i for i in range(nb)], 148 | [float(i) for i in range(nb)], 149 | binary_vectors 150 | ] 151 | return entities 152 | 153 | 154 | def gen_unique_str(str_value=None): 155 | prefix = "".join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) 156 | return "collection_" + prefix if str_value is None else str_value + "_" + prefix 157 | 158 | 159 | def binary_support(): 160 | return ["BIN_FLAT", "BIN_IVF_FLAT"] 161 | 162 | 163 | def gen_simple_index(): 164 | index_params = [] 165 | for i in range(len(all_index_types)): 166 | if all_index_types[i] in binary_support(): 167 | continue 168 | dic = {"index_type": all_index_types[i], "metric_type": "L2"} 169 | dic.update({"params": default_index_params[i]}) 170 | index_params.append(dic) 171 | return index_params 172 | 173 | 174 | connections.connect(alias="default") 175 | 176 | 177 | def test_create_collection(): 178 | collection = Collection(name=gen_unique_str(), schema=gen_default_fields()) 179 | assert collection.is_empty is True 180 | assert collection.num_entities == 0 181 | collection.drop() 182 | 183 | 184 | def test_collection_only_name(): 185 | name = gen_unique_str() 186 | collection_temp = Collection(name=name, schema=gen_default_fields()) 187 | collection = Collection(name=name) 188 | data = gen_float_data(default_nb) 189 | collection.insert(data) 190 | collection.load() 191 | assert collection.is_empty is False 192 | assert collection.num_entities == default_nb 193 | collection.drop() 194 | 195 | 196 | def test_collection_with_dataframe(): 197 | data = gen_dataframe(default_nb) 198 | collection = Collection.construct_from_dataframe(name=gen_unique_str(), dataframe=data, primary_field="int64") 199 | collection.load() 200 | assert collection.is_empty is False 201 | assert collection.num_entities == default_nb 202 | collection.drop() 203 | 204 | 205 | def test_create_index_float_vector(): 206 | data = gen_float_data(default_nb) 207 | collection = Collection(name=gen_unique_str(), data=data, schema=gen_default_fields()) 208 | for index_param in gen_simple_index(): 209 | collection.create_index(field_name=default_float_vec_field_name, index_params=index_param) 210 | assert len(collection.indexes) != 0 211 | collection.drop() 212 | 213 | 214 | def test_create_index_binary_vector(): 215 | collection = Collection(name=gen_unique_str(), schema=gen_binary_schema()) 216 | data = gen_binary_data(default_nb) 217 | collection.insert(data) 218 | collection.create_index(field_name=default_binary_vec_field_name, index_params=default_binary_index) 219 | assert len(collection.indexes) != 0 220 | collection.drop() 221 | 222 | 223 | def test_specify_primary_key(): 224 | data = gen_float_data(default_nb) 225 | collection = Collection(name=gen_unique_str(), data=data, schema=gen_default_fields_with_primary_key_1()) 226 | for index_param in gen_simple_index(): 227 | collection.create_index(field_name=default_float_vec_field_name, index_params=index_param) 228 | assert len(collection.indexes) != 0 229 | collection.drop() 230 | 231 | collection2 = Collection(name=gen_unique_str(), data=data, schema=gen_default_fields_with_primary_key_2()) 232 | for index_param in gen_simple_index(): 233 | collection2.create_index(field_name=default_float_vec_field_name, index_params=index_param) 234 | assert len(collection2.indexes) != 0 235 | collection2.drop() 236 | 237 | 238 | print("test collection") 239 | test_create_collection() 240 | print("test collection only name") 241 | test_collection_only_name() 242 | print("test collection with dataframe") 243 | test_collection_with_dataframe() 244 | print("test collection index float vector") 245 | test_create_index_float_vector() 246 | print("test collection binary vector") 247 | test_create_index_binary_vector() 248 | print("test collection specify primary key") 249 | test_specify_primary_key() 250 | print("test end") 251 | -------------------------------------------------------------------------------- /examples/connections.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2020 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance 4 | # with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under the License. 11 | 12 | import logging 13 | 14 | try: 15 | from pymilvus_orm import connections 16 | except ImportError: 17 | from os.path import dirname, abspath 18 | import sys 19 | 20 | sys.path.append(dirname(dirname(abspath(__file__)))) 21 | 22 | from pymilvus_orm import connections 23 | 24 | LOGGER = logging.getLogger(__name__) 25 | 26 | print("start connection") 27 | conn = connections.connect() 28 | LOGGER.info(conn.list_collections()) 29 | print("end connection") 30 | 31 | -------------------------------------------------------------------------------- /examples/example.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from pymilvus_orm import * 4 | from pymilvus_orm.schema import * 5 | from pymilvus_orm.types import DataType 6 | import random 7 | 8 | # configure milvus hostname and port 9 | print(f"\nCreate connection...") 10 | connections.connect() 11 | 12 | # List all collection names 13 | print(f"\nList collections...") 14 | print(list_collections()) 15 | 16 | # Create a collection named 'demo_film_tutorial' 17 | print(f"\nCreate collection...") 18 | field1 = FieldSchema(name="release_year", dtype=DataType.INT64, description="int64", is_primary=True) 19 | field2 = FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, description="float vector", dim=8, is_primary=False) 20 | schema = CollectionSchema(fields=[field1, field2], description="collection description") 21 | collection = Collection(name='demo_film_tutorial', data=None, schema=schema) 22 | 23 | # List all collection names 24 | print(f"\nList collections...") 25 | print(list_collections()) 26 | 27 | print(f"\nGet collection name, schema and description...") 28 | print(collection.name) 29 | print(collection.schema) 30 | print(collection.description) 31 | 32 | # List all partition names in demo collection 33 | print(f"\nList partitions...") 34 | print(collection.partitions) 35 | 36 | # Create a partition named 'American' 37 | print(f"\nCreate partition...") 38 | partition_name = "American" 39 | partition = Partition(collection, partition_name) 40 | print(collection.partition(partition_name='American')) 41 | 42 | # List all partition names in demo collection 43 | print(f"\nList partitions...") 44 | print(collection.partitions) 45 | 46 | # Construct some entities 47 | The_Lord_of_the_Rings = [ 48 | { 49 | "id": 1, 50 | "title": "The_Fellowship_of_the_Ring", 51 | "release_year": 2001, 52 | "embedding": [random.random() for _ in range(8)] 53 | }, 54 | { 55 | "id": 2, 56 | "title": "The_Two_Towers", 57 | "release_year": 2002, 58 | "embedding": [random.random() for _ in range(8)] 59 | }, 60 | { 61 | "id": 3, 62 | "title": "The_Return_of_the_King", 63 | "release_year": 2003, 64 | "embedding": [random.random() for _ in range(8)] 65 | } 66 | ] 67 | 68 | # Transform 69 | ids = [k.get("id") for k in The_Lord_of_the_Rings] 70 | release_years = [k.get("release_year") for k in The_Lord_of_the_Rings] 71 | embeddings = [k.get("embedding") for k in The_Lord_of_the_Rings] 72 | 73 | data = [release_years, embeddings] 74 | 75 | # Insert into milvus 76 | print(f"\nInsert data...") 77 | partition.insert(data) 78 | 79 | # Count entities 80 | print(f"\nCount entities...") 81 | print(collection.num_entities) 82 | 83 | # TODO(wxyu): search 84 | 85 | # Drop a partition 86 | print(f"\nDrop partition...") 87 | partition.drop() 88 | 89 | # List all partition names in demo collection 90 | print(f"\nList partitions...") 91 | print(collection.partitions) 92 | 93 | # List all collection names 94 | print(f"\nList collections...") 95 | print(list_collections()) 96 | 97 | # Drop a collection 98 | print(f"\nDrop collection...") 99 | collection.drop() 100 | 101 | # List all collection names 102 | print(f"\nList collections...") 103 | print(list_collections()) 104 | 105 | # Calculate distance between vectors 106 | vectors_l = [[random.random() for _ in range(64)] for _ in range(3)] 107 | vectors_r = [[random.random() for _ in range(64)] for _ in range(5)] 108 | op_l = {"float_vectors": vectors_l} 109 | op_r = {"float_vectors": vectors_r} 110 | params = {"metric": "L2", "sqrt": True} 111 | results = utility.calc_distance(vectors_left=op_l, vectors_right=op_r, params=params) 112 | for i in range(len(results)): 113 | print(results[i]) 114 | -------------------------------------------------------------------------------- /examples/example_index.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from pymilvus_orm import * 4 | from pymilvus_orm.schema import * 5 | from pymilvus_orm.types import DataType 6 | import random 7 | 8 | # configure milvus hostname and port 9 | print(f"\nCreate connection...") 10 | connections.connect() 11 | 12 | # List all collection names 13 | print(f"\nList collections...") 14 | print(list_collections()) 15 | 16 | # Create a collection named 'demo_film_tutorial' 17 | print(f"\nCreate collection...") 18 | field1 = FieldSchema(name="release_year", dtype=DataType.INT64, description="int64", is_primary=True) 19 | field2 = FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, description="float vector", dim=128, is_primary=False) 20 | schema = CollectionSchema(fields=[field1, field2], description="collection description") 21 | collection = Collection(name='demo_film_tutorial', data=None, schema=schema) 22 | 23 | print(f"\nCreate index...") 24 | index_params = {"index_type": "IVF_FLAT", "metric_type": "L2", "params": {"nlist": 100}} 25 | index = Index(collection, "embedding", index_params) 26 | print(index.params) 27 | 28 | print([index.params for index in collection.indexes]) 29 | 30 | print(f"\nDrop index...") 31 | index.drop() 32 | 33 | print([index.params for index in collection.indexes]) 34 | collection.drop() 35 | -------------------------------------------------------------------------------- /examples/hello_milvus.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2020 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance 4 | # with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under the License. 11 | 12 | 13 | import random 14 | 15 | from pymilvus_orm import ( 16 | connections, FieldSchema, CollectionSchema, DataType, 17 | Collection, list_collections, 18 | ) 19 | 20 | 21 | def hello_milvus(): 22 | # create connection 23 | connections.connect() 24 | 25 | print(f"\nList collections...") 26 | print(list_collections()) 27 | 28 | # create collection 29 | dim = 128 30 | default_fields = [ 31 | FieldSchema(name="count", dtype=DataType.INT64, is_primary=True), 32 | FieldSchema(name="random_value", dtype=DataType.DOUBLE), 33 | FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim) 34 | ] 35 | default_schema = CollectionSchema(fields=default_fields, description="test collection") 36 | 37 | print(f"\nCreate collection...") 38 | collection = Collection(name="hello_milvus", schema=default_schema) 39 | 40 | print(f"\nList collections...") 41 | print(list_collections()) 42 | 43 | # insert data 44 | nb = 3000 45 | vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] 46 | collection.insert( 47 | [ 48 | [i for i in range(nb)], 49 | [float(random.randrange(-20, -10)) for _ in range(nb)], 50 | vectors 51 | ] 52 | ) 53 | 54 | print(f"\nGet collection entities...") 55 | print(collection.num_entities) 56 | 57 | # create index and load table 58 | default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} 59 | print(f"\nCreate index...") 60 | collection.create_index(field_name="float_vector", index_params=default_index) 61 | print(f"\nload collection...") 62 | collection.load() 63 | 64 | # load and search 65 | topK = 5 66 | search_params = {"metric_type": "L2", "params": {"nprobe": 10}} 67 | import time 68 | start_time = time.time() 69 | print(f"\nSearch...") 70 | # define output_fields of search result 71 | res = collection.search( 72 | vectors[-2:], "float_vector", search_params, topK, 73 | "count > 100", output_fields=["count", "random_value"] 74 | ) 75 | end_time = time.time() 76 | 77 | # show result 78 | for hits in res: 79 | for hit in hits: 80 | # Get value of the random value field for search result 81 | print(hit, hit.entity.get("random_value")) 82 | print("search latency = %.4fs" % (end_time - start_time)) 83 | 84 | # drop collection 85 | collection.drop() 86 | 87 | 88 | hello_milvus() 89 | -------------------------------------------------------------------------------- /examples/partition.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2020 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance 4 | # with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under the License. 11 | 12 | from pymilvus_orm.collection import Collection 13 | from pymilvus_orm.connections import connections 14 | from pymilvus_orm.partition import Partition 15 | from pymilvus_orm.schema import FieldSchema, CollectionSchema 16 | from pymilvus_orm.types import DataType 17 | from pymilvus_orm.utility import list_collections, has_partition 18 | import random 19 | from sklearn import preprocessing 20 | import string 21 | 22 | default_dim = 128 23 | default_nb = 3000 24 | default_float_vec_field_name = "float_vector" 25 | default_segment_row_limit = 1000 26 | 27 | 28 | 29 | all_index_types = [ 30 | "FLAT", 31 | "IVF_FLAT", 32 | "IVF_SQ8", 33 | # "IVF_SQ8_HYBRID", 34 | "IVF_PQ", 35 | "HNSW", 36 | # "NSG", 37 | "ANNOY", 38 | "RHNSW_FLAT", 39 | "RHNSW_PQ", 40 | "RHNSW_SQ", 41 | "BIN_FLAT", 42 | "BIN_IVF_FLAT" 43 | ] 44 | 45 | default_index_params = [ 46 | {"nlist": 128}, 47 | {"nlist": 128}, 48 | {"nlist": 128}, 49 | # {"nlist": 128}, 50 | {"nlist": 128, "m": 16, "nbits": 8}, 51 | {"M": 48, "efConstruction": 500}, 52 | # {"search_length": 50, "out_degree": 40, "candidate_pool_size": 100, "knng": 50}, 53 | {"n_trees": 50}, 54 | {"M": 48, "efConstruction": 500}, 55 | {"M": 48, "efConstruction": 500, "PQM": 64}, 56 | {"M": 48, "efConstruction": 500}, 57 | {"nlist": 128}, 58 | {"nlist": 128} 59 | ] 60 | 61 | 62 | default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} 63 | 64 | 65 | def gen_default_fields(auto_id=True): 66 | default_fields = [ 67 | FieldSchema(name="count", dtype=DataType.INT64, is_primary=True), 68 | FieldSchema(name="float", dtype=DataType.FLOAT), 69 | FieldSchema(name=default_float_vec_field_name, dtype=DataType.FLOAT_VECTOR, dim=default_dim) 70 | ] 71 | default_schema = CollectionSchema(fields=default_fields, description="test collection", 72 | segment_row_limit=default_segment_row_limit, auto_id=False) 73 | return default_schema 74 | 75 | 76 | def gen_vectors(num, dim, is_normal=True): 77 | vectors = [[random.random() for _ in range(dim)] for _ in range(num)] 78 | vectors = preprocessing.normalize(vectors, axis=1, norm='l2') 79 | return vectors.tolist() 80 | 81 | 82 | def gen_data(nb, is_normal=False): 83 | vectors = gen_vectors(nb, default_dim, is_normal) 84 | entities = [ 85 | [i for i in range(nb)], 86 | [float(i) for i in range(nb)], 87 | vectors 88 | ] 89 | return entities 90 | 91 | 92 | def gen_unique_str(str_value=None): 93 | prefix = "".join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) 94 | return "collection_" + prefix if str_value is None else str_value + "_" + prefix 95 | 96 | 97 | def binary_support(): 98 | return ["BIN_FLAT", "BIN_IVF_FLAT"] 99 | 100 | 101 | def gen_simple_index(): 102 | index_params = [] 103 | for i in range(len(all_index_types)): 104 | if all_index_types[i] in binary_support(): 105 | continue 106 | dic = {"index_type": all_index_types[i], "metric_type": "L2"} 107 | dic.update({"params": default_index_params[i]}) 108 | index_params.append(dic) 109 | return index_params 110 | 111 | def test_partition(): 112 | connections.connect(alias="default") 113 | print("create collection") 114 | collection = Collection(name=gen_unique_str(), schema=gen_default_fields()) 115 | print("create partition") 116 | partition = Partition(collection, name=gen_unique_str()) 117 | print(list_collections()) 118 | assert has_partition(collection.name, partition.name) == True 119 | 120 | data = gen_data(default_nb) 121 | print("insert data to partition") 122 | partition.insert(data) 123 | assert partition.is_empty is False 124 | assert partition.num_entities == default_nb 125 | 126 | print("load partition") 127 | partition.load() 128 | topK = 5 129 | search_params = {"metric_type": "L2", "params": {"nprobe": 10}} 130 | print("search partition") 131 | res = partition.search(data[2][-2:], "float_vector", search_params, topK, "count > 100") 132 | for hits in res: 133 | for hit in hits: 134 | print(hit) 135 | 136 | print("release partition") 137 | partition.release() 138 | print("drop partition") 139 | partition.drop() 140 | print("drop collection") 141 | collection.drop() 142 | 143 | test_partition() 144 | -------------------------------------------------------------------------------- /pymilvus_orm/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2021 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under 11 | # the License. 12 | 13 | """client module""" 14 | from pkg_resources import get_distribution, DistributionNotFound 15 | 16 | from .collection import Collection 17 | from .connections import ( 18 | Connections, 19 | connections, 20 | add_connection, 21 | list_connections, 22 | get_connection_addr, 23 | remove_connection, 24 | connect, 25 | get_connection, 26 | disconnect 27 | ) 28 | 29 | from .index import Index 30 | from .partition import Partition 31 | from .utility import ( 32 | loading_progress, 33 | index_building_progress, 34 | wait_for_loading_complete, 35 | wait_for_index_building_complete, 36 | has_collection, 37 | has_partition, 38 | list_collections, 39 | ) 40 | 41 | from .search import SearchResult, Hits, Hit 42 | from .types import DataType 43 | from .schema import FieldSchema, CollectionSchema 44 | from .future import SearchFuture, MutationFuture 45 | 46 | __version__ = '0.0.0.dev' 47 | 48 | try: 49 | __version__ = get_distribution('pymilvus-orm').version 50 | except DistributionNotFound: 51 | # package is not installed 52 | pass 53 | -------------------------------------------------------------------------------- /pymilvus_orm/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2021 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under 11 | # the License. 12 | 13 | VECTOR_COMMON_TYPE_PARAMS = ("dim",) 14 | 15 | 16 | 17 | CALC_DIST_IDS = "ids" 18 | CALC_DIST_FLOAT_VEC = "float_vectors" 19 | CALC_DIST_BIN_VEC = "bin_vectors" 20 | CALC_DIST_METRIC = "metric" 21 | CALC_DIST_L2 = "L2" 22 | CALC_DIST_IP = "IP" 23 | CALC_DIST_HAMMING = "HAMMING" 24 | CALC_DIST_TANIMOTO = "TANIMOTO" 25 | CALC_DIST_SQRT = "sqrt" 26 | CALC_DIST_DIM = "dim" 27 | -------------------------------------------------------------------------------- /pymilvus_orm/default_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2021 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under 11 | # the License. 12 | 13 | 14 | class DefaultConfig: 15 | DEFAULT_USING = "default" 16 | DEFAULT_HOST = "localhost" 17 | DEFAULT_PORT = "19530" 18 | DEFAULT_HANDLER = "GRPC" 19 | DEFAULT_POOL = "SingletonThread" 20 | -------------------------------------------------------------------------------- /pymilvus_orm/exceptions.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2021 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under 11 | # the License. 12 | 13 | 14 | class ParamError(ValueError): 15 | """ 16 | Param of interface is illegal 17 | """ 18 | 19 | class ResultError(ValueError): 20 | """ 21 | Result of interface is illegal 22 | """ 23 | 24 | class ConnectError(ValueError): 25 | """ 26 | Connect server failed 27 | """ 28 | 29 | 30 | class NotConnectError(ConnectError): 31 | """ 32 | Disconnect error 33 | """ 34 | 35 | 36 | class RepeatingConnectError(ConnectError): 37 | """ 38 | Try to connect repeatedly 39 | """ 40 | 41 | 42 | class ConnectionPoolError(ConnectError): 43 | """ 44 | Waiting timeout error 45 | """ 46 | 47 | 48 | class FutureTimeoutError(TimeoutError): 49 | """ 50 | Future timeout 51 | """ 52 | 53 | 54 | class DeprecatedError(AttributeError): 55 | """ 56 | Deprecated 57 | """ 58 | 59 | 60 | class VersionError(AttributeError): 61 | """ 62 | Version not match 63 | """ 64 | 65 | 66 | class MilvusException(Exception): 67 | 68 | def __init__(self, code, message): 69 | super(MilvusException, self).__init__(message) 70 | self._code = code 71 | self._message = message 72 | 73 | @property 74 | def code(self): 75 | return self._code 76 | 77 | @property 78 | def message(self): 79 | return self._message 80 | 81 | def __str__(self): 82 | return f"<{type(self).__name__}: (code={self._code}, message={self._message})>" 83 | 84 | 85 | class CollectionExistException(MilvusException): 86 | pass 87 | 88 | 89 | class CollectionNotExistException(MilvusException): 90 | pass 91 | 92 | 93 | class InvalidDimensionException(MilvusException): 94 | pass 95 | 96 | 97 | class InvalidMetricTypeException(MilvusException): 98 | pass 99 | 100 | 101 | class IllegalCollectionNameException(MilvusException): 102 | pass 103 | 104 | 105 | class DescribeCollectionException(MilvusException): 106 | pass 107 | 108 | 109 | class PartitionNotExistException(MilvusException): 110 | pass 111 | 112 | 113 | class PartitionAlreadyExistException(MilvusException): 114 | pass 115 | 116 | 117 | class InvalidArgumentException(MilvusException): 118 | pass 119 | 120 | 121 | class IndexConflictException(MilvusException): 122 | pass 123 | 124 | 125 | class IndexNotExistException(MilvusException): 126 | pass 127 | 128 | 129 | class CannotInferSchemaException(MilvusException): 130 | pass 131 | 132 | 133 | class SchemaNotReadyException(MilvusException): 134 | pass 135 | 136 | 137 | class DataTypeNotMatchException(MilvusException): 138 | pass 139 | 140 | 141 | class DataTypeNotSupportException(MilvusException): 142 | pass 143 | 144 | 145 | class DataNotMatchException(MilvusException): 146 | pass 147 | 148 | 149 | class ConnectionNotExistException(MilvusException): 150 | pass 151 | 152 | 153 | class ConnectionConfigException(MilvusException): 154 | pass 155 | 156 | 157 | class PrimaryKeyException(MilvusException): 158 | pass 159 | 160 | 161 | class FieldsTypeException(MilvusException): 162 | pass 163 | 164 | 165 | class FieldTypeException(MilvusException): 166 | pass 167 | 168 | 169 | class AutoIDException(MilvusException): 170 | pass 171 | 172 | 173 | class ExceptionsMessage: 174 | NoHostPort = "connection configuration must contain 'host' and 'port'." 175 | HostType = "Type of 'host' must be str." 176 | PortType = "Type of 'port' must be str or int." 177 | ConnDiffConf = "Alias of %r already creating connections, but the configure is not the same as passed in." 178 | AliasType = "Alias should be string, but %r is given." 179 | ConnLackConf = "You need to pass in the configuration of the connection named %r ." 180 | ConnectFirst = "should create connect first." 181 | NoSchema = "Should be passed into the schema." 182 | EmptySchema = "The field of the schema cannot be empty." 183 | SchemaType = "Schema type must be schema.CollectionSchema." 184 | SchemaInconsistent = "The collection already exist, but the schema is not the same as the schema passed in." 185 | AutoIDWithData = "Auto_id is True, primary field should not have data." 186 | AutoIDType = "Param auto_id must be bool type." 187 | AutoIDInconsistent = "The auto_id of the collection is inconsistent with the auto_id of the primary key field." 188 | AutoIDOnlyOnPK = "The auto_id can only be specified on the primary key field" 189 | FieldsNumInconsistent = "The data fields number is not match with schema." 190 | NoVector = "No vector field is found." 191 | NoneDataFrame = "Dataframe can not be None." 192 | DataFrameType = "Data type must be pandas.DataFrame." 193 | NoPrimaryKey = "Schema must have a primary key field." 194 | PrimaryKeyNotExist = "Primary field must in dataframe." 195 | PrimaryKeyOnlyOne = "Primary key field can only be one." 196 | PrimaryKeyType = "Primary key type must be DataType.INT64." 197 | IsPrimaryType = "Param is_primary must be bool type." 198 | DataTypeInconsistent = "The data in the same column must be of the same type." 199 | DataTypeNotSupport = "Data type is not support." 200 | DataLengthsInconsistent = "Arrays must all be same length." 201 | DataFrameInvalid = "Cannot infer schema from empty dataframe." 202 | NdArrayNotSupport = "Data type not support numpy.ndarray." 203 | TypeOfDataAndSchemaInconsistent = "The types of schema and data do not match." 204 | PartitionAlreadyExist = "Partition already exist." 205 | PartitionNotExist = "Partition not exist." 206 | IndexNotExist = "Index doesn't exist." 207 | CollectionType = "The type of collection must be pymilvus_orm.Collection." 208 | FieldsType = "The fields of schema must be type list." 209 | FieldType = "The field of schema type must be FieldSchema." 210 | FieldDtype = "Field dtype must be of DataType" 211 | ExprType = "The type of expr must be string ,but %r is given." 212 | -------------------------------------------------------------------------------- /pymilvus_orm/future.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2021 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under 11 | # the License. 12 | 13 | 14 | from .search import SearchResult 15 | from .mutation import MutationResult 16 | 17 | 18 | # TODO(dragondriver): how could we inherit the docstring elegantly? 19 | class BaseFuture: 20 | def __init__(self, future): 21 | self._f = future 22 | 23 | def result(self, **kwargs): 24 | """ 25 | Return the result from future object. 26 | 27 | It's a synchronous interface. It will wait executing until 28 | server respond or timeout occur(if specified). 29 | """ 30 | return self.on_response(self._f.result()) 31 | 32 | def on_response(self, res): 33 | return res 34 | 35 | def cancel(self): 36 | """ 37 | Cancel the request. 38 | """ 39 | return self._f.cancel() 40 | 41 | def done(self): 42 | """ 43 | Wait for request done. 44 | """ 45 | return self._f.done() 46 | 47 | 48 | class SearchFuture(BaseFuture): 49 | def on_response(self, res): 50 | return SearchResult(res) 51 | 52 | 53 | class MutationFuture(BaseFuture): 54 | def on_response(self, res): 55 | return MutationResult(res) 56 | -------------------------------------------------------------------------------- /pymilvus_orm/index.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2021 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under 11 | # the License. 12 | 13 | import copy 14 | 15 | from .exceptions import CollectionNotExistException, ExceptionsMessage, IndexNotExistException 16 | 17 | 18 | class Index: 19 | def __init__(self, collection, field_name, index_params, **kwargs): 20 | """ 21 | Creates index on a specified field according to the index parameters. 22 | 23 | :param collection: The collection in which the index is created 24 | :type collection: Collection 25 | 26 | :param field_name: The name of the field to create an index for. 27 | :type field_name: str 28 | 29 | :param index_params: Indexing parameters. 30 | :type index_params: dict 31 | 32 | :raises ParamError: If parameters are invalid. 33 | :raises IndexConflictException: 34 | If an index of the same name but of different param already exists. 35 | 36 | :example: 37 | >>> from pymilvus_orm import * 38 | >>> from pymilvus_orm.schema import * 39 | >>> from pymilvus_orm.types import DataType 40 | >>> connections.connect() 41 | 42 | >>> field1 = FieldSchema("int64", DataType.INT64, is_primary=True) 43 | >>> field2 = FieldSchema("fvec", DataType.FLOAT_VECTOR, is_primary=False, dim=128) 44 | >>> schema = CollectionSchema(fields=[field1, field2], description="collection description") 45 | >>> collection = Collection(name='test_collection', schema=schema) 46 | >>> # insert some data 47 | >>> index_params = {"index_type": "IVF_FLAT", "metric_type": "L2", "params": {"nlist": 128}} 48 | >>> index = Index(collection, "fvec", index_params) 49 | >>> print(index.params) 50 | {'index_type': 'IVF_FLAT', 'metric_type': 'L2', 'params': {'nlist': 128}} 51 | >>> print(index.collection_name) 52 | test_collection 53 | >>> print(index.field_name) 54 | fvec 55 | >>> index.drop() 56 | """ 57 | from .collection import Collection 58 | if not isinstance(collection, Collection): 59 | raise CollectionNotExistException(0, ExceptionsMessage.CollectionType) 60 | self._collection = collection 61 | self._field_name = field_name 62 | self._index_params = index_params 63 | self._kwargs = kwargs 64 | if self._kwargs.pop("construct_only", False): 65 | return 66 | 67 | conn = self._get_connection() 68 | index = conn.describe_index(self._collection.name) 69 | if index is not None: 70 | tmp_field_name = index.pop("field_name", None) 71 | if index is None or index != index_params or tmp_field_name != field_name: 72 | conn.create_index(self._collection.name, self._field_name, self._index_params) 73 | 74 | def _get_connection(self): 75 | return self._collection._get_connection() 76 | 77 | # read-only 78 | @property 79 | def params(self) -> dict: 80 | """ 81 | Returns the index parameters. 82 | 83 | :return dict: 84 | The index parameters 85 | """ 86 | return copy.deepcopy(self._index_params) 87 | 88 | # read-only 89 | @property 90 | def collection_name(self) -> str: 91 | """ 92 | Returns the corresponding collection name. 93 | 94 | :return str: 95 | The corresponding collection name 96 | """ 97 | return self._collection.name 98 | 99 | @property 100 | def field_name(self) -> str: 101 | """ 102 | Returns the corresponding field name. 103 | 104 | :return str: 105 | The corresponding field name. 106 | """ 107 | return self._field_name 108 | 109 | def __eq__(self, other) -> bool: 110 | """ 111 | The order of the fields of index must be consistent. 112 | """ 113 | return self.to_dict() == other.to_dict() 114 | 115 | def to_dict(self): 116 | """ 117 | Put collection name, field name and index params into dict. 118 | """ 119 | _dict = { 120 | "collection": self._collection._name, 121 | "field": self._field_name, 122 | "index_param": self.params 123 | } 124 | return _dict 125 | 126 | def drop(self, timeout=None, **kwargs): 127 | """ 128 | Drop an index and its corresponding index files. 129 | 130 | :param timeout: An optional duration of time in seconds to allow for the RPC. When timeout 131 | is set to None, client waits until server response or error occur 132 | :type timeout: float 133 | 134 | :raises IndexNotExistException: If the specified index does not exist. 135 | """ 136 | conn = self._get_connection() 137 | if conn.describe_index(self._collection.name) is None: 138 | raise IndexNotExistException(0, ExceptionsMessage.IndexNotExist) 139 | conn.drop_index(self._collection.name, self.field_name, timeout=timeout, **kwargs) 140 | -------------------------------------------------------------------------------- /pymilvus_orm/mutation.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2021 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under 11 | # the License. 12 | 13 | class MutationResult: 14 | def __init__(self, mr): 15 | self._mr = mr 16 | self._primary_keys = list() 17 | self._insert_cnt = 0 18 | self._delete_cnt = 0 19 | self._upsert_cnt = 0 20 | self._timestamp = 0 21 | self._pack(mr) 22 | 23 | @property 24 | def primary_keys(self): 25 | return self._primary_keys 26 | 27 | @property 28 | def insert_count(self): 29 | return self._insert_cnt 30 | 31 | @property 32 | def delete_count(self): 33 | return self._delete_cnt 34 | 35 | @property 36 | def upsert_count(self): 37 | return self._upsert_cnt 38 | 39 | @property 40 | def timestamp(self): 41 | return self._timestamp 42 | 43 | def __str__(self): 44 | """ 45 | Return the information of mutation result 46 | 47 | :return str: 48 | The information of mutation result. 49 | """ 50 | return "(insert count: {}, delete count: {}, upsert count: {}, timestamp: {})".\ 51 | format(self._insert_cnt, self._delete_cnt, self._upsert_cnt, self._timestamp) 52 | 53 | __repr__ = __str__ 54 | 55 | # TODO 56 | # def error_code(self): 57 | # pass 58 | # 59 | # def error_reason(self): 60 | # pass 61 | 62 | def _pack(self, mr): 63 | if mr is None: 64 | return 65 | self._primary_keys = mr.primary_keys 66 | self._insert_cnt = mr.insert_count 67 | self._delete_cnt = mr.delete_count 68 | self._upsert_cnt = mr.upsert_count 69 | self._timestamp = mr.timestamp 70 | -------------------------------------------------------------------------------- /pymilvus_orm/prepare.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2021 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under 11 | # the License. 12 | import copy 13 | 14 | import numpy 15 | import pandas 16 | 17 | from pymilvus_orm.exceptions import DataNotMatchException, DataTypeNotSupportException, ExceptionsMessage 18 | 19 | 20 | class Prepare: 21 | @classmethod 22 | def prepare_insert_data(cls, data, schema): 23 | if not isinstance(data, (list, tuple, pandas.DataFrame)): 24 | raise DataTypeNotSupportException(0, ExceptionsMessage.DataTypeNotSupport) 25 | 26 | fields = schema.fields 27 | entities = [] 28 | raw_lengths = [] 29 | if isinstance(data, pandas.DataFrame): 30 | if schema.auto_id: 31 | if schema.primary_field.name in data: 32 | if len(fields) != len(data.columns): 33 | raise DataNotMatchException(0, ExceptionsMessage.FieldsNumInconsistent) 34 | if not data[schema.primary_field.name].isnull().all(): 35 | raise DataNotMatchException(0, ExceptionsMessage.AutoIDWithData) 36 | else: 37 | if len(fields) != len(data.columns)+1: 38 | raise DataNotMatchException(0, ExceptionsMessage.FieldsNumInconsistent) 39 | else: 40 | if len(fields) != len(data.columns): 41 | raise DataNotMatchException(0, ExceptionsMessage.FieldsNumInconsistent) 42 | for i, field in enumerate(fields): 43 | if field.is_primary and field.auto_id: 44 | continue 45 | entities.append({"name": field.name, 46 | "type": field.dtype, 47 | "values": list(data[field.name])}) 48 | raw_lengths.append(len(data[field.name])) 49 | else: 50 | if schema.auto_id: 51 | if len(data) + 1 != len(fields): 52 | raise DataNotMatchException(0, ExceptionsMessage.FieldsNumInconsistent) 53 | 54 | tmp_fields = copy.deepcopy(fields) 55 | for i, field in enumerate(tmp_fields): 56 | if field.is_primary and field.auto_id: 57 | tmp_fields.pop(i) 58 | 59 | for i, field in enumerate(tmp_fields): 60 | if isinstance(data[i], numpy.ndarray): 61 | raise DataTypeNotSupportException(0, ExceptionsMessage.NdArrayNotSupport) 62 | 63 | entities.append({ 64 | "name": field.name, 65 | "type": field.dtype, 66 | "values": data[i]}) 67 | raw_lengths.append(len(data[i])) 68 | 69 | lengths = list(set(raw_lengths)) 70 | if len(lengths) > 1: 71 | raise DataNotMatchException(0, ExceptionsMessage.DataLengthsInconsistent) 72 | 73 | return entities 74 | -------------------------------------------------------------------------------- /pymilvus_orm/search.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2021 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under 11 | # the License. 12 | 13 | import abc 14 | from pymilvus.client.abstract import Entity 15 | 16 | 17 | class _IterableWrapper: 18 | def __init__(self, iterable_obj): 19 | self._iterable = iterable_obj 20 | 21 | def __iter__(self): 22 | return self 23 | 24 | def __next__(self): 25 | return self.on_result(self._iterable.__next__()) 26 | 27 | def __getitem__(self, item): 28 | s = self._iterable.__getitem__(item) 29 | if isinstance(item, slice): 30 | _start = item.start or 0 31 | i_len = self._iterable.__len__() 32 | _end = min(item.stop, i_len) if item.stop else i_len 33 | 34 | elements = [] 35 | for i in range(_start, _end): 36 | elements.append(self.on_result(s[i])) 37 | return elements 38 | return s 39 | 40 | def __len__(self): 41 | return self._iterable.__len__() 42 | 43 | @abc.abstractmethod 44 | def on_result(self, res): 45 | raise NotImplementedError 46 | 47 | 48 | # TODO: how to add docstring to method of subclass and don't change the implementation? 49 | # for example like below: 50 | # class Hits(_IterableWrapper): 51 | # __init__.__doc__ = """doc of __init__""" 52 | # __iter__.__doc__ = """doc of __iter__""" 53 | # __next__.__doc__ = """doc of __next__""" 54 | # __getitem__.__doc__ = """doc of __getitem__""" 55 | # __len__.__doc__ = """doc of __len__""" 56 | # 57 | # def on_result(self, res): 58 | # return Hit(res) 59 | 60 | 61 | class DocstringMeta(type): 62 | def __new__(cls, name, bases, attrs): 63 | doc_meta = attrs.pop("docstring", None) 64 | new_cls = super(DocstringMeta, cls).__new__(cls, name, bases, attrs) 65 | if doc_meta: 66 | for member_name, member in attrs.items(): 67 | if member_name in doc_meta: 68 | member.__doc__ = doc_meta[member_name] 69 | return new_cls 70 | 71 | 72 | # for example: 73 | # class Hits(_IterableWrapper, metaclass=DocstringMeta): 74 | # docstring = { 75 | # "__init__": """doc of __init__""", 76 | # "__iter__": """doc of __iter__""", 77 | # "__next__": """doc of __next__""", 78 | # "__getitem__": """doc of __getitem__""", 79 | # "__len__": """doc of __len__""", 80 | # } 81 | # 82 | # def on_result(self, res): 83 | # return Hit(res) 84 | 85 | 86 | class Hit: 87 | def __init__(self, hit): 88 | """ 89 | Construct a Hit object from response. A hit represent a record corresponding to the query. 90 | """ 91 | self._hit = hit 92 | 93 | @property 94 | def id(self) -> int: 95 | """ 96 | Return the id of the hit record. 97 | 98 | :return int: 99 | The id of the hit record. 100 | """ 101 | return self._hit.id 102 | 103 | @property 104 | def entity(self) -> Entity: 105 | """ 106 | Return the Entity of the hit record. 107 | 108 | :return pymilvus Entity object: 109 | The entity content of the hit record. 110 | """ 111 | return self._hit.entity 112 | 113 | @property 114 | def distance(self) -> float: 115 | """ 116 | Return the distance between the hit record and the query. 117 | 118 | :return float: 119 | The distance of the hit record. 120 | """ 121 | return self._hit.distance 122 | 123 | @property 124 | def score(self) -> float: 125 | """ 126 | Return the calculated score of the hit record, now the score is equal to distance. 127 | 128 | :return float: 129 | The score of the hit record. 130 | """ 131 | return self._hit.score 132 | 133 | def __str__(self): 134 | """ 135 | Return the information of hit record. 136 | 137 | :return str: 138 | The information of hit record. 139 | """ 140 | return "(distance: {}, id: {})".format(self._hit.distance, self._hit.id) 141 | 142 | __repr__ = __str__ 143 | 144 | 145 | class Hits: 146 | def __init__(self, hits): 147 | """ 148 | Construct a Hits object from response. 149 | """ 150 | self._hits = hits 151 | 152 | def __iter__(self): 153 | """ 154 | Iterate the Hits object. Every iteration returns a Hit which represent a record 155 | corresponding to the query. 156 | """ 157 | return self 158 | 159 | def __next__(self): 160 | """ 161 | Iterate the Hits object. Every iteration returns a Hit which represent a record 162 | corresponding to the query. 163 | """ 164 | return Hit(self._hits.__next__()) 165 | 166 | def __getitem__(self, item): 167 | """ 168 | Return the kth Hit corresponding to the query. 169 | 170 | :return Hit: 171 | The kth specified by item Hit corresponding to the query. 172 | """ 173 | s = self._hits.__getitem__(item) 174 | if isinstance(item, slice): 175 | _start = item.start or 0 176 | i_len = self._hits.__len__() 177 | _end = min(item.stop, i_len) if item.stop else i_len 178 | 179 | elements = [] 180 | for i in range(_start, _end): 181 | elements.append(self.on_result(s[i])) 182 | return elements 183 | return s 184 | 185 | def __len__(self) -> int: 186 | """ 187 | Return the number of hit record. 188 | 189 | :return int: 190 | The number of hit record. 191 | """ 192 | return self._hits.__len__() 193 | 194 | def on_result(self, res): 195 | return Hit(res) 196 | 197 | @property 198 | def ids(self) -> list: 199 | """ 200 | Return the ids of all hit record. 201 | 202 | :return list[int]: 203 | The ids of all hit record. 204 | """ 205 | return self._hits.ids 206 | 207 | @property 208 | def distances(self) -> list: 209 | """ 210 | Return the distances of all hit record. 211 | 212 | :return list[float]: 213 | The distances of all hit record. 214 | """ 215 | return self._hits.distances 216 | 217 | 218 | class SearchResult: 219 | def __init__(self, query_result=None): 220 | """ 221 | Construct a search result from response. 222 | """ 223 | self._qs = query_result 224 | 225 | def __iter__(self): 226 | """ 227 | Iterate the Search Result. Every iteration returns a Hits corresponding to a query. 228 | """ 229 | return self 230 | 231 | def __next__(self): 232 | """ 233 | Iterate the Search Result. Every iteration returns a Hits corresponding to a query. 234 | """ 235 | return self.on_result(self._qs.__next__()) 236 | 237 | def __getitem__(self, item): 238 | """ 239 | Return the Hits corresponding to the nth query. 240 | 241 | :return Hits: 242 | The hits corresponding to the nth(item) query. 243 | """ 244 | s = self._qs.__getitem__(item) 245 | if isinstance(item, slice): 246 | _start = item.start or 0 247 | i_len = self._qs.__len__() 248 | _end = min(item.stop, i_len) if item.stop else i_len 249 | 250 | elements = [] 251 | for i in range(_start, _end): 252 | elements.append(self.on_result(s[i])) 253 | return elements 254 | return s 255 | 256 | def __len__(self) -> int: 257 | """ 258 | Return the number of query of Search Result. 259 | 260 | :return int: 261 | The number of query of search result. 262 | """ 263 | return self._qs.__len__() 264 | 265 | def on_result(self, res): 266 | return Hits(res) 267 | -------------------------------------------------------------------------------- /pymilvus_orm/types.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2021 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under 11 | # the License. 12 | 13 | from enum import IntEnum 14 | import logging 15 | from pandas.api.types import infer_dtype, is_list_like, is_scalar, is_float, is_array_like 16 | import numpy as np 17 | 18 | LOGGER = logging.getLogger(__name__) 19 | 20 | 21 | class DataType(IntEnum): 22 | NONE = 0 23 | BOOL = 1 24 | INT8 = 2 25 | INT16 = 3 26 | INT32 = 4 27 | INT64 = 5 28 | 29 | FLOAT = 10 30 | DOUBLE = 11 31 | 32 | STRING = 20 33 | 34 | BINARY_VECTOR = 100 35 | FLOAT_VECTOR = 101 36 | 37 | UNKNOWN = 999 38 | 39 | 40 | dtype_str_map = { 41 | "string": DataType.STRING, 42 | "floating": DataType.FLOAT, 43 | "integer": DataType.INT64, 44 | "mixed-integer": DataType.INT64, 45 | "mixed-integer-float": DataType.FLOAT, 46 | "boolean": DataType.BOOL, 47 | "mixed": DataType.UNKNOWN, 48 | "bytes": DataType.UNKNOWN, 49 | } 50 | 51 | numpy_dtype_str_map = { 52 | "bool_": DataType.BOOL, 53 | "bool": DataType.BOOL, 54 | "int": DataType.INT64, 55 | "int_": DataType.INT64, 56 | "intc": DataType.INT64, 57 | "intp": DataType.INT64, 58 | "int8": DataType.INT8, 59 | "int16": DataType.INT16, 60 | "int32": DataType.INT32, 61 | "int64": DataType.INT64, 62 | "uint8": DataType.INT8, 63 | "uint16": DataType.INT16, 64 | "uint32": DataType.INT32, 65 | "uint64": DataType.INT64, 66 | "float": DataType.FLOAT, 67 | "float_": DataType.FLOAT, 68 | "float16": DataType.FLOAT, 69 | "float32": DataType.FLOAT, 70 | "float64": DataType.DOUBLE, 71 | } 72 | 73 | 74 | def is_integer_datatype(data_type): 75 | return data_type in (DataType.INT8, DataType.INT16, DataType.INT32, DataType.INT64) 76 | 77 | 78 | def is_float_datatype(data_type): 79 | return data_type in (DataType.FLOAT,) 80 | 81 | 82 | def is_numeric_datatype(data_type): 83 | return is_float_datatype(data_type) or is_integer_datatype(data_type) 84 | 85 | 86 | # pylint: disable=too-many-return-statements 87 | def infer_dtype_by_scaladata(data): 88 | if isinstance(data, float): 89 | return DataType.DOUBLE 90 | if isinstance(data, bool): 91 | return DataType.BOOL 92 | if isinstance(data, int): 93 | return DataType.INT64 94 | if isinstance(data, str): 95 | return DataType.STRING 96 | if isinstance(data, np.float64): 97 | return DataType.DOUBLE 98 | if isinstance(data, np.float32): 99 | return DataType.FLOAT 100 | if isinstance(data, np.int64): 101 | return DataType.INT64 102 | if isinstance(data, np.int32): 103 | return DataType.INT32 104 | if isinstance(data, np.int16): 105 | return DataType.INT16 106 | if isinstance(data, np.int8): 107 | return DataType.INT8 108 | if isinstance(data, np.bool8): 109 | return DataType.BOOL 110 | if isinstance(data, np.bool_): 111 | return DataType.BOOL 112 | if isinstance(data, bytes): 113 | return DataType.BINARY_VECTOR 114 | if is_float(data): 115 | return DataType.DOUBLE 116 | 117 | return DataType.UNKNOWN 118 | 119 | 120 | def infer_dtype_bydata(data): 121 | d_type = DataType.UNKNOWN 122 | if is_scalar(data): 123 | d_type = infer_dtype_by_scaladata(data) 124 | return d_type 125 | 126 | if is_list_like(data) or is_array_like(data): 127 | failed = False 128 | try: 129 | type_str = infer_dtype(data) 130 | except TypeError: 131 | failed = True 132 | if not failed: 133 | d_type = dtype_str_map.get(type_str, DataType.UNKNOWN) 134 | if is_numeric_datatype(d_type): 135 | d_type = DataType.FLOAT_VECTOR 136 | else: 137 | d_type = DataType.UNKNOWN 138 | 139 | return d_type 140 | 141 | if d_type == DataType.UNKNOWN: 142 | try: 143 | elem = data[0] 144 | except: 145 | elem = None 146 | 147 | if elem is not None and is_scalar(elem): 148 | d_type = infer_dtype_by_scaladata(elem) 149 | 150 | if d_type == DataType.UNKNOWN: 151 | _dtype = getattr(data, "dtype", None) 152 | 153 | if _dtype is not None: 154 | d_type = map_numpy_dtype_to_datatype(_dtype) 155 | 156 | return d_type 157 | 158 | 159 | def map_numpy_dtype_to_datatype(d_type): 160 | d_type_str = str(d_type) 161 | return numpy_dtype_str_map.get(d_type_str, DataType.UNKNOWN) 162 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url https://test.pypi.org/simple/ 2 | pymilvus==2.0.0rc3.dev16 3 | pandas 4 | numpy 5 | sklearn==0.0 6 | m2r==0.2.1 7 | mistune==0.8.4 8 | Sphinx==2.3.1 9 | sphinx-copybutton==0.3.1 10 | sphinx-rtd-theme==0.4.3 11 | sphinxcontrib-applehelp==1.0.1 12 | sphinxcontrib-devhelp==1.0.1 13 | sphinxcontrib-htmlhelp==1.0.2 14 | sphinxcontrib-jsmath==1.0.1 15 | sphinxcontrib-qthelp==1.0.2 16 | sphinxcontrib-serializinghtml==1.1.3 17 | sphinxcontrib-prettyspecialmethods 18 | pytest==5.3.4 19 | pytest-cov==2.8.1 20 | pytest-timeout==1.3.4 21 | pylint==2.4.4 22 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [build_sphinx] 2 | source-dir = docs/source/ 3 | build-dir = docs/_build 4 | all_files = 1 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import setuptools 3 | import re 4 | 5 | HERE = pathlib.Path(__file__).parent 6 | 7 | README = (HERE / 'README.md').read_text() 8 | 9 | requirements = [ 10 | "pymilvus==2.0.0rc3.dev16", 11 | "pandas==1.1.5; python_version<'3.7'", 12 | "pandas==1.2.4; python_version>'3.6'", 13 | ] 14 | 15 | extras_require={ 16 | 'test': [ 17 | 'sklearn==0.0', 18 | 'pytest==5.3.4', 19 | 'pytest-cov==2.8.1', 20 | 'pytest-timeout==1.3.4', 21 | ], 22 | 'dev': [ 23 | 'sklearn==0.0', 24 | 'pylint==2.4.4', 25 | ], 26 | 'doc': [ 27 | 'mistune==0.8.4', 28 | 'm2r==0.2.1', 29 | 'Sphinx==2.3.1', 30 | 'sphinx-copybutton==0.3.1', 31 | 'sphinx-rtd-theme==0.4.3', 32 | 'sphinxcontrib-applehelp==1.0.1', 33 | 'sphinxcontrib-devhelp==1.0.1', 34 | 'sphinxcontrib-htmlhelp==1.0.2', 35 | 'sphinxcontrib-jsmath==1.0.1', 36 | 'sphinxcontrib-qthelp==1.0.2', 37 | 'sphinxcontrib-serializinghtml==1.1.3', 38 | 'sphinxcontrib-prettyspecialmethods', 39 | ] 40 | } 41 | 42 | setuptools.setup( 43 | name="pymilvus-orm", 44 | author='Milvus Team', 45 | author_email='milvus-team@zilliz.com', 46 | setup_requires=['setuptools_scm'], 47 | use_scm_version={'local_scheme': 'no-local-version'}, 48 | description="Python ORM Sdk for Milvus(>= 2.0)", 49 | long_description=README, 50 | long_description_content_type='text/markdown', 51 | url='https://github.com/milvus-io/pymilvus-orm.git', 52 | license="Apache-2.0", 53 | packages=setuptools.find_packages(), 54 | dependency_links=[ 55 | 'https://test.pypi.org/simple/pymilvus', 56 | 'https://pypi.org/simple/pymilvus', 57 | ], 58 | include_package_data=True, 59 | install_requires=requirements, 60 | extras_require=extras_require, 61 | classifiers=[ 62 | "Programming Language :: Python :: 3.6", 63 | "Programming Language :: Python :: 3.7", 64 | "Programming Language :: Python :: 3.8", 65 | ], 66 | python_requires='>=3.6' 67 | ) 68 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pytest 3 | 4 | from os.path import dirname, abspath 5 | sys.path.append(dirname(dirname(abspath(__file__)))) 6 | 7 | sys.modules['pymilvus'] = __import__('mock_milvus') 8 | import pymilvus_orm.connections as connections 9 | 10 | 11 | @pytest.fixture(scope='session', autouse=True) 12 | def create_collection(): 13 | connections.connect() 14 | yield 15 | connections.remove_connection(alias='default') 16 | -------------------------------------------------------------------------------- /tests/mock_milvus.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | 3 | from pymilvus import * 4 | import logging 5 | 6 | from mock_result import MockMutationResult 7 | 8 | 9 | class MockMilvus: 10 | def __init__(self, host=None, port=None, handler="GRPC", pool="SingletonThread", **kwargs): 11 | self._collections = dict() 12 | self._collection_partitions = dict() 13 | self._collection_indexes = dict() 14 | 15 | def create_collection(self, collection_name, fields, timeout=None, **kwargs): 16 | if collection_name in self._collections: 17 | raise BaseException(1, f"Create collection failed: collection {collection_name} exist") 18 | self._collections[collection_name] = fields 19 | self._collection_partitions[collection_name] = {'_default'} 20 | self._collection_indexes[collection_name] = [] 21 | logging.debug(f"create_collection: {collection_name}") 22 | 23 | def drop_collection(self, collection_name, timeout=None): 24 | if collection_name not in self._collections: 25 | raise BaseException(1, f"describe collection failed: can't find collection: {collection_name}") 26 | self._collections.pop(collection_name) 27 | self._collection_partitions.pop(collection_name) 28 | logging.debug(f"drop_collection: {collection_name}") 29 | 30 | def has_collection(self, collection_name, timeout=None): 31 | logging.debug(f"has_collection: {collection_name}") 32 | return collection_name in self._collections 33 | 34 | def describe_collection(self, collection_name, timeout=None): 35 | if collection_name not in self._collections: 36 | raise BaseException(1, f"describe collection failed: can't find collection: {collection_name}") 37 | logging.debug(f"describe_collection: {collection_name}") 38 | return self._collections[collection_name] 39 | 40 | def load_collection(self, collection_name, timeout=None): 41 | if collection_name not in self._collections: 42 | raise BaseException(1, f"describe collection failed: can't find collection: {collection_name}") 43 | logging.debug(f"load_collection: {collection_name}") 44 | 45 | def release_collection(self, collection_name, timeout=None): 46 | if collection_name not in self._collections: 47 | raise BaseException(1, f"describe collection failed: can't find collection: {collection_name}") 48 | logging.debug(f"release_collection: {collection_name}") 49 | 50 | def get_collection_stats(self, collection_name, timeout=None, **kwargs): 51 | if collection_name not in self._collections: 52 | raise BaseException(1, f"describe collection failed: can't find collection: {collection_name}") 53 | logging.debug(f"get_collection_stats: {collection_name}") 54 | return {'row_count': 0} 55 | 56 | def list_collections(self, timeout=None): 57 | logging.debug(f"list_collections") 58 | return list(self._collections.keys()) 59 | 60 | def create_partition(self, collection_name, partition_tag, timeout=None): 61 | if collection_name not in self._collections: 62 | raise BaseException(1, f"create partition failed: can't find collection: {collection_name}") 63 | if partition_tag in self._collection_partitions[collection_name]: 64 | raise BaseException(1, f"create partition failed: partition name = {partition_tag} already exists") 65 | logging.debug(f"create_partition: {collection_name}, {partition_tag}") 66 | self._collection_partitions[collection_name].add(partition_tag) 67 | 68 | def drop_partition(self, collection_name, partition_tag, timeout=None): 69 | if collection_name not in self._collections: 70 | raise BaseException(1, f"DropPartition failed: can't find collection: {collection_name}") 71 | if partition_tag not in self._collection_partitions[collection_name]: 72 | raise BaseException(1, f"DropPartition failed: partition {partition_tag} does not exist") 73 | if partition_tag == "_default": 74 | raise BaseException(1, f"DropPartition failed: default partition cannot be deleted") 75 | logging.debug(f"drop_partition: {collection_name}, {partition_tag}") 76 | self._collection_partitions[collection_name].remove(partition_tag) 77 | 78 | def has_partition(self, collection_name, partition_tag, timeout=None): 79 | if collection_name not in self._collections: 80 | raise BaseException(1, f"HasPartition failed: can't find collection: {collection_name}") 81 | logging.debug(f"has_partition: {collection_name}, {partition_tag}") 82 | return partition_tag in self._collection_partitions[collection_name] 83 | 84 | def load_partitions(self, collection_name, partition_names, timeout=None): 85 | if collection_name not in self._collections: 86 | raise BaseException(1, f"describe collection failed: can't find collection: {collection_name}") 87 | for partition_name in partition_names: 88 | if partition_name not in self._collection_partitions[collection_name]: 89 | raise BaseException(1, f"partitionID of partitionName:{partition_name} can not be find") 90 | logging.debug(f"load_partition: {collection_name}, {partition_names}") 91 | 92 | def release_partitions(self, collection_name, partition_names, timeout=None): 93 | if collection_name not in self._collections: 94 | raise BaseException(1, f"describe collection failed: can't find collection: {collection_name}") 95 | for partition_name in partition_names: 96 | if partition_name not in self._collection_partitions[collection_name]: 97 | raise BaseException(1, f"partitionID of partitionName:{partition_name} can not be find") 98 | logging.debug(f"release_partition: {collection_name}, {partition_names}") 99 | 100 | def get_partition_stats(self, collection_name, partition_name, timeout=None, **kwargs): 101 | if collection_name not in self._collections: 102 | raise BaseException(1, f"describe collection failed: can't find collection: {collection_name}") 103 | if partition_name not in self._collection_partitions[collection_name]: 104 | raise BaseException(1, f"GetPartitionStatistics failed: partition {partition_name} does not exist") 105 | logging.debug(f"get_partition_stats: {partition_name}") 106 | return {'row_count': 0} 107 | 108 | def list_partitions(self, collection_name, timeout=None): 109 | if collection_name not in self._collections: 110 | raise BaseException(1, f"can't find collection: {collection_name}") 111 | logging.debug(f"list_partitions: {collection_name}") 112 | return [e for e in self._collection_partitions[collection_name]] 113 | 114 | def create_index(self, collection_name, field_name, params, timeout=None, **kwargs): 115 | logging.debug(f"create_index: {collection_name}, {field_name}, {params}") 116 | index = {"field_name": field_name, "params": params} 117 | self._collection_indexes[collection_name].append(index) 118 | 119 | def drop_index(self, collection_name, field_name, timeout=None): 120 | logging.debug(f"drop_index: {collection_name}, {field_name}") 121 | self._collection_indexes[collection_name] = [] 122 | 123 | def describe_index(self, collection_name, index_name="", timeout=None): 124 | logging.debug(f"describe_index: {collection_name}, {index_name}") 125 | if self._collection_indexes.get(collection_name) is None: 126 | return 127 | indexes = self._collection_indexes[collection_name].copy() 128 | if len(indexes) != 0: 129 | return indexes[0] 130 | 131 | def insert(self, collection_name, entities, ids=None, partition_tag=None, timeout=None, **kwargs): 132 | return MockMutationResult() 133 | 134 | def flush(self, collection_names=None, timeout=None, **kwargs): 135 | pass 136 | 137 | def search(self, collection_name, dsl, partition_tags=None, fields=None, timeout=None, **kwargs): 138 | pass 139 | 140 | def load_collection_progress(self, collection_name, timeout=None, **kwargs): 141 | return {'num_loaded_entities': 3000, 'num_total_entities': 5000} 142 | 143 | def load_partitions_progress(self, collection_name, partition_names, timeout=None, **kwargs): 144 | return {'num_loaded_entities': 3000, 'num_total_entities': 5000} 145 | 146 | def wait_for_loading_collection_complete(self, collection_name, timeout=None, **kwargs): 147 | pass 148 | 149 | def wait_for_loading_partitions_complete(self, collection_name, partition_names, timeout=None, **kwargs): 150 | pass 151 | 152 | def get_index_build_progress(self, collection_name, index_name, timeout=None, **kwargs): 153 | return {'total_rows': 5000, 'indexed_rows': 3000} 154 | 155 | def wait_for_creating_index(self, collection_name, index_name, timeout=None, **kwargs): 156 | return True, "" 157 | 158 | def close(self): 159 | pass 160 | 161 | 162 | Milvus = MockMilvus 163 | -------------------------------------------------------------------------------- /tests/mock_result.py: -------------------------------------------------------------------------------- 1 | class MockMutationResult: 2 | def __init__(self): 3 | self._primary_keys = [] 4 | self._insert_cnt = 0 5 | self._delete_cnt = 0 6 | self._upsert_cnt = 0 7 | self._timestamp = 0 8 | 9 | @property 10 | def primary_keys(self): 11 | return self._primary_keys 12 | 13 | @property 14 | def insert_count(self): 15 | return self._insert_cnt 16 | 17 | @property 18 | def delete_count(self): 19 | return self._delete_cnt 20 | 21 | @property 22 | def upsert_count(self): 23 | return self._upsert_cnt 24 | 25 | @property 26 | def timestamp(self): 27 | return self._timestamp 28 | -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | log_format = %(asctime)s %(levelname)s %(message)s 3 | log_date_format = %Y-%m-%d %H:%M:%S 4 | log_level = debug 5 | -------------------------------------------------------------------------------- /tests/test_collection.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import numpy 3 | import pytest 4 | from utils import * 5 | from pymilvus_orm import Collection, connections 6 | 7 | LOGGER = logging.getLogger(__name__) 8 | 9 | 10 | class TestCollections: 11 | @pytest.fixture(scope="function",) 12 | def collection(self): 13 | name = gen_collection_name() 14 | schema = gen_schema() 15 | yield Collection(name, schema=schema) 16 | if connections.get_connection().has_collection(name): 17 | connections.get_connection().drop_collection(name) 18 | 19 | def test_collection_by_DataFrame(self): 20 | from pymilvus_orm import Collection, connections 21 | from pymilvus_orm.schema import FieldSchema, CollectionSchema 22 | from pymilvus_orm.types import DataType 23 | fields = [ 24 | FieldSchema("int64", DataType.INT64), 25 | FieldSchema("float", DataType.FLOAT), 26 | FieldSchema("float_vector", DataType.FLOAT_VECTOR, dim=128) 27 | ] 28 | collection_schema = CollectionSchema(fields, primary_field="int64") 29 | connections.connect() 30 | collection = Collection(name=gen_collection_name(), schema=collection_schema) 31 | collection.drop() 32 | 33 | def test_constructor(self, collection): 34 | assert type(collection) is Collection 35 | 36 | def test_construct_from_dataframe(self): 37 | assert type(Collection.construct_from_dataframe(gen_collection_name(), gen_pd_data(default_nb), primary_field="int64")[0]) is Collection 38 | 39 | def test_schema(self, collection): 40 | schema = collection.schema 41 | description = "This is new description" 42 | with pytest.raises(AttributeError): 43 | schema.description = description 44 | with pytest.raises(AttributeError): 45 | collection.schema = schema 46 | 47 | def test_description(self, collection): 48 | LOGGER.info(collection.description) 49 | description = "This is new description" 50 | with pytest.raises(AttributeError): 51 | collection.description = description 52 | 53 | def test_name(self, collection): 54 | LOGGER.info(collection.name) 55 | with pytest.raises(AttributeError): 56 | collection.name = gen_collection_name() 57 | 58 | def test_is_empty(self, collection): 59 | assert collection.is_empty is True 60 | 61 | def test_num_entities(self, collection): 62 | assert collection.num_entities == 0 63 | 64 | def test_drop(self, collection): 65 | collection.drop() 66 | 67 | def test_load(self, collection): 68 | collection.load() 69 | 70 | def test_release(self, collection): 71 | collection.release() 72 | 73 | @pytest.mark.xfail 74 | def test_insert(self, collection): 75 | data = gen_list_data(default_nb) 76 | collection.insert(data) 77 | 78 | def test_insert_ret(self, collection): 79 | vectors = gen_vectors(1, default_dim, bool(0)) 80 | data = [ 81 | [1], 82 | [numpy.float32(1.0)], 83 | vectors 84 | ] 85 | result = collection.insert(data) 86 | print(result) 87 | assert "insert count" in str(result) 88 | assert "delete count" in str(result) 89 | assert "upsert count" in str(result) 90 | assert "timestamp" in str(result) 91 | 92 | @pytest.mark.xfail 93 | def test_search(self, collection): 94 | collection.search() 95 | 96 | @pytest.mark.xfail 97 | def test_get(self, collection): 98 | data = gen_list_data(default_nb) 99 | ids = collection.insert(data) 100 | assert len(ids) == default_nb 101 | res = collection.get(ids[0:10]) 102 | 103 | @pytest.mark.xfail 104 | def test_query(self, collection): 105 | data = gen_list_data(default_nb) 106 | ids = collection.insert(data) 107 | assert len(ids) == default_nb 108 | ids_expr = ",".join(str(x) for x in ids) 109 | expr = "id in [ " + ids_expr + " ]" 110 | res = collection.query(expr) 111 | 112 | def test_partitions(self, collection): 113 | assert len(collection.partitions) == 1 114 | 115 | def test_partition(self, collection): 116 | collection.partition(gen_partition_name()) 117 | 118 | def test_has_partition(self, collection): 119 | assert collection.has_partition("_default") is True 120 | assert collection.has_partition(gen_partition_name()) is False 121 | 122 | @pytest.mark.xfail 123 | def test_drop_partition(self, collection): 124 | collection.drop_partition(gen_partition_name()) 125 | 126 | def test_indexes(self, collection): 127 | assert type(collection.indexes) is list 128 | assert len(collection.indexes) == 0 129 | 130 | @pytest.mark.xfail 131 | def test_index(self, collection): 132 | collection.index() 133 | 134 | @pytest.mark.xfail 135 | def test_create_index(self, collection, defa): 136 | collection.create_index(gen_field_name(), gen_index_name()) 137 | 138 | def test_has_index(self, collection): 139 | assert collection.has_index() is False 140 | 141 | @pytest.mark.xfail 142 | def test_drop_index(self, collection): 143 | collection.drop_index() 144 | 145 | def test_dummy(self): 146 | pass 147 | -------------------------------------------------------------------------------- /tests/test_connections.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import logging 3 | import pytest 4 | import pymilvus 5 | from unittest import mock 6 | 7 | from pymilvus_orm import connections, Connections 8 | from pymilvus_orm.default_config import DefaultConfig 9 | from pymilvus_orm.exceptions import * 10 | 11 | LOGGER = logging.getLogger(__name__) 12 | 13 | 14 | class TestConnections: 15 | @pytest.fixture(scope="function") 16 | def c(self): 17 | return copy.deepcopy(Connections()) 18 | 19 | @pytest.fixture(scope="function") 20 | def configure_params(self): 21 | params = { 22 | "test": {"host": "localhost", "port": "19530"}, 23 | "dev": {"host": "127.0.0.1", "port": "19530"}, 24 | } 25 | return params 26 | 27 | @pytest.fixture(scope="function") 28 | def host(self): 29 | return "localhost" 30 | 31 | @pytest.fixture(scope="function") 32 | def port(self): 33 | return "19530" 34 | 35 | @pytest.fixture(scope="function") 36 | def params(self, host, port): 37 | d = { 38 | "host": host, 39 | "port": port, 40 | } 41 | return d 42 | 43 | def test_constructor(self, c): 44 | LOGGER.info(type(c)) 45 | 46 | def test_add_connection(self, c, configure_params): 47 | with mock.patch("pymilvus.Milvus.__init__", return_value=None): 48 | c.add_connection(**configure_params) 49 | 50 | for key, _ in configure_params.items(): 51 | c.connect(key) 52 | 53 | conn = c.get_connection(key) 54 | 55 | assert isinstance(conn, pymilvus.Milvus) 56 | 57 | with pytest.raises(ConnectionConfigException): 58 | c.add_connection(**{key: {"host": "192.168.1.1", "port": "13500"}}) 59 | 60 | c.remove_connection(key) 61 | 62 | def test_remove_connection_without_no_connections(self, c): 63 | c.remove_connection("remove") 64 | 65 | def test_remove_connection(self, c, host, port): 66 | with mock.patch("pymilvus.Milvus.__init__", return_value=None): 67 | alias = "default" 68 | 69 | c.connect(alias, host=host, port=port) 70 | c.disconnect(alias) 71 | 72 | assert c.get_connection(alias) is None 73 | c.remove_connection(alias) 74 | 75 | def test_connect_without_param(self, c): 76 | with mock.patch("pymilvus.Milvus.__init__", return_value=None): 77 | alias = "default" 78 | c.connect(alias) 79 | conn_got = c.get_connection(alias) 80 | assert isinstance(conn_got, pymilvus.Milvus) 81 | c.remove_connection(alias) 82 | 83 | def test_connect(self, c, params): 84 | with mock.patch("pymilvus.Milvus.__init__", return_value=None): 85 | alias = "default" 86 | c.connect(alias, **params) 87 | conn_got = c.get_connection(alias) 88 | assert isinstance(conn_got, pymilvus.Milvus) 89 | c.remove_connection(alias) 90 | 91 | def test_get_connection_without_no_connections(self, c): 92 | assert c.get_connection("get") is None 93 | 94 | def test_get_connection(self, c, host, port): 95 | with mock.patch("pymilvus.Milvus.__init__", return_value=None): 96 | alias = "default" 97 | 98 | c.connect(alias, host=host, port=port) 99 | 100 | conn_got = c.get_connection(alias) 101 | assert isinstance(conn_got, pymilvus.Milvus) 102 | 103 | c.remove_connection(alias) 104 | 105 | def test_get_connection_without_alias(self, c, host, port): 106 | with mock.patch("pymilvus.Milvus.__init__", return_value=None): 107 | alias = DefaultConfig.DEFAULT_USING 108 | 109 | c.connect(alias, host=host, port=port) 110 | 111 | conn_got = c.get_connection() 112 | assert isinstance(conn_got, pymilvus.Milvus) 113 | 114 | c.remove_connection(alias) 115 | 116 | def test_get_connection_with_configure_without_add(self, c, configure_params): 117 | with mock.patch("pymilvus.Milvus.__init__", return_value=None): 118 | c.add_connection(**configure_params) 119 | for key, _ in configure_params.items(): 120 | c.connect(key) 121 | conn = c.get_connection(key) 122 | assert isinstance(conn, pymilvus.Milvus) 123 | c.remove_connection(key) 124 | 125 | def test_get_connection_addr(self, c, host, port): 126 | alias = DefaultConfig.DEFAULT_USING 127 | 128 | c.connect(alias, host=host, port=port) 129 | 130 | connection_addr = c.get_connection_addr(alias) 131 | 132 | assert connection_addr["host"] == host 133 | assert connection_addr["port"] == port 134 | c.remove_connection(alias) 135 | 136 | def test_list_connections(self, c, host, port): 137 | alias = DefaultConfig.DEFAULT_USING 138 | 139 | c.connect(alias, host=host, port=port) 140 | 141 | conns = c.list_connections() 142 | 143 | assert len(conns) == 1 144 | c.remove_connection(alias) 145 | -------------------------------------------------------------------------------- /tests/test_index.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import pytest 3 | from utils import * 4 | from pymilvus_orm import Collection, Index 5 | 6 | LOGGER = logging.getLogger(__name__) 7 | 8 | 9 | class TestIndex: 10 | @pytest.fixture(scope="function") 11 | def name(self): 12 | return gen_index_name() 13 | 14 | @pytest.fixture(scope="function") 15 | def field_name(self): 16 | return gen_field_name() 17 | 18 | @pytest.fixture(scope="function") 19 | def collection_name(self): 20 | return gen_collection_name() 21 | 22 | @pytest.fixture(scope="function") 23 | def schema(self): 24 | return gen_schema() 25 | 26 | @pytest.fixture(scope="function") 27 | def index_param(self): 28 | return gen_index() 29 | 30 | @pytest.fixture( 31 | scope="function", 32 | params=gen_simple_index() 33 | ) 34 | def get_simple_index(self, request): 35 | return request.param 36 | 37 | @pytest.fixture(scope="function") 38 | def index(self, name, field_name, collection_name, schema, get_simple_index): 39 | # from pymilvus_orm.collection import Collection 40 | collection = Collection(collection_name, schema=schema) 41 | return Index(collection, field_name, get_simple_index) 42 | 43 | def test_params(self, index, get_simple_index): 44 | assert index.params == get_simple_index 45 | 46 | def test_collection_name(self, index, collection_name): 47 | assert index.collection_name == collection_name 48 | 49 | def test_field_name(self, index, field_name): 50 | assert index.field_name == field_name 51 | 52 | def test_drop(self, index): 53 | index.drop() 54 | -------------------------------------------------------------------------------- /tests/test_partition.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import unittest 3 | import pytest 4 | from utils import * 5 | from pymilvus_orm import Collection, Partition 6 | 7 | LOGGER = logging.getLogger(__name__) 8 | 9 | 10 | class TestPartition: 11 | @pytest.fixture(scope="function") 12 | def collection_name(self): 13 | return gen_collection_name() 14 | 15 | @pytest.fixture(scope="function") 16 | def schema(self): 17 | return gen_schema() 18 | 19 | @pytest.fixture(scope="function") 20 | def partition_name(self): 21 | return gen_partition_name() 22 | 23 | @pytest.fixture(scope="function") 24 | def description(self): 25 | return "TestPartition_description" 26 | 27 | @pytest.fixture(scope="function") 28 | def collection(self, collection_name, schema): 29 | c = Collection(collection_name, schema=schema) 30 | yield c 31 | c.drop() 32 | 33 | @pytest.fixture(scope="function") 34 | def partition(self, collection, partition_name, description): 35 | params = { 36 | "description": description, 37 | } 38 | yield Partition(collection, partition_name, **params) 39 | if collection.has_partition(partition_name): 40 | collection.drop_partition(partition_name) 41 | 42 | def test_constructor(self, partition): 43 | assert type(partition) is Partition 44 | 45 | def test_description(self, partition, description): 46 | assert partition.description == description 47 | 48 | def test_name(self, partition, partition_name): 49 | assert partition.name == partition_name 50 | 51 | def test_is_empty(self, partition): 52 | assert partition.is_empty is True 53 | 54 | def test_num_entities(self, partition): 55 | assert partition.num_entities == 0 56 | 57 | def test_drop(self, collection, partition, partition_name): 58 | assert collection.has_partition(partition_name) is True 59 | partition.drop() 60 | assert collection.has_partition(partition_name) is False 61 | 62 | def test_load(self, partition): 63 | try: 64 | partition.load() 65 | except: 66 | assert False 67 | 68 | def test_release(self, partition): 69 | try: 70 | partition.release() 71 | except: 72 | assert False 73 | 74 | def test_insert(self, partition): 75 | data = gen_list_data(default_nb) 76 | partition.insert(data) 77 | 78 | @pytest.mark.xfail 79 | def test_get(self, partition): 80 | data = gen_list_data(default_nb) 81 | ids = partition.insert(data) 82 | assert len(ids) == default_nb 83 | res = partition.get(ids[0:10]) 84 | 85 | @pytest.mark.xfail 86 | def test_query(self, partition): 87 | data = gen_list_data(default_nb) 88 | ids = partition.insert(data) 89 | assert len(ids) == default_nb 90 | ids_expr = ",".join(str(x) for x in ids) 91 | expr = "id in [ " + ids_expr + " ]" 92 | res = partition.query(expr) 93 | -------------------------------------------------------------------------------- /tests/test_schema.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy 4 | import pytest 5 | 6 | from pymilvus_orm.schema import CollectionSchema, FieldSchema, parse_fields_from_dataframe 7 | from utils import * 8 | 9 | LOGGER = logging.getLogger(__name__) 10 | 11 | 12 | class TestCollectionSchema: 13 | @pytest.fixture(scope="function") 14 | def raw_dict(self): 15 | _dict = {} 16 | _dict["description"] = "TestCollectionSchema_description" 17 | fields = [ 18 | { 19 | "name": "vec1", 20 | "description": "desc1", 21 | "type": DataType.FLOAT_VECTOR, 22 | "params": {"dim": 128}, 23 | }, 24 | 25 | { 26 | "name": "vec2", 27 | "description": "desc2", 28 | "type": DataType.BINARY_VECTOR, 29 | "params": {"dim": 128}, 30 | }, 31 | { 32 | "name": "ID", 33 | "description": "ID", 34 | "type": DataType.INT64, 35 | "is_primary": True, 36 | "auto_id": False 37 | }, 38 | ] 39 | _dict["fields"] = fields 40 | 41 | return _dict 42 | 43 | def test_constructor_from_dict(self, raw_dict): 44 | schema = CollectionSchema.construct_from_dict(raw_dict) 45 | assert schema.description, raw_dict['description'] 46 | assert len(schema.fields) == len(raw_dict['fields']) 47 | f = schema.primary_field 48 | assert isinstance(f, FieldSchema) 49 | assert f.name == raw_dict['fields'][2]['name'] 50 | 51 | def test_to_dict(self, raw_dict): 52 | schema = CollectionSchema.construct_from_dict(raw_dict) 53 | target = schema.to_dict() 54 | target.pop("auto_id", None) 55 | assert target == raw_dict 56 | assert target is not raw_dict 57 | 58 | 59 | class TestFieldSchema: 60 | @pytest.fixture(scope="function") 61 | def raw_dict_float_vector(self): 62 | _dict = dict() 63 | _dict["name"] = "TestFieldSchema_name_floatvector" 64 | _dict["description"] = "TestFieldSchema_description_floatvector" 65 | _dict["type"] = DataType.FLOAT_VECTOR 66 | _dict["params"] = {"dim": 128} 67 | return _dict 68 | 69 | @pytest.fixture(scope="function") 70 | def raw_dict_binary_vector(self): 71 | _dict = dict() 72 | _dict["name"] = "TestFieldSchema_name_binary_vector" 73 | _dict["description"] = "TestFieldSchema_description_binary_vector" 74 | _dict["type"] = DataType.BINARY_VECTOR 75 | _dict["params"] = {"dim": 128} 76 | return _dict 77 | 78 | @pytest.fixture(scope="function") 79 | def raw_dict_norm(self): 80 | _dict = dict() 81 | _dict["name"] = "TestFieldSchema_name_norm" 82 | _dict["description"] = "TestFieldSchema_description_norm" 83 | _dict["type"] = DataType.INT64 84 | return _dict 85 | 86 | @pytest.fixture(scope="function") 87 | def dataframe1(self): 88 | import pandas 89 | data = { 90 | 'float': [1.0], 91 | 'int32': [2], 92 | 'float_vec': [numpy.array([3, 4.0], numpy.float32)] 93 | } 94 | df1 = pandas.DataFrame(data) 95 | return df1 96 | 97 | def test_constructor_from_float_dict(self, raw_dict_float_vector): 98 | field = FieldSchema.construct_from_dict(raw_dict_float_vector) 99 | assert field.dtype == DataType.FLOAT_VECTOR 100 | assert field.description == raw_dict_float_vector['description'] 101 | assert field.is_primary == False 102 | assert field.name == raw_dict_float_vector['name'] 103 | assert field.dim == raw_dict_float_vector['params']['dim'] 104 | 105 | def test_constructor_from_binary_dict(self, raw_dict_binary_vector): 106 | field = FieldSchema.construct_from_dict(raw_dict_binary_vector) 107 | assert field.dtype == DataType.BINARY_VECTOR 108 | assert field.description == raw_dict_binary_vector['description'] 109 | assert field.is_primary == False 110 | assert field.name == raw_dict_binary_vector['name'] 111 | assert field.dim == raw_dict_binary_vector['params']['dim'] 112 | 113 | def test_constructor_from_norm_dict(self, raw_dict_norm): 114 | field = FieldSchema.construct_from_dict(raw_dict_norm) 115 | assert field.dtype == DataType.INT64 116 | assert field.description == raw_dict_norm['description'] 117 | assert field.is_primary == False 118 | assert field.name == raw_dict_norm['name'] 119 | assert field.dim is None 120 | assert field.dummy is None 121 | 122 | def test_cmp(self, raw_dict_binary_vector): 123 | import copy 124 | field1 = FieldSchema.construct_from_dict(raw_dict_binary_vector) 125 | field2 = FieldSchema.construct_from_dict(raw_dict_binary_vector) 126 | assert field1 == field2 127 | dict1 = copy.deepcopy(raw_dict_binary_vector) 128 | dict1["name"] = dict1["name"] + "_" 129 | field3 = FieldSchema.construct_from_dict(dict1) 130 | assert field1 != field3 131 | 132 | def test_to_dict(self, raw_dict_norm, raw_dict_float_vector, raw_dict_binary_vector): 133 | fields = [] 134 | dicts = [raw_dict_norm, raw_dict_float_vector, raw_dict_binary_vector] 135 | fields.append(FieldSchema.construct_from_dict(raw_dict_norm)) 136 | fields.append(FieldSchema.construct_from_dict(raw_dict_float_vector)) 137 | fields.append(FieldSchema.construct_from_dict(raw_dict_binary_vector)) 138 | 139 | for i, f in enumerate(fields): 140 | target = f.to_dict() 141 | assert target == dicts[i] 142 | assert target is not dicts[i] 143 | 144 | def test_parse_fields_from_dataframe(self, dataframe1): 145 | fields = parse_fields_from_dataframe(dataframe1) 146 | assert len(fields) == len(dataframe1.columns) 147 | for f in fields: 148 | if f.dtype == DataType.FLOAT_VECTOR: 149 | assert f.dim == len(dataframe1['float_vec'].values[0]) 150 | -------------------------------------------------------------------------------- /tests/test_types.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2020 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance 4 | # with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under the License. 11 | 12 | from pymilvus_orm.types import * 13 | import pandas as pd 14 | import numpy as np 15 | 16 | 17 | class TestTypes: 18 | def test_map_numpy_dtype_to_datatype(self): 19 | data1 = { 20 | 'double': [2.0], 21 | 'float32': [np.float32(1.0)], 22 | 'double2': [np.float64(1.0)], 23 | 'int8': [np.int8(1)], 24 | 'int16': [2], 25 | 'int32': [4], 26 | 'int64': [8], 27 | 'bool': [True], 28 | 'float_vec': [np.array([1.1, 1.2])], 29 | } 30 | 31 | df = pd.DataFrame(data1) 32 | 33 | wants1 = [ 34 | DataType.DOUBLE, 35 | DataType.DOUBLE, 36 | DataType.DOUBLE, 37 | DataType.INT64, 38 | DataType.INT64, 39 | DataType.INT64, 40 | DataType.INT64, 41 | DataType.BOOL, 42 | DataType.UNKNOWN, 43 | ] 44 | 45 | ret1 = [map_numpy_dtype_to_datatype(x) for x in df.dtypes] 46 | assert ret1 == wants1 47 | 48 | df2 = pd.DataFrame(data=[1, 2, 3], columns=['a'], 49 | dtype=np.int8) 50 | assert DataType.INT8 == map_numpy_dtype_to_datatype(df2.dtypes[0]) 51 | 52 | df2 = pd.DataFrame(data=[1, 2, 3], columns=['a'], 53 | dtype=np.int16) 54 | assert DataType.INT16 == map_numpy_dtype_to_datatype(df2.dtypes[0]) 55 | 56 | df2 = pd.DataFrame(data=[1, 2, 3], columns=['a'], 57 | dtype=np.int32) 58 | assert DataType.INT32 == map_numpy_dtype_to_datatype(df2.dtypes[0]) 59 | 60 | df2 = pd.DataFrame(data=[1, 2, 3], columns=['a'], 61 | dtype=np.int64) 62 | assert DataType.INT64 == map_numpy_dtype_to_datatype(df2.dtypes[0]) 63 | 64 | def test_infer_dtype_bydata(self): 65 | data1 = [ 66 | [1], 67 | [True], 68 | [1.0, 2.0], 69 | ["abc"], 70 | bytes("abc", encoding='ascii'), 71 | 1, 72 | True, 73 | "abc", 74 | np.int8(1), 75 | np.int16(1), 76 | [np.int8(1)] 77 | ] 78 | 79 | wants = [ 80 | DataType.FLOAT_VECTOR, 81 | DataType.UNKNOWN, 82 | DataType.FLOAT_VECTOR, 83 | DataType.UNKNOWN, 84 | DataType.BINARY_VECTOR, 85 | DataType.INT64, 86 | DataType.BOOL, 87 | DataType.STRING, 88 | DataType.INT8, 89 | DataType.INT16, 90 | DataType.FLOAT_VECTOR, 91 | ] 92 | 93 | actual = [] 94 | for d in data1: 95 | actual.append(infer_dtype_bydata(d)) 96 | 97 | assert actual == wants 98 | -------------------------------------------------------------------------------- /tests/test_utility.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from utils import * 3 | from pymilvus_orm.utility import * 4 | 5 | 6 | class TestCollectionSchema: 7 | def test_loading_progress(self): 8 | loading_progress(gen_collection_name(), [gen_partition_name()]) 9 | 10 | def test_wait_for_loading_complete(self): 11 | wait_for_loading_complete(gen_collection_name(), [gen_partition_name()]) 12 | 13 | def test_index_building_progress(self): 14 | index_building_progress(gen_collection_name(), gen_index_name()) 15 | 16 | def test_wait_for_index_building_complete(self): 17 | wait_for_index_building_complete(gen_collection_name(), gen_index_name()) 18 | 19 | def test_has_collection(self): 20 | assert has_collection(gen_collection_name()) is False 21 | 22 | def test_has_partition(self): 23 | with pytest.raises(BaseException): 24 | has_partition(gen_collection_name(), gen_partition_name()) 25 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | import pandas 3 | from sklearn import preprocessing 4 | from pymilvus_orm.types import DataType 5 | 6 | default_dim = 128 7 | default_nb = 1200 8 | default_nq = 10 9 | default_float_vec_field_name = "float_vector" 10 | 11 | all_index_types = [ 12 | "FLAT", 13 | "IVF_FLAT", 14 | "IVF_SQ8", 15 | "IVF_SQ8_HYBRID", 16 | "IVF_PQ", 17 | "HNSW", 18 | # "NSG", 19 | "ANNOY", 20 | "RHNSW_PQ", 21 | "RHNSW_SQ", 22 | "BIN_FLAT", 23 | "BIN_IVF_FLAT" 24 | ] 25 | 26 | default_index_params = [ 27 | {"nlist": 128}, 28 | {"nlist": 128}, 29 | {"nlist": 128}, 30 | {"nlist": 128}, 31 | {"nlist": 128, "m": 16, "nbits": 8}, 32 | {"M": 48, "efConstruction": 500}, 33 | # {"search_length": 50, "out_degree": 40, "candidate_pool_size": 100, "knng": 50}, 34 | {"n_trees": 50}, 35 | {"M": 48, "efConstruction": 500, "PQM": 64}, 36 | {"M": 48, "efConstruction": 500}, 37 | {"nlist": 128}, 38 | {"nlist": 128} 39 | ] 40 | 41 | def binary_support(): 42 | return ["BIN_FLAT", "BIN_IVF_FLAT"] 43 | 44 | def gen_collection_name(): 45 | return f'ut-collection-' + str(random.randint(100000, 999999)) 46 | 47 | 48 | def gen_partition_name(): 49 | return f'ut-partition-' + str(random.randint(100000, 999999)) 50 | 51 | 52 | def gen_index_name(): 53 | return f'ut-index-' + str(random.randint(100000, 999999)) 54 | 55 | 56 | def gen_field_name(): 57 | return f'ut-field-' + str(random.randint(100000, 999999)) 58 | 59 | 60 | def gen_schema(): 61 | from pymilvus_orm.schema import CollectionSchema, FieldSchema 62 | fields = [ 63 | FieldSchema(gen_field_name(), DataType.INT64, is_primary=True, auto_id=False), 64 | FieldSchema(gen_field_name(), DataType.FLOAT), 65 | FieldSchema(gen_field_name(), DataType.FLOAT_VECTOR, dim=default_dim) 66 | ] 67 | collection_schema = CollectionSchema(fields) 68 | return collection_schema 69 | 70 | 71 | def gen_vectors(num, dim, is_normal=True): 72 | vectors = [[random.random() for _ in range(dim)] for _ in range(num)] 73 | vectors = preprocessing.normalize(vectors, axis=1, norm='l2') 74 | return vectors.tolist() 75 | 76 | 77 | def gen_int_attr(row_num): 78 | return [random.randint(0, 255) for _ in range(row_num)] 79 | 80 | 81 | # pandas.DataFrame 82 | def gen_pd_data(nb, is_normal=False): 83 | import numpy 84 | vectors = gen_vectors(nb, default_dim, is_normal) 85 | datas = { 86 | "int64": [i for i in range(nb)], 87 | "float": numpy.array([i for i in range(nb)], dtype=numpy.float32), 88 | default_float_vec_field_name: vectors 89 | } 90 | data = pandas.DataFrame(datas) 91 | return data 92 | 93 | 94 | # list or tuple data 95 | def gen_list_data(nb, is_normal=False): 96 | vectors = gen_vectors(nb, default_dim, is_normal) 97 | datas = [[i for i in range(nb)], [float(i) for i in range(nb)], vectors] 98 | return datas 99 | 100 | 101 | def gen_index(): 102 | nlists = [1, 1024, 16384] 103 | pq_ms = [128, 64, 32, 16, 8, 4] 104 | Ms = [5, 24, 48] 105 | efConstructions = [100, 300, 500] 106 | search_lengths = [10, 100, 300] 107 | out_degrees = [5, 40, 300] 108 | candidate_pool_sizes = [50, 100, 300] 109 | knngs = [5, 100, 300] 110 | 111 | index_params = [] 112 | for index_type in all_index_types: 113 | if index_type in ["FLAT", "BIN_FLAT", "BIN_IVF_FLAT"]: 114 | index_params.append({"index_type": index_type, "index_param": {"nlist": 1024}}) 115 | elif index_type in ["IVF_FLAT", "IVF_SQ8", "IVF_SQ8_HYBRID"]: 116 | ivf_params = [{"index_type": index_type, "index_param": {"nlist": nlist}} \ 117 | for nlist in nlists] 118 | index_params.extend(ivf_params) 119 | elif index_type == "IVF_PQ": 120 | IVFPQ_params = [{"index_type": index_type, "index_param": {"nlist": nlist, "m": m}} \ 121 | for nlist in nlists \ 122 | for m in pq_ms] 123 | index_params.extend(IVFPQ_params) 124 | elif index_type in ["HNSW", "RHNSW_SQ", "RHNSW_PQ"]: 125 | hnsw_params = [{"index_type": index_type, "index_param": {"M": M, "efConstruction": efConstruction}} \ 126 | for M in Ms \ 127 | for efConstruction in efConstructions] 128 | index_params.extend(hnsw_params) 129 | elif index_type == "NSG": 130 | nsg_params = [{"index_type": index_type, 131 | "index_param": {"search_length": search_length, "out_degree": out_degree, 132 | "candidate_pool_size": candidate_pool_size, "knng": knng}} \ 133 | for search_length in search_lengths \ 134 | for out_degree in out_degrees \ 135 | for candidate_pool_size in candidate_pool_sizes \ 136 | for knng in knngs] 137 | index_params.extend(nsg_params) 138 | 139 | return index_params 140 | 141 | def gen_simple_index(): 142 | index_params = [] 143 | for i in range(len(all_index_types)): 144 | if all_index_types[i] in binary_support(): 145 | continue 146 | dic = {"index_type": all_index_types[i], "metric_type": "L2"} 147 | dic.update({"params": default_index_params[i]}) 148 | index_params.append(dic) 149 | return index_params 150 | --------------------------------------------------------------------------------