├── .github └── workflows │ ├── checks.yml │ ├── dependencies.yml │ ├── develop.yml │ ├── main.yml │ ├── pr.yaml │ └── release.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── aperturedb ├── BBoxDataCSV.py ├── BlobDataCSV.py ├── BlobNewestDataCSV.py ├── Blobs.py ├── BoundingBoxes.py ├── CSVParser.py ├── CSVWriter.py ├── Clips.py ├── CommonLibrary.py ├── Configuration.py ├── ConnectionDataCSV.py ├── Connector.py ├── ConnectorRest.py ├── Constraints.py ├── DaskManager.py ├── DataModels.py ├── DescriptorDataCSV.py ├── DescriptorSetDataCSV.py ├── Descriptors.py ├── Entities.py ├── EntityDataCSV.py ├── EntityUpdateDataCSV.py ├── ImageDataCSV.py ├── ImageDownloader.py ├── Images.py ├── KaggleData.py ├── NotebookHelpers.py ├── Operations.py ├── ParallelLoader.py ├── ParallelQuery.py ├── ParallelQuerySet.py ├── Parallelizer.py ├── PolygonDataCSV.py ├── Polygons.py ├── PyTorchData.py ├── PyTorchDataset.py ├── Query.py ├── QueryGenerator.py ├── SPARQL.py ├── Sort.py ├── Sources.py ├── SparseAddingDataCSV.py ├── Stats.py ├── Subscriptable.py ├── TensorFlowData.py ├── Utils.py ├── VideoDataCSV.py ├── VideoDownloader.py ├── Videos.py ├── __init__.py ├── cli │ ├── README.md │ ├── __init__.py │ ├── adb.py │ ├── configure.py │ ├── console.py │ ├── ingest.py │ ├── keys.py │ ├── mount_coco.py │ ├── tokens.py │ ├── transact.py │ └── utilities.py ├── queryMessage.py ├── queryMessage3_pb2.py ├── queryMessage4_pb2.py ├── queryMessage5_pb2.py ├── transformers │ ├── __init__.py │ ├── clip.py │ ├── clip_pytorch_embeddings.py │ ├── common_properties.py │ ├── facenet.py │ ├── facenet_pytorch_embeddings.py │ ├── image_properties.py │ └── transformer.py └── types.py ├── ci.sh ├── configure_deployment.sh ├── docker ├── complete │ └── Dockerfile ├── dependencies │ ├── Dockerfile │ └── build.sh ├── notebook │ ├── Dockerfile │ ├── Dockerfile.cpu │ └── scripts │ │ └── start.sh ├── pytorch-gpu │ ├── Dockerfile │ ├── build.sh │ └── scripts │ │ └── start.sh ├── release │ └── Dockerfile ├── tests │ ├── Dockerfile │ └── scripts │ │ └── start.sh └── twine │ └── Dockerfile ├── docs └── README.protobuf ├── examples ├── CelebADataKaggle.py ├── Cifar10DataTensorFlow.py ├── CocoDataPyTorch.py ├── DataWizard │ └── Polygon Regions DataWizard.ipynb ├── Foo.py ├── README.md ├── dask │ ├── ingest_dask.py │ └── ingest_loader.py ├── image_classification │ ├── AlexNetClassifier.py │ ├── CocoDataPytorch.py │ ├── imagenet_classes.txt │ ├── prepare_aperturedb.py │ ├── pytorch_classification.ipynb │ └── pytorch_classification.py ├── loaders_101 │ ├── CocoDataPytorch.py │ └── loaders.ipynb ├── loading_with_models │ ├── add_video_model.py │ ├── find_roi.py │ ├── get_tl_embeddings.py │ ├── models.ipynb │ ├── text_embedding.json │ └── video_clips.json ├── rest_api │ ├── index.html │ ├── rest_api.js │ ├── rest_api.py │ └── songbird.jpg └── similarity_search │ ├── CelebADataKaggle.py │ ├── add_faces.py │ ├── bruce-lee.jpg │ ├── similarity_search.ipynb │ └── taylor-swift.jpg ├── github-release.sh ├── publish.sh ├── pyproject.toml ├── tag.sh ├── test ├── .coveragerc ├── .dockerignore ├── .env ├── __init__.py ├── adb_timing_tests.py ├── conftest.py ├── coverage │ └── Dockerfile ├── dbinfo.py ├── docker-compose.yml ├── download_images.py ├── generateImages.py ├── generateInput.py ├── get_10_faces_with_annotations.json ├── get_10_faces_with_optional_annotations.json ├── get_10_image_uniqueids.json ├── input │ ├── README.md │ ├── sample_gs_urls │ ├── sample_gs_video_urls │ ├── sample_http_urls │ ├── sample_http_video_urls │ ├── sample_s3_urls │ ├── sample_s3_video_urls │ └── url_images.adb.csv ├── pytest.ini ├── run_test.sh ├── run_test_container.sh ├── test_CLI.py ├── test_Data.py ├── test_Datawizard.py ├── test_Key.py ├── test_Parallel.py ├── test_ResponseHandler.py ├── test_SPARQL.py ├── test_Server.py ├── test_Session.py ├── test_Stats.py ├── test_Success.py ├── test_UserConvenience.py ├── test_Utils.py ├── test_kaggle.py └── test_torch_connector.py └── version.sh /.github/workflows/checks.yml: -------------------------------------------------------------------------------- 1 | name: syntax-check 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - develop 8 | - main 9 | 10 | jobs: 11 | pre-commit: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | 17 | - uses: actions/setup-python@v3 18 | with: 19 | python-version: '3.10' 20 | 21 | - uses: pre-commit/action@v3.0.1 22 | 23 | - uses: luisremis/find-trailing-whitespace@master 24 | -------------------------------------------------------------------------------- /.github/workflows/dependencies.yml: -------------------------------------------------------------------------------- 1 | name: dependencies 2 | 3 | on: 4 | schedule: 5 | - cron: "0 0 * * *" 6 | 7 | jobs: 8 | build-dependencies: 9 | 10 | runs-on: 11 | - self-hosted 12 | - deployer 13 | 14 | steps: 15 | 16 | - uses: actions/checkout@v3 17 | 18 | - name: Login to DockerHub 19 | uses: docker/login-action@v2 20 | with: 21 | username: ${{ secrets.DOCKER_USER }} 22 | password: ${{ secrets.DOCKER_PASS }} 23 | 24 | - name: Build and Push Dependencies Image 25 | env: 26 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 27 | AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} 28 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 29 | GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }} 30 | run: BUILD_DEPENDENCIES=true PULL_DEPENDENCIES=false PUSH_DEPENDENCIES=true ./ci.sh 31 | shell: bash 32 | -------------------------------------------------------------------------------- /.github/workflows/develop.yml: -------------------------------------------------------------------------------- 1 | name: develop 2 | 3 | on: 4 | push: 5 | branches: 6 | - develop 7 | 8 | jobs: 9 | build-test: 10 | 11 | runs-on: 12 | - self-hosted 13 | - deployer 14 | 15 | steps: 16 | 17 | - uses: actions/checkout@v3 18 | 19 | - name: Login to DockerHub 20 | uses: docker/login-action@v2 21 | with: 22 | username: ${{ secrets.DOCKER_USER }} 23 | password: ${{ secrets.DOCKER_PASS }} 24 | 25 | - name: Login to Google Cloud 26 | uses: google-github-actions/setup-gcloud@v0 27 | with: 28 | service_account_key: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }} 29 | project_id: ${{ secrets.GCP_SERVICE_ACCOUNT_PROJECT_ID }} 30 | export_default_credentials: true 31 | 32 | - name: Build and Run Tests 33 | env: 34 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 35 | AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} 36 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 37 | GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }} 38 | run: RUN_TESTS=true ./ci.sh 39 | shell: bash 40 | 41 | build_and_deploy_docs: 42 | needs: 43 | - build-test 44 | 45 | runs-on: 46 | - self-hosted 47 | - deployer 48 | 49 | steps: 50 | 51 | - uses: actions/checkout@v3 52 | 53 | - name: Login to DockerHub 54 | uses: docker/login-action@v2 55 | with: 56 | username: ${{ secrets.DOCKER_USER }} 57 | password: ${{ secrets.DOCKER_PASS }} 58 | 59 | - name: Build Notebook,docs Docker 60 | env: 61 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 62 | AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} 63 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 64 | RUNNER_NAME: ${{ runner.name }} 65 | ADB_REPO: aperturedata/aperturedb 66 | ADB_TAG: dev 67 | LENZ_REPO: aperturedata/lenz 68 | LENZ_TAG: dev 69 | run: BUILD_COMPLETE=true ./ci.sh 70 | shell: bash 71 | 72 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: main 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | build_and_test: 10 | 11 | runs-on: 12 | - self-hosted 13 | - deployer 14 | 15 | steps: 16 | 17 | - uses: actions/checkout@v3 18 | 19 | - name: Login to DockerHub 20 | uses: docker/login-action@v2 21 | with: 22 | username: ${{ secrets.DOCKER_USER }} 23 | password: ${{ secrets.DOCKER_PASS }} 24 | 25 | - name: Login to Google Cloud 26 | uses: google-github-actions/setup-gcloud@v0 27 | with: 28 | service_account_key: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }} 29 | project_id: ${{ secrets.GCP_SERVICE_ACCOUNT_PROJECT_ID }} 30 | export_default_credentials: true 31 | 32 | - name: Build and Run Tests 33 | env: 34 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 35 | AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} 36 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 37 | GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }} 38 | run: RUN_TESTS=true ./ci.sh 39 | shell: bash 40 | 41 | build_notebooks_and_publish_pypi: 42 | needs: 43 | - build_and_test 44 | 45 | runs-on: 46 | - self-hosted 47 | - deployer 48 | 49 | steps: 50 | 51 | - uses: actions/checkout@v3 52 | 53 | - name: Login to DockerHub 54 | uses: docker/login-action@v2 55 | with: 56 | username: ${{ secrets.DOCKER_USER }} 57 | password: ${{ secrets.DOCKER_PASS }} 58 | 59 | - name: Build Notebook 60 | env: 61 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 62 | AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} 63 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 64 | RUNNER_NAME: ${{ runner.name }} 65 | ADB_REPO: aperturedata/aperturedb 66 | ADB_TAG: dev 67 | LENZ_REPO: aperturedata/lenz 68 | LENZ_TAG: dev 69 | run: BUILD_COMPLETE=true ./ci.sh 70 | shell: bash 71 | 72 | - name: Publish to PyPi 73 | env: 74 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 75 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 76 | run: | 77 | bash publish.sh 78 | 79 | - name: Tag release version 80 | run: './tag.sh' 81 | shell: bash 82 | 83 | - name: GitHub Release 84 | run: 'TOKEN=${{ secrets.GITHUBPAT }} ./github-release.sh' 85 | shell: bash 86 | 87 | trigger_demos_buils: 88 | runs-on: ubuntu-latest 89 | needs: 90 | - build_notebooks_and_publish_pypi 91 | steps: 92 | - uses: actions/checkout@v3 93 | - name: repository dispatch 94 | run: | 95 | curl -X POST https://api.github.com/repos/aperture-data/demos/actions/workflows/13727133/dispatches \ 96 | -H "Accept: application/vnd.github+json" \ 97 | -H "Authorization: Bearer ${{ secrets.GITHUBPAT }}" \ 98 | -H "X-GitHub-Api-Version: 2022-11-28" \ 99 | -d '{"ref":"master","inputs":{}}' 100 | 101 | shell: bash 102 | 103 | trigger_docs_deploy: 104 | runs-on: ubuntu-latest 105 | needs: 106 | - build_notebooks_and_publish_pypi 107 | steps: 108 | - uses: actions/checkout@v3 109 | - name: repository dispatch 110 | run: | 111 | curl -X POST https://api.github.com/repos/aperture-data/docs/actions/workflows/64451786/dispatches \ 112 | -H "Accept: application/vnd.github+json" \ 113 | -H "Authorization: Bearer ${{ secrets.GITHUBPAT }}" \ 114 | -H "X-GitHub-Api-Version: 2022-11-28" \ 115 | -d '{"ref":"main","inputs":{}}' 116 | 117 | shell: bash 118 | -------------------------------------------------------------------------------- /.github/workflows/pr.yaml: -------------------------------------------------------------------------------- 1 | name: pr 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - develop 7 | 8 | jobs: 9 | run_test: 10 | 11 | runs-on: 12 | - self-hosted 13 | - deployer 14 | 15 | steps: 16 | 17 | - uses: actions/checkout@v3 18 | 19 | - name: Login to DockerHub 20 | uses: docker/login-action@v2 21 | with: 22 | username: ${{ secrets.DOCKER_USER }} 23 | password: ${{ secrets.DOCKER_PASS }} 24 | 25 | - name: Login to Google Cloud 26 | uses: google-github-actions/setup-gcloud@v0 27 | with: 28 | service_account_key: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }} 29 | project_id: ${{ secrets.GCP_SERVICE_ACCOUNT_PROJECT_ID }} 30 | export_default_credentials: true 31 | 32 | - name: Build and Run Tests 33 | env: 34 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 35 | AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} 36 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 37 | GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }} 38 | RUNNER_NAME: ${{ runner.name }} 39 | RUN_TESTS: true 40 | NO_PUSH: true 41 | BRANCH_NAME: ${{ github.event.pull_request.head.ref }} 42 | TARGET_BRANCH_NAME: ${{ github.event.pull_request.base.ref }} 43 | ADB_REPO: aperturedata/aperturedb 44 | ADB_TAG: dev 45 | LENZ_REPO: aperturedata/lenz 46 | LENZ_TAG: dev 47 | run: ./ci.sh 48 | shell: bash 49 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | push: 5 | branches: 6 | - release* 7 | 8 | jobs: 9 | build-test: 10 | 11 | runs-on: 12 | - self-hosted 13 | - deployer 14 | 15 | steps: 16 | 17 | - uses: actions/checkout@v3 18 | 19 | - name: Login to DockerHub 20 | uses: docker/login-action@v2 21 | with: 22 | username: ${{ secrets.DOCKER_USER }} 23 | password: ${{ secrets.DOCKER_PASS }} 24 | 25 | - name: Login to Google Cloud 26 | uses: google-github-actions/setup-gcloud@v0 27 | with: 28 | service_account_key: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }} 29 | project_id: ${{ secrets.GCP_SERVICE_ACCOUNT_PROJECT_ID }} 30 | export_default_credentials: true 31 | 32 | - name: Build and Run Tests 33 | env: 34 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 35 | AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} 36 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 37 | GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }} 38 | run: RUN_TESTS=true NO_PUSH=true UPDATE_BRANCH=true ./ci.sh 39 | shell: bash 40 | 41 | build_and_deploy_docs: 42 | 43 | runs-on: 44 | - self-hosted 45 | - deployer 46 | 47 | steps: 48 | 49 | - uses: actions/checkout@v3 50 | 51 | - name: Login to DockerHub 52 | uses: docker/login-action@v2 53 | with: 54 | username: ${{ secrets.DOCKER_USER }} 55 | password: ${{ secrets.DOCKER_PASS }} 56 | 57 | - name: Build Notebook Docker 58 | run: BUILD_COMPLETE=true NO_PUSH=true ./ci.sh 59 | shell: bash 60 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # VSCode 132 | .vscode/ 133 | 134 | #Data files 135 | *.adb.csv 136 | *.jpg 137 | *.npy 138 | test/aperturedb/db*/ 139 | test/input/blobs/ 140 | docs/examples/ 141 | examples/*/coco 142 | examples/*/classification.txt 143 | kaggleds/ 144 | examples/*/kaggleds/ 145 | docs/*/*.svg 146 | test/aperturedb/log* 147 | adb-python/* 148 | docker/notebook/aperturedata/* 149 | docker/tests/aperturedata/* 150 | docker/pytorch-gpu/aperturedata/* 151 | /test/input/ 152 | /test/input/images/ 153 | 154 | .aperturedb 155 | test/data/ 156 | test/aperturedb/certificate/ 157 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autofix_commit_msg: | 3 | [pre-commit.ci] auto fixes from pre-commit.com hooks 4 | 5 | for more information, see https://pre-commit.ci 6 | autofix_prs: true 7 | autoupdate_branch: '' 8 | autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' 9 | autoupdate_schedule: weekly 10 | skip: [] 11 | submodules: false 12 | repos: 13 | - repo: https://github.com/hhatto/autopep8 14 | rev: 8b75604 15 | hooks: 16 | - id: autopep8 17 | exclude: _pb2.py$ 18 | args: ["--ignore", "E251,E241,E221,E402,E265,E275", "-i"] 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | @copyright Copyright (c) 2017 Intel Corporation 4 | @copyright Copyright (c) 2024 ApertureData Inc 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), 8 | to deal in the Software without restriction, 9 | including without limitation the rights to use, copy, modify, 10 | merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ApertureDB Client Python Module 2 | 3 | This is the Python SDK for building applications with [ApertureDB](https://docs.aperturedata.io/Introduction/WhatIsAperture). 4 | 5 | This comprises of utilities to get data in and out of ApertureDB in an optimal manner. 6 | A quick [getting started guide](https://docs.aperturedata.io/Setup/QuickStart) is useful to start building with this SDK. 7 | For more concrete examples, please refer to: 8 | * [Simple examples and concepts](https://docs.aperturedata.io/category/start-with-basics) 9 | * [Advanced usage examples](https://docs.aperturedata.io/category/build-ml-examples) 10 | * [Sample applications](https://docs.aperturedata.io/category/build-applications) 11 | 12 | # Installing in a custom virtual environment 13 | ```bash 14 | pip install aperturedb[complete] 15 | ``` 16 | 17 | or an installation with only the core part of the SDK 18 | ```bash 19 | pip install aperturedb 20 | ``` 21 | 22 | A complete [reference](https://docs.aperturedata.io/category/aperturedb-python-sdk) of this SDK is available on the official [ApertureDB Documentation](https://docs.aperturedata.io) 23 | 24 | 25 | # Development setup 26 | The recommended way is to clone this repo, and do an editable install as follows: 27 | ```bash 28 | git clone https://github.com/aperture-data/aperturedb-python.git 29 | cd aperturedb-python 30 | pip install -e .[dev] 31 | ``` 32 | 33 | 34 | # Running tests 35 | The tests are inside the `test` dir. Currently these get run in Linux container. Refer to `docker/tests` and `test/run_test_container` for details. Following explanation assumes that the current working directory is `test`. 36 | 37 | The tests bring up a set of components in an isolated network, namely: 38 | - aperturedb-community 39 | - lenz 40 | - nginx 41 | - ca (for initial provisioning of certificates) 42 | - webui 43 | 44 | 45 | To connect to this setup, the ports are exposed to the host as follows: 46 | - 55556 for TCP connection to aperturedb (via lenz). 47 | - 8087 for HTTP connection to aperturedb (via nginx). 48 | 49 | 50 | 51 | This can be done manually as: 52 | ```bash 53 | docker compose up -d 54 | ``` 55 | 56 | ## Changes to run the tests in development environment. 57 | Edit the file `test/dbinfo.py` to loook like the following. 58 | - DB_TCP_HOST = `localhost` 59 | - DB_REST_HOST = `localhost` 60 | - DB_TCP_PORT = `55556` 61 | - DB_REST_PORT = `8087` 62 | 63 | 64 | All the tests can be run with: 65 | 66 | ```bash 67 | export GCP_SERVICE_ACCOUNT_KEY= 68 | bash run_test.sh 69 | ``` 70 | 71 | Running specific tests can be accomplished by invoking it with pytest as follows: 72 | 73 | ```bash 74 | PROJECT=aperturedata KAGGLE_username=ci KAGGLE_key=dummy coverage run -m pytest test_Session.py -v -s --log-cli-level=DEBUG 75 | ``` 76 | 77 | **NOTE:The running environment is assumed to be Linux x86_64. There might be certain changes required for them to be run on MacOS or Windows python environments.** 78 | 79 | ## Certain Environment variables that affect the runtime beaviour of the SDK. 80 | 81 | These can be used as debugging aids. 82 | 83 | | Variable | type | Comments | Default value | 84 | | --- | --- | --- | --- | 85 | |ADB_DEBUGGABLE | boolean | allows the application to register a fault handler that dumps a trace when SIGUSR1 is sent to the process | not set | 86 | |LOG_FILE_LEVEL | log levels | The threshold for emitting log messages into the error.log file | WARN | 87 | |LOG_CONSOLE_LEVEL | log levels | The threshold for emitting log messages into stdout | ERROR | 88 | |ADB_LOG_FILE | string | custom file path for the LOG file | not set| 89 | 90 | 91 | # Reporting bugs 92 | Any error in the functionality / documentation / tests maybe reported by creating a 93 | [github issue](https://github.com/aperture-data/aperturedb-python/issues). 94 | 95 | # Development guidelines 96 | For inclusion of any features, a PR may be created with a patch, 97 | and a brief description of the problem and the fix. 98 | The CI enforces a coding style guideline with autopep8 and 99 | a script to detect trailing white spaces. 100 | 101 | If a PR encounters failures, the log will describe the location of 102 | the offending line with a description of the problem. 103 | -------------------------------------------------------------------------------- /aperturedb/BBoxDataCSV.py: -------------------------------------------------------------------------------- 1 | from aperturedb import CSVParser 2 | 3 | HEADER_X_POS = "x_pos" 4 | HEADER_Y_POS = "y_pos" 5 | HEADER_WIDTH = "width" 6 | HEADER_HEIGHT = "height" 7 | IMG_KEY_PROP = "img_key_prop" 8 | IMG_KEY_VAL = "img_key_value" 9 | 10 | 11 | class BBoxDataCSV(CSVParser.CSVParser): 12 | """ 13 | **ApertureDB BBox Data.** 14 | 15 | This class loads the Bounding Box Data which is present in a CSV file, 16 | and converts it into a series of ApertureDB queries. 17 | 18 | :::note Is backed by a CSV file with the following columns: 19 | ``IMG_KEY``, ``x_pos``, ``y_pos``, ``width``, ``height``, ``BBOX_PROP_NAME_1``, ... ``BBOX_PROP_NAME_N``, ``constraint_BBOX_PROP_NAME_1`` 20 | ::: 21 | 22 | **IMG_KEY**: column has the property name of the image property that 23 | the bounding box will be connected to, and each row has the value 24 | that will be used for finding the image. 25 | 26 | **x_pos, y_pos**: Specify the coordinates of top left of the bounding box. 27 | 28 | **width, height**: Specify the dimensions of the bounding box, as integers (unit is in pixels). 29 | 30 | **BBOX_PROP_NAME_N**: is an arbitrary name of the property of the bounding 31 | box, and each row has the value for that property. 32 | 33 | **constraint_BBOX_PROP_NAME_1**: Constraints against specific property, used for conditionally adding a Bounding Box. 34 | 35 | Example CSV file:: 36 | 37 | img_unique_id,x_pos,y_pos,width,height,type,dataset_id,constraint_dataset_id 38 | d5b25253-9c1e,257,154,84,125,manual,12345,12345 39 | d5b25253-9c1e,7,537,522,282,manual,12346,12346 40 | ... 41 | 42 | Example usage: 43 | 44 | ``` python 45 | 46 | data = BBoxDataCSV("/path/to/BoundingBoxesData.csv") 47 | loader = ParallelLoader(client) 48 | loader.ingest(data) 49 | ``` 50 | 51 | :::info 52 | In the above example, the constraint_dataset_id ensures that a bounding box with the specified 53 | dataset_id would be only inserted if it does not already exist in the database. 54 | ::: 55 | 56 | """ 57 | 58 | def __init__(self, filename: str, **kwargs): 59 | 60 | super().__init__(filename, **kwargs) 61 | 62 | self.props_keys = [x for x in self.header[5:] 63 | if not x.startswith(CSVParser.CONSTRAINTS_PREFIX)] 64 | self.constraints_keys = [x for x in self.header[5:] 65 | if x.startswith(CSVParser.CONSTRAINTS_PREFIX)] 66 | 67 | self.img_key = self.header[0] 68 | self.command = "AddBoundingBox" 69 | 70 | def get_indices(self): 71 | return { 72 | "entity": { 73 | "_BoundingBox": self.get_indexed_properties() 74 | } 75 | } 76 | 77 | def getitem(self, idx): 78 | q = [] 79 | img_id = self.df.loc[idx, self.img_key] 80 | fi = { 81 | "FindImage": { 82 | "_ref": 1, 83 | "unique": True, 84 | "constraints": { 85 | self.img_key: ["==", img_id], 86 | }, 87 | "blobs": False, 88 | }, 89 | } 90 | q.append(fi) 91 | 92 | box_data_headers = [HEADER_X_POS, 93 | HEADER_Y_POS, HEADER_WIDTH, HEADER_HEIGHT] 94 | box_data = [int(self.df.loc[idx, h]) for h in box_data_headers] 95 | 96 | rect_attrs = ["x", "y", "width", "height"] 97 | custom_fields = { 98 | "image_ref": 1, 99 | "rectangle": { 100 | attr: val for attr, val in zip(rect_attrs, box_data) 101 | }, 102 | } 103 | abb = self._basic_command(idx, custom_fields) 104 | 105 | properties = self.parse_properties(idx) 106 | if properties: 107 | props = properties 108 | if "_label" in props: 109 | abb[self.command]["label"] = props["_label"] 110 | props.pop("_label") 111 | # Check if props is not empty after removing "_label" 112 | if props: 113 | abb[self.command]["properties"] = props 114 | q.append(abb) 115 | 116 | return q, [] 117 | 118 | def validate(self) -> None: 119 | 120 | self.header = list(self.df.columns.values) 121 | 122 | if self.header[1] != HEADER_X_POS: 123 | raise Exception("Error with CSV file field: " + HEADER_X_POS) 124 | if self.header[2] != HEADER_Y_POS: 125 | raise Exception("Error with CSV file field: " + HEADER_Y_POS) 126 | if self.header[3] != HEADER_WIDTH: 127 | raise Exception("Error with CSV file field: " + HEADER_WIDTH) 128 | if self.header[4] != HEADER_HEIGHT: 129 | raise Exception("Error with CSV file field: " + HEADER_HEIGHT) 130 | -------------------------------------------------------------------------------- /aperturedb/BlobDataCSV.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from aperturedb import CSVParser 4 | 5 | PROPERTIES = "properties" 6 | CONSTRAINTS = "constraints" 7 | BLOB_PATH = "filename" 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class BlobDataCSV(CSVParser.CSVParser): 13 | """**ApertureDB Blob Data.** 14 | 15 | This class loads the Blob Data which is present in a CSV file, 16 | and converts it into a series of ApertureDB queries. 17 | 18 | :::note Is backed by a CSV file with the following columns: 19 | ``FILENAME``, ``PROP_NAME_1``, ... ``PROP_NAME_N``, ``constraint_PROP_NAME_1`` 20 | ::: 21 | 22 | **FILENAME**: The path of the blob object on the file system. 23 | 24 | **PROP_NAME_1 ... PROP_NAME_N**: Arbitrary property names associated with this blob. 25 | 26 | **constraint_PROP_NAME_1**: Constraints against specific property, used for conditionally adding a Blob. 27 | 28 | Example CSV file:: 29 | 30 | filename,name,lastname,age,id,constraint_id 31 | /mnt/blob1,John,Salchi,69,321423532,321423532 32 | /mnt/blob2,Johna,Salchi,63,42342522,42342522 33 | ... 34 | 35 | Example usage: 36 | 37 | ``` python 38 | 39 | data = BlobDataCSV("/path/to/BlobData.csv") 40 | loader = ParallelLoader(client) 41 | loader.ingest(data) 42 | ``` 43 | 44 | 45 | :::info 46 | In the above example, the constraint_id ensures that a blob with the specified 47 | id would be only inserted if it does not already exist in the database. 48 | ::: 49 | """ 50 | 51 | def __init__(self, filename: str, **kwargs): 52 | 53 | super().__init__(filename, **kwargs) 54 | 55 | self.props_keys = [x for x in self.header[1:] 56 | if not x.startswith(CSVParser.CONSTRAINTS_PREFIX) and x != BLOB_PATH] 57 | self.constraints_keys = [x for x in self.header[1:] 58 | if x.startswith(CSVParser.CONSTRAINTS_PREFIX)] 59 | self.command = "AddBlob" 60 | 61 | def get_indices(self): 62 | return { 63 | "entity": { 64 | "_Blob": self.get_indexed_properties() 65 | } 66 | } 67 | 68 | def getitem(self, idx): 69 | filename = os.path.join(self.relative_path_prefix, 70 | self.df.loc[idx, BLOB_PATH]) 71 | blob_ok, blob = self.load_blob(filename) 72 | if not blob_ok: 73 | logger.error("Error loading blob: " + filename) 74 | raise Exception("Error loading blob: " + filename) 75 | 76 | q = [] 77 | ab = self._basic_command(idx) 78 | q.append(ab) 79 | 80 | return q, [blob] 81 | 82 | def load_blob(self, filename): 83 | 84 | try: 85 | fd = open(filename, "rb") 86 | buff = fd.read() 87 | fd.close() 88 | return True, buff 89 | except Exception as e: 90 | logger.exception(e) 91 | 92 | return False, None 93 | 94 | def validate(self): 95 | 96 | self.header = list(self.df.columns.values) 97 | 98 | if self.header[0] != BLOB_PATH: 99 | raise Exception("Error with CSV file field: " + BLOB_PATH) 100 | -------------------------------------------------------------------------------- /aperturedb/Blobs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from aperturedb.Entities import Entities 4 | 5 | 6 | class Blobs(Entities): 7 | """ 8 | **The object mapper representation of blobs in ApertureDB.** 9 | 10 | This class is a layer on top of the native query. 11 | It facilitates interactions with blobs in the database in the pythonic way. 12 | """ 13 | db_object = "_Blob" 14 | -------------------------------------------------------------------------------- /aperturedb/BoundingBoxes.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from aperturedb.Entities import Entities 4 | 5 | 6 | class BoundingBoxes(Entities): 7 | """ 8 | **The object mapper representation of bounding boxes in ApertureDB.** 9 | 10 | This class is a layer on top of the native query. 11 | It facilitates interactions with bounding boxes in the database in the pythonic way. 12 | """ 13 | db_object = "_BoundingBox" 14 | -------------------------------------------------------------------------------- /aperturedb/Clips.py: -------------------------------------------------------------------------------- 1 | from aperturedb.Entities import Entities 2 | 3 | 4 | class Clips(Entities): 5 | """ 6 | **The object mapper representation of Video Clips in ApertureDB.** 7 | 8 | This class is a layer on top of the native query. 9 | It facilitates interactions with Video clips in the database in the pythonic way. 10 | """ 11 | db_object = "_Clip" 12 | -------------------------------------------------------------------------------- /aperturedb/Constraints.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from enum import Enum 3 | 4 | 5 | class Conjunction(Enum): 6 | AND = "all" 7 | OR = "any" 8 | 9 | 10 | class Constraints(object): 11 | """ 12 | **Constraints object for the Object mapper API** 13 | """ 14 | 15 | def __init__(self, conjunction: Conjunction = Conjunction.AND): 16 | self._conjunction = conjunction.value 17 | self.constraints = { 18 | conjunction.value: { 19 | } 20 | } 21 | 22 | def equal(self, key, value) -> Constraints: 23 | self.constraints[self._conjunction][key] = ["==", value] 24 | return self 25 | 26 | def notequal(self, key, value) -> Constraints: 27 | self.constraints[self._conjunction][key] = ["!=", value] 28 | return self 29 | 30 | def greaterequal(self, key, value) -> Constraints: 31 | self.constraints[self._conjunction][key] = [">=", value] 32 | return self 33 | 34 | def greater(self, key, value) -> Constraints: 35 | self.constraints[self._conjunction][key] = [">", value] 36 | return self 37 | 38 | def lessequal(self, key, value) -> Constraints: 39 | self.constraints[self._conjunction][key] = ["<=", value] 40 | return self 41 | 42 | def less(self, key, value) -> Constraints: 43 | self.constraints[self._conjunction][key] = ["<", value] 44 | return self 45 | 46 | def is_in(self, key, val_array) -> Constraints: 47 | self.constraints[self._conjunction][key] = ["in", val_array] 48 | return self 49 | 50 | def check(self, entity): 51 | for key, op in self.constraints.items(): 52 | if key not in entity: 53 | return False 54 | if op[0] == "==": 55 | if not entity[key] == op[1]: 56 | return False 57 | elif op[0] == ">=": 58 | if not entity[key] >= op[1]: 59 | return False 60 | elif op[0] == ">": 61 | if not entity[key] > op[1]: 62 | return False 63 | elif op[0] == "<=": 64 | if not entity[key] <= op[1]: 65 | return False 66 | elif op[0] == "<": 67 | if not entity[key] < op[1]: 68 | return False 69 | elif op[0] == "in": 70 | if not entity[key] in op[1]: 71 | return False 72 | else: 73 | raise Exception("invalid constraint operation: " + op[0]) 74 | return True 75 | -------------------------------------------------------------------------------- /aperturedb/DaskManager.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import logging 3 | from threading import Lock 4 | import time 5 | from types import SimpleNamespace 6 | import dask 7 | from dask.distributed import Client, LocalCluster, progress 8 | from aperturedb.Connector import Connector 9 | 10 | import multiprocessing as mp 11 | 12 | from aperturedb.Stats import Stats 13 | 14 | dask.config.set({"dataframe.convert-string": False}) 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class DaskManager: 20 | """ 21 | **Class responsible for setting up a local cluster and assigning parts 22 | of data to each worker** 23 | """ 24 | 25 | def __init__(self, num_workers: int = -1): 26 | self.__num_workers = num_workers 27 | # The -1 magic number is to use as many 90% of the cores (1 worker per core). 28 | # This can be overridden by the user. 29 | # Create a pool of workers. 30 | # TODO: see if the same pool can be reused for multiple tasks. 31 | workers = self.__num_workers if self.__num_workers != \ 32 | -1 else int(0.9 * mp.cpu_count()) 33 | 34 | self._cluster = LocalCluster(n_workers=workers) 35 | self._cluster.shutdown_on_close = False 36 | self._client = Client(self._cluster) 37 | dask.config.set(scheduler="distributed") 38 | 39 | def __del__(self): 40 | logger.info(".......Shutting cluster.........") 41 | self._client.close() 42 | self._cluster.close() 43 | 44 | def run(self, QueryClass: type[ParallelQuery], client: Connector, generator, batchsize, stats): 45 | def process(df, host, port, use_ssl, session, connnector_type): 46 | metrics = Stats() 47 | # Dask reads data in partitions, and the first partition is of 2 rows, with all 48 | # values as 'foo'. This is for sampling the column names and types. Should not process 49 | # those rows. 50 | if len(df) == 2: 51 | if (df.iloc[0, 0] == "a" and df.isna().iloc[1, 0]) or df.iloc[0, 0] == "foo": 52 | return 53 | count = 0 54 | try: 55 | shared_data = SimpleNamespace() 56 | shared_data.session = session 57 | shared_data.lock = Lock() 58 | client = connnector_type(host=host, port=port, 59 | use_ssl=use_ssl, shared_data=shared_data) 60 | except Exception as e: 61 | logger.exception(e) 62 | #from aperturedb.ParallelLoader import ParallelLoader 63 | loader = QueryClass(client) 64 | for i in range(0, len(df), batchsize): 65 | end = min(i + batchsize, len(df)) 66 | slice = df[i:end] 67 | data = generator.__class__( 68 | filename=generator.filename, 69 | df=slice, 70 | blobs_relative_to_csv=generator.blobs_relative_to_csv) 71 | 72 | loader.query(generator=data, batchsize=len( 73 | slice), numthreads=1, stats=False) 74 | count += 1 75 | metrics.times_arr.extend(loader.times_arr) 76 | metrics.error_counter += loader.error_counter 77 | metrics.succeeded_queries += loader.get_succeeded_queries() 78 | metrics.succeeded_commands += loader.get_succeeded_commands() 79 | 80 | return metrics 81 | 82 | start_time = time.time() 83 | # Connector cannot be serialized across processes, 84 | # so we pass session and host/port information instead. 85 | computation = generator.df.map_partitions( 86 | process, 87 | client.host, 88 | client.port, 89 | client.use_ssl, 90 | client.shared_data.session, 91 | type(client)) 92 | computation = computation.persist() 93 | if stats: 94 | progress(computation) 95 | results = computation.compute() 96 | 97 | return results, time.time() - start_time 98 | -------------------------------------------------------------------------------- /aperturedb/DataModels.py: -------------------------------------------------------------------------------- 1 | """ 2 | **Data Model Classes to support (pydantic) model based ingestiton.** 3 | """ 4 | from __future__ import annotations 5 | from pydantic import BaseModel, Field 6 | from typing_extensions import Annotated, List 7 | from typing import ClassVar, Optional 8 | from uuid import uuid4 9 | from aperturedb.Query import ObjectType, PropertyType, RangeType 10 | 11 | 12 | class IdentityDataModel(BaseModel): 13 | """Base class for all entities in ApertureDB. 14 | Generates a default UUID for the entity. 15 | """ 16 | 17 | id: Annotated[str, Field(default_factory=lambda: uuid4().hex)] 18 | # Change as per the docs for the error 19 | # https://docs.pydantic.dev/dev-v2/usage/errors/#model-field-overridden 20 | type: ClassVar[ObjectType] = ObjectType.ENTITY 21 | 22 | 23 | class BlobDataModel(IdentityDataModel): 24 | """Base class for all blob entities in ApertureDB. 25 | """ 26 | url: Annotated[str, Field( 27 | title="URL", description="URL to file, http, s3 or gs resource")] 28 | type = ObjectType.BLOB 29 | 30 | 31 | class ImageDataModel(BlobDataModel): 32 | """Base class for all image objects in ApertureDB. 33 | """ 34 | type = ObjectType.IMAGE 35 | 36 | 37 | class ClipDataModel(IdentityDataModel): 38 | """Base class for all clip objects in ApertureDB. 39 | """ 40 | type = ObjectType.CLIP 41 | range_type: Annotated[RangeType, 42 | Field(title="Range Type", description="Range type", 43 | default=RangeType.TIME), 44 | PropertyType.SYSTEM] 45 | start: Annotated[float, Field(title="Start", description="Start point as frame, time(hh:mm:ss.uuuuuu) or fraction"), 46 | PropertyType.SYSTEM] 47 | stop: Annotated[float, Field(title="Stop", description="Stop point as frame, time(hh:mm:ss.uuuuuu) or fraction"), 48 | PropertyType.SYSTEM] 49 | 50 | 51 | class VideoDataModel(BlobDataModel): 52 | """Data model for video objects in ApertureDB. 53 | """ 54 | type = ObjectType.VIDEO 55 | 56 | 57 | class DescriptorDataModel(IdentityDataModel): 58 | """Descriptor (Embedding) data model for ApertureDB. 59 | """ 60 | type = ObjectType.DESCRIPTOR 61 | vector: Annotated[List[float], Field( 62 | title="Vector", description="Vector of floats"), PropertyType.SYSTEM] 63 | set: Annotated[DescriptorSetDataModel, Field( 64 | title="Set", description="Descriptor set"), PropertyType.SYSTEM] 65 | 66 | 67 | class PolygonDataModel(IdentityDataModel): 68 | """Polygon data model for ApertureDB. 69 | """ 70 | type = ObjectType.POLYGON 71 | 72 | 73 | class FrameDataModel(IdentityDataModel): 74 | """Frame data model for ApertureDB. 75 | """ 76 | type = ObjectType.FRAME 77 | 78 | 79 | class DescriptorSetDataModel(IdentityDataModel): 80 | """Descriptor Set data model for ApertureDB. 81 | """ 82 | type = ObjectType.DESCRIPTORSET 83 | name: Annotated[str, Field(title="Name", description="Name of the descriptor set"), 84 | PropertyType.SYSTEM] 85 | dimensions: Annotated[int, Field(title="Dimension", description="Dimension of the descriptor set"), 86 | PropertyType.SYSTEM] 87 | 88 | 89 | class BoundingBoxDataModel(IdentityDataModel): 90 | """Bounding Box data model for ApertureDB. 91 | """ 92 | type = ObjectType.BOUNDING_BOX 93 | -------------------------------------------------------------------------------- /aperturedb/DescriptorSetDataCSV.py: -------------------------------------------------------------------------------- 1 | import ast 2 | from aperturedb import CSVParser 3 | 4 | HEADER_NAME = "name" 5 | HEADER_DIM = "dimensions" 6 | HEADER_ENGINE = "engine" 7 | HEADER_METRIC = "metric" 8 | PROPERTIES = "properties" 9 | CONSTRAINTS = "constraints" 10 | 11 | 12 | class DescriptorSetDataCSV(CSVParser.CSVParser): 13 | """**ApertureDB DescriptorSet Data.** 14 | 15 | This class loads the Descriptor Set Data which is present in a CSV file, 16 | and converts it into a series of aperturedb queries. 17 | 18 | :::note Is backed by a CSV file with the following columns: 19 | ``name``, ``dimensions``, ``engine``, ``metric``, ``PROP_NAME_N``, ``constraint_PROP1`` 20 | ::: 21 | 22 | Example CSV file:: 23 | 24 | name,dimensions,engine,metric 25 | dining_chairs,2048,FaissIVFFlat,L2 26 | chandeliers,2048,FaissIVFFlat,L2 27 | console_tables,2048,FaissIVFFlat,L2 28 | ... 29 | 30 | Example code to create an instance: 31 | 32 | ``` python 33 | 34 | data = DescriptorSetDataCSV("/path/to/DescriptorSetData.csv") 35 | loader = ParallelLoader(client) 36 | loader.ingest(data) 37 | ``` 38 | 39 | 40 | :::info 41 | In the above example, the first row implies to create a Descriptor set called dining_chairs. 42 | The Descriptors in that set would be expected to be an array of float64, of length 2048. 43 | When performing a search on this set, FaissIVFFlat engine would be used and the metric to compute 44 | the distance would be L2. 45 | ::: 46 | """ 47 | 48 | def __init__(self, filename: str, **kwargs): 49 | 50 | super().__init__(filename, **kwargs) 51 | 52 | self.props_keys = [x for x in self.header[4:] 53 | if not x.startswith(CSVParser.CONSTRAINTS_PREFIX)] 54 | self.constraints_keys = [x for x in self.header[4:] 55 | if x.startswith(CSVParser.CONSTRAINTS_PREFIX)] 56 | self.command = "AddDescriptorSet" 57 | 58 | def get_indices(self): 59 | return { 60 | "entity": { 61 | "_DescriptorSet": self.get_indexed_properties() 62 | } 63 | } 64 | 65 | def getitem(self, idx): 66 | 67 | # Metrics/Engine can be of the form: 68 | # "IP", or 69 | # ["IP" ...] 70 | idx = self.df.index.start + idx 71 | metrics = self.df.loc[idx, HEADER_METRIC] 72 | metrics = metrics if "[" not in metrics else ast.literal_eval(metrics) 73 | engines = self.df.loc[idx, HEADER_ENGINE] 74 | engines = engines if "[" not in engines else ast.literal_eval(engines) 75 | 76 | data = { 77 | "name": self.df.loc[idx, HEADER_NAME], 78 | "dimensions": self.df.loc[idx, HEADER_DIM], 79 | "engine": engines, 80 | "metric": metrics, 81 | } 82 | 83 | q = [] 84 | ads = self._basic_command(idx, custom_fields=data) 85 | q.append(ads) 86 | 87 | return q, [] 88 | 89 | def validate(self): 90 | 91 | self.header = list(self.df.columns.values) 92 | 93 | if self.header[0] != HEADER_NAME: 94 | raise Exception("Error with CSV file field: " + HEADER_NAME) 95 | if self.header[1] != HEADER_DIM: 96 | raise Exception("Error with CSV file field: " + HEADER_DIM) 97 | if self.header[2] != HEADER_ENGINE: 98 | raise Exception("Error with CSV file field: " + HEADER_ENGINE) 99 | if self.header[3] != HEADER_METRIC: 100 | raise Exception("Error with CSV file field: " + HEADER_METRIC) 101 | -------------------------------------------------------------------------------- /aperturedb/EntityDataCSV.py: -------------------------------------------------------------------------------- 1 | from aperturedb import CSVParser 2 | import logging 3 | 4 | logger = logging.getLogger(__name__) 5 | ENTITY_CLASS = "EntityClass" 6 | PROPERTIES = "properties" 7 | CONSTRAINTS = "constraints" 8 | 9 | 10 | class EntityDataCSV(CSVParser.CSVParser): 11 | """**ApertureDB Entity Data.** 12 | 13 | This class loads the Entity Data which is present in a CSV file, 14 | and converts it into a series of ApertureDB queries. 15 | 16 | :::note Is backed by a CSV file with the following columns: 17 | ``EntityClass``, ``PROP_NAME_1``, ... ``PROP_NAME_N``, ``constraint_PROP1`` 18 | ::: 19 | 20 | Example CSV file:: 21 | 22 | EntityClass,name,lastname,age,id,constraint_id 23 | Person,John,Salchi,69,321423532,321423532 24 | Person,Johna,Salchi,63,42342522,42342522 25 | ... 26 | 27 | Example usage: 28 | 29 | ``` python 30 | 31 | data = EntityDataCSV("/path/to/EntityData.csv") 32 | loader = ParallelLoader(client) 33 | loader.ingest(data) 34 | ``` 35 | 36 | 37 | :::info 38 | In the above example, the constraint_id ensures that a Entity with the specified 39 | id would be only inserted if it does not already exist in the database. 40 | ::: 41 | 42 | """ 43 | 44 | def __init__(self, filename: str, **kwargs): 45 | super().__init__(filename, **kwargs) 46 | 47 | self.props_keys = [x for x in self.header[1:] 48 | if not x.startswith(CSVParser.CONSTRAINTS_PREFIX)] 49 | self.constraints_keys = [x for x in self.header[1:] 50 | if x.startswith(CSVParser.CONSTRAINTS_PREFIX)] 51 | self.command = "AddEntity" 52 | 53 | def get_indices(self): 54 | return { 55 | "entity": { 56 | cls: self.get_indexed_properties() for cls in self.df[ENTITY_CLASS].unique() 57 | } 58 | } 59 | 60 | def getitem(self, idx): 61 | idx = self.df.index.start + idx 62 | eclass = self.df.loc[idx, ENTITY_CLASS] 63 | q = [] 64 | ae = self._basic_command(idx, 65 | custom_fields={ 66 | "class": eclass 67 | }) 68 | 69 | q.append(ae) 70 | return q, [] 71 | 72 | def validate(self): 73 | if self.header[0] != ENTITY_CLASS: 74 | raise Exception("Error with CSV file field: " + ENTITY_CLASS) 75 | 76 | # Used when a csv has a single entity type that needs to be deleted 77 | 78 | 79 | class EntityDeleteDataCSV(CSVParser.CSVParser): 80 | """**ApertureDB Entity Delete Data.** 81 | 82 | This class loads the Entity Data which is present in a CSV file, 83 | and converts it into a series of ApertureDB deletes. 84 | 85 | :::note 86 | Expects a CSV file with the following columns: 87 | 88 | ``constraint_PROP1`` 89 | ::: 90 | 91 | Example CSV file:: 92 | 93 | constraint_id 94 | 321423532 95 | 42342522 96 | ... 97 | 98 | Example usage: 99 | 100 | ```python 101 | 102 | data = ImageDeleteDataCSV("/path/to/UnusedImages.csv") 103 | loader = ParallelQuery(client) 104 | loader.query(data) 105 | ``` 106 | 107 | 108 | :::info 109 | In the above example, the constraint_id ensures that a Entity with the specified 110 | id would be only deleted. 111 | 112 | Note that you can take a csv with normal prop data and this will ignore it, so you 113 | could use input to a loader to this. 114 | ::: 115 | 116 | 117 | """ 118 | 119 | def __init__(self, entity_class, filename, df=None, use_dask=False): 120 | super().__init__(filename, df=df, use_dask=use_dask) 121 | self.command = "Delete" + entity_class 122 | self.constraint_keyword = "constraints" 123 | if not use_dask: 124 | self.constraint_keys = [x for x in self.header[0:]] 125 | 126 | def getitem(self, idx): 127 | idx = self.df.index.start + idx 128 | q = [] 129 | entity_delete = self._basic_command(idx) 130 | 131 | q.append(entity_delete) 132 | return q, [] 133 | 134 | def validate(self): 135 | # all we require is a valid csv with 1 or more columns. 136 | return True 137 | 138 | 139 | class ImageDeleteDataCSV(EntityDeleteDataCSV): 140 | """ 141 | **ApertureData CSV Loader class for deleting images** 142 | 143 | Usage details in EntityDeleteDataCSV 144 | """ 145 | 146 | def __init__(self, filename, df=None, use_dask=False): 147 | super().__init__("Image", filename, df=df, use_dask=use_dask) 148 | -------------------------------------------------------------------------------- /aperturedb/KaggleData.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import List, Tuple 3 | import os 4 | import pandas as pd 5 | from kaggle.api.kaggle_api_extended import KaggleApi 6 | import zipfile 7 | from aperturedb.Subscriptable import Subscriptable 8 | 9 | 10 | class KaggleData(Subscriptable): 11 | """ 12 | **Class to wrap around a Dataset retrieved from kaggle** 13 | 14 | A DataSet downloaded from kaggle does not implement a standard mechanism to iterate over its values 15 | This class intends to provide an abstraction like that of a pytorch dataset 16 | where the iteration over Dataset elements yields an atomic record. 17 | 18 | :::note 19 | This class should be subclassed with specific implementations of generate_index and generate_query. 20 | ::: 21 | 22 | Example subclass: [CelebADataKaggle](https://github.com/aperture-data/aperturedb-python/blob/develop/examples/CelebADataKaggle.py) 23 | 24 | Args: 25 | dataset_ref (str): URL of kaggle dataset, for example https://www.kaggle.com/datasets/jessicali9530/celeba-dataset 26 | records_count (int): number of records to provide to generate. 27 | 28 | """ 29 | 30 | def __init__( 31 | self, 32 | dataset_ref: str, 33 | records_count: int = -1) -> None: 34 | self._collection = None 35 | self.records_count = records_count 36 | kaggle = KaggleApi() 37 | kaggle.authenticate() 38 | if "datasets/" in dataset_ref: 39 | dataset_ref = dataset_ref[dataset_ref.index( 40 | "datasets/") + len("datasets/"):] 41 | 42 | workdir = os.path.join("kaggleds", dataset_ref) 43 | 44 | files = kaggle.dataset_list_files(dataset_ref) 45 | 46 | # do not unzip from kaggle's API as it deletes the archive and 47 | # a subsequent run results in a redownload. 48 | x = kaggle.dataset_download_files( 49 | dataset=dataset_ref, 50 | path=workdir, 51 | quiet=False, 52 | unzip=False) 53 | 54 | archive = None 55 | for _, subdirs, dfiles in os.walk(workdir): 56 | if len(dfiles) == 1 and len(subdirs) == 0: 57 | archive = os.path.join(workdir, dfiles[0]) 58 | 59 | with zipfile.ZipFile(archive, 'r') as zip_ref: 60 | zip_ref.extractall(workdir) 61 | 62 | break 63 | self.workdir = workdir 64 | self.collection = self.generate_index( 65 | workdir, self.records_count).to_dict('records') 66 | 67 | def getitem(self, subscript): 68 | return self.generate_query(subscript) 69 | 70 | def __len__(self): 71 | return len(self.collection) 72 | 73 | def generate_index(self, root: str, records_count: int = -1) -> pd.DataFrame: 74 | """**Generate a way to access each record downloaded at the root** 75 | 76 | Args: 77 | root (str): Path to wich kaggle downloads a Dataset. 78 | 79 | Returns: 80 | pd.DataFrame: The Data loaded in a dataframe. 81 | """ 82 | raise Exception("To be implemented by subclass") 83 | 84 | def generate_query(self, idx: int) -> Tuple[List[dict], List[bytes]]: 85 | """ 86 | **Takes information from one atomic record from the Data and converts it to Query for apertureDB** 87 | 88 | Args: 89 | idx (int): index of the record in collection. 90 | 91 | Raises: 92 | Exception: _description_ 93 | 94 | Returns: 95 | Tuple[List[dict], List[bytes]]: A pair of list of commands and optional list of blobs to go with them. 96 | """ 97 | raise Exception("To be implemented by subclass") 98 | -------------------------------------------------------------------------------- /aperturedb/Operations.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | 4 | class Operations(object): 5 | """ 6 | **Operations that can be performed on the fly on any retrieved images** 7 | 8 | [Supported operations](/query_language/Reference/shared_command_parameters/operations) 9 | """ 10 | 11 | def __init__(self): 12 | 13 | self.operations_arr = [] 14 | 15 | def get_operations_arr(self): 16 | return self.operations_arr 17 | 18 | def resize(self, width: int, height: int) -> Operations: 19 | 20 | op = { 21 | "type": "resize", 22 | "width": width, 23 | "height": height, 24 | } 25 | 26 | self.operations_arr.append(op) 27 | return self 28 | 29 | def rotate(self, angle: int, resize=False) -> Operations: 30 | 31 | op = { 32 | "type": "rotate", 33 | "angle": angle, 34 | "resize": resize, 35 | } 36 | 37 | self.operations_arr.append(op) 38 | return self 39 | 40 | def flip(self, code: str) -> Operations: 41 | 42 | op = { 43 | "type": "flip", 44 | "code": code, 45 | } 46 | 47 | self.operations_arr.append(op) 48 | return self 49 | 50 | def crop(self, x: int, y: int, width: int, height: int) -> Operations: 51 | 52 | op = { 53 | "type": "crop", 54 | "x": x, 55 | "y": y, 56 | "width": width, 57 | "height": height, 58 | } 59 | 60 | self.operations_arr.append(op) 61 | return self 62 | 63 | def interval(self, start: int, stop: int, step: int) -> Operations: 64 | 65 | op = { 66 | "type": "interval", 67 | "start": start, 68 | "stop": stop, 69 | "step": step 70 | } 71 | 72 | self.operations_arr.append(op) 73 | return self 74 | -------------------------------------------------------------------------------- /aperturedb/Parallelizer.py: -------------------------------------------------------------------------------- 1 | import math 2 | import time 3 | import threading 4 | 5 | from threading import Thread 6 | from tqdm import tqdm as tqdm 7 | 8 | 9 | class Parallelizer: 10 | """**Generic Parallelizer** 11 | 12 | A parallelizer converts a series of operations to be executed and partitions it into 13 | batches, to be executed by multiple threads of execution. 14 | ```mermaid 15 | gantt 16 | title Parallel execution 17 | dateFormat HH:mm:ss 18 | section Worker1 19 | Batch1 :w1, 00:00:00, 10s 20 | Batch3 :w3, after w1, 10s 21 | Batch5 :after w3, 10s 22 | 23 | section Worker2 24 | Batch2 :w2, 00:00:00, 10s 25 | Batch4 :w4, after w2, 10s 26 | Batch6 :w6, after w4, 10s 27 | 28 | ``` 29 | """ 30 | 31 | def __init__(self): 32 | self._reset() 33 | 34 | def _reset(self, batchsize: int = 1, numthreads: int = 1): 35 | 36 | # Default Values 37 | self.batchsize = batchsize 38 | self.numthreads = numthreads 39 | 40 | self.total_actions = 0 41 | self.times_arr = [] 42 | self.total_actions_time = 0 43 | self.error_counter = 0 44 | self.actual_stats = [] 45 | 46 | def get_times(self): 47 | 48 | return self.times_arr 49 | 50 | def batched_run(self, generator, batchsize: int, numthreads: int, stats: bool): 51 | run_event = threading.Event() 52 | run_event.set() 53 | self._reset(batchsize, numthreads) 54 | self.stats = stats 55 | self.generator = generator 56 | if hasattr(generator, "sample_count"): 57 | print("sample_count", generator.sample_count) 58 | self.total_actions = generator.sample_count 59 | else: 60 | self.total_actions = len(generator) 61 | self.pb = tqdm(total=self.total_actions, desc="Progress", 62 | unit="items", unit_scale=True, dynamic_ncols=True) 63 | start_time = time.time() 64 | 65 | if self.total_actions < batchsize: 66 | elements_per_thread = self.total_actions 67 | self.numthreads = 1 68 | else: 69 | elements_per_thread = math.ceil( 70 | self.total_actions / self.numthreads) 71 | 72 | thread_arr = [] 73 | for i in range(self.numthreads): 74 | idx_start = i * elements_per_thread 75 | idx_end = min(idx_start + elements_per_thread, 76 | self.total_actions) 77 | 78 | thread_add = Thread(target=self.worker, 79 | args=(i, generator, idx_start, idx_end, run_event)) 80 | thread_arr.append(thread_add) 81 | 82 | a = [th.start() for th in thread_arr] 83 | try: 84 | while run_event.is_set() and any([th.is_alive() for th in thread_arr]): 85 | time.sleep(1) 86 | except KeyboardInterrupt: 87 | print("Interrupted ... Shutting down workers") 88 | finally: 89 | run_event.clear() 90 | a = [th.join() for th in thread_arr] 91 | 92 | # Update progress bar to completion 93 | if self.stats: 94 | self.pb.close() 95 | 96 | self.total_actions_time = time.time() - start_time 97 | 98 | if self.stats: 99 | self.print_stats() 100 | 101 | def print_stats(self): 102 | """ 103 | Must be implemented by child class 104 | """ 105 | pass 106 | -------------------------------------------------------------------------------- /aperturedb/PolygonDataCSV.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from aperturedb import CSVParser 4 | 5 | HEADER_POLYGONS = "polygons" 6 | IMG_KEY_PROP = "img_key_prop" 7 | IMG_KEY_VAL = "img_key_value" 8 | POLYGON_FIELDS = { 9 | "_label": "label", 10 | } 11 | 12 | 13 | class PolygonDataCSV(CSVParser.CSVParser): 14 | """ 15 | **ApertureDB Polygon Data.** 16 | 17 | This class loads the Polygon Data which is present in a CSV file, 18 | and converts it into a series of ApertureDB queries. 19 | 20 | :::note Is backed by a CSV file with the following columns: 21 | ``IMG_KEY``, [``POLYGON_PROPERTY_1``, ... ``POLYGON_PROPERTY_N``,] [``constraint_POLYGON_PROPERTY_1``, ... ``constraint_POLYGON_PROPERTY_N``,] [``_label``,] ``polygons`` 22 | ::: 23 | 24 | **IMG_KEY**: identifies the name of the image property that will identify the 25 | image with which to associate each polygon object. This property should reliably 26 | identify at most a single image, like a unique id. The value in each row will be 27 | used to look up the image to which the polygon will attach. 28 | 29 | **POLYGON_PROPERTY_I**: declares the name of a property that will be assigned to all polygon objects. Any number of properties can be declared in this way. 30 | 31 | **constraint_POLYGON_PROPERTY_I**: declares that POLYGON_PROPERTY_I should be unique, and that a new polygon will not be added if there already exists one with the same value for this property. For each row, the value in this column should match the value in column POLYGON_PROPERTY_I. 32 | 33 | **_label**: optionally applies a label to the polygon objects. 34 | 35 | **polygons**: a JSON array of polygon regions. Each polygon region is itself an array of [x,y] vertices that describe the boundary of a single contiguous polygon. See also [Polygon API parameter](/query_language/Reference/shared_command_parameters/polygons). 36 | 37 | Example CSV file:: 38 | 39 | image_id,polygon_id,constraint_polygon_id,category_id,_label,polygons 40 | 397133,82445,82445,44,bottle,"[[[224.24, 297.18], [228.29, 297.18], ...]]" 41 | 397133,119568,119568,67,dining table,"[[[292.37, 425.1], [340.6, 373.86], ...]]" 42 | ... 43 | 44 | Example usage: 45 | 46 | ``` python 47 | 48 | data = PolygonDataCSV("/path/to/PolygonData.csv") 49 | loader = ParallelLoader(client) 50 | loader.ingest(data) 51 | ``` 52 | 53 | """ 54 | 55 | def __init__(self, filename: str, **kwargs): 56 | 57 | super().__init__(filename, kwargs=kwargs) 58 | 59 | self.props_keys = [] 60 | self.constraints_keys = [] 61 | self.polygon_keys = [] 62 | for key in self.header[1:-1]: 63 | if key in POLYGON_FIELDS.keys(): 64 | self.polygon_keys.append(key) 65 | elif key.startswith(CSVParser.CONSTRAINTS_PREFIX): 66 | self.constraints_keys.append(key) 67 | else: 68 | self.props_keys.append(key) 69 | 70 | self.img_key = self.header[0] 71 | self.command = "AddPolygon" 72 | 73 | def get_indices(self): 74 | return { 75 | "entity": { 76 | "_Polygon": self.get_indexed_properties() 77 | } 78 | } 79 | 80 | def getitem(self, idx): 81 | idx = self.df.index.start + idx 82 | 83 | q = [] 84 | 85 | img_id = self.df.loc[idx, self.img_key] 86 | 87 | fi = { 88 | "FindImage": { 89 | "_ref": 1, 90 | "constraints": { 91 | self.img_key: ["==", img_id], 92 | }, 93 | "blobs": False, 94 | }, 95 | } 96 | q.append(fi) 97 | 98 | polygon_fields = { 99 | "image_ref": 1, 100 | "polygons": json.loads(self.df.loc[idx, HEADER_POLYGONS]) 101 | } 102 | for key in self.polygon_keys: 103 | polygon_fields[POLYGON_FIELDS[key]] = self.df.loc[idx, key] 104 | 105 | ap = self._basic_command(idx, polygon_fields) 106 | q.append(ap) 107 | 108 | return q, [] 109 | 110 | def validate(self): 111 | 112 | self.header = list(self.df.columns.values) 113 | 114 | if len(self.header) < 2: 115 | raise Exception( 116 | "Error with CSV file: must have at least two columns") 117 | if self.header[-1] != HEADER_POLYGONS: 118 | raise Exception("Error with CSV file field: " + HEADER_POLYGONS) 119 | -------------------------------------------------------------------------------- /aperturedb/Polygons.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from aperturedb.Entities import Entities 3 | from aperturedb.CommonLibrary import execute_query 4 | 5 | 6 | class Polygons(Entities): 7 | db_object = "_Polygon" 8 | 9 | def intersection(self, other: Polygons, threshold: float) -> Polygons: 10 | """ 11 | Find a set of polygons that intersect with another set of polygons. 12 | The threshold is user specified and is used to determine if two polygons 13 | sufficiently overlap to be considered intersecting. 14 | 15 | Args: 16 | other (Polygons): Set of polygons to intersect with. 17 | threshold (float): The threshold for determining if two polygons are sufficiently intersecting. 18 | 19 | Returns: 20 | Polygons: unique set of polygons that intersect with the other set of polygons. 21 | """ 22 | result = set() 23 | for p1 in self: 24 | for p2 in other: 25 | query = [ 26 | { 27 | "FindEntity": { 28 | "_ref": 1, 29 | "unique": True, 30 | "constraints": { 31 | "_uniqueid": ["==", p1["_uniqueid"]] 32 | } 33 | } 34 | }, { 35 | "FindEntity": { 36 | "_ref": 2, 37 | "unique": True, 38 | "constraints": { 39 | "_uniqueid": ["==", p2["_uniqueid"]] 40 | } 41 | } 42 | }, { 43 | "RegionIoU": { 44 | "roi_1": 1, 45 | "roi_2": 2, 46 | } 47 | } 48 | ] 49 | res, r, b = execute_query(self.client, query, []) 50 | if r[2]["RegionIoU"]["IoU"][0][0] > threshold: 51 | result.add(int(p1["ann_id"])) 52 | result.add(int(p2["ann_id"])) 53 | return list(result) 54 | -------------------------------------------------------------------------------- /aperturedb/PyTorchData.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | from torch.utils.data import Dataset 3 | from aperturedb.Subscriptable import Subscriptable 4 | 5 | 6 | class PyTorchData(Subscriptable): 7 | """ 8 | **Class to wrap around a Dataset retrieved from [PyTorch datasets](https://pytorch.org/vision/0.15/datasets.html)** 9 | 10 | The dataset in this case can be iterated over. 11 | So the only thing that needs to be implemented is generate_query, 12 | which takes an index and returns a query. 13 | 14 | :::note 15 | This class should be subclassed with a specific (custom) implementation of generate_query(). 16 | ::: 17 | 18 | Example subclass: [CocoDataPyTorch](https://github.com/aperture-data/aperturedb-python/blob/develop/examples/CocoDataPyTorch.py) 19 | 20 | """ 21 | 22 | def __init__(self, dataset: Dataset) -> None: 23 | self.loaded_dataset = [t for t in dataset] 24 | 25 | def getitem(self, idx: int): 26 | return self.generate_query(idx) 27 | 28 | def __len__(self): 29 | return len(self.loaded_dataset) 30 | 31 | def generate_query(self, idx: int) -> Tuple[List[dict], List[bytes]]: 32 | """ 33 | **Takes information from one atomic record from the Data and converts it to Query for apertureDB** 34 | 35 | Args: 36 | idx (int): index of the record in collection. 37 | 38 | Raises: 39 | Exception: _description_ 40 | 41 | Returns: 42 | Tuple[List[dict], List[bytes]]: A pair of list of commands and optional list of blobs to go with them. 43 | """ 44 | raise Exception("To be implemented by subclass") 45 | -------------------------------------------------------------------------------- /aperturedb/QueryGenerator.py: -------------------------------------------------------------------------------- 1 | from aperturedb import Subscriptable 2 | 3 | 4 | class QueryGenerator(Subscriptable.Subscriptable): 5 | """ 6 | The base class to use for Query Generators. 7 | """ 8 | 9 | def getitem(self, subscript): 10 | raise Exception("To be implemented in subclass") 11 | -------------------------------------------------------------------------------- /aperturedb/Sort.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class Order(Enum): 5 | ASCENDING = "ascending" 6 | DESCENDING = "descending" 7 | 8 | 9 | class Sort(): 10 | """ 11 | **Specification of the sort order** 12 | """ 13 | 14 | def __init__(self, key: str, order: Order) -> None: 15 | self._sort = { 16 | "key": key, 17 | "order": order.value 18 | } 19 | -------------------------------------------------------------------------------- /aperturedb/Sources.py: -------------------------------------------------------------------------------- 1 | import time 2 | import requests 3 | import logging 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | 8 | class Sources(): 9 | """ 10 | **Load data from various resources** 11 | """ 12 | 13 | def __init__(self, n_download_retries, **kwargs): 14 | 15 | self.n_download_retries = n_download_retries 16 | 17 | # Use custom clients if specified 18 | self.s3 = None if "s3_client" not in kwargs else kwargs["s3_client"] 19 | self.http_client = requests.Session( 20 | ) if "http_client" not in kwargs else kwargs["http_client"] 21 | 22 | def load_from_file(self, filename): 23 | """ 24 | Load data from a file. 25 | """ 26 | try: 27 | fd = open(filename, "rb") 28 | buff = fd.read() 29 | fd.close() 30 | return True, buff 31 | except Exception as e: 32 | logger.error(f"VALIDATION ERROR: {filename}") 33 | logger.exception(e) 34 | finally: 35 | if not fd.closed: 36 | fd.close() 37 | return False, None 38 | 39 | def load_from_http_url(self, url, validator): 40 | """ 41 | Load data from a http url. 42 | """ 43 | import numpy as np 44 | 45 | retries = 0 46 | while True: 47 | imgdata = self.http_client.get(url) 48 | if imgdata.ok and ("Content-Length" not in imgdata.headers or int(imgdata.headers["Content-Length"]) == imgdata.raw._fp_bytes_read): 49 | imgbuffer = np.frombuffer(imgdata.content, dtype='uint8') 50 | if not validator(imgbuffer): 51 | logger.error(f"VALIDATION ERROR: {url}") 52 | return False, None 53 | 54 | return imgdata.ok, imgdata.content 55 | else: 56 | if retries >= self.n_download_retries: 57 | break 58 | logger.warning(f"Retrying object: {url}") 59 | retries += 1 60 | time.sleep(2) 61 | 62 | return False, None 63 | 64 | def load_from_s3_url(self, s3_url, validator): 65 | import numpy as np 66 | 67 | retries = 0 68 | while True: 69 | try: 70 | bucket_name = s3_url.split("/")[2] 71 | object_name = s3_url.split("s3://" + bucket_name + "/")[-1] 72 | s3_response_object = self.s3.get_object( 73 | Bucket=bucket_name, Key=object_name) 74 | img = s3_response_object['Body'].read() 75 | imgbuffer = np.frombuffer(img, dtype='uint8') 76 | if not validator(imgbuffer): 77 | logger.error(f"VALIDATION ERROR: {s3_url}") 78 | return False, None 79 | 80 | return True, img 81 | except Exception as e: 82 | if retries >= self.n_download_retries: 83 | break 84 | logger.warning(f"Retrying object: {s3_url}", exc_info=True) 85 | retries += 1 86 | time.sleep(2) 87 | 88 | logger.error(f"S3 ERROR: {s3_url}") 89 | return False, None 90 | 91 | def load_from_gs_url(self, gs_url, validator): 92 | import numpy as np 93 | from google.cloud import storage 94 | 95 | retries = 0 96 | client = storage.Client() 97 | while True: 98 | try: 99 | bucket_name = gs_url.split("/")[2] 100 | object_name = gs_url.split("gs://" + bucket_name + "/")[-1] 101 | 102 | blob = client.bucket(bucket_name).blob( 103 | object_name).download_as_bytes() 104 | imgbuffer = np.frombuffer(blob, dtype='uint8') 105 | if not validator(imgbuffer): 106 | logger.warning(f"VALIDATION ERROR: {gs_url}") 107 | return False, None 108 | return True, blob 109 | except: 110 | if retries >= self.n_download_retries: 111 | break 112 | logger.warning("Retrying object: {gs_url}", exc_info=True) 113 | retries += 1 114 | time.sleep(2) 115 | 116 | logger.error(f"GS ERROR: {gs_url}") 117 | return False, None 118 | -------------------------------------------------------------------------------- /aperturedb/SparseAddingDataCSV.py: -------------------------------------------------------------------------------- 1 | from aperturedb import CSVParser 2 | import logging 3 | 4 | logger = logging.getLogger(__name__) 5 | # SparseAddingDataCSV 6 | # Check for item existance using constraints before adding 7 | # Useful when adding larger resources where a portion already exist 8 | 9 | 10 | class SparseAddingDataCSV(CSVParser.CSVParser): 11 | """ 12 | **ApertureDB General CSV Parser for Loading Blob data where a large amount of the blobs already exist. 13 | 14 | This is a blob loader where the entity is searched for first, before the blob data is passed to the server. 15 | This can be useful speedup if blob data is large in comparison to the amount of data actually causing loads 16 | 17 | This is an abstract class, ImageSparseAddDataCSV loads Images. 18 | 19 | """ 20 | 21 | def __init__(self, entity_class: str, filename: str, **kwargs): 22 | self.entity = entity_class 23 | self.keys_set = False 24 | super().__init__(filename, **kwargs) 25 | self.blobs_per_query = [0, 1] 26 | self.commands_per_query = [1, 1] 27 | self._setupkeys() 28 | 29 | def _setupkeys(self): 30 | if not self.keys_set: 31 | self.keys_set = True 32 | self.props_keys = [x for x in self.header[1:] 33 | if not x.startswith(CSVParser.CONSTRAINTS_PREFIX)] 34 | self.constraints_keys = [x for x in self.header[1:] 35 | if x.startswith(CSVParser.CONSTRAINTS_PREFIX)] 36 | 37 | def getitem(self, idx): 38 | idx = self.df.index.start + idx 39 | query_set = [] 40 | 41 | hold_props_keys = self.props_keys 42 | self.props_keys = [] 43 | self.command = "Find" + self.entity 44 | self.constraint_keyword = "constraints" 45 | entity_find = self._basic_command( 46 | idx, custom_fields={"results": {"count": True}}) 47 | # proceed to second command if count == 0 48 | condition_find_failed = {"results": {0: {"count": ["==", 0]}}} 49 | self.props_keys = hold_props_keys 50 | self.command = "Add" + self.entity 51 | self.constraint_keyword = "if_not_found" 52 | entity_add = self._basic_command(idx) 53 | query_set.append(entity_find) 54 | query_set.append([condition_find_failed, entity_add]) 55 | 56 | if hasattr(self, "modify_item") and callable(self.modify_item): 57 | query_set = self.modify_item(query_set, idx) 58 | 59 | return [query_set], [] 60 | 61 | def validate(self): 62 | self._setupkeys() 63 | valid = True 64 | if not self.use_dask: 65 | if len(self.constraints_keys) < 1: 66 | logger.error("Cannot add/update " + 67 | self.entity + "; no constraint keys") 68 | valid = False 69 | return valid 70 | -------------------------------------------------------------------------------- /aperturedb/Stats.py: -------------------------------------------------------------------------------- 1 | class Stats: 2 | total_actions = 0 3 | times_arr = [] 4 | total_actions_time = 0 5 | error_counter = 0 6 | objects_existed = 0 7 | succeeded_queries = 0 8 | succeeded_commands = 0 9 | 10 | def __init__(self): 11 | self.total_actions = 0 12 | self.times_arr = [] 13 | self.total_actions_time = 0 14 | self.error_counter = 0 15 | self.objects_existed = 0 16 | self.succeeded_queries = 0 17 | self.succeeded_commands = 0 18 | -------------------------------------------------------------------------------- /aperturedb/Subscriptable.py: -------------------------------------------------------------------------------- 1 | class Wrapper(): 2 | """ 3 | This is needed because slicing in Subscriptable returns a list. 4 | The response handler also needs to be accounted for as 5 | that will be a part of generator. 6 | """ 7 | 8 | def __init__(self, list, response_handler, strict_response_validation, blobs_relative_to_csv): 9 | self.list = list 10 | self.response_handler = response_handler 11 | self.strict_response_validation = strict_response_validation 12 | self.blobs_relative_to_csv = blobs_relative_to_csv 13 | 14 | def __len__(self): 15 | return len(self.list) 16 | 17 | def __getitem__(self, i): 18 | return self.list[i] 19 | 20 | 21 | class Subscriptable(): 22 | """ 23 | The base class to use for Data/Generators and such collection types. 24 | """ 25 | 26 | def __getitem__(self, subscript): 27 | if isinstance(subscript, slice): 28 | start = subscript.start if subscript.start else 0 29 | start = len(self) + start if start < 0 else start 30 | stop = subscript.stop if subscript.stop else len(self) 31 | step = subscript.step if subscript.step else 1 32 | wrapper = Wrapper( 33 | [self.getitem(i) for i in range(start, stop, step)], 34 | self.response_handler if hasattr( 35 | self, "response_handler") else None, 36 | self.strict_response_validation if hasattr( 37 | self, "strict_response_validation") else None, 38 | self.blobs_relative_to_csv if hasattr( 39 | self, "blobs_relative_to_csv") else False 40 | ) 41 | return wrapper 42 | 43 | else: 44 | if subscript < len(self): 45 | return self.getitem(subscript) 46 | else: 47 | raise StopIteration() 48 | 49 | def getitem(self, subscript): 50 | raise Exception("To be implemented in subclass") 51 | 52 | def __iter__(self): 53 | self.ind = 0 54 | return self 55 | 56 | def __next__(self): 57 | if self.ind >= len(self): 58 | raise StopIteration 59 | else: 60 | r = self.getitem(self.ind) 61 | self.ind += 1 62 | return r 63 | -------------------------------------------------------------------------------- /aperturedb/TensorFlowData.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | from aperturedb.Subscriptable import Subscriptable 3 | import tensorflow as tf 4 | 5 | 6 | class TensorFlowData(Subscriptable): 7 | """ 8 | **Class to wrap around a Dataset retrieved from [Tensorflow datasets](https://www.tensorflow.org/datasets)** 9 | 10 | The dataset in this case can be iterated over. 11 | So the only thing that needs to be implemented is __init__ and generate_query, 12 | which takes an index and returns a query. 13 | 14 | :::note 15 | This class should be subclassed with a specific (custom) implementation of generate_query(), 16 | and __init__ should be called with the dataset to be wrapped. 17 | ::: 18 | 19 | Example subclass: [Cifar10DataTensorflow](https://github.com/aperture-data/aperturedb-python/blob/develop/examples/Cifar10DataTensorflow.py) 20 | 21 | """ 22 | 23 | def __init__(self, dataset: tf.data.Dataset) -> None: 24 | raise Exception("To be implemented by subclass") 25 | 26 | def getitem(self, idx: int): 27 | return self.generate_query(idx) 28 | 29 | def __len__(self): 30 | raise Exception("To be implemented by subclass") 31 | 32 | def generate_query(self, idx: int) -> Tuple[List[dict], List[bytes]]: 33 | """ 34 | **Takes information from one atomic record from the Data and converts it to Query for ApertureDB** 35 | 36 | Args: 37 | idx (int): index of the record in collection. 38 | 39 | Raises: 40 | Exception: _description_ 41 | 42 | Returns: 43 | Tuple[List[dict], List[bytes]]: A pair of list of commands and optional list of blobs to go with them. 44 | """ 45 | raise Exception("To be implemented by subclass") 46 | -------------------------------------------------------------------------------- /aperturedb/VideoDownloader.py: -------------------------------------------------------------------------------- 1 | import time 2 | import requests 3 | import os 4 | 5 | import cv2 6 | import numpy as np 7 | 8 | from aperturedb import Parallelizer 9 | from aperturedb import CSVParser 10 | 11 | HEADER_PATH = "filename" 12 | HEADER_URL = "url" 13 | 14 | 15 | class VideoDownloaderCSV(CSVParser.CSVParser): 16 | """ 17 | **ApertureDB Video Downloader.** 18 | 19 | :::info 20 | Expects a CSV file with AT LEAST a ``url`` column, and 21 | optionally a ``filename`` field. 22 | If ``filename`` is not present, it is taken from the URL. 23 | ::: 24 | """ 25 | 26 | def __init__(self, filename, check_video=True): 27 | 28 | self.has_filename = False 29 | self.check_video = check_video 30 | 31 | super().__init__(filename) 32 | 33 | def __getitem__(self, idx): 34 | 35 | url = self.df.loc[idx, HEADER_URL] 36 | 37 | if self.has_filename: 38 | filename = self.df.loc[idx, HEADER_PATH] 39 | else: 40 | filename = self.url_to_filename(url) 41 | 42 | return url, filename 43 | 44 | def url_to_filename(self, url): 45 | 46 | filename = url.split("/")[-1] 47 | folder = "/tmp/videos/" 48 | 49 | return folder + filename 50 | 51 | def validate(self): 52 | 53 | self.header = list(self.df.columns.values) 54 | 55 | if HEADER_URL not in self.header: 56 | raise Exception("Error with CSV file field: url. Must be a field") 57 | 58 | if HEADER_PATH in self.header: 59 | self.has_filename = True 60 | 61 | 62 | class VideoDownloader(Parallelizer.Parallelizer): 63 | 64 | def __init__(self, ): 65 | 66 | super().__init__() 67 | 68 | self.type = "video" 69 | 70 | self.check_video = False 71 | 72 | def check_if_video_is_ok(self, filename, url): 73 | 74 | if not os.path.exists(filename): 75 | return False 76 | 77 | try: 78 | a = cv2.VideoCapture(filename) 79 | if a.isOpened() == False: 80 | print("Video present but error reading it:", url) 81 | return False 82 | except BaseException: 83 | print("Video present but error decoding:", url) 84 | return False 85 | 86 | return True 87 | 88 | def download_video(self, url, filename): 89 | 90 | start = time.time() 91 | 92 | if self.check_video and self.check_if_video_is_ok(filename, url): 93 | return 94 | 95 | folder = os.path.dirname(filename) 96 | if not os.path.exists(folder): 97 | os.makedirs(folder, exist_ok=True) 98 | 99 | videodata = requests.get(url) 100 | if videodata.ok: 101 | fd = open(filename, "wb") 102 | fd.write(videodata.content) 103 | fd.close() 104 | 105 | try: 106 | a = cv2.VideoCapture(filename) 107 | if a.isOpened() == False: 108 | print("Downloaded Video size error:", url) 109 | os.remove(filename) 110 | self.error_counter += 1 111 | except BaseException: 112 | print("Downloaded Video cannot be decoded:", url) 113 | os.remove(filename) 114 | self.error_counter += 1 115 | else: 116 | print("URL not found:", url) 117 | self.error_counter += 1 118 | 119 | self.times_arr.append(time.time() - start) 120 | 121 | def worker(self, thid, generator, start, end): 122 | 123 | for i in range(start, end): 124 | 125 | url, filename = generator[i] 126 | 127 | self.download_video(url, filename) 128 | 129 | if thid == 0 and self.stats: 130 | self.pb.update((i - start) / (end - start)) 131 | 132 | def print_stats(self): 133 | 134 | print("====== ApertureDB VideoDownloader Stats ======") 135 | 136 | times = np.array(self.times_arr) 137 | print("Avg Video download time(s):", np.mean(times)) 138 | print("Img download time std:", np.std(times)) 139 | print("Avg download throughput (videos/s)):", 140 | 1 / np.mean(times) * self.numthreads) 141 | 142 | print("Total time(s):", self.total_actions_time) 143 | print("Overall throughput (videos/s):", 144 | self.total_actions / self.total_actions_time) 145 | print("Total errors encountered:", self.error_counter) 146 | print("=============================================") 147 | -------------------------------------------------------------------------------- /aperturedb/Videos.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from typing import Any 3 | 4 | from aperturedb.Entities import Entities 5 | from IPython.display import HTML, display 6 | from aperturedb.NotebookHelpers import display_annotated_video 7 | from ipywidgets import widgets 8 | 9 | 10 | class Videos(Entities): 11 | """ 12 | **The object mapper representation of videos in ApertureDB.** 13 | 14 | This class is a layer on top of the native query. 15 | It facilitates interactions with videos in the database in the pythonic way. 16 | """ 17 | db_object = "_Video" 18 | 19 | def getitem(self, idx): 20 | item = super().getitem(idx) 21 | if self.blobs: 22 | if 'preview' not in item: 23 | item['preview'] = self.get_blob(item) 24 | return item 25 | 26 | def inspect(self, show_preview: bool = True, meta = None) -> Any: 27 | if meta == None: 28 | def meta(x): return [] 29 | df = super().inspect() 30 | if show_preview == True: 31 | op = widgets.Output() 32 | with op: 33 | df['preview'] = df.apply(lambda x: display_annotated_video( 34 | x["preview"], bboxes=meta(x)), axis=1) 35 | display(HTML( 36 | "
" + 37 | df.to_html(escape=False) 38 | + "
" 39 | )) 40 | return op 41 | else: 42 | return df 43 | -------------------------------------------------------------------------------- /aperturedb/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | import datetime 4 | import os 5 | import json 6 | import requests 7 | from string import Template 8 | import platform 9 | import faulthandler 10 | import signal 11 | import sys 12 | 13 | __version__ = "0.4.47" 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | # https://docs.python.org/3/library/faulthandler.html 18 | # Register SIGUSR1 to dump the stack trace 19 | # Good for debugging a running process 20 | 21 | if os.getenv("ADB_DEBUGGABLE", None) != None: 22 | if sys.platform == "win32": 23 | logger.warn("Unable to configure debugging support for win32") 24 | else: 25 | faulthandler.register(signal.SIGUSR1.value) 26 | 27 | # set log level 28 | formatter = logging.Formatter( 29 | "%(asctime)s : %(levelname)s : %(name)s : %(thread)d : %(lineno)d : %(message)s") 30 | 31 | log_file_level = logging.getLevelName(os.getenv("LOG_FILE_LEVEL", "WARN")) 32 | log_console_level = logging.getLevelName( 33 | os.getenv("LOG_CONSOLE_LEVEL", "ERROR")) 34 | 35 | # Set the logger filter to the minimum (more chatty) of the two handler levels 36 | # This reduces problems if the environment adds a root handler (e.g. Google Colab) 37 | logger_level = min(log_file_level, log_console_level) 38 | if any(log_control in os.environ 39 | for log_control in ["LOG_CONSOLE_LEVEL", "LOG_FILE_LEVEL"]): 40 | logger.setLevel(logger_level) 41 | 42 | # define file handler and set formatter 43 | error_file_name = "error.${now}.log" 44 | 45 | if "ADB_LOG_FILE" in os.environ: 46 | error_file_name = None if len( 47 | os.environ["ADB_LOG_FILE"]) == 0 else os.environ["ADB_LOG_FILE"] 48 | 49 | if error_file_name is not None: 50 | error_file_tmpl = Template(error_file_name) 51 | template_items = { 52 | # python isodate has ':', not valid in files in windows. 53 | "now": str(datetime.datetime.now().isoformat()).replace(':', ''), 54 | "node": str(platform.node()) 55 | } 56 | error_file_handler = logging.FileHandler(error_file_tmpl.safe_substitute( 57 | **template_items), delay=True) 58 | error_file_handler.setFormatter(formatter) 59 | error_file_handler.setLevel(log_file_level) 60 | logger.addHandler(error_file_handler) 61 | 62 | error_console_handler = logging.StreamHandler() 63 | error_console_handler.setLevel(log_console_level) 64 | error_console_handler.setFormatter(formatter) 65 | logger.addHandler(error_console_handler) 66 | 67 | try: 68 | latest_version = json.loads(requests.get( 69 | "https://pypi.org/pypi/aperturedb/json").text)["info"]["version"] 70 | except Exception as e: 71 | logger.warning( 72 | f"Failed to get latest version: {e}. You are using version {__version__}") 73 | latest_version = None 74 | if __version__ != latest_version: 75 | logger.warning( 76 | f"The latest version of aperturedb is {latest_version}. You are using version {__version__}. It is recommended to upgrade.") 77 | -------------------------------------------------------------------------------- /aperturedb/cli/README.md: -------------------------------------------------------------------------------- 1 | # adb : Commad line utility. 2 | 3 | adb is a command line utility to have a well defined way of doing routine tasks with AperturDB instance. 4 | It's based on [typer](https://typer.tiangolo.com/) 5 | 6 | It has subcommands with their parameters defined under the cli directory. 7 | 8 | Some key points to consider: 9 | - Against conventions of importing different classes at module level, the functions in adb should tend to import them lazily (even at the risk of repeating). This is because the recursive imports bog the startup down, which makes for a bad user experience. 10 | 11 | ## Notes about improving the load times. 12 | execute the command to be tested with PYTHONPROFILEIMPORTTIME set as 1 13 | ``` 14 | pip install tuna 15 | PYTHONPROFILEIMPORTTIME=1 adb config ls 2>&1 | tee check_times 16 | tuna check_times 17 | ``` -------------------------------------------------------------------------------- /aperturedb/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aperture-data/aperturedb-python/a58fe1eab4c78ae92a6b03ab6d5c9e5ed1f0d62d/aperturedb/cli/__init__.py -------------------------------------------------------------------------------- /aperturedb/cli/adb.py: -------------------------------------------------------------------------------- 1 | import typer 2 | 3 | from aperturedb.cli import configure, ingest, utilities, transact 4 | 5 | app = typer.Typer(pretty_exceptions_show_locals=False) 6 | 7 | app.add_typer(ingest.app, name="ingest", help="Ingest data into ApertureDB.") 8 | app.add_typer(configure.app, name="config", 9 | help="Configure ApertureDB client.") 10 | app.add_typer(utilities.app, name="utils", help="Utilities") 11 | app.add_typer(transact.app, name="transact", 12 | help="Run a transaction against ApertureDB.") 13 | 14 | 15 | @app.callback() 16 | def check_context(ctx: typer.Context): 17 | if ctx.invoked_subcommand != "config" and not \ 18 | configure.has_environment_configuration(): 19 | configure.check_configured(as_global=False) or \ 20 | configure.check_configured(as_global=True, show_error=True) 21 | 22 | 23 | if __name__ == "__main__": 24 | app() 25 | -------------------------------------------------------------------------------- /aperturedb/cli/console.py: -------------------------------------------------------------------------------- 1 | from rich.console import Console 2 | 3 | console = Console() 4 | -------------------------------------------------------------------------------- /aperturedb/cli/keys.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | import typer 4 | 5 | from aperturedb.cli.console import console 6 | from aperturedb.Configuration import Configuration 7 | from aperturedb.Connector import Connector 8 | 9 | app = typer.Typer() 10 | 11 | 12 | @app.command(help="Create Key for a user") 13 | def generate(user: Annotated[str, typer.Argument(help="The user to generate a key for")]): 14 | from aperturedb.CommonLibrary import create_connector 15 | conn = create_connector() 16 | key = generate_user_key(conn, user) 17 | console.log(f"Key for {user} is", key, highlight=False) 18 | 19 | 20 | def generate_user_key(conn: Connector, user: str): 21 | from aperturedb.Utils import Utils 22 | u = Utils(conn) 23 | token = u.generate_token() 24 | u.assign_token(user, token) 25 | key = Configuration.create_aperturedb_key( 26 | conn.config.host, conn.config.port, token, conn.config.use_rest, 27 | conn.config.use_ssl) 28 | return key 29 | -------------------------------------------------------------------------------- /aperturedb/cli/tokens.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Annotated 3 | 4 | import typer 5 | 6 | from aperturedb.cli.console import console 7 | from aperturedb.CommonLibrary import create_connector, execute_query 8 | from aperturedb.Utils import Utils 9 | 10 | app = typer.Typer() 11 | 12 | 13 | @app.command(help="List User Authentication Tokens") 14 | def list(user: Annotated[str, typer.Argument(help="The user the display tokens for")]): 15 | token_list_query = [{"GetUserDetails": {"username": user}}] 16 | client = create_connector() 17 | result, response, blobs = execute_query( 18 | client=client, 19 | query=token_list_query, 20 | blobs=[]) 21 | utokens = response[0]['GetUserDetails']['tokens'] 22 | if len(utokens) == 0: 23 | console.log(f"No Tokens for {user}") 24 | else: 25 | console.log(utokens) 26 | 27 | 28 | @app.command(help="Generate an Authentication token for a user") 29 | def generate(): 30 | conn = create_connector() 31 | u = Utils(conn) 32 | token = u.generate_token() 33 | print(f"{token}") 34 | return token 35 | 36 | 37 | @app.command(help="Assign an Authentication token to a user") 38 | def assign(user: Annotated[str, typer.Argument(help="user to assign the token to")], 39 | token: Annotated[str, typer.Argument(help="Token to be assigned")]): 40 | conn = create_connector() 41 | u = Utils(conn) 42 | try: 43 | u.assign_token(user, token) 44 | console.log(f"Assigned token to {user}") 45 | except Exception as e: 46 | console.log(f"Failed to assign token: {e}", style="red") 47 | 48 | 49 | @app.command(help="Remove an Authentication token from a user") 50 | def remove(user: Annotated[str, typer.Argument(help="User to remove a token from")], 51 | token: Annotated[str, typer.Argument(help="Token to be removed")]): 52 | conn = create_connector() 53 | u = Utils(conn) 54 | try: 55 | u.remove_token(user, token) 56 | console.log("Action complete") 57 | except Exception as e: 58 | console.log(f"Failed to remove token: {e}", style="red") 59 | -------------------------------------------------------------------------------- /aperturedb/cli/transact.py: -------------------------------------------------------------------------------- 1 | import json 2 | from enum import Enum 3 | import sys 4 | import traceback 5 | 6 | import typer 7 | from typing_extensions import Annotated 8 | 9 | from aperturedb.cli.console import console 10 | 11 | from aperturedb.Connector import Connector 12 | import logging 13 | 14 | logger = logging.getLogger(__file__) 15 | 16 | FUSE_AVAIALBLE = False 17 | 18 | 19 | def load_fuse(): 20 | global FUSE_AVAIALBLE 21 | try: 22 | from aperturedb.cli.mount_coco import mount_images_from_aperturedb 23 | FUSE_AVAIALBLE = True 24 | except ImportError as e: 25 | logger.warning( 26 | "fuse not found for this env. This is not critical for adb to continue.") 27 | 28 | 29 | app = typer.Typer(callback=load_fuse) 30 | 31 | 32 | class OutputTypes(str, Enum): 33 | STDOUT = "stdout" 34 | MOUNT_COCO = "mount_coco" 35 | RAW_JSON = "raw_json" 36 | 37 | 38 | def dump_as_raw_json(client: Connector, transaction: dict, **kwargs): 39 | """ 40 | Function to pass the result of a transaction as raw json to stdout. 41 | Does not handle blobs. 42 | 43 | Args: 44 | client (Connector): The client to the database 45 | transaction (dict): Query to be executed. 46 | """ 47 | from aperturedb.CommonLibrary import execute_query 48 | 49 | result, response, blobs = execute_query( 50 | client=client, 51 | query=transaction, 52 | blobs=[]) 53 | print(json.dumps(response, indent=2)) 54 | 55 | 56 | def dump_to_stdout(client: Connector, transaction: dict, **kwargs): 57 | from aperturedb.CommonLibrary import execute_query 58 | 59 | result, response, blobs = execute_query( 60 | client=client, 61 | query=transaction, 62 | blobs=[]) 63 | console.log(result) 64 | console.log(response) 65 | for i, blob in enumerate(blobs): 66 | console.log(f"len(blob[{i}]) = {len(blob[i])}") 67 | 68 | 69 | def mount_as_coco_ds(client: Connector, transaction: dict, **kwargs): 70 | from aperturedb.Images import Images 71 | from aperturedb.CommonLibrary import execute_query 72 | 73 | result, response, blobs = execute_query( 74 | client=client, 75 | query=transaction, 76 | blobs=[]) 77 | if result == 0: 78 | image_entities = [] 79 | for i, cr in enumerate(response): 80 | if "FindImage" in cr: 81 | if "entities" in cr["FindImage"]: 82 | image_entities.extend(cr["FindImage"]["entities"]) 83 | else: 84 | console.log(f"No entities found in FindImage {i} response") 85 | try: 86 | from aperturedb.cli.mount_coco import mount_images_from_aperturedb 87 | images = Images(client, response=image_entities) 88 | console.log(f"Found {len(images)} images") 89 | mount_images_from_aperturedb(images) 90 | except Exception as e: 91 | console.log(traceback.format_exc()) 92 | else: 93 | console.log(response) 94 | 95 | 96 | @app.command() 97 | def from_json_file( 98 | filepath: Annotated[str, typer.Argument(help="Path to query in json format")], 99 | output_type: Annotated[OutputTypes, typer.Option( 100 | help="Type of output")] = "stdout", 101 | output_path: Annotated[str, typer.Option( 102 | help="Path to output (only for mount as output)")] = None 103 | ): 104 | from aperturedb.CommonLibrary import create_connector 105 | 106 | client = create_connector() 107 | 108 | output_types = { 109 | OutputTypes.STDOUT: dump_to_stdout, 110 | OutputTypes.RAW_JSON: dump_as_raw_json 111 | } 112 | global FUSE_AVAIALBLE 113 | if FUSE_AVAIALBLE: 114 | output_types[OutputTypes.MOUNT_COCO] = mount_as_coco_ds 115 | 116 | with open(filepath) as inputstream: 117 | transaction = json.loads(inputstream.read()) 118 | old_argv = sys.argv[1:] 119 | sys.argv[1:] = [output_path] 120 | output_types[output_type](client, transaction, output_path=output_path) 121 | sys.argv[1:] = old_argv 122 | -------------------------------------------------------------------------------- /aperturedb/cli/utilities.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Annotated 3 | 4 | import typer 5 | 6 | from aperturedb.cli.console import console 7 | 8 | app = typer.Typer() 9 | 10 | import aperturedb.cli.keys as keys 11 | import aperturedb.cli.tokens as tokens 12 | app.add_typer(keys.app, name="keys", 13 | help="Manage Aperturedb keys") 14 | app.add_typer(tokens.app, name="tokens", 15 | help="Manage database authentication tokens") 16 | 17 | 18 | class CommandTypes(str, Enum): 19 | STATUS = "status" 20 | SUMMARY = "summary" 21 | REMOVE_ALL = "remove_all" 22 | REMOVE_INDEXES = "remove_indexes" 23 | 24 | 25 | def confirm(command: CommandTypes, force: bool): 26 | if force: 27 | return True 28 | console.print("Danger", style="bold red") 29 | console.log(f"This will execute {command}.") 30 | response = typer.prompt("Are you sure you want to continue? [y/N]") 31 | if response.lower() != "y": 32 | typer.echo("Aborting...") 33 | raise typer.Abort() 34 | return True 35 | 36 | 37 | @app.command(help="Execute a command on the database") 38 | def execute(command: CommandTypes, 39 | force: Annotated[bool, typer.Option(help="Do not confirm")] = False): 40 | 41 | from aperturedb.Utils import Utils 42 | from aperturedb.CommonLibrary import create_connector 43 | 44 | utils = Utils(create_connector()) 45 | available_commands = { 46 | CommandTypes.STATUS: lambda: print(utils.status()), 47 | CommandTypes.SUMMARY: utils.summary, 48 | CommandTypes.REMOVE_ALL: lambda: confirm( 49 | CommandTypes.REMOVE_ALL, force) and utils.remove_all_objects(), 50 | CommandTypes.REMOVE_INDEXES: lambda: confirm( 51 | CommandTypes.REMOVE_INDEXES, force) and utils.remove_all_indexes(), 52 | } 53 | 54 | available_commands[command]() 55 | 56 | 57 | class LogLevel(str, Enum): 58 | INFO = "INFO" 59 | WARNING = "WARNING" 60 | ERROR = "ERROR" 61 | 62 | 63 | @app.command() 64 | def log( 65 | message: Annotated[str, typer.Argument(help="The message to log")], 66 | level: LogLevel = LogLevel.INFO 67 | ): 68 | """ 69 | Log a message to the user log. 70 | 71 | This is useful because it can later be seen in Grafana, not only as log entries in the 72 | ApertureDB Logging dashboard, but also as event markers in the ApertureDB Status dashboard. 73 | """ 74 | from aperturedb.Utils import Utils 75 | from aperturedb.CommonLibrary import create_connector 76 | 77 | utils = Utils(create_connector()) 78 | utils.user_log_message(message, level=level.value) 79 | 80 | 81 | @app.command() 82 | def visualize_schema( 83 | filename: str = "schema", 84 | format: str = "png" 85 | ): 86 | """ 87 | Visualize the schema of the database. 88 | 89 | This will create a file with the schema of the database in the specified format. 90 | 91 | Relies on graphviz to be installed. 92 | """ 93 | from aperturedb.Utils import Utils 94 | from aperturedb.CommonLibrary import create_connector 95 | 96 | utils = Utils(create_connector()) 97 | s = utils.visualize_schema() 98 | result = s.render(filename, format=format) 99 | print(result) 100 | -------------------------------------------------------------------------------- /aperturedb/queryMessage.py: -------------------------------------------------------------------------------- 1 | # queryMessage.py - wraps protobuf versions 2 | import google.protobuf 3 | 4 | if google.protobuf.__version__.split(".")[0] == "3": 5 | from . import queryMessage3_pb2 6 | 7 | def queryMessage(): 8 | return queryMessage3_pb2.queryMessage() 9 | 10 | def ParseFromString(msg, data): 11 | return msg.ParseFromString(data) 12 | elif google.protobuf.__version__.split(".")[0] == "4": 13 | from . import queryMessage4_pb2 14 | 15 | def queryMessage(): 16 | return queryMessage4_pb2.queryMessage() 17 | 18 | def ParseFromString(msg, data): 19 | # because of https://github.com/protocolbuffers/protobuf/issues/10774 20 | return msg.ParseFromString(memoryview(data).tobytes()) 21 | elif google.protobuf.__version__.split(".")[0] == "5": 22 | from . import queryMessage5_pb2 23 | 24 | def queryMessage(): 25 | return queryMessage5_pb2.queryMessage() 26 | 27 | def ParseFromString(msg, data): 28 | return msg.ParseFromString(data) 29 | else: 30 | raise Exception( 31 | f"aperturedb not compatible with {google.protobuf.__version__}") 32 | -------------------------------------------------------------------------------- /aperturedb/queryMessage3_pb2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by the protocol buffer compiler. DO NOT EDIT! 3 | # source: queryMessage3.proto 4 | 5 | from google.protobuf import descriptor as _descriptor 6 | from google.protobuf import message as _message 7 | from google.protobuf import reflection as _reflection 8 | from google.protobuf import symbol_database as _symbol_database 9 | # @@protoc_insertion_point(imports) 10 | 11 | _sym_db = _symbol_database.Default() 12 | 13 | 14 | 15 | 16 | DESCRIPTOR = _descriptor.FileDescriptor( 17 | name='queryMessage3.proto', 18 | package='VDMS.protobufs', 19 | syntax='proto3', 20 | serialized_options=None, 21 | create_key=_descriptor._internal_create_key, 22 | serialized_pb=b'\n\x13queryMessage3.proto\x12\x0eVDMS.protobufs\":\n\x0cqueryMessage\x12\x0c\n\x04json\x18\x01 \x01(\t\x12\r\n\x05\x62lobs\x18\x02 \x03(\x0c\x12\r\n\x05token\x18\x03 \x01(\tb\x06proto3' 23 | ) 24 | 25 | 26 | 27 | 28 | _QUERYMESSAGE = _descriptor.Descriptor( 29 | name='queryMessage', 30 | full_name='VDMS.protobufs.queryMessage', 31 | filename=None, 32 | file=DESCRIPTOR, 33 | containing_type=None, 34 | create_key=_descriptor._internal_create_key, 35 | fields=[ 36 | _descriptor.FieldDescriptor( 37 | name='json', full_name='VDMS.protobufs.queryMessage.json', index=0, 38 | number=1, type=9, cpp_type=9, label=1, 39 | has_default_value=False, default_value=b"".decode('utf-8'), 40 | message_type=None, enum_type=None, containing_type=None, 41 | is_extension=False, extension_scope=None, 42 | serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), 43 | _descriptor.FieldDescriptor( 44 | name='blobs', full_name='VDMS.protobufs.queryMessage.blobs', index=1, 45 | number=2, type=12, cpp_type=9, label=3, 46 | has_default_value=False, default_value=[], 47 | message_type=None, enum_type=None, containing_type=None, 48 | is_extension=False, extension_scope=None, 49 | serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), 50 | _descriptor.FieldDescriptor( 51 | name='token', full_name='VDMS.protobufs.queryMessage.token', index=2, 52 | number=3, type=9, cpp_type=9, label=1, 53 | has_default_value=False, default_value=b"".decode('utf-8'), 54 | message_type=None, enum_type=None, containing_type=None, 55 | is_extension=False, extension_scope=None, 56 | serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), 57 | ], 58 | extensions=[ 59 | ], 60 | nested_types=[], 61 | enum_types=[ 62 | ], 63 | serialized_options=None, 64 | is_extendable=False, 65 | syntax='proto3', 66 | extension_ranges=[], 67 | oneofs=[ 68 | ], 69 | serialized_start=39, 70 | serialized_end=97, 71 | ) 72 | 73 | DESCRIPTOR.message_types_by_name['queryMessage'] = _QUERYMESSAGE 74 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 75 | 76 | queryMessage = _reflection.GeneratedProtocolMessageType('queryMessage', (_message.Message,), { 77 | 'DESCRIPTOR' : _QUERYMESSAGE, 78 | '__module__' : 'queryMessage3_pb2' 79 | # @@protoc_insertion_point(class_scope:VDMS.protobufs.queryMessage) 80 | }) 81 | _sym_db.RegisterMessage(queryMessage) 82 | 83 | 84 | # @@protoc_insertion_point(module_scope) 85 | -------------------------------------------------------------------------------- /aperturedb/queryMessage4_pb2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by the protocol buffer compiler. DO NOT EDIT! 3 | # source: queryMessage4.proto 4 | # Protobuf Python Version: 4.25.3 5 | """Generated protocol buffer code.""" 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import descriptor_pool as _descriptor_pool 8 | from google.protobuf import symbol_database as _symbol_database 9 | from google.protobuf.internal import builder as _builder 10 | # @@protoc_insertion_point(imports) 11 | 12 | _sym_db = _symbol_database.Default() 13 | 14 | 15 | 16 | 17 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13queryMessage4.proto\x12\x0eVDMS.protobufs\":\n\x0cqueryMessage\x12\x0c\n\x04json\x18\x01 \x01(\t\x12\r\n\x05\x62lobs\x18\x02 \x03(\x0c\x12\r\n\x05token\x18\x03 \x01(\tb\x06proto3') 18 | 19 | _globals = globals() 20 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) 21 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'queryMessage4_pb2', _globals) 22 | if _descriptor._USE_C_DESCRIPTORS == False: 23 | DESCRIPTOR._options = None 24 | _globals['_QUERYMESSAGE']._serialized_start=39 25 | _globals['_QUERYMESSAGE']._serialized_end=97 26 | # @@protoc_insertion_point(module_scope) 27 | -------------------------------------------------------------------------------- /aperturedb/queryMessage5_pb2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by the protocol buffer compiler. DO NOT EDIT! 3 | # NO CHECKED-IN PROTOBUF GENCODE 4 | # source: queryMessage5.proto 5 | # Protobuf Python Version: 5.29.0 6 | """Generated protocol buffer code.""" 7 | from google.protobuf import descriptor as _descriptor 8 | from google.protobuf import descriptor_pool as _descriptor_pool 9 | from google.protobuf import runtime_version as _runtime_version 10 | from google.protobuf import symbol_database as _symbol_database 11 | from google.protobuf.internal import builder as _builder 12 | _runtime_version.ValidateProtobufRuntimeVersion( 13 | _runtime_version.Domain.PUBLIC, 14 | 5, 15 | 29, 16 | 0, 17 | '', 18 | 'queryMessage5.proto' 19 | ) 20 | # @@protoc_insertion_point(imports) 21 | 22 | _sym_db = _symbol_database.Default() 23 | 24 | 25 | 26 | 27 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13queryMessage5.proto\x12\x0eVDMS.protobufs\":\n\x0cqueryMessage\x12\x0c\n\x04json\x18\x01 \x01(\t\x12\r\n\x05\x62lobs\x18\x02 \x03(\x0c\x12\r\n\x05token\x18\x03 \x01(\tb\x06proto3') 28 | 29 | _globals = globals() 30 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) 31 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'queryMessage5_pb2', _globals) 32 | if not _descriptor._USE_C_DESCRIPTORS: 33 | DESCRIPTOR._loaded_options = None 34 | _globals['_QUERYMESSAGE']._serialized_start=39 35 | _globals['_QUERYMESSAGE']._serialized_end=97 36 | # @@protoc_insertion_point(module_scope) 37 | -------------------------------------------------------------------------------- /aperturedb/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aperture-data/aperturedb-python/a58fe1eab4c78ae92a6b03ab6d5c9e5ed1f0d62d/aperturedb/transformers/__init__.py -------------------------------------------------------------------------------- /aperturedb/transformers/clip.py: -------------------------------------------------------------------------------- 1 | 2 | import logging 3 | import numpy as np 4 | from PIL import Image 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | error_message = """ 9 | CLIP transformer requires git+https://github.com/openai/CLIP.git and torch 10 | Install with: pip install aperturedb[complete], followed by explicit install of CLIP. 11 | Can be done with : "pip install git+https://github.com/openai/CLIP.git" in the same 12 | venv as aperturedb. 13 | """ 14 | 15 | try: 16 | import clip 17 | import torch 18 | import cv2 19 | except ImportError: 20 | logger.critical(error_message) 21 | exit(1) 22 | 23 | descriptor_set = "ViT-B/16" 24 | device = "cuda" if torch.cuda.is_available() else "cpu" 25 | model, preprocess = clip.load(descriptor_set, device=device) 26 | 27 | 28 | def generate_embedding(blob): 29 | global errors 30 | 31 | nparr = np.fromstring(blob, np.uint8) 32 | image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) 33 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 34 | image = preprocess(Image.fromarray(image)).unsqueeze(0).to(device) 35 | 36 | image_features = model.encode_image(image) 37 | embedding = None 38 | if device == "cuda": 39 | image_features = image_features.float() 40 | embedding = image_features.detach().cpu().numpy().tobytes() 41 | else: 42 | embedding = image_features.detach().numpy().tobytes() 43 | 44 | return embedding 45 | -------------------------------------------------------------------------------- /aperturedb/transformers/clip_pytorch_embeddings.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | from aperturedb.Subscriptable import Subscriptable 3 | from aperturedb.transformers.transformer import Transformer 4 | from .clip import generate_embedding, descriptor_set 5 | 6 | 7 | class CLIPPyTorchEmbeddings(Transformer): 8 | """ 9 | Generates the embeddings for the images using the CLIP Pytorch model. 10 | https://github.com/openai/CLIP 11 | """ 12 | 13 | def __init__(self, data: Subscriptable, **kwargs) -> None: 14 | """ 15 | Args: 16 | data: Subscriptable object 17 | search_set_name: Name of the [descriptorset](/query_language/Reference/descriptor_commands/desc_commands/AddDescriptor) to use for the search. 18 | """ 19 | self.search_set_name = kwargs.pop( 20 | "search_set_name", descriptor_set) 21 | super().__init__(data, **kwargs) 22 | 23 | # Let's sample some data to figure out the descriptorset we need. 24 | if len(self._add_image_index) > 0: 25 | sample = generate_embedding(self.data[0][1][0]) 26 | utils = self.get_utils() 27 | utils.add_descriptorset( 28 | self.search_set_name, dim=len(sample) // 4, metric=["CS"]) 29 | 30 | def getitem(self, subscript): 31 | x = self.data[subscript] 32 | 33 | for ic in self._add_image_index: 34 | serialized = generate_embedding(x[1][ic]) 35 | # If the image already has an image_sha256, we use it. 36 | image_sha256 = x[0][ic]["AddImage"].get("properties", {}).get( 37 | "adb_image_sha256", None) 38 | if not image_sha256: 39 | image_sha256 = hashlib.sha256(x[1][ic]).hexdigest() 40 | x[1].append(serialized) 41 | x[0].append( 42 | { 43 | "AddDescriptor": { 44 | "set": self.search_set_name, 45 | "properties": { 46 | "image_sha256": image_sha256, 47 | }, 48 | "if_not_found": { 49 | "image_sha256": ["==", image_sha256], 50 | }, 51 | "connect": { 52 | "ref": x[0][ic]["AddImage"]["_ref"] 53 | } 54 | } 55 | }) 56 | return x 57 | -------------------------------------------------------------------------------- /aperturedb/transformers/common_properties.py: -------------------------------------------------------------------------------- 1 | from aperturedb.Subscriptable import Subscriptable 2 | from aperturedb.transformers.transformer import Transformer 3 | import logging 4 | 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | class CommonProperties(Transformer): 10 | """ 11 | This applies some common properties to the data. 12 | """ 13 | 14 | def __init__(self, data: Subscriptable, **kwargs) -> None: 15 | """ 16 | Args: 17 | data: Subscriptable object 18 | adb_data_source: Data source for the data 19 | adb_timestamp: Timestamp for the data 20 | adb_main_object: Main object for the data 21 | """ 22 | super().__init__(data, **kwargs) 23 | 24 | # Statically set some properties, these are not in the data 25 | self.adb_data_source = kwargs.get("adb_data_source", None) 26 | self.adb_timestamp = kwargs.get("adb_timestamp", None) 27 | self.adb_main_object = kwargs.get("adb_main_object", None) 28 | 29 | def getitem(self, subscript): 30 | x = self.data[subscript] 31 | try: 32 | # x is a transaction that has an add_image command and a blob 33 | for ic in self._add_image_index: 34 | src_properties = x[0][ic]["AddImage"]["properties"] 35 | # Set the static properties, if explicitly set 36 | if self.adb_data_source: 37 | src_properties["adb_data_source"] = self.adb_data_source 38 | if self.adb_timestamp: 39 | src_properties["adb_timestamp"] = self.adb_timestamp 40 | if self.adb_main_object: 41 | src_properties["adb_main_object"] = self.adb_main_object 42 | except Exception as e: 43 | logger.exception(e.with_traceback(), stack_info=True) 44 | 45 | return x 46 | -------------------------------------------------------------------------------- /aperturedb/transformers/facenet.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logger = logging.getLogger(__name__) 4 | 5 | error_message = """ 6 | Facenet transformer requires facenet-pytorch and torch 7 | Install with: pip install aperturedb[complete] 8 | Alternatively, install with: "pip install facenet-pytorch torch" in the same 9 | venv as aperturedb. 10 | """ 11 | 12 | try: 13 | from facenet_pytorch import MTCNN, InceptionResnetV1 14 | import torch 15 | except ImportError: 16 | logger.critical(error_message) 17 | exit(1) 18 | 19 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 20 | 21 | # If required, create a face detection pipeline using MTCNN: 22 | mtcnn = MTCNN(image_size=96, margin=0, device=device) 23 | 24 | # Create an inception resnet (in eval mode): 25 | resnet = InceptionResnetV1(pretrained='vggface2', device=device).eval() 26 | 27 | errors = 0 28 | 29 | 30 | def generate_embedding(img): 31 | global errors 32 | # Get cropped and prewhitened image tensor 33 | img_cropped = mtcnn(img) 34 | if img_cropped is not None: 35 | # Calculate embedding (unsqueeze to add batch dimension) 36 | img_embedding = resnet(img_cropped.unsqueeze(0).to(device)) 37 | else: 38 | img_embedding = torch.zeros(1, 512).to(device) 39 | errors += 1 40 | 41 | return img_embedding 42 | -------------------------------------------------------------------------------- /aperturedb/transformers/facenet_pytorch_embeddings.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | from aperturedb.Subscriptable import Subscriptable 3 | from aperturedb.transformers.transformer import Transformer 4 | from PIL import Image 5 | import io 6 | import time 7 | from .facenet import generate_embedding 8 | 9 | 10 | class FacenetPyTorchEmbeddings(Transformer): 11 | """ 12 | Generates the embeddings for the images using the Facenet Pytorch model. 13 | """ 14 | 15 | def __init__(self, data: Subscriptable, **kwargs) -> None: 16 | """ 17 | Args: 18 | data: Subscriptable object 19 | search_set_name: Name of the [descriptorset](/query_language/Reference/descriptor_commands/desc_commands/AddDescriptor) to use for the search. 20 | """ 21 | self.search_set_name = kwargs.pop( 22 | "search_set_name", "facenet_pytorch_embeddings") 23 | super().__init__(data, **kwargs) 24 | 25 | # Let's sample some data to figure out the descriptorset we need. 26 | if len(self._add_image_index) > 0: 27 | sample = self._get_embedding_from_blob(self.data[0][1][0]) 28 | utils = self.get_utils() 29 | utils.add_descriptorset(self.search_set_name, dim=len(sample) // 4) 30 | 31 | def _get_embedding_from_blob(self, image_blob: bytes): 32 | pil_image = Image.open(io.BytesIO(image_blob)) 33 | embedding = generate_embedding(pil_image) 34 | serialized = embedding.cpu().detach().numpy().tobytes() 35 | return serialized 36 | 37 | def getitem(self, subscript): 38 | start = time.time() 39 | self.ncalls += 1 40 | x = self.data[subscript] 41 | 42 | for ic in self._add_image_index: 43 | serialized = self._get_embedding_from_blob( 44 | x[1][self._add_image_index.index(ic)]) 45 | # If the image already has an image_sha256, we use it. 46 | image_sha256 = x[0][ic]["AddImage"].get("properties", {}).get( 47 | "adb_image_sha256", None) 48 | if not image_sha256: 49 | image_sha256 = hashlib.sha256(x[1][ic]).hexdigest() 50 | x[1].append(serialized) 51 | x[0].append( 52 | { 53 | "AddDescriptor": { 54 | "set": self.search_set_name, 55 | "properties": { 56 | "image_sha256": image_sha256, 57 | }, 58 | "if_not_found": { 59 | "image_sha256": ["==", image_sha256], 60 | }, 61 | "connect": { 62 | "ref": x[0][ic]["AddImage"]["_ref"] 63 | } 64 | } 65 | }) 66 | self.cumulative_time += time.time() - start 67 | return x 68 | -------------------------------------------------------------------------------- /aperturedb/transformers/image_properties.py: -------------------------------------------------------------------------------- 1 | from aperturedb.transformers.transformer import Transformer 2 | from aperturedb.Subscriptable import Subscriptable 3 | 4 | from PIL import Image 5 | import io 6 | import logging 7 | import uuid 8 | import hashlib 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class ImageProperties(Transformer): 14 | """ 15 | This computes some image properties and adds them to the metadata. 16 | """ 17 | 18 | def __init__(self, data: Subscriptable, **kwargs) -> None: 19 | super().__init__(data, **kwargs) 20 | utils = self.get_utils() 21 | 22 | if "adb_data_source" not in utils.get_indexed_props("_Image"): 23 | utils.create_entity_index("_Image", "adb_data_source") 24 | 25 | def getitem(self, subscript): 26 | x = self.data[subscript] 27 | try: 28 | # x is a transaction that has an add_image command and a blob 29 | for ic in self._add_image_index: 30 | blob_index = self._add_image_index.index(ic) 31 | src_properties = x[0][ic]["AddImage"]["properties"] 32 | # Compute the dynamic properties and apply them to metadata 33 | src_properties["adb_image_size"] = len(x[1][blob_index]) 34 | src_properties["adb_image_sha256"] = hashlib.sha256( 35 | x[1][blob_index]).hexdigest() 36 | 37 | # Compute the image dimensions. 38 | pil_image = Image.open(io.BytesIO(x[1][blob_index])) 39 | src_properties["adb_image_width"] = pil_image.width 40 | src_properties["adb_image_height"] = pil_image.height 41 | src_properties["adb_image_id"] = str( 42 | src_properties["id"] if "id" in src_properties else uuid.uuid4().hex) 43 | 44 | except Exception as e: 45 | # Importantly, do not raise an exception here, since it will kill ingestion. 46 | # Create a log message instead, for post-mortem analysis. 47 | logger.exception(e.with_traceback(None), stack_info=True) 48 | 49 | return x 50 | -------------------------------------------------------------------------------- /aperturedb/transformers/transformer.py: -------------------------------------------------------------------------------- 1 | from aperturedb.Subscriptable import Subscriptable 2 | from aperturedb.CommonLibrary import create_connector 3 | from aperturedb.Utils import Utils 4 | import logging 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | class Transformer(Subscriptable): 10 | """ 11 | Transformer is an abstract class that can be used to transform 12 | data before ingestion into aperturedb. 13 | 14 | :::info 15 | **Some build in transformers:** 16 | - CommonProperties: Add common properties to the data 17 | - ImageProperties: Add image properties to the data 18 | - Facenet: Add facenet embeddings to the data 19 | ::: 20 | 21 | 22 | [Example](https://github.com/aperture-data/aperturedb-python/blob/develop/examples/similarity_search/add_faces.py) of how to use transformers: 23 | ```python 24 | from CelebADataKaggle import CelebADataKaggle 25 | from aperturedb.transformers.facenet_pytorch_embeddings import FacenetPyTorchEmbeddings 26 | from aperturedb.transformers.common_properties import CommonProperties 27 | from aperturedb.transformers.image_properties import ImageProperties 28 | 29 | . 30 | . 31 | . 32 | 33 | dataset = CelebADataKaggle() 34 | 35 | # Here's a pipeline that adds extra properties to the celebA dataset 36 | dataset = CommonProperties( 37 | dataset, 38 | adb_data_source="kaggle-celebA", 39 | adb_main_object="Face", 40 | adb_annoted=True) 41 | 42 | # some useful properties for the images 43 | dataset = ImageProperties(dataset) 44 | 45 | # Add the embeddings generated through facenet. 46 | dataset = FacenetPyTorchEmbeddings(dataset) 47 | 48 | ``` 49 | 50 | """ 51 | 52 | def __init__(self, data: Subscriptable, client=None, **kwargs) -> None: 53 | self.data = data 54 | 55 | # Inspect the first element to get the number of queries and blobs 56 | x = self.data[0] 57 | self._queries = len(x[0]) 58 | self._blobs = len(x[1]) 59 | self._blob_index = [] 60 | self._add_image_index = [] 61 | self._client = client 62 | 63 | bc = 0 64 | for i, c in enumerate(x[0]): 65 | command = list(c.keys())[0] 66 | if command in ["AddImage", "AddDescriptor", "AddVideo", "AddBlob"]: 67 | self._blob_index.append(i) 68 | if command == "AddImage": 69 | self._add_image_index.append(i) 70 | bc += 1 71 | logger.info(f"Found {bc} blobs in the data") 72 | logger.info( 73 | f"Found {len(self._add_image_index)} AddImage commands in the data") 74 | 75 | self.ncalls = 0 76 | self.cumulative_time = 0 77 | 78 | def getitem(self, subscript): 79 | raise NotImplementedError("Needs to be subclassed") 80 | 81 | def __len__(self): 82 | return len(self.data) 83 | 84 | def get_client(self): 85 | if self._client is None: 86 | self._client = create_connector() 87 | return self._client 88 | 89 | def get_utils(self): 90 | return Utils(self.get_client()) 91 | -------------------------------------------------------------------------------- /aperturedb/types.py: -------------------------------------------------------------------------------- 1 | # This file only exists to support readable type hints 2 | 3 | from typing import List, Dict, Any 4 | 5 | Command = Dict[str, Any] 6 | Blob = bytes 7 | Commands = List[Command] # aka Query, but that's also a class name 8 | Blobs = List[Blob] 9 | CommandResponses = List[Dict] 10 | 11 | Image = bytes 12 | Video = bytes 13 | Descriptor = bytes 14 | -------------------------------------------------------------------------------- /configure_deployment.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | source $(dirname "$0")/version.sh 4 | 5 | read_version 6 | 7 | echo "Configuring deployment with: $BUILD_VERSION" 8 | 9 | 10 | find deploy/ -type f -name "*.yaml" -exec sed -i "s/\$VERSION/v$BUILD_VERSION/g" {} \; 11 | 12 | -------------------------------------------------------------------------------- /docker/complete/Dockerfile: -------------------------------------------------------------------------------- 1 | # Pull base image. 2 | FROM aperturedata/aperturedb-notebook:dependencies 3 | 4 | RUN mkdir /aperturedata 5 | ADD docker/complete/aperturedata /aperturedata 6 | 7 | RUN cd /aperturedata && pip install -e ".[complete]" 8 | -------------------------------------------------------------------------------- /docker/dependencies/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | # Updated as per the newest release. 4 | ENV OPENCV_VERSION=4.7.0 5 | 6 | RUN apt-get update \ 7 | && apt-get upgrade -y \ 8 | && apt-get install -y --no-install-recommends \ 9 | python3-venv 10 | 11 | ENV DEBIAN_FRONTEND=noninteractive 12 | ENV VIRTUAL_ENV=/opt/venv 13 | ENV NODEJS_LTS=v20.12.2 14 | RUN python3 -m venv $VIRTUAL_ENV 15 | ENV PATH="$VIRTUAL_ENV/bin:$PATH" 16 | 17 | RUN apt-get -y install build-essential git cmake python3.10-venv\ 18 | libx264-* libx265-* libavcodec-dev libavformat-dev\ 19 | pkg-config\ 20 | libavutil-dev libswscale-dev python3-venv\ 21 | libavcodec-extra libavcodec-dev python3-dev\ 22 | ffmpeg h264enc wget fuse libfuse-dev 23 | 24 | #The version of nodejs in ubuntu is very old 25 | #Installing the LTS as on Feb 23 26 | RUN cd /opt && wget https://nodejs.org/dist/${NODEJS_LTS}/node-${NODEJS_LTS}-linux-x64.tar.xz && tar xf node-${NODEJS_LTS}-linux-x64.tar.xz 27 | ENV PATH="/opt/node-${NODEJS_LTS}-linux-x64/bin:$PATH" 28 | 29 | # Cmake determines the correct path for site packages by looking at 30 | # numpy, and results in following output in configure: 31 | # -- Python 3: 32 | # -- Interpreter: /opt/venv/bin/python3 (ver 3.10.6) 33 | # -- Libraries: /usr/lib/x86_64-linux-gnu/libpython3.10.so (ver 3.10.6) 34 | # -- numpy: /opt/venv/lib/python3.10/site-packages/numpy/core/include (ver 1.24.1) 35 | # -- install path: lib/python3.10/site-packages/cv2/python-3.10 36 | RUN pip install "numpy<2" 37 | 38 | RUN wget -q https://github.com/opencv/opencv/archive/$OPENCV_VERSION.tar.gz && \ 39 | tar xf $OPENCV_VERSION.tar.gz && rm $OPENCV_VERSION.tar.gz && \ 40 | cd opencv-$OPENCV_VERSION && mkdir build && cd build && \ 41 | cmake \ 42 | -D CMAKE_BUILD_TYPE=Release \ 43 | -D WITH_TBB=OFF -D WITH_OPENMP=ON -D WITH_IPP=ON \ 44 | -D CPU_DISPATCH=SSE4_2,AVX,AVX2 \ 45 | -D BUILD_EXAMPLES=OFF \ 46 | -D BUILD_DOCS=OFF \ 47 | -D BUILD_PERF_TESTS=OFF \ 48 | -D BUILD_TESTS=OFF \ 49 | -D BUILD_opencv_apps=OFF \ 50 | -D WITH_FFMPEG=ON \ 51 | -D CMAKE_INSTALL_PREFIX=/usr/local \ 52 | -D OPENCV_PYTHON3_INSTALL_PATH=/opt/venv/lib/python3.10/site-packages .. && \ 53 | make -j6 && make install 54 | 55 | RUN pip install jupyterlab jupyterlab-dash dash-cytoscape plotly jupyter-dash numpy 56 | RUN jupyter lab build 57 | RUN jupyter labextension disable "@jupyterlab/apputils-extension:announcements" 58 | -------------------------------------------------------------------------------- /docker/dependencies/build.sh: -------------------------------------------------------------------------------- 1 | docker build -f Dockerfile -t aperturedata/aperturedb-notebook:dependencies . 2 | -------------------------------------------------------------------------------- /docker/notebook/Dockerfile: -------------------------------------------------------------------------------- 1 | # Pull base image. 2 | FROM aperturedata/aperturedb-notebook:dependencies 3 | 4 | RUN mkdir /aperturedata 5 | ADD docker/notebook/aperturedata /aperturedata 6 | 7 | COPY docker/notebook/scripts/start.sh /start.sh 8 | RUN chmod 755 /start.sh 9 | 10 | # Add Tini. Tini operates as a process subreaper for jupyter. 11 | # This prevents kernel crashes. 12 | # ENV TINI_VERSION v0.6.0 13 | # ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /usr/bin/tini 14 | # RUN chmod +x /usr/bin/tini 15 | # ENTRYPOINT ["/usr/bin/tini", "--"] 16 | RUN cd /aperturedata && pip install -e ".[dev]" 17 | RUN echo "adb --install-completion" | bash 18 | 19 | # Install useful JupyterLab extensions 20 | RUN pip install jupyter-resource-usage 21 | 22 | # Suppress the annoying announcements popup 23 | RUN jupyter labextension disable "@jupyterlab/apputils-extension:announcements" 24 | 25 | # Install CLIP (for running transformers) 26 | RUN pip install git+https://github.com/openai/CLIP.git 27 | 28 | RUN apt update && apt install -y curl && apt clean 29 | 30 | EXPOSE 8888 31 | CMD ["/start.sh"] 32 | -------------------------------------------------------------------------------- /docker/notebook/Dockerfile.cpu: -------------------------------------------------------------------------------- 1 | # Pull base image. 2 | FROM aperturedata/aperturedb-notebook:dependencies 3 | 4 | RUN mkdir /aperturedata 5 | ADD docker/notebook/aperturedata /aperturedata 6 | 7 | COPY docker/notebook/scripts/start.sh /start.sh 8 | RUN chmod 755 /start.sh 9 | 10 | # Add Tini. Tini operates as a process subreaper for jupyter. 11 | # This prevents kernel crashes. 12 | # ENV TINI_VERSION v0.6.0 13 | # ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /usr/bin/tini 14 | # RUN chmod +x /usr/bin/tini 15 | # ENTRYPOINT ["/usr/bin/tini", "--"] 16 | RUN cd /aperturedata && pip install -e "." 17 | RUN echo "adb --install-completion" | bash 18 | 19 | # Install useful JupyterLab extensions 20 | RUN pip install jupyter-resource-usage 21 | 22 | # Suppress the annoying announcements popup 23 | RUN jupyter labextension disable "@jupyterlab/apputils-extension:announcements" 24 | 25 | # Install torch and torchvision cpu version 26 | RUN pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu 27 | RUN pip install facenet-pytorch --no-deps 28 | 29 | # Install CLIP (for running transformers) 30 | RUN pip install git+https://github.com/openai/CLIP.git 31 | 32 | RUN apt update && apt install -y curl && apt clean 33 | 34 | EXPOSE 8888 35 | CMD ["/start.sh"] 36 | -------------------------------------------------------------------------------- /docker/notebook/scripts/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Configure the Juypter Notebook password 4 | jupyter lab --generate-config 5 | 6 | PASSWORD=${PASSWORD:-test} 7 | PASS_HASH=$(python3 -c "from jupyter_server.auth import passwd; print(passwd('${PASSWORD}'))") 8 | echo "c.NotebookApp.password='${PASS_HASH}'">> /root/.jupyter/jupyter_lab_config.py 9 | 10 | BASE_URL=${BASE_URL:-/} 11 | echo "c.ServerApp.base_url='${BASE_URL}'">> /root/.jupyter/jupyter_lab_config.py 12 | 13 | NOTEBOOK_DIR=${NOTEBOOK_DIR:-/notebooks} 14 | mkdir -p ${NOTEBOOK_DIR} 15 | echo "c.NotebookApp.notebook_dir='${NOTEBOOK_DIR}'">> /root/.jupyter/jupyter_lab_config.py 16 | 17 | cd ${HOME} 18 | jupyter-lab --port=8888 --no-browser --allow-root --ip=0.0.0.0 19 | -------------------------------------------------------------------------------- /docker/pytorch-gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/deeplearning-platform-release/pytorch-gpu.1-13.py37 2 | 3 | RUN mkdir /aperturedata 4 | ADD docker/pytorch-gpu/aperturedata /aperturedata 5 | 6 | RUN pip install awscli 7 | RUN apt-get update && apt-get install -y libopencv-dev python3-opencv fuse libfuse-dev 8 | RUN cd /aperturedata && pip install -e ".[dev]" 9 | 10 | COPY docker/pytorch-gpu/scripts/start.sh /start.sh 11 | RUN chmod 755 /start.sh 12 | CMD ["/start.sh"] -------------------------------------------------------------------------------- /docker/pytorch-gpu/build.sh: -------------------------------------------------------------------------------- 1 | docker build -f docker/pytorch-gpu/Dockerfile -t aperturedata/aperturedb-pytorch-gpu:latest . -------------------------------------------------------------------------------- /docker/pytorch-gpu/scripts/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd /aperturedata/test && bash run_test.sh -------------------------------------------------------------------------------- /docker/release/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | ENV DEBIAN_FRONTEND=noninteractive 3 | RUN apt update && apt upgrade -y && apt install -y --no-install-recommends \ 4 | python3-dev \ 5 | python3-pip 6 | ARG VERSION 7 | COPY dist/aperturedb-${VERSION}-py3-none-any.whl /tmp 8 | RUN pip3 install /tmp/aperturedb-${VERSION}-py3-none-any.whl 9 | -------------------------------------------------------------------------------- /docker/tests/Dockerfile: -------------------------------------------------------------------------------- 1 | # Pull base image. 2 | FROM aperturedata/aperturedb-notebook:dependencies 3 | 4 | RUN mkdir /aperturedata 5 | ADD docker/tests/aperturedata /aperturedata 6 | 7 | RUN pip install awscli 8 | RUN cd /aperturedata && pip install -e ".[dev]" 9 | RUN pip install git+https://github.com/openai/CLIP.git 10 | COPY docker/tests/scripts/start.sh /start.sh 11 | RUN chmod 755 /start.sh 12 | CMD ["/start.sh"] -------------------------------------------------------------------------------- /docker/tests/scripts/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd /aperturedata/test && bash run_test.sh -------------------------------------------------------------------------------- /docker/twine/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | 3 | RUN pip install twine build 4 | -------------------------------------------------------------------------------- /docs/README.protobuf: -------------------------------------------------------------------------------- 1 | ApertureDB python library supports version 3 and 4 of protobuf. 2 | 3 | These are incompatable implementations in that the proto file that protoc 4 | compiles into python code will not work between versions. 5 | 6 | To solve this we have added a thin wrapper which selects the backend 7 | implementation based on the system installed version. 8 | 9 | This is done because customers use packages alongside aperturedb which require 10 | python protobuf packages from both the 3.x line and the 4.x line. 11 | 12 | To regenerate the files, simply take the queryMessage.proto file, and make 13 | copies which append the version at the end, then use the matching protoc to 14 | compile them. Finally place them in this repo. 15 | -------------------------------------------------------------------------------- /examples/CelebADataKaggle.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | from aperturedb.KaggleData import KaggleData 3 | import pandas as pd 4 | import os 5 | import logging 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class CelebADataKaggle(KaggleData): 11 | """ 12 | **ApertureDB ingestable Dataset based off 13 | [CelebA on kaggle](https://www.kaggle.com/datasets/jessicali9530/celeba-dataset)** 14 | """ 15 | 16 | def __init__(self, **kwargs) -> None: 17 | self.records_count = -1 18 | super().__init__(dataset_ref = "jessicali9530/celeba-dataset", 19 | records_count=self.records_count) 20 | 21 | def generate_index(self, root: str, records_count=-1) -> pd.DataFrame: 22 | attr_index = pd.read_csv( 23 | os.path.join(root, "list_attr_celeba.csv")) 24 | bbox_index = pd.read_csv( 25 | os.path.join(root, "list_bbox_celeba.csv")) 26 | landmarks_index = pd.read_csv(os.path.join( 27 | root, "list_landmarks_align_celeba.csv")) 28 | partition_index = pd.read_csv( 29 | os.path.join(root, "list_eval_partition.csv")) 30 | rows = attr_index.combine_first(bbox_index).combine_first( 31 | landmarks_index).combine_first(partition_index) 32 | original_size = len(rows) 33 | records_count = records_count if records_count > 0 else original_size 34 | 35 | rows = rows[:records_count] 36 | 37 | logger.info( 38 | f"Created {len(rows)} items from {original_size} in the original dataset.") 39 | return rows 40 | 41 | def generate_query(self, idx: int) -> Tuple[List[dict], List[bytes]]: 42 | record = self.collection[idx] 43 | p = record 44 | q = [ 45 | { 46 | "AddImage": { 47 | "_ref": 1, 48 | "properties": { 49 | c: p[c] for c in p.keys() 50 | }, 51 | } 52 | }, { 53 | "AddBoundingBox": { 54 | "_ref": 2, 55 | "image_ref": 1, 56 | "rectangle": { 57 | "x": p["x_1"], 58 | "y": p["y_1"], 59 | "width": p["width"] if p["width"] > 0 else 1, 60 | "height": p["height"] if p["height"] > 0 else 1, 61 | } 62 | } 63 | } 64 | ] 65 | q[0]["AddImage"]["properties"]["keypoints"] = f"10 {p['lefteye_x']} {p['lefteye_y']} {p['righteye_x']} {p['righteye_y']} {p['nose_x']} {p['nose_y']} {p['leftmouth_x']} {p['leftmouth_y']} {p['rightmouth_x']} {p['rightmouth_y']}" 66 | 67 | image_file_name = os.path.join( 68 | self.workdir, 69 | 'img_align_celeba/img_align_celeba', 70 | p["image_id"]) 71 | blob = open(image_file_name, "rb").read() 72 | return q, [blob] 73 | -------------------------------------------------------------------------------- /examples/Cifar10DataTensorFlow.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from aperturedb.TensorFlowData import TensorFlowData 3 | from typing import List, Tuple 4 | from aperturedb.Images import np_arr_img_to_bytes 5 | 6 | 7 | class Cifar10DataTensorFlow(TensorFlowData): 8 | """ 9 | **ApertureDB ingestable Dataset, which is sourced from 10 | [Cifar10 (tensorflow.datasets)](https://www.tensorflow.org/datasets/catalog/cifar10)** 11 | """ 12 | 13 | def __init__(self): 14 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data() 15 | self.x = tf.concat([x_train, x_test], axis=0) 16 | self.y = tf.concat([tf.squeeze(y_train), tf.squeeze(y_test)], axis=0) 17 | self.train_len = x_train.shape[0] 18 | 19 | def __len__(self): 20 | return self.x.shape[0] 21 | 22 | def generate_query(self, idx: int) -> Tuple[List[dict], List[bytes]]: 23 | x, y = self.x[idx], self.y[idx] 24 | q = [{ 25 | "AddImage": { 26 | "_ref": 1 27 | } 28 | }] 29 | q[0]["AddImage"]["properties"] = { 30 | "label": str(y.numpy()), 31 | "train": True if idx < self.train_len else False 32 | } 33 | 34 | return q, [np_arr_img_to_bytes(x.numpy())] 35 | -------------------------------------------------------------------------------- /examples/Foo.py: -------------------------------------------------------------------------------- 1 | from aperturedb.transformers.transformer import Transformer 2 | 3 | 4 | class Foo(Transformer): 5 | """ 6 | An example of a non packaged transformer. 7 | example usage in adb (The argument to cli is --user-transformer)): 8 | adb from-generator examples/CelebADataKaggle.py --sample-count 1 --user-transformer examples/Foo.py 9 | """ 10 | 11 | def getitem(self, subscript): 12 | x = self.data[subscript] 13 | for ic in self._add_image_index: 14 | x[0][ic]["AddImage"]["properties"]["foo"] = "bar" 15 | 16 | return x 17 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Code examples with aperturedb. 2 | 3 | Following are the table of contents for this folder and its subfolders. 4 | There are instructions to run these scripts also. 5 | 6 | A part of Coco validation needs to be downloaded. 7 | This is a prerequisite for running some of the scripts below. 8 | 9 | ``` 10 | mkdir coco && cd coco && wget http://images.cocodataset.org/zips/val2017.zip && unzip val2017.zip && wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip && unzip annotations_trainval2017.zip 11 | ``` 12 | 13 | ## Example 1: ApertureDB Loaders 101 14 | 15 | The following files are under *loaders_101* 16 | | File | Description | instructions | 17 | | -----| ------------| -----| 18 | | loaders.ipynb | A notebook with some sample code for aperturedb | Also available to read at [Aperturedb documentation](https://docs.aperturedata.io/HowToGuides/Advanced/loaders)| 19 | 20 | ## Example 2: Image classification using a pretrained model 21 | The following files are under *image_classification* 22 | 23 | | File | Description | instructions | 24 | | -----| ------------| -----| 25 | | AlexNetClassifier.py | Helper code to transorm images before using pretrained alexnet model to classify them | Is not invoked directly | 26 | | imagenet_classes.txt | The class labels for the outputs from alexnet | used by pytorch_classification.py | 27 | | prepare_aperturedb.py | Helper to download images from coco dataset, and load them into aperturedb | ``python prepare_aperturedb.py -images_count 100`` | 28 | | pytorch_classification.py | Pulls all images from aperturedb with a certain property set by prepare_aperturedb.py script , and classifies them using alexnet | ``python pytorch_classification.py`` | 29 | | pytorch_classification.ipynb | It does the same operation as ``pytorch_classification.py``. Also displays the classified images | Also available to read at [Aperturedb python documentation](https://docs.aperturedata.io/HowToGuides/Basic/pytorch_classification) | 30 | 31 | ## Example 3: Similarity search using apertureDB 32 | 33 | This needs a bit of extra setup. 34 | - Install the dependent packages using the commands as shown, in the top level path of this repo. 35 | ``` 36 | pip install ".[complete]" 37 | 38 | ``` 39 | - Setup kaggle account and the API token as per the official [kagggle api guide](https://github.com/Kaggle/kaggle-api). 40 | 41 | The following files are under *similarity_search* 42 | 43 | | File | Description | instructions | 44 | | -----| ------------| -----| 45 | | similarity_search.ipynb | A notebook with some sample code for describing similarity search using aperturedb | Also available to read at [Aperturedb documentation](https://docs.aperturedata.io/HowToGuides/Advanced/similarity_search)| 46 | | facenet.py | Face Recognition using facenet and pytorch | Is invoked indirectly | 47 | | add_faces.py | A Script to load celebA dataset into aperturedb | ``python add_faces.py``| 48 | 49 | ## Example 4: REST interface to apertureDB. 50 | 51 | The following files are under *rest_api* 52 | 53 | | File | Description | instructions | 54 | | -----| ------------| -----| 55 | | rest_api.py | Interactions with aperturedb using python's requests | ``python rest_api.py``| 56 | | rest_api.js | Interactions with aperturedb using javascript with axios | Is included in index.html | 57 | | index.html | A web page that renders from responses from aperturedb | Tested on chrome | 58 | 59 | ## Example 5: Adding Data to aperturedb with User defined models 60 | 61 | The following files are under *loading_with_models* 62 | 63 | | File | Description | instructions | 64 | | -----| ------------| -----| 65 | | models.ipynb | A notebook with some sample code to add data using models | Also available to read at [Aperturedb model example](https://docs.aperturedata.io/HowToGuides/Advanced/models)| 66 | -------------------------------------------------------------------------------- /examples/dask/ingest_dask.py: -------------------------------------------------------------------------------- 1 | import dask 2 | from dask import dataframe 3 | import os 4 | from aperturedb import EntityDataCSV 5 | from dask.distributed import Client, LocalCluster 6 | 7 | 8 | if __name__ == '__main__': 9 | batchsize = 2000 10 | numthreads = 8 11 | 12 | cluster = LocalCluster(n_workers=numthreads) 13 | client = Client(cluster) 14 | dask.config.set(scheduler="distributed") 15 | 16 | FILENAME = os.path.join(os.path.dirname(__file__), 'see2.out') 17 | 18 | ratings = dataframe.read_csv( 19 | FILENAME, blocksize=os.path.getsize(FILENAME) // numthreads) 20 | 21 | def process(df): 22 | from aperturedb.CommonLibrary import create_connector 23 | from aperturedb.ParallelLoader import ParallelLoader 24 | client = create_connector() 25 | loader = ParallelLoader(client) 26 | count = 0 27 | 28 | for i in range(0, len(df), batchsize): 29 | end = min(i + batchsize, len(df)) 30 | batch = df[i:end] 31 | data = EntityDataCSV.EntityDataCSV(filename="", df=batch) 32 | loader.ingest(data, batchsize=len(batch), numthreads=1) 33 | count += 1 34 | 35 | print(f"len(df) = {len(df)}, count = {count}") 36 | 37 | ratings.map_partitions(process).compute() 38 | -------------------------------------------------------------------------------- /examples/dask/ingest_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | from aperturedb.EntityDataCSV import EntityDataCSV 3 | from aperturedb.ParallelLoader import ParallelLoader 4 | from aperturedb.CommonLibrary import create_connector 5 | import typer 6 | 7 | 8 | app = typer.Typer() 9 | 10 | 11 | @app.command() 12 | def main(use_dask: bool = False, csv_path: str = "data.csv"): 13 | client = create_connector() 14 | 15 | data = EntityDataCSV(filename=os.path.join( 16 | os.path.dirname(__file__), csv_path), use_dask=use_dask) 17 | loader = ParallelLoader(client=client) 18 | loader.ingest(generator=data, batchsize=2000, numthreads=8, stats=True) 19 | 20 | 21 | if __name__ == "__main__": 22 | app() 23 | -------------------------------------------------------------------------------- /examples/image_classification/AlexNetClassifier.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision import transforms 3 | from torchvision import models 4 | from PIL import Image 5 | 6 | 7 | class AlexNetClassifier(object): 8 | 9 | def __init__(self): 10 | 11 | self.alexnet = models.alexnet(pretrained=True) 12 | 13 | self.transform = transforms.Compose([ 14 | # transforms.Resize(256), # Resize done by ApertureDB 15 | transforms.CenterCrop(224), 16 | transforms.ToTensor(), 17 | transforms.Normalize( 18 | mean=[0.485, 0.456, 0.406], 19 | std =[0.229, 0.224, 0.225] 20 | )]) 21 | 22 | with open('imagenet_classes.txt') as f: 23 | self.classes = [line.strip() for line in f.readlines()] 24 | 25 | def classify(self, image): 26 | img = Image.fromarray(image.astype('uint8'), 'RGB') 27 | 28 | img_t = self.transform(img) 29 | batch_t = torch.unsqueeze(img_t, 0) 30 | self.alexnet.eval() 31 | out = self.alexnet(batch_t) 32 | _, index = torch.max(out, 1) 33 | percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100 34 | 35 | label = self.classes[index[0]] 36 | confidence = percentage[index[0]].item() 37 | 38 | return label, confidence 39 | 40 | def print_model(self): 41 | # dir(models) 42 | print(self.alexnet) 43 | -------------------------------------------------------------------------------- /examples/image_classification/CocoDataPytorch.py: -------------------------------------------------------------------------------- 1 | ../CocoDataPytorch.py -------------------------------------------------------------------------------- /examples/image_classification/prepare_aperturedb.py: -------------------------------------------------------------------------------- 1 | import io 2 | 3 | from aperturedb.ParallelLoader import ParallelLoader 4 | from aperturedb.CommonLibrary import create_connector 5 | from PIL import Image 6 | from CocoDataPyTorch import CocoDataPyTorch 7 | import argparse 8 | 9 | 10 | def main(params): 11 | # Define a helper function to convert PIL.image to a bytes array. 12 | def image_to_byte_array(image: Image) -> bytes: 13 | imgByteArr = io.BytesIO() 14 | image.save(imgByteArr, format="JPEG") 15 | imgByteArr = imgByteArr.getvalue() 16 | return imgByteArr 17 | 18 | coco_detection = CocoDataPyTorch("prepare_aperturedb") 19 | 20 | # Lets use some images from the coco which are annotated for the purpose of the demo 21 | images = [] 22 | for t in coco_detection: 23 | X, y = t 24 | if len(y) > 0: 25 | images.append(t) 26 | if len(images) == params.images_count: 27 | break 28 | 29 | loader = ParallelLoader(create_connector()) 30 | loader.ingest(generator = images, stats=True) 31 | print(f"Inserted {params.images_count} images to aperturedb") 32 | 33 | 34 | def get_args(): 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument('-images_count', type=int, required=True, 37 | help="The number of images to ingest into aperturedb") 38 | return parser.parse_args() 39 | 40 | 41 | if __name__ == "__main__": 42 | main(get_args()) 43 | -------------------------------------------------------------------------------- /examples/image_classification/pytorch_classification.py: -------------------------------------------------------------------------------- 1 | import time 2 | import AlexNetClassifier as alexnet 3 | from aperturedb import PyTorchDataset 4 | from aperturedb.CommonLibrary import create_connector 5 | 6 | client = create_connector() 7 | 8 | out_file_name = "classification.txt" 9 | query = [{ 10 | "FindImage": { 11 | "constraints": { 12 | "dataset_name": ["==", "prepare_aperturedb"] 13 | }, 14 | "operations": [ 15 | { 16 | "type": "resize", 17 | "width": 256, 18 | "height": 256 19 | } 20 | ], 21 | "results": { 22 | "list": ["image_id"], 23 | } 24 | } 25 | }] 26 | 27 | 28 | classifier = alexnet.AlexNetClassifier() 29 | with open(out_file_name, 'w') as classification: 30 | dataset = PyTorchDataset.ApertureDBDataset( 31 | client=client, query=query, label_prop='image_id') 32 | start = time.time() 33 | for item in dataset: 34 | image, id = item 35 | label, conf = classifier.classify(image) 36 | classification.write(f"{id}: {label}, confidence = {conf}\n") 37 | print("\rRetrieval performance (imgs/s):", 38 | len(dataset) / (time.time() - start), end="") 39 | 40 | print(f"\nWritten classification results into {out_file_name}") 41 | -------------------------------------------------------------------------------- /examples/loaders_101/CocoDataPytorch.py: -------------------------------------------------------------------------------- 1 | ../CocoDataPytorch.py -------------------------------------------------------------------------------- /examples/loading_with_models/add_video_model.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from aperturedb.DataModels import VideoDataModel, ClipDataModel, DescriptorDataModel, DescriptorSetDataModel 3 | from aperturedb.CommonLibrary import create_connector, execute_query 4 | from aperturedb.Query import generate_add_query 5 | from aperturedb.Query import RangeType 6 | import json 7 | 8 | # In aperturedb we have Videos, Video Clips and Embeddings(aka Descriptors) 9 | #recognized as first class objects. 10 | # Note : Video has multiple Clips, and each Clip has an embedding. 11 | 12 | # In aperturedb.datamodel, we already define datamodels for Videos, Clips and Embeddings 13 | # Now Define the data models for the "association" of Video, Video Clips, and Embeddings 14 | 15 | # Video clip -> Embedding. 16 | 17 | 18 | class ClipEmbeddingModel(ClipDataModel): 19 | embedding: DescriptorDataModel 20 | 21 | # Video -> Video Clips 22 | 23 | 24 | class VideoClipsModel(VideoDataModel): 25 | title: str 26 | description: str 27 | clips: List[ClipEmbeddingModel] = [] 28 | 29 | 30 | # Function to create a connected Video object model. 31 | def save_video_details_to_aperturedb(URL: str, clips, collection): 32 | video = VideoClipsModel(url=URL, title="Ecommerce v2.5", 33 | description="Ecommerce v2.5 video with clips by Marengo26") 34 | # Use the embeddings to create the video clips, and add them to the video object 35 | for clip in clips: 36 | video.clips.append(ClipEmbeddingModel( 37 | range_type=RangeType.TIME, 38 | start=clip['start_offset_sec'], 39 | stop=clip['end_offset_sec'], 40 | embedding=DescriptorDataModel( 41 | # The corresponding descriptor to the Video Clip. 42 | vector=clip['embedding'], set=collection) 43 | )) 44 | return video 45 | 46 | 47 | video_url = "https://storage.googleapis.com/ad-demos-datasets/videos/Ecommerce%20v2.5.mp4" 48 | 49 | clips = None 50 | with open("video_clips.json", "r") as f: 51 | clips = json.load(f) 52 | 53 | client = create_connector() 54 | 55 | # Create a descriptor set 56 | # DS is a search space for descriptors added to it (some times called collections) 57 | # https://docs.aperturedata.io/HowToGuides/Advanced/similarity_search#descriptorsets-and-descriptors 58 | collection = DescriptorSetDataModel( 59 | name="marengo26", dimensions=len(clips[0]['embedding'])) 60 | q, blobs, c = generate_add_query(collection) 61 | result, response, blobs = execute_query(query=q, blobs=blobs, client=client) 62 | print(f"{result=}, {response=}") 63 | 64 | # Create a video object, with clips, and embeddings 65 | video = save_video_details_to_aperturedb(video_url, clips, collection) 66 | q, blobs, c = generate_add_query(video) 67 | result, response, blobs = execute_query(query=q, blobs=blobs, client=client) 68 | print(f"{result=}, {response=}") 69 | -------------------------------------------------------------------------------- /examples/loading_with_models/find_roi.py: -------------------------------------------------------------------------------- 1 | import json 2 | from aperturedb.Descriptors import Descriptors 3 | from aperturedb.CommonLibrary import create_connector 4 | from aperturedb.Query import ObjectType 5 | 6 | client = create_connector() 7 | 8 | with open("text_embedding.json", "r") as f: 9 | # Load the embeddings from the json file. Look at get_tl_embedding.py for more details 10 | # on how it was generated. 11 | embeddings = json.load(f) 12 | 13 | # We will search from a set of descriptors in the DB called "marengo26". 14 | descriptorset = "marengo26" 15 | 16 | # Find similar descriptors to the text_embedding in the descriptorset. 17 | descriptors = Descriptors(client) 18 | descriptors.find_similar( 19 | descriptorset, 20 | embeddings["text_embedding"], 21 | k_neighbors=3, 22 | distances=True) 23 | 24 | # Find connected clips to the descriptors. 25 | clip_descriptors = descriptors.get_connected_entities(ObjectType.CLIP) 26 | 27 | # Show the metadata of the clips. 28 | for clips in clip_descriptors: 29 | for clip in clips: 30 | print(clip) 31 | print("-----") 32 | -------------------------------------------------------------------------------- /examples/loading_with_models/get_tl_embeddings.py: -------------------------------------------------------------------------------- 1 | import json 2 | from twelvelabs import TwelveLabs 3 | from twelvelabs.models.embed import EmbeddingsTask 4 | 5 | # Initialize the Twelve Labs client 6 | twelvelabs_client = TwelveLabs(api_key=TL_API_KEY) 7 | 8 | 9 | def generate_embedding(video_url): 10 | # Create an embedding task 11 | task = twelvelabs_client.embed.task.create( 12 | engine_name="Marengo-retrieval-2.6", 13 | video_url=video_url 14 | ) 15 | print( 16 | f"Created task: id={task.id} engine_name={task.engine_name} status={task.status}") 17 | 18 | # Define a callback function to monitor task progress 19 | def on_task_update(task: EmbeddingsTask): 20 | print(f" Status={task.status}") 21 | 22 | # Wait for the task to complete 23 | status = task.wait_for_done( 24 | sleep_interval=2, 25 | callback=on_task_update 26 | ) 27 | print(f"Embedding done: {status}") 28 | 29 | # Retrieve the task result 30 | task_result = twelvelabs_client.embed.task.retrieve(task.id) 31 | 32 | # Extract and return the embeddings 33 | embeddings = [] 34 | for v in task_result.video_embeddings: 35 | embeddings.append({ 36 | 'embedding': v.embedding.float, 37 | 'start_offset_sec': v.start_offset_sec, 38 | 'end_offset_sec': v.end_offset_sec, 39 | 'embedding_scope': v.embedding_scope 40 | }) 41 | 42 | return embeddings, task_result 43 | 44 | 45 | def generate_text_embeddings(text: str): 46 | text_embedding = twelvelabs_client.embed.create( 47 | engine_name="Marengo-retrieval-2.6", 48 | text=text, 49 | text_truncate="none") 50 | 51 | return text_embedding 52 | 53 | 54 | # Example usage 55 | video_url = "https://storage.googleapis.com/ad-demos-datasets/videos/Ecommerce%20v2.5.mp4" 56 | 57 | # Generate embeddings for the video 58 | embeddings, task_result = generate_embedding(video_url) 59 | 60 | print(f"Generated {len(embeddings)} embeddings for the video") 61 | for i, emb in enumerate(embeddings): 62 | print(f"Embedding {i+1}:") 63 | print(f" Scope: {emb['embedding_scope']}") 64 | print( 65 | f" Time range: {emb['start_offset_sec']} - {emb['end_offset_sec']} seconds") 66 | print(f" Embedding vector (first 5 values): {emb['embedding'][:5]}") 67 | print() 68 | 69 | 70 | with open("embeddings.txt", "w") as f: 71 | f.write(json.dumps(embeddings, indent=2)) 72 | 73 | text_embedding, result = generate_text_embeddings( 74 | "Show me the part which has lot of outfits being displayed" 75 | ) 76 | 77 | with open("text_embedding.json", "w") as f: 78 | f.write(json.dumps(str(text_embedding), indent=2)) 79 | -------------------------------------------------------------------------------- /examples/rest_api/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |

 8 |         
AddImage. Select a file to continue
9 |
10 | 11 | 12 | 13 |
14 | 15 | 16 | -------------------------------------------------------------------------------- /examples/rest_api/rest_api.js: -------------------------------------------------------------------------------- 1 | const request = (query, blobs, handler, sessionToken) => { 2 | apiURL = "https://coco.datasets.aperturedata.io/api" 3 | const formData = new FormData(); 4 | formData.append('query', JSON.stringify(query)); 5 | displayContent(query, response=false); 6 | 7 | blobs.forEach(element => { 8 | formData.append('blobs', element); 9 | }); 10 | 11 | let headers = null; 12 | if (sessionToken != null){ 13 | console.log(`setting session token ${sessionToken}`); 14 | headers = { 15 | "Authorization": `Bearer ${sessionToken}` 16 | } 17 | } 18 | 19 | axios.post( 20 | url=apiURL, 21 | data=formData, { 22 | headers: headers 23 | }).then((response)=>{ 24 | handler(response.data) 25 | }) 26 | } 27 | 28 | const displayContent = (payload, response=true) => { 29 | var tag = document.createElement("p"); 30 | var text = JSON.stringify(payload, undefined, 4); 31 | var element = document.getElementById("output"); 32 | var br = document.createElement("hr"); 33 | prefix = response ? "<<<<<<< Response" : "Request >>>>>>>"; 34 | tag.innerHTML = `${prefix}\r\n${text}`; 35 | element.appendChild(tag); 36 | element.appendChild(br); 37 | } 38 | 39 | run_requests = () => { 40 | //Get a refresh token. 41 | auth = [{ 42 | "Authenticate": { 43 | "username": "admin", 44 | "password": "admin" 45 | } 46 | }] 47 | request(query = auth, blobs = [], handler = (data)=>{ 48 | authData = data["json"]; 49 | // console.log(authData[0]); 50 | displayContent(authData); 51 | sessionToken = authData[0].Authenticate.session_token; 52 | 53 | 54 | //List images 55 | listQuery = [{ 56 | "FindImage": { 57 | "blobs": false, 58 | "uniqueids": true, 59 | "results" : { 60 | "limit": 10 61 | } 62 | } 63 | }] 64 | request(query = listQuery, blobs = [], handler = (data) => { 65 | response = data["json"]; 66 | displayContent(response); 67 | 68 | //Find an image 69 | findQuery = [{ 70 | "FindImage": { 71 | "constraints": { 72 | "_uniqueid": ["==", response[0].FindImage.entities[0]._uniqueid] 73 | }, 74 | "results": { 75 | "all_properties": true 76 | } 77 | } 78 | }] 79 | request(query = findQuery, blobs = [], handler = (data) => { 80 | response = data["json"]; 81 | console.log(data); 82 | displayContent(response); 83 | const url = `data:image/jpeg;base64,${data["blobs"][0]}`; 84 | fetch(url) 85 | .then(res=>res.blob()) 86 | .then(blob=>{ 87 | var image = document.createElement('img'); 88 | console.log(blob); 89 | image.src = window.webkitURL.createObjectURL(blob); 90 | var element = document.getElementById("output"); 91 | element.appendChild(image); 92 | }); 93 | }, sessionToken = sessionToken) 94 | 95 | 96 | }, sessionToken=sessionToken) 97 | 98 | sessionStorage.setItem("session_token", sessionToken); 99 | }) 100 | 101 | 102 | 103 | } 104 | 105 | const addImage = (event) => { 106 | event.preventDefault(); 107 | query = [{ 108 | "AddImage": { 109 | "properties": { 110 | "rest_api_example_id": 123456789 111 | } 112 | } 113 | }]; 114 | const file = document.getElementById("fileupload").files[0] 115 | request(query = query, blobs = [file], (data)=>{ 116 | response = data["json"]; 117 | displayContent(response); 118 | }, sessionToken = sessionStorage.getItem("session_token")); 119 | 120 | } 121 | 122 | window.addEventListener("load", (event)=>{ 123 | console.log("hello world"); 124 | const form = document.getElementById("addimage"); 125 | form.addEventListener('submit', addImage); 126 | }) 127 | -------------------------------------------------------------------------------- /examples/rest_api/rest_api.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import argparse 3 | import json 4 | import os 5 | from aperturedb.CommonLibrary import create_connector 6 | from aperturedb.Connector import Connector 7 | 8 | client: Connector = create_connector() 9 | 10 | URL = "https://" + client.config.host + '/api' 11 | 12 | VERIFY_SSL = True 13 | 14 | 15 | def parse_auth(res): 16 | 17 | res = json.loads(res)["json"] 18 | print(json.dumps(res, indent=4, sort_keys=False)) 19 | 20 | session_token = res[0]["Authenticate"]["session_token"] 21 | refresh_token = res[0]["Authenticate"]["refresh_token"] 22 | return session_token, refresh_token 23 | 24 | 25 | def auth(): 26 | 27 | query = [{ 28 | "Authenticate": { 29 | "username": client.config.username, 30 | "password": client.config.password, 31 | } 32 | }] 33 | 34 | # Authenticate 35 | response = requests.post(URL, 36 | files = [('query', (None, json.dumps(query)))], 37 | verify = VERIFY_SSL) 38 | 39 | # print(response.status_code) 40 | # print(response.text) 41 | 42 | return parse_auth(response.text) 43 | 44 | 45 | def query_api(query, st, files_upload=[]): 46 | 47 | files = [ 48 | ('query', (None, json.dumps(query))), 49 | ] 50 | 51 | for file in files_upload: 52 | instream = open(file, 'rb') 53 | files.append( 54 | ('blobs', (os.path.basename(file), instream, 'image/jpeg'))) 55 | 56 | response = requests.post(URL, 57 | headers = {'Authorization': "Bearer " + st}, 58 | files = files, 59 | verify = VERIFY_SSL) 60 | 61 | # Parse response: 62 | try: 63 | json_response = json.loads(response.text) 64 | response = json_response["json"] 65 | blobs = json_response["blobs"] 66 | except: 67 | print("Error with response:") 68 | print(response.status_code) 69 | print(response.text) 70 | response = "error!" 71 | blobs = [] 72 | 73 | return response, blobs 74 | 75 | 76 | def get_status(st): 77 | 78 | query = [{ 79 | "GetStatus": {} 80 | }] 81 | 82 | return query_api(query, st) 83 | 84 | 85 | def add_image_by_id(st, id): 86 | 87 | query = [{ 88 | "AddImage": { 89 | "properties": { 90 | "rest_api_example_id": id 91 | } 92 | } 93 | }] 94 | 95 | return query_api(query, st, files_upload=["songbird.jpg"]) 96 | 97 | 98 | def get_image_by_id(st, id): 99 | 100 | query = [{ 101 | "FindImage": { 102 | "constraints": { 103 | "_uniqueid": ["==", id] 104 | }, 105 | "results": { 106 | "all_properties": True 107 | } 108 | } 109 | }] 110 | 111 | return query_api(query, st) 112 | 113 | 114 | def list_images(st): 115 | 116 | query = [{ 117 | "FindImage": { 118 | "blobs": False, 119 | "uniqueids": True 120 | } 121 | }] 122 | 123 | return query_api(query, st) 124 | 125 | 126 | def main(params): 127 | 128 | VERIFY_SSL = params.verify_ssl 129 | 130 | print("-" * 80) 131 | print("Authentication:") 132 | session_token, refresh_token = auth() 133 | 134 | # Print DB Status 135 | # get_status(session_token) 136 | 137 | # ---------------------- 138 | print("-" * 80) 139 | print("List Images:") 140 | r, blobs = list_images(session_token) 141 | print("Response:") 142 | print(json.dumps(r, indent=4, sort_keys=False)) 143 | img_id = r[0]["FindImage"]["entities"][0]["_uniqueid"] 144 | 145 | # ---------------------- 146 | print("-" * 80) 147 | print("Find image by id:") 148 | r, blobs = get_image_by_id(session_token, img_id) 149 | 150 | print("Response:") 151 | print(json.dumps(r, indent=4, sort_keys=False)) 152 | 153 | print("Returned images: {}".format(len(blobs))) 154 | 155 | # Base 64 encoded images 156 | for img in blobs: 157 | 158 | print("Image size (base64 enconded): {}".format(len(img))) 159 | 160 | # ---------------------- 161 | print("-" * 80) 162 | print("Add image by id:") 163 | r, blobs = add_image_by_id(session_token, 123456789) 164 | 165 | print("Response:") 166 | print(json.dumps(r, indent=4, sort_keys=False)) 167 | 168 | # ---------------------- 169 | 170 | 171 | def get_args(): 172 | obj = argparse.ArgumentParser() 173 | 174 | obj.add_argument('-verify_ssl', type=bool, default=True) 175 | 176 | params = obj.parse_args() 177 | 178 | return params 179 | 180 | 181 | if __name__ == "__main__": 182 | args = get_args() 183 | main(args) 184 | -------------------------------------------------------------------------------- /examples/rest_api/songbird.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aperture-data/aperturedb-python/a58fe1eab4c78ae92a6b03ab6d5c9e5ed1f0d62d/examples/rest_api/songbird.jpg -------------------------------------------------------------------------------- /examples/similarity_search/CelebADataKaggle.py: -------------------------------------------------------------------------------- 1 | ../CelebADataKaggle.py -------------------------------------------------------------------------------- /examples/similarity_search/add_faces.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from aperturedb.ParallelLoader import ParallelLoader 3 | from CelebADataKaggle import CelebADataKaggle 4 | from aperturedb.transformers.facenet_pytorch_embeddings import FacenetPyTorchEmbeddings 5 | from aperturedb.transformers.common_properties import CommonProperties 6 | from aperturedb.transformers.image_properties import ImageProperties 7 | from aperturedb.CommonLibrary import create_connector 8 | from aperturedb.Utils import Utils 9 | 10 | search_set_name = "similar_celebreties" 11 | 12 | 13 | def main(params): 14 | utils = Utils(create_connector()) 15 | utils.remove_descriptorset(search_set_name) 16 | 17 | dataset = CelebADataKaggle() 18 | 19 | # Here's a pipeline that adds extra properties to the celebA dataset 20 | dataset = CommonProperties( 21 | dataset, 22 | adb_data_source="kaggle-celebA", 23 | adb_main_object="Face") 24 | 25 | # some useful properties for the images 26 | dataset = ImageProperties(dataset) 27 | 28 | # Add the embeddings generated through facenet. 29 | dataset = FacenetPyTorchEmbeddings(dataset) 30 | 31 | # Limit the number of images to ingest 32 | dataset = dataset[:params.images_count] 33 | print(len(dataset)) 34 | 35 | loader = ParallelLoader(create_connector()) 36 | loader.ingest(dataset, stats=True) 37 | 38 | 39 | def get_args(): 40 | parser = argparse.ArgumentParser() 41 | parser.add_argument('-images_count', type=int, required=True, 42 | help="The number of images to ingest into aperturedb") 43 | return parser.parse_args() 44 | 45 | 46 | if __name__ == "__main__": 47 | main(get_args()) 48 | -------------------------------------------------------------------------------- /examples/similarity_search/bruce-lee.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aperture-data/aperturedb-python/a58fe1eab4c78ae92a6b03ab6d5c9e5ed1f0d62d/examples/similarity_search/bruce-lee.jpg -------------------------------------------------------------------------------- /examples/similarity_search/taylor-swift.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aperture-data/aperturedb-python/a58fe1eab4c78ae92a6b03ab6d5c9e5ed1f0d62d/examples/similarity_search/taylor-swift.jpg -------------------------------------------------------------------------------- /github-release.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | source $(dirname "$0")/version.sh 4 | 5 | # Set default version to develop 6 | BUILD_VERSION=develop 7 | 8 | # Trigger read version 9 | read_version 10 | echo "Build version: $BUILD_VERSION" 11 | 12 | create_release() { 13 | user="aperture-data" 14 | repo="aperturedb-python" 15 | token=$TOKEN 16 | tag="v$BUILD_VERSION" 17 | 18 | command="curl -s -o release.json -w '%{http_code}' \ 19 | --request POST \ 20 | --header 'Accept: application/vnd.github+json' \ 21 | --header 'Authorization: Bearer ${token}' \ 22 | --header 'X-GitHub-Api-Version: 2022-11-28' \ 23 | --data '{\"tag_name\": \"${tag}\", \"name\": \"${tag}\", \"body\":\"Release ${tag}\"}' \ 24 | https://api.github.com/repos/$user/$repo/releases" 25 | http_code=`eval $command` 26 | if [ $http_code == "201" ]; then 27 | echo "created release:" 28 | cat release.json 29 | else 30 | echo "create release failed with code '$http_code':" 31 | cat release.json 32 | echo "command:" 33 | echo $command 34 | return 1 35 | fi 36 | } 37 | 38 | create_release -------------------------------------------------------------------------------- /publish.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | echo "Building aperturedb" 4 | rm -rf build/ dist/ vdms.egg-info/ 5 | 6 | docker build --no-cache -t CI/twine -f docker/twine/Dockerfile . 7 | echo "Uploading aperturedb" 8 | 9 | docker rm -f publisher || true 10 | docker run --rm --name publisher \ 11 | -e "TWINE_USERNAME=${TWINE_USERNAME}" \ 12 | -e "TWINE_PASSWORD=${TWINE_PASSWORD}" \ 13 | -v ./:/publish \ 14 | CI/twine bash -c "cd /publish && python -m build && twine upload --skip-existing --verbose dist/*" 15 | 16 | RELEASE_IMAGE="aperturedata/aperturedb-python:latest" 17 | source version.sh && read_version 18 | echo "Building image ${RELEASE_IMAGE}" 19 | docker build --no-cache -t ${RELEASE_IMAGE} \ 20 | --build-arg="VERSION=${BUILD_VERSION}" -f docker/release/Dockerfile . 21 | docker push ${RELEASE_IMAGE} 22 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "aperturedb" 3 | dynamic = ["version"] 4 | description = "ApertureDB Python SDK" 5 | 6 | readme = "README.md" 7 | requires-python = ">=3.8" 8 | license = {file = "LICENSE"} 9 | keywords = ["aperturedb", "graph", "database", 10 | "image", "video", "metadata", "search", "indexing"] 11 | 12 | authors = [ 13 | {name = "ApertureData Engineering", email = "team@aperturedata.io"} 14 | ] 15 | 16 | dependencies = [ 17 | # Pin to the bridge version. 18 | # https://github.com/tensorflow/tensorflow/issues/60320 19 | 'protobuf >=3.20.3,<6.0.0', 20 | #Folllowing is needed parallel loaders, and basic things for 21 | # making the notebooks. 22 | 'requests', 'boto3', 23 | # https://github.com/Kaggle/kaggle-api/issues/611 24 | 'numpy<2; python_version<"3.9.0"', 'numpy', 'distributed', 25 | 'matplotlib', 'pandas', 'kaggle!=1.6.15', 'google-cloud-storage', 26 | 'ipython', 'dask[complete]', 'ipywidgets', 'pydantic>=2.6.0', 'devtools', 'typer', 27 | "opencv-python-headless", 28 | # Pinning this to resolve test errors temporarily 29 | 'ipywidgets==8.0.4', 30 | 'keepalive-socket==0.0.1', 31 | 'graphviz==0.20.2', 32 | "python-dotenv", 33 | ] 34 | 35 | [tool.setuptools.package-dir] 36 | aperturedb = "aperturedb" 37 | 38 | [project.urls] 39 | "Homepage" = "https://github.com/aperture-data/aperturedb-python" 40 | "Bug Reports" = "https://github.com/aperture-data/aperturedb-python/issues" 41 | 42 | [project.optional-dependencies] 43 | # This is used when we build the docker image for notebook 44 | notebook = [ 45 | "torch", 46 | "torchvision", 47 | "tensorflow", 48 | "facenet-pytorch", 49 | ] 50 | # User install requirements, guaranteed to be pip installable 51 | complete = [ 52 | "torch", 53 | "torchvision", 54 | "tensorflow", 55 | "facenet-pytorch", 56 | ] 57 | # Dev install requirements, bleeding edge, will break CI. 58 | dev = [ 59 | "torch", 60 | "torchvision", 61 | "tensorflow", 62 | "facenet-pytorch", 63 | "coverage", 64 | "autopep8", 65 | "pre-commit", 66 | "pytest", 67 | "build", 68 | "fuse-python ; platform_system == 'Linux'", 69 | "rdflib", 70 | ] 71 | 72 | # The following would provide a command line executable called `sample` 73 | # which executes the function `main` from this package when invoked. 74 | [project.scripts] # Optional 75 | adb = "aperturedb.cli.adb:app" 76 | 77 | [build-system] 78 | # These are the assumed default build requirements from pip: 79 | # https://pip.pypa.io/en/stable/reference/pip/#pep-517-and-518-support 80 | requires = ["setuptools>=61.0", "wheel"] 81 | build-backend = "setuptools.build_meta" 82 | 83 | [tool.setuptools.dynamic] 84 | version = {attr = "aperturedb.__version__"} 85 | -------------------------------------------------------------------------------- /tag.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD) 6 | if [ -z "$BRANCH_NAME" ] 7 | then 8 | echo "This is on a merge branch. Will not continue" 9 | exit 0 10 | fi 11 | 12 | source $(dirname "$0")/version.sh 13 | 14 | # Trigger read version 15 | read_version 16 | echo "Build version: $BUILD_VERSION" 17 | 18 | git config --local user.name "github-actions[bot]" 19 | git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" 20 | git tag "v$BUILD_VERSION" $TAG_BASE 21 | git push origin "v$BUILD_VERSION" 22 | -------------------------------------------------------------------------------- /test/.coveragerc: -------------------------------------------------------------------------------- 1 | # .coveragerc to control coverage.py 2 | [run] 3 | branch = True 4 | source = ../aperturedb 5 | 6 | -------------------------------------------------------------------------------- /test/.dockerignore: -------------------------------------------------------------------------------- 1 | aperturedb/ 2 | input/ 3 | notebooks/ 4 | kaggleds/ 5 | __pycache__/ 6 | .pytest_cache/ 7 | -------------------------------------------------------------------------------- /test/.env: -------------------------------------------------------------------------------- 1 | ADB_REPO=${ADB_REPO:-aperturedata/aperturedb-community} 2 | ADB_TAG=${ADB_TAG:-latest} 3 | LENZ_REPO=${LENZ_REPO:-aperturedata/lenz} 4 | LENZ_TAG=${LENZ_TAG:-latest} 5 | RUNNER_NAME=${RUNNER_NAME:-default} 6 | GATEWAY=${GATEWAY:-0.0.0.0} 7 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aperture-data/aperturedb-python/a58fe1eab4c78ae92a6b03ab6d5c9e5ed1f0d62d/test/__init__.py -------------------------------------------------------------------------------- /test/adb_timing_tests.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | if __name__ == '__main__': 4 | for command in [ 5 | "adb config create aperturedb1 --host aperturedb --port 5555 --no-interactive --overwrite", 6 | "adb config create aperturedb2 --host aperturedb --port 5555 --no-interactive --overwrite", 7 | "adb config ls", 8 | "adb config activate aperturedb2", 9 | 10 | ]: 11 | print(command) 12 | start = datetime.now() 13 | os.system(command) 14 | diff = datetime.now() - start 15 | print(diff) 16 | assert diff.total_seconds() <= 0.9, f"Command {command} took too long" 17 | -------------------------------------------------------------------------------- /test/coverage/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx:alpine 2 | COPY output/ /usr/share/nginx/html 3 | -------------------------------------------------------------------------------- /test/dbinfo.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # This file containts information on to access the server 4 | 5 | GATEWAY = os.getenv("GATEWAY", "localhost") 6 | 7 | DB_TCP_HOST = GATEWAY 8 | DB_REST_HOST = GATEWAY 9 | DB_TCP_PORT = 55556 10 | DB_REST_PORT = 8087 11 | DB_USER = "admin" 12 | DB_PASSWORD = "admin" 13 | -------------------------------------------------------------------------------- /test/docker-compose.yml: -------------------------------------------------------------------------------- 1 | name: $RUNNER_NAME 2 | 3 | services: 4 | ca: 5 | image: alpine/openssl 6 | restart: on-failure 7 | command: req -x509 -newkey rsa:4096 -days 3650 -nodes -keyout /cert/tls.key -out /cert/tls.crt -subj "/C=US/O=ApertureData Inc./CN=localhost" 8 | volumes: 9 | - ./aperturedb/certificate:/cert 10 | 11 | lenz: 12 | depends_on: 13 | ca: 14 | condition: service_completed_successfully 15 | aperturedb: 16 | condition: service_started 17 | image: $LENZ_REPO:$LENZ_TAG 18 | ports: 19 | - $GATEWAY:55556:55551 20 | restart: always 21 | environment: 22 | LNZ_HEALTH_PORT: 58085 23 | LNZ_TCP_PORT: 55551 24 | LNZ_HTTP_PORT: 8080 25 | LNZ_ADB_BACKENDS: '["aperturedb:55553"]' 26 | LNZ_REPLICAS: 1 27 | LNZ_ADB_MAX_CONCURRENCY: 48 28 | LNZ_FORCE_SSL: false 29 | LNZ_CERTIFICATE_PATH: /etc/lenz/certificate/tls.crt 30 | LNZ_PRIVATE_KEY_PATH: /etc/lenz/certificate/tls.key 31 | volumes: 32 | - ./aperturedb/certificate:/etc/lenz/certificate 33 | 34 | aperturedb: 35 | image: $ADB_REPO:$ADB_TAG 36 | volumes: 37 | - ./aperturedb/db_$RUNNER_NAME:/aperturedb/db 38 | - ./aperturedb/logs:/aperturedb/logs 39 | restart: always 40 | environment: 41 | ADB_KVGD_DB_SIZE: "204800" 42 | ADB_LOG_PATH: "logs" 43 | ADB_ENABLE_DEBUG: 1 44 | ADB_MASTER_KEY: "admin" 45 | ADB_PORT: 55553 46 | ADB_FORCE_SSL: false 47 | 48 | webui: 49 | image: aperturedata/aperturedata-platform-web-private:latest 50 | restart: always 51 | 52 | nginx: 53 | depends_on: 54 | ca: 55 | condition: service_completed_successfully 56 | image: nginx 57 | restart: always 58 | ports: 59 | - $GATEWAY:8087:80 60 | - $GATEWAY:8443:443 61 | configs: 62 | - source: nginx.conf 63 | target: /etc/nginx/conf.d/default.conf 64 | volumes: 65 | - ./aperturedb/certificate:/etc/nginx/certificate 66 | 67 | configs: 68 | nginx.conf: 69 | content: | 70 | server { 71 | listen 80; 72 | listen 443 ssl; 73 | client_max_body_size 256m; 74 | ssl_certificate /etc/nginx/certificate/tls.crt; 75 | ssl_certificate_key /etc/nginx/certificate/tls.key; 76 | location / { 77 | proxy_pass http://webui; 78 | } 79 | location /api/ { 80 | proxy_pass http://lenz:8080; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /test/download_images.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | from aperturedb import ImageDownloader 4 | 5 | 6 | def main(params): 7 | 8 | downloader = ImageDownloader.ImageDownloader( 9 | check_if_present=True, n_download_retries=2) 10 | downloader.batched_run(ImageDownloader.ImageDownloaderCSV(params.in_file), 11 | numthreads=32, 12 | batchsize=1, 13 | stats=True) 14 | return downloader.error_counter == 0 15 | 16 | 17 | def get_args(): 18 | obj = argparse.ArgumentParser() 19 | 20 | # Run Config 21 | obj.add_argument('-in_file', type=str, default="input/url_images.adb.csv") 22 | 23 | params = obj.parse_args() 24 | 25 | return params 26 | 27 | 28 | if __name__ == "__main__": 29 | args = get_args() 30 | sys.exit(0 if main(args) else 1) 31 | -------------------------------------------------------------------------------- /test/get_10_faces_with_annotations.json: -------------------------------------------------------------------------------- 1 | [ 2 | { "FindImage" : { 3 | "blobs": false, 4 | "constraints": { 5 | "Bald": ["==", null] 6 | }, 7 | "results" : { 8 | "list": ["_uniqueid"], 9 | "limit": 10 10 | } 11 | }} 12 | ] 13 | -------------------------------------------------------------------------------- /test/get_10_faces_with_optional_annotations.json: -------------------------------------------------------------------------------- 1 | [ 2 | { "FindImage" : { 3 | "blobs": false, 4 | "constraints": { 5 | "any": { 6 | "Bald": ["!=", null] 7 | } 8 | }, 9 | "results" : { 10 | "list": ["_uniqueid"], 11 | "limit": 10 12 | } 13 | }}, 14 | { "FindImage" : { 15 | "blobs": false, 16 | "constraints": { 17 | "any": { 18 | "Bald": ["==", null] 19 | } 20 | }, 21 | "results" : { 22 | "list": ["_uniqueid"], 23 | "limit": 10 24 | } 25 | }} 26 | ] 27 | -------------------------------------------------------------------------------- /test/get_10_image_uniqueids.json: -------------------------------------------------------------------------------- 1 | [ 2 | { "FindImage" : { 3 | "blobs": false, 4 | "results" : { 5 | "list": ["_uniqueid"], 6 | "limit": 10 7 | } 8 | }} 9 | ] 10 | -------------------------------------------------------------------------------- /test/input/README.md: -------------------------------------------------------------------------------- 1 | Here goes input data for testing 2 | -------------------------------------------------------------------------------- /test/input/sample_gs_urls: -------------------------------------------------------------------------------- 1 | gs://aperturedb-testing/sample_images/1002318269_97db6e0975.jpg 2 | gs://aperturedb-testing/sample_images/10201275523_3e6ea67c7f.jpg 3 | gs://aperturedb-testing/sample_images/2297552664_1ee0e8855d.jpg 4 | gs://aperturedb-testing/sample_images/4140939180_07aeded917.jpg 5 | gs://aperturedb-testing/sample_images/4436463882_b96a3d9df9.jpg 6 | gs://aperturedb-testing/sample_images/4572998878_658b45226f.jpg 7 | gs://aperturedb-testing/sample_images/6985418911_df7747990d.jpg 8 | gs://aperturedb-testing/sample_images/7289030198_1f1ba44113.jpg 9 | gs://aperturedb-testing/sample_images/9329902958_0bc80ce58a.jpg 10 | gs://aperturedb-testing/sample_images/9506922316_c19019e38f.jpg 11 | -------------------------------------------------------------------------------- /test/input/sample_gs_video_urls: -------------------------------------------------------------------------------- 1 | gs://aperturedb-testing/sample_videos/109b799c2ec09f526dea6caabaefc53.mp4 2 | gs://aperturedb-testing/sample_videos/1dd248793c90a3e07f5ea825df27a0d7.mp4 3 | gs://aperturedb-testing/sample_videos/2e7a3ea46f4b0c12b81348bde1d45.mp4 4 | gs://aperturedb-testing/sample_videos/4ca3fb2f50eb773480acd3b8d7decee.mp4 5 | gs://aperturedb-testing/sample_videos/4ec3609ad2d97661146fd536957df14.mp4 6 | gs://aperturedb-testing/sample_videos/4fb04ae53497501c91c1f91c45dad1c7.mp4 7 | gs://aperturedb-testing/sample_videos/5116a64cc2159daebfb161d0cc3f6945.mp4 8 | gs://aperturedb-testing/sample_videos/51b71187b6cd1d6dd2aa32aa107a2f.mp4 9 | gs://aperturedb-testing/sample_videos/5573f4dd80f6c427e9f1c3d16751ad8.mp4 10 | gs://aperturedb-testing/sample_videos/5952518f7358d3d5d1c1b31d745aae3.mp4 11 | -------------------------------------------------------------------------------- /test/input/sample_http_urls: -------------------------------------------------------------------------------- 1 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/1002318269_97db6e0975.jpg 2 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/10201275523_3e6ea67c7f.jpg 3 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/2297552664_1ee0e8855d.jpg 4 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/4140939180_07aeded917.jpg 5 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/4436463882_b96a3d9df9.jpg 6 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/4572998878_658b45226f.jpg 7 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/6985418911_df7747990d.jpg 8 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/7289030198_1f1ba44113.jpg 9 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/9329902958_0bc80ce58a.jpg 10 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/9506922316_c19019e38f.jpg 11 | -------------------------------------------------------------------------------- /test/input/sample_http_video_urls: -------------------------------------------------------------------------------- 1 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/109b799c2ec09f526dea6caabaefc53.mp4 2 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/1dd248793c90a3e07f5ea825df27a0d7.mp4 3 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/2e7a3ea46f4b0c12b81348bde1d45.mp4 4 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/4ca3fb2f50eb773480acd3b8d7decee.mp4 5 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/4ec3609ad2d97661146fd536957df14.mp4 6 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/4fb04ae53497501c91c1f91c45dad1c7.mp4 7 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/5116a64cc2159daebfb161d0cc3f6945.mp4 8 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/51b71187b6cd1d6dd2aa32aa107a2f.mp4 9 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/5573f4dd80f6c427e9f1c3d16751ad8.mp4 10 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/5952518f7358d3d5d1c1b31d745aae3.mp4 11 | -------------------------------------------------------------------------------- /test/input/sample_s3_urls: -------------------------------------------------------------------------------- 1 | s3://aperturedata-public/sample_images/4436463882_b96a3d9df9.jpg 2 | s3://aperturedata-public/sample_images/9329902958_0bc80ce58a.jpg 3 | s3://aperturedata-public/sample_images/2297552664_1ee0e8855d.jpg 4 | s3://aperturedata-public/sample_images/4140939180_07aeded917.jpg 5 | s3://aperturedata-public/sample_images/6985418911_df7747990d.jpg 6 | s3://aperturedata-public/sample_images/10201275523_3e6ea67c7f.jpg 7 | s3://aperturedata-public/sample_images/7289030198_1f1ba44113.jpg 8 | s3://aperturedata-public/sample_images/1002318269_97db6e0975.jpg 9 | s3://aperturedata-public/sample_images/9506922316_c19019e38f.jpg 10 | s3://aperturedata-public/sample_images/4572998878_658b45226f.jpg 11 | -------------------------------------------------------------------------------- /test/input/sample_s3_video_urls: -------------------------------------------------------------------------------- 1 | s3://aperturedata-public/sample_videos/109b799c2ec09f526dea6caabaefc53.mp4 2 | s3://aperturedata-public/sample_videos/1dd248793c90a3e07f5ea825df27a0d7.mp4 3 | s3://aperturedata-public/sample_videos/2e7a3ea46f4b0c12b81348bde1d45.mp4 4 | s3://aperturedata-public/sample_videos/4ca3fb2f50eb773480acd3b8d7decee.mp4 5 | s3://aperturedata-public/sample_videos/4ec3609ad2d97661146fd536957df14.mp4 6 | s3://aperturedata-public/sample_videos/4fb04ae53497501c91c1f91c45dad1c7.mp4 7 | s3://aperturedata-public/sample_videos/5116a64cc2159daebfb161d0cc3f6945.mp4 8 | s3://aperturedata-public/sample_videos/51b71187b6cd1d6dd2aa32aa107a2f.mp4 9 | s3://aperturedata-public/sample_videos/5573f4dd80f6c427e9f1c3d16751ad8.mp4 10 | s3://aperturedata-public/sample_videos/5952518f7358d3d5d1c1b31d745aae3.mp4 11 | -------------------------------------------------------------------------------- /test/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | log_format = %(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s 3 | log_date_format = %Y-%m-%d %H:%M:%S 4 | pythonpath = . 5 | markers = 6 | slow: slow running test 7 | external_network: mark a test using external network 8 | remote_credentials: mark a test as requiring remote authentication ( that isn't included in checkout ) 9 | kaggle: uses kaggle 10 | http: uses HTTP/HTTPS interface 11 | tcp: uses TCP/REST interface 12 | dask: uses DASK multi-processing library 13 | -------------------------------------------------------------------------------- /test/run_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -u 4 | set -e 5 | 6 | mkdir -p output 7 | rm -rf output/* 8 | mkdir -p input/blobs 9 | 10 | echo "Downloading images..." 11 | python3 download_images.py 12 | RESULT=$? 13 | if [[ $RESULT != 0 ]]; then 14 | echo "Download failed." 15 | exit 1 16 | fi 17 | echo "Done downloading images." 18 | 19 | echo "Generating input files..." 20 | python3 generateInput.py 21 | echo "Done generating input files." 22 | 23 | echo "Running tests..." 24 | CREDENTIALS_FILE='/tmp/key.json' 25 | echo $GCP_SERVICE_ACCOUNT_KEY > $CREDENTIALS_FILE 26 | export GOOGLE_APPLICATION_CREDENTIALS=$CREDENTIALS_FILE 27 | # capture errors 28 | set +e 29 | CLIENT_PATH="${APERTUREDB_LOG_PATH}/../client/${FILTER}" 30 | CLIENT_PATH=${CLIENT_PATH// /_} 31 | mkdir -p ${CLIENT_PATH} 32 | PROJECT=aperturedata KAGGLE_username=ci KAGGLE_key=dummy coverage run -m pytest -m "$FILTER" test_*.py -v | tee ${CLIENT_PATH}/test.log 33 | RESULT=$? 34 | cp error*.log -v ${CLIENT_PATH} 35 | 36 | if [[ $RESULT != 0 ]]; then 37 | echo "Test failed; outputting db log:" 38 | if [[ "${APERTUREDB_LOG_PATH}" != "" ]]; then 39 | 40 | BUCKET=python-ci-runs 41 | NOW=$(date -Iseconds) 42 | ARCHIVE_NAME=logs.tar.gz 43 | DESTINATION="s3://${BUCKET}/aperturedb-${NOW}-${FILTER}.tgz" 44 | tar czf ${ARCHIVE_NAME} ${APERTUREDB_LOG_PATH}/.. 45 | aws s3 cp ${ARCHIVE_NAME} $DESTINATION 46 | echo "Log output to $DESTINATION" 47 | else 48 | echo "Unable to output log, APERTUREDB_LOG_PATH not set." 49 | fi 50 | exit 1 51 | else 52 | echo "Generating coverage..." 53 | coverage html -i --directory=output 54 | python adb_timing_tests.py 55 | fi 56 | 57 | -------------------------------------------------------------------------------- /test/run_test_container.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -u 4 | set -e 5 | 6 | function check_containers_networks(){ 7 | echo "Running containers and networks cleanup" 8 | docker ps 9 | echo "Existing networks" 10 | docker network ls 11 | } 12 | 13 | function run_aperturedb_instance(){ 14 | set -e 15 | TAG=$1 16 | #Ensure clean environment (as much as possible) 17 | RUNNER_NAME=$TAG docker compose -f docker-compose.yml down --remove-orphans 18 | docker network rm ${TAG}_host_default || true 19 | 20 | # ensure latest db 21 | docker compose pull 22 | 23 | rm -rf output 24 | mkdir -m 777 output 25 | 26 | docker network create ${TAG}_host_default 27 | GATEWAY=$(docker network inspect ${TAG}_host_default | jq -r .[0].IPAM.Config[0].Gateway) 28 | GATEWAY=$GATEWAY RUNNER_NAME=$TAG docker compose -f docker-compose.yml up -d 29 | echo "$GATEWAY" 30 | } 31 | 32 | IP_REGEX='[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}' 33 | 34 | GATEWAY_HTTP=$(run_aperturedb_instance "${RUNNER_NAME}_http" | grep $IP_REGEX ) 35 | GATEWAY_NON_HTTP=$(run_aperturedb_instance "${RUNNER_NAME}_non_http" | grep $IP_REGEX ) 36 | 37 | # The LOG_PATH and RUNNER_INFO_PATH are set to the current working directory 38 | LOG_PATH="$(pwd)/aperturedb/logs" 39 | TESTING_LOG_PATH="/aperturedb/test/server_logs" 40 | RUNNER_INFO_PATH="$(pwd)/aperturedb/logs/runner_state" 41 | 42 | check_containers_networks | tee "$RUNNER_INFO_PATH"/runner_state.log 43 | 44 | REPOSITORY="aperturedata/aperturedb-python-tests" 45 | if ! [ -z ${1+x} ] 46 | then 47 | REPOSITORY="$1" 48 | fi 49 | echo "running tests on docker image $REPOSITORY with $GATEWAY_HTTP and $GATEWAY_NON_HTTP" 50 | docker run \ 51 | -v $(pwd)/output:/aperturedata/test/output \ 52 | -v "$LOG_PATH":"${TESTING_LOG_PATH}" \ 53 | -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ 54 | -e AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION \ 55 | -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ 56 | -e GCP_SERVICE_ACCOUNT_KEY="$GCP_SERVICE_ACCOUNT_KEY" \ 57 | -e APERTUREDB_LOG_PATH="${TESTING_LOG_PATH}" \ 58 | -e GATEWAY="${GATEWAY_HTTP}" \ 59 | -e FILTER="http" \ 60 | $REPOSITORY & 61 | 62 | pid1=$! 63 | docker run \ 64 | -v $(pwd)/output:/aperturedata/test/output \ 65 | -v "$LOG_PATH":"${TESTING_LOG_PATH}" \ 66 | -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ 67 | -e AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION \ 68 | -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ 69 | -e GCP_SERVICE_ACCOUNT_KEY="$GCP_SERVICE_ACCOUNT_KEY" \ 70 | -e APERTUREDB_LOG_PATH="${TESTING_LOG_PATH}" \ 71 | -e GATEWAY="${GATEWAY_NON_HTTP}" \ 72 | -e FILTER="not http" \ 73 | $REPOSITORY & 74 | 75 | pid2=$! 76 | wait $pid1 77 | exit_code1=$? 78 | wait $pid2 79 | exit_code2=$? 80 | 81 | if [ $exit_code1 -ne 0 ]; then 82 | echo "Tests failed for HTTP" 83 | exit $exit_code1 84 | fi 85 | if [ $exit_code2 -ne 0 ]; then 86 | echo "Tests failed for NON_HTTP" 87 | exit $exit_code2 88 | fi 89 | 90 | echo "Tests completed successfully" 91 | echo " --- Runner name: ${RUNNER_NAME} ---" 92 | check_containers_networks 93 | -------------------------------------------------------------------------------- /test/test_Key.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import typer 3 | 4 | from aperturedb.Configuration import Configuration 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | key_pairs = { 9 | "WzEsMSwibG9jYWxob3N0IiwiYWRtaW4iLCJhZG1pbiJd": 10 | [1, 1, "localhost", "admin", "admin"], 11 | "WzEsMCwiMTI3LjAuMC4xOjU1NTU0IiwiWVFadVZVV2Zab0FkWjJrUUVMeFB5RnptZHJ3WXd0cjBBRGEiXQ==": 12 | [1, 0, "127.0.0.1:55554", "YQZuVUWfZoAdZ2kQELxPyFzmdrwYwtr0ADa"], 13 | "WzEsNywid29ya2Zsb3ctbG9hZGVkLWZvMWphdTN0LjAiLCJhZG1pbiIsIjEyMzRCVFFMUF8lMnR0Il0=": 14 | [1, 7, "workflow-loaded-fo1jau3t.farm0000.cloud.aperturedata.io", 15 | "admin", "1234BTQLP_%2tt"], 16 | "WzEsNSwidGVzdC0zcWpxdDZrcy40IiwiWVFadVZVV2Zab0FkWjJrUUVMeFB5RnptZHJ3WXd0cjBBRGEiXQ==": 17 | [1, 5, "test-3qjqt6ks.farm0004.cloud.aperturedata.io", 18 | "YQZuVUWfZoAdZ2kQELxPyFzmdrwYwtr0ADa"], 19 | "WzEsMiwiMTkyLjE2OC40LjEyOjU1NTU1IiwiYWRtaW4iLCJhZG1pbiJd": 20 | [1, 2, "192.168.4.12:55555", "admin", "admin"], 21 | "WzEsMywiYXBlcnR1cmVkYi5iaWdjb3JwLmlvOjE5MTgiLCJZUVp1VlVXZlpvQWRaMmtRRUx4UHlGem1kcndZd3RyMEFEYSJd": 22 | [1, 3, "aperturedb.bigcorp.io:1918", "YQZuVUWfZoAdZ2kQELxPyFzmdrwYwtr0ADa"], 23 | "WzEsNCwidGNwLTU1N2Vwbm4zLjkwOToxOTE4IiwiYWRtaW4iLCI4OTBFcE1uKyElMiRfIl0=": 24 | [1, 4, "tcp-557epnn3.farm0909.cloud.aperturedata.io:1918", 25 | "admin", "890EpMn+!%2$_"], 26 | "WzEsNiwiaHR0cC05MGpnM3pwcy4xMjo0NDMiLCJZUVp1VlVXZlpvQWRaMmtRRUx4UHlGem1kcndZd3RyMEFEYSJd": 27 | [1, 6, "http-90jg3zps.farm0012.cloud.aperturedata.io:443", 28 | "YQZuVUWfZoAdZ2kQELxPyFzmdrwYwtr0ADa"] 29 | } 30 | 31 | 32 | class TestApertureDBKey(): 33 | 34 | def test_encode_keys(self): 35 | for key, data in key_pairs.items(): 36 | logger.info(f"Testing encoding of {key}") 37 | config_type = data[1] 38 | host = data[2] 39 | username = password = token = None 40 | comp, rest, ssl = Configuration.key_type_to_config(config_type) 41 | if host.rfind(':') != -1: 42 | port = int(host.split(':')[1]) 43 | host = host.split(':')[0] 44 | else: 45 | port = Configuration.config_default_port(rest, ssl) 46 | if len(data) == 4: 47 | token = data[3] 48 | else: 49 | username = data[3] 50 | password = data[4] 51 | c = Configuration(host, port, username, password, 52 | "encoding test", use_rest=rest, use_ssl=ssl, token=token) 53 | deflated = c.deflate() 54 | assert deflated == key 55 | 56 | def test_decode_keys(self): 57 | for key, data in key_pairs.items(): 58 | logger.info(f"Testing decoding of {key}") 59 | config = Configuration.reinflate(key) 60 | config_type = data[1] 61 | host = data[2] 62 | if config_type == 0 or config_type == 4: 63 | assert not config.use_rest and not config.use_ssl 64 | if config_type == 1 or config_type == 5: 65 | assert not config.use_rest and config.use_ssl 66 | if config_type == 2 or config_type == 6: 67 | assert config.use_rest and not config.use_ssl 68 | if config_type == 3 or config_type == 7: 69 | assert config.use_rest and config.use_ssl 70 | 71 | if host.rfind(':') != -1: 72 | port = int(host.split(':')[1]) 73 | host = host.split(':')[0] 74 | assert config.port == port 75 | 76 | if len(data) == 4: 77 | assert config.token == "adbp_" + data[3] 78 | else: 79 | assert config.username == data[3] and config.password == data[4] 80 | 81 | assert(config.host == host) 82 | -------------------------------------------------------------------------------- /test/test_Parallel.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import random 3 | 4 | from aperturedb.Connector import Connector 5 | from aperturedb.ParallelQuery import ParallelQuery 6 | from aperturedb.Subscriptable import Subscriptable 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | # Tests for parallel which don't involve data. 11 | 12 | 13 | class GeneratorWithErrors(Subscriptable): 14 | def __init__(self, commands_per_query=1, elements=100, error_pct=.5) -> None: 15 | super().__init__() 16 | self.commands_per_query = commands_per_query 17 | self.elements = elements 18 | self.error_pct = error_pct 19 | 20 | def __len__(self): 21 | return self.elements 22 | 23 | def getitem(self, subscript): 24 | query = [] 25 | blobs = [] 26 | for i in range(self.commands_per_query): 27 | if random.randint(0, 100) <= (self.error_pct * 100): 28 | query.append({ 29 | "BadCommand": { 30 | } 31 | }) 32 | else: 33 | query.append({ 34 | "FindEntity": { 35 | "results": { 36 | "count": True 37 | } 38 | } 39 | }) 40 | 41 | return query, blobs 42 | 43 | 44 | class TestParallel(): 45 | """ 46 | These check operation of ParallelQuery 47 | """ 48 | 49 | def test_someBadQueries(self, db: Connector): 50 | """ 51 | Verifies that it handles some queries returning errors 52 | """ 53 | try: 54 | elements = 100 55 | generator = GeneratorWithErrors(elements=elements) 56 | querier = ParallelQuery(db, dry_run=False) 57 | querier.query(generator, batchsize=2, 58 | numthreads=8, 59 | stats=True) 60 | logger.info(querier.get_succeeded_commands()) 61 | assert querier.get_succeeded_commands() < elements 62 | except Exception as e: 63 | print(e) 64 | print("Failed to renew Session") 65 | assert False 66 | 67 | def test_allBadQueries(self, db: Connector): 68 | """ 69 | Verifies that it handles all queries returning errors 70 | """ 71 | try: 72 | elements = 100 73 | generator = GeneratorWithErrors(elements=elements, error_pct=1) 74 | querier = ParallelQuery(db, dry_run=False) 75 | querier.query(generator, batchsize=2, 76 | numthreads=8, 77 | stats=True) 78 | logger.info(querier.get_succeeded_commands()) 79 | assert querier.get_succeeded_commands() == 0 80 | except Exception as e: 81 | print(e) 82 | print("Failed to renew Session") 83 | assert False 84 | -------------------------------------------------------------------------------- /test/test_SPARQL.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import os 3 | import subprocess 4 | import runpy 5 | import requests 6 | import shutil 7 | import pytest 8 | import numpy as np 9 | import pandas as pd 10 | import os.path as osp 11 | import tempfile 12 | from aperturedb.Query import QueryBuilder, Query 13 | from aperturedb.Entities import Entities 14 | from aperturedb.Constraints import Constraints 15 | from aperturedb.Images import Images 16 | from aperturedb.Utils import Utils 17 | from aperturedb.SPARQL import SPARQL 18 | from aperturedb.cli.ingest import from_csv, TransformerType, IngestType 19 | from aperturedb.ImageDataCSV import ImageDataCSV 20 | from aperturedb.EntityDataCSV import EntityDataCSV 21 | from aperturedb.ConnectionDataCSV import ConnectionDataCSV 22 | from aperturedb.DescriptorDataCSV import DescriptorDataCSV 23 | from aperturedb.ParallelLoader import ParallelLoader 24 | from aperturedb.transformers.common_properties import CommonProperties 25 | from aperturedb.transformers.image_properties import ImageProperties 26 | from aperturedb.transformers.clip_pytorch_embeddings import CLIPPyTorchEmbeddings 27 | from aperturedb.transformers.facenet_pytorch_embeddings import FacenetPyTorchEmbeddings 28 | 29 | import logging 30 | logger = logging.getLogger(__name__) 31 | 32 | 33 | @pytest.fixture 34 | def load_cookbook(utils: Utils, db): 35 | utils.remove_all_indexes() 36 | utils.remove_all_objects() 37 | 38 | temp_dir = tempfile.mkdtemp() 39 | # temp_path = Path(temp_dir) 40 | original_dir = os.getcwd() 41 | os.chdir(temp_dir) 42 | 43 | # Define the URL and file path for the script 44 | file_url = "https://raw.githubusercontent.com/aperture-data/Cookbook/refs/heads/main/scripts/convert_ingredients_adb_csv.py" 45 | file_path = Path("convert_ingredients_adb_csv.py") 46 | 47 | try: 48 | # Download the script file 49 | response = requests.get(file_url) 50 | file_path.write_text(response.text) 51 | 52 | runpy.run_path(str(file_path), run_name="__main__") 53 | 54 | data = ImageDataCSV("dishes.adb.csv") 55 | data = CLIPPyTorchEmbeddings(data, client=db) 56 | data = ImageProperties(data, client=db) 57 | data = CommonProperties(data, client=db) 58 | loader = ParallelLoader(db) 59 | loader.ingest(data, batchsize=100, stats=True) 60 | 61 | data = EntityDataCSV("ingredients.adb.csv") 62 | loader = ParallelLoader(db) 63 | loader.ingest(data, batchsize=100, stats=True) 64 | 65 | data = ConnectionDataCSV("dish_ingredients.adb.csv") 66 | loader = ParallelLoader(db) 67 | loader.ingest(data, batchsize=100, stats=True) 68 | finally: 69 | os.chdir(original_dir) 70 | 71 | 72 | # Tag the test functions that depend on the setup as external_network 73 | def pytest_collection_modifyitems(items): 74 | for item in items: 75 | if "load_cookbook" in getattr(item, "fixturenames", ()): 76 | item.add_marker("external_network") 77 | 78 | # Test functions that depends on the setup 79 | 80 | 81 | @pytest.fixture 82 | def sparql(db): 83 | sparql = SPARQL(db) 84 | print(sparql.schema) 85 | assert sparql.connections, f"No connections {sparql.schema}" 86 | assert sparql.properties, f"No properties {sparql.schema}" 87 | return sparql 88 | 89 | 90 | @pytest.mark.parametrize("description,query", [ 91 | ('Find all images with chicken and butter as ingredients', 92 | """ 93 | SELECT ?s ?caption { 94 | ?s c:HasIngredient [p:name "chicken"] , [p:name "butter"] ; 95 | p:caption ?caption . 96 | } LIMIT 10 97 | """), 98 | ('Find all images with chicken or butter as ingredients', 99 | """ 100 | SELECT ?s ?caption WHERE { 101 | VALUES ?ingredient { "chicken" "butter" } 102 | ?s c:HasIngredient [p:name ?ingredient] ; 103 | p:caption ?caption . 104 | } LIMIT 10 105 | """), 106 | ('Find the top 10 ingredients', 107 | """ 108 | SELECT (COUNT(*) AS ?count) ?ingredient WHERE { 109 | ?s c:HasIngredient [p:name ?ingredient] . 110 | } GROUP BY ?ingredient ORDER BY DESC(?count) LIMIT 10 111 | """), 112 | ('Do a descriptor search for a random image', 113 | f""" 114 | SELECT ?i ?distance ?d ?caption WHERE {{ 115 | ?d knn:similarTo [ 116 | knn:set 'ViT-B/16' ; 117 | knn:k_neighbors 20 ; 118 | knn:vector "{SPARQL.encode_descriptor(np.random.rand(512))}" ; 119 | knn:distance ?distance 120 | ] ; 121 | c:ANY ?i . # Use fake connection because we can't say c:_DescriptorConnection 122 | ?i p:caption ?caption . 123 | }} 124 | """)]) 125 | def test_sparql(load_cookbook, sparql, query, description): 126 | results = sparql.query(query) 127 | assert results, f"No results for {description}" 128 | -------------------------------------------------------------------------------- /test/test_Server.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from aperturedb.Connector import Connector 3 | from aperturedb.ConnectorRest import ConnectorRest 4 | from aperturedb.ParallelLoader import ParallelLoader 5 | import dbinfo 6 | import pandas as pd 7 | 8 | import logging 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class TestBadResponses(): 13 | 14 | def test_Error_code_2(self, db: Connector, insert_data_from_csv, monkeypatch): 15 | count = 0 16 | original_q = db._query 17 | 18 | def test_response_half_exist(a: Connector, query, blobs): 19 | nonlocal count 20 | if "AddImage" not in query[0]: 21 | count += 1 22 | resp = original_q(query, blobs) 23 | return resp 24 | response = [] 25 | for i in range(len(query)): 26 | result = {"info": "Object Exists!", 27 | "status": 2} if i % 2 == 0 else {"status": 0} 28 | response.append({"AddImage": result}) 29 | 30 | return (response, []) 31 | monkeypatch.setattr(Connector, "_query", test_response_half_exist) 32 | monkeypatch.setattr(ParallelLoader, "get_existing_indices", lambda x: { 33 | "entity": {"_Image": {"id"}}}) 34 | data, loader = insert_data_from_csv( 35 | in_csv_file = "./input/images.adb.csv") 36 | assert loader.error_counter == 0 37 | assert loader.get_succeeded_queries() == len(data) 38 | assert loader.get_succeeded_commands() == len(data) 39 | 40 | def test_Error_code_3(self, db: Connector, insert_data_from_csv, monkeypatch): 41 | count = 0 42 | original_q = db._query 43 | 44 | def test_response_half_non_unique(a: Connector, query, blobs): 45 | nonlocal count 46 | if "AddImage" not in query[0]: 47 | count += 1 48 | resp = original_q(query, blobs) 49 | return resp 50 | response = None 51 | for i in range(len(query)): 52 | result = { 53 | 'info': 'JSON Command 1: expecting 1 but got 2', 'status': 3} 54 | response = result 55 | break 56 | 57 | return (response, []) 58 | monkeypatch.setattr(Connector, "_query", test_response_half_non_unique) 59 | monkeypatch.setattr(ConnectorRest, "_query", 60 | test_response_half_non_unique) 61 | monkeypatch.setattr(ParallelLoader, "get_existing_indices", lambda x: { 62 | "entity": {"_Image": {"id"}}}) 63 | input_data = pd.read_csv("./input/images.adb.csv") 64 | data, loader = insert_data_from_csv( 65 | in_csv_file = "./input/images.adb.csv", expected_error_count = len(input_data)) 66 | assert loader.error_counter == 0, f"Error counter: {loader.error_counter=}" 67 | assert loader.get_succeeded_queries( 68 | ) == 0, f"Queries: {loader.get_succeeded_queries()=}" 69 | assert loader.get_succeeded_commands( 70 | ) == 0, f"Commands: {loader.get_succeeded_commands()=}" 71 | 72 | def test_AuthFailure(self, monkeypatch): 73 | 74 | def failed_auth_query(conn_obj, ignored_query): 75 | # generate a response from the server which is not the expected Auth result. 76 | # _query returns the server response json and an array of blobs. 77 | return ({"info": "Internal Server Error 42", "status": -1, "ignored": ignored_query}, []) 78 | 79 | monkeypatch.setattr(Connector, "_query", failed_auth_query) 80 | 81 | with pytest.raises(Exception) as conn_exception: 82 | db = Connector( 83 | host = dbinfo.DB_TCP_HOST, 84 | port = dbinfo.DB_TCP_PORT, 85 | user = dbinfo.DB_USER, 86 | password = dbinfo.DB_PASSWORD, 87 | use_ssl = True) 88 | db.query([{ 89 | "FindImage": { 90 | "results": { 91 | "limit": 5 92 | } 93 | } 94 | }]) 95 | 96 | assert "Unexpected response" in str(conn_exception.value) 97 | -------------------------------------------------------------------------------- /test/test_Stats.py: -------------------------------------------------------------------------------- 1 | from aperturedb.DescriptorDataCSV import DescriptorDataCSV 2 | from aperturedb.ParallelLoader import ParallelLoader 3 | from io import BytesIO, TextIOWrapper 4 | import sys 5 | 6 | # stats had some issues with displaying of computed data. 7 | # These tests reproduce the issue. 8 | # Run it with: (to avoid pytest's output) 9 | # pytest test_Stats.py -s --no-summary 10 | 11 | 12 | class TestStats(): 13 | def ingest_with_capture(self, data, db): 14 | loader = ParallelLoader(db) 15 | # setup the environment 16 | old_stdout = sys.stdout 17 | sys.stdout = TextIOWrapper(BytesIO(), sys.stdout.encoding) 18 | 19 | loader.ingest(data, batchsize=99, numthreads=31, stats=True) 20 | sys.stdout.seek(0) # jump to the start 21 | out = sys.stdout.read() # read output 22 | 23 | sys.stdout = old_stdout 24 | return out 25 | 26 | def validate_stats(self, out, assertions): 27 | for line in out.splitlines(): 28 | if ":" in line: 29 | stats = line.split(":") 30 | if len(stats) == 2: 31 | first, second = line.split(":") 32 | print(first, second) 33 | if first in assertions: 34 | assert assertions[first.strip()](second.strip()) == True, \ 35 | f"Assertion failed for '{first}' with value {second}" 36 | 37 | def test_stats_all_errors_non_equal_last_batch(self, db, utils): 38 | utils.remove_all_objects() 39 | # Try to ingest descriptors, with no descriptor set, so all queries fail 40 | data = DescriptorDataCSV( 41 | "./input/setA.adb.csv", blobs_relative_to_csv=True) 42 | out = self.ingest_with_capture(data, db) 43 | assertions = { 44 | "Total inserted elements": lambda x: float(x) == 0, 45 | "Overall insertion throughput (element/s)": lambda x: x == "NaN", 46 | } 47 | self.validate_stats(out, assertions) 48 | -------------------------------------------------------------------------------- /test/test_Success.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from aperturedb.ParallelQuery import ParallelQuery 3 | from aperturedb.Query import QueryBuilder 4 | 5 | import logging 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | class TestLoaderSuccess(): 10 | def assertEqual(self, expected, actual): 11 | if expected != actual: 12 | raise AssertionError( 13 | "Expected {}, got {}".format(expected, actual)) 14 | 15 | def test_Loader(self, utils, insert_data_from_csv): 16 | # Assert that we have a clean slate to begin with. 17 | assert utils.remove_all_indexes() 18 | assert utils.remove_all_objects() == True 19 | # initial load 20 | data, _ = insert_data_from_csv( 21 | in_csv_file = "./input/persons-exist-base.adb.csv") 22 | self.assertEqual(len(data), utils.count_entities("Person")) 23 | 24 | # default configuration does not consider object exists to be a query failure 25 | def assert_partial(loader, test_data): 26 | self.assertEqual(len(data) + len(test_data) - loader.get_objects_existed(), 27 | utils.count_entities("Person")) 28 | data, _ = insert_data_from_csv(in_csv_file = "./input/persons-some-exist.adb.csv", 29 | loader_result_lambda = assert_partial) 30 | 31 | # change to disallow object exist to qualify as success. 32 | old_status = ParallelQuery.getSuccessStatus() 33 | ParallelQuery.setSuccessStatus([0]) 34 | 35 | # Assert that we have a clean slate to begin with. 36 | assert utils.remove_all_indexes() 37 | assert utils.remove_all_objects() == True 38 | # initial load 39 | data, _ = insert_data_from_csv( 40 | in_csv_file = "./input/persons-exist-base.adb.csv") 41 | # default configuration does not consider object exists to be a query 42 | # failure 43 | data, _ = insert_data_from_csv( 44 | in_csv_file = "./input/persons-some-exist.adb.csv", 45 | expected_error_count = 3, 46 | loader_result_lambda=assert_partial) 47 | 48 | # reset success status to default 49 | ParallelQuery.setSuccessStatus(old_status) 50 | -------------------------------------------------------------------------------- /test/test_UserConvenience.py: -------------------------------------------------------------------------------- 1 | import json 2 | from types import SimpleNamespace 3 | from aperturedb.ConnectorRest import ConnectorRest 4 | from requests.sessions import Session 5 | 6 | 7 | class TestUserConvenience(): 8 | """ 9 | This class tests some undocumented features of the Python SDK. 10 | This cannot rely on dbinfo, or connect as dbinfo and common lib rely on explicit 11 | arguments. 12 | """ 13 | 14 | def test_ConnectorRest_handlesNonePort(self): 15 | """ 16 | Test that ConnectorRest can handle a None port, 17 | and will default to the correct port. 18 | """ 19 | client = ConnectorRest(host="dummy", user="admin", password="password") 20 | assert "443" in client.url 21 | posts = 0 22 | 23 | def mock_post(self, url, headers, files, verify): 24 | nonlocal posts 25 | assert "443" in url 26 | response1 = { 27 | "json": [{"Authenticate": { 28 | "status": 0, 29 | "session_token": "x", 30 | "refresh_token": "2", 31 | "session_token_expires_in": 3600, 32 | "refresh_token_expires_in": 3600 33 | }}], 34 | "blobs": [] 35 | } 36 | 37 | r = SimpleNamespace(status_code=200, text=json.dumps(response1)) 38 | posts += 1 39 | return r 40 | old_post = Session.post 41 | Session.post = mock_post 42 | client.query("[{\"FindEntity\": {\"_ref\": 1}}]") 43 | # Ensure that the mock post was called, 1 time to authenticate, 1 time to query 44 | assert posts == 2 45 | Session.post = old_post 46 | -------------------------------------------------------------------------------- /test/test_Utils.py: -------------------------------------------------------------------------------- 1 | class TestUtils(): 2 | 3 | def test_remove_all_objects(self, utils): 4 | assert utils.remove_all_objects() == True, \ 5 | "Failed to remove all objects" 6 | 7 | def test_remove_all_indexes(self, utils): 8 | assert utils.remove_all_indexes() == True, \ 9 | "Failed to remove all indexes" 10 | 11 | def test_get_descriptorset_list(self, utils): 12 | assert utils.get_descriptorset_list() == [] 13 | -------------------------------------------------------------------------------- /test/test_torch_connector.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os 3 | import logging 4 | from typing import Union 5 | 6 | import torch 7 | import torch.distributed as dist 8 | from aperturedb import Images 9 | from aperturedb import PyTorchDataset 10 | from torch.utils.data.dataloader import DataLoader 11 | from torch.utils.data.dataset import Dataset 12 | 13 | from aperturedb.ConnectorRest import ConnectorRest 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class TestTorchDatasets(): 19 | def validate_dataset(self, dataset: Union[DataLoader, Dataset], expected_length): 20 | start = time.time() 21 | 22 | count = 0 23 | # Iterate over dataset. 24 | for img in dataset: 25 | if len(img[0]) < 0: 26 | logger.error("Empty image?") 27 | assert True == False 28 | count += len(img[1]) if isinstance(dataset, DataLoader) else 1 29 | assert count == expected_length 30 | 31 | time_taken = time.time() - start 32 | if time_taken != 0: 33 | logger.info(f"Throughput (imgs/s): {len(dataset) / time_taken}") 34 | 35 | def test_nativeContraints(self, db, utils, images): 36 | assert len(images) > 0 37 | # This is a hack against a bug in batch API. 38 | dim = 224 if isinstance(db, ConnectorRest) else 225 39 | query = [{ 40 | "FindImage": { 41 | "constraints": { 42 | "age": [">=", 0] 43 | }, 44 | "operations": [ 45 | { 46 | "type": "resize", 47 | "width": dim, 48 | "height": dim 49 | } 50 | ], 51 | "results": { 52 | "list": ["license"] 53 | } 54 | } 55 | }] 56 | 57 | dataset = PyTorchDataset.ApertureDBDataset( 58 | db, query, label_prop="license") 59 | 60 | self.validate_dataset(dataset, utils.count_images()) 61 | 62 | def test_datasetWithMultiprocessing(self, db, utils, images): 63 | len_limit = utils.count_images() 64 | # This is a hack against a bug in batch API. 65 | # TODO Fixme 66 | dim = 224 if isinstance(db, ConnectorRest) else 225 67 | query = [{ 68 | "FindImage": { 69 | "constraints": { 70 | "age": [">=", 0] 71 | }, 72 | "operations": [ 73 | { 74 | "type": "resize", 75 | "width": dim, 76 | "height": dim 77 | } 78 | ], 79 | "results": { 80 | "list": ["license"], 81 | "limit": len_limit 82 | } 83 | } 84 | }] 85 | 86 | dataset = PyTorchDataset.ApertureDBDataset( 87 | db, query, label_prop="license") 88 | 89 | self.validate_dataset(dataset, len_limit) 90 | 91 | # Distributed Data Loader Setup 92 | 93 | # Needed for init_process_group 94 | os.environ['MASTER_ADDR'] = 'localhost' 95 | os.environ['MASTER_PORT'] = '12355' 96 | 97 | dist.init_process_group("gloo", rank=0, world_size=1) 98 | 99 | # === Distributed Data Loader Sequential 100 | batch_size = 10 101 | data_loader = DataLoader( 102 | dataset, 103 | batch_size=batch_size, # pick random values here to test 104 | num_workers=4, # num_workers > 1 to test multiprocessing works 105 | pin_memory=True, 106 | drop_last=True, 107 | ) 108 | 109 | self.validate_dataset(data_loader, len_limit) 110 | # === Distributed Data Loader Shuffler 111 | 112 | # This will generate a random sampler, which will make the use 113 | # of batching wasteful 114 | sampler = torch.utils.data.DistributedSampler( 115 | dataset, shuffle=True) 116 | 117 | data_loader = DataLoader( 118 | dataset, 119 | sampler=sampler, 120 | batch_size=batch_size, # pick random values here to test 121 | num_workers=4, # num_workers > 1 to test multiprocessing works 122 | pin_memory=True, 123 | drop_last=True, 124 | ) 125 | 126 | self.validate_dataset(data_loader, len_limit) 127 | dist.destroy_process_group() 128 | -------------------------------------------------------------------------------- /version.sh: -------------------------------------------------------------------------------- 1 | # Read version from python code 2 | read_version() { 3 | BUILD_VERSION=$(awk '$1=="__version__" && $2=="=" {print $3}' aperturedb/__init__.py | tr -d '"') 4 | } --------------------------------------------------------------------------------