├── .github
    └── workflows
    │   ├── checks.yml
    │   ├── dependencies.yml
    │   ├── develop.yml
    │   ├── main.yml
    │   ├── pr.yaml
    │   └── release.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── aperturedb
    ├── BBoxDataCSV.py
    ├── BlobDataCSV.py
    ├── BlobNewestDataCSV.py
    ├── Blobs.py
    ├── BoundingBoxes.py
    ├── CSVParser.py
    ├── CSVWriter.py
    ├── Clips.py
    ├── CommonLibrary.py
    ├── Configuration.py
    ├── ConnectionDataCSV.py
    ├── Connector.py
    ├── ConnectorRest.py
    ├── Constraints.py
    ├── DaskManager.py
    ├── DataModels.py
    ├── DescriptorDataCSV.py
    ├── DescriptorSetDataCSV.py
    ├── Descriptors.py
    ├── Entities.py
    ├── EntityDataCSV.py
    ├── EntityUpdateDataCSV.py
    ├── ImageDataCSV.py
    ├── ImageDownloader.py
    ├── Images.py
    ├── KaggleData.py
    ├── NotebookHelpers.py
    ├── Operations.py
    ├── ParallelLoader.py
    ├── ParallelQuery.py
    ├── ParallelQuerySet.py
    ├── Parallelizer.py
    ├── PolygonDataCSV.py
    ├── Polygons.py
    ├── PyTorchData.py
    ├── PyTorchDataset.py
    ├── Query.py
    ├── QueryGenerator.py
    ├── SPARQL.py
    ├── Sort.py
    ├── Sources.py
    ├── SparseAddingDataCSV.py
    ├── Stats.py
    ├── Subscriptable.py
    ├── TensorFlowData.py
    ├── Utils.py
    ├── VideoDataCSV.py
    ├── VideoDownloader.py
    ├── Videos.py
    ├── __init__.py
    ├── cli
    │   ├── README.md
    │   ├── __init__.py
    │   ├── adb.py
    │   ├── configure.py
    │   ├── console.py
    │   ├── ingest.py
    │   ├── keys.py
    │   ├── mount_coco.py
    │   ├── tokens.py
    │   ├── transact.py
    │   └── utilities.py
    ├── queryMessage.py
    ├── queryMessage3_pb2.py
    ├── queryMessage4_pb2.py
    ├── queryMessage5_pb2.py
    ├── transformers
    │   ├── __init__.py
    │   ├── clip.py
    │   ├── clip_pytorch_embeddings.py
    │   ├── common_properties.py
    │   ├── facenet.py
    │   ├── facenet_pytorch_embeddings.py
    │   ├── image_properties.py
    │   └── transformer.py
    └── types.py
├── ci.sh
├── configure_deployment.sh
├── docker
    ├── complete
    │   └── Dockerfile
    ├── dependencies
    │   ├── Dockerfile
    │   └── build.sh
    ├── notebook
    │   ├── Dockerfile
    │   ├── Dockerfile.cpu
    │   └── scripts
    │   │   └── start.sh
    ├── pytorch-gpu
    │   ├── Dockerfile
    │   ├── build.sh
    │   └── scripts
    │   │   └── start.sh
    ├── release
    │   └── Dockerfile
    ├── tests
    │   ├── Dockerfile
    │   └── scripts
    │   │   └── start.sh
    └── twine
    │   └── Dockerfile
├── docs
    └── README.protobuf
├── examples
    ├── CelebADataKaggle.py
    ├── Cifar10DataTensorFlow.py
    ├── CocoDataPyTorch.py
    ├── DataWizard
    │   └── Polygon Regions DataWizard.ipynb
    ├── Foo.py
    ├── README.md
    ├── dask
    │   ├── ingest_dask.py
    │   └── ingest_loader.py
    ├── image_classification
    │   ├── AlexNetClassifier.py
    │   ├── CocoDataPytorch.py
    │   ├── imagenet_classes.txt
    │   ├── prepare_aperturedb.py
    │   ├── pytorch_classification.ipynb
    │   └── pytorch_classification.py
    ├── loaders_101
    │   ├── CocoDataPytorch.py
    │   └── loaders.ipynb
    ├── loading_with_models
    │   ├── add_video_model.py
    │   ├── find_roi.py
    │   ├── get_tl_embeddings.py
    │   ├── models.ipynb
    │   ├── text_embedding.json
    │   └── video_clips.json
    ├── rest_api
    │   ├── index.html
    │   ├── rest_api.js
    │   ├── rest_api.py
    │   └── songbird.jpg
    └── similarity_search
    │   ├── CelebADataKaggle.py
    │   ├── add_faces.py
    │   ├── bruce-lee.jpg
    │   ├── similarity_search.ipynb
    │   └── taylor-swift.jpg
├── github-release.sh
├── publish.sh
├── pyproject.toml
├── tag.sh
├── test
    ├── .coveragerc
    ├── .dockerignore
    ├── .env
    ├── __init__.py
    ├── adb_timing_tests.py
    ├── conftest.py
    ├── coverage
    │   └── Dockerfile
    ├── dbinfo.py
    ├── docker-compose.yml
    ├── download_images.py
    ├── generateImages.py
    ├── generateInput.py
    ├── get_10_faces_with_annotations.json
    ├── get_10_faces_with_optional_annotations.json
    ├── get_10_image_uniqueids.json
    ├── input
    │   ├── README.md
    │   ├── sample_gs_urls
    │   ├── sample_gs_video_urls
    │   ├── sample_http_urls
    │   ├── sample_http_video_urls
    │   ├── sample_s3_urls
    │   ├── sample_s3_video_urls
    │   └── url_images.adb.csv
    ├── pytest.ini
    ├── run_test.sh
    ├── run_test_container.sh
    ├── test_CLI.py
    ├── test_Data.py
    ├── test_Datawizard.py
    ├── test_Key.py
    ├── test_Parallel.py
    ├── test_ResponseHandler.py
    ├── test_SPARQL.py
    ├── test_Server.py
    ├── test_Session.py
    ├── test_Stats.py
    ├── test_Success.py
    ├── test_UserConvenience.py
    ├── test_Utils.py
    ├── test_kaggle.py
    └── test_torch_connector.py
└── version.sh


/.github/workflows/checks.yml:
--------------------------------------------------------------------------------
 1 | name: syntax-check
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches:
 7 |     - develop
 8 |     - main
 9 | 
10 | jobs:
11 |   pre-commit:
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |     - uses: actions/checkout@v3
16 | 
17 |     - uses: actions/setup-python@v3
18 |       with:
19 |         python-version: '3.10'
20 | 
21 |     - uses: pre-commit/action@v3.0.1
22 | 
23 |     - uses: luisremis/find-trailing-whitespace@master
24 | 


--------------------------------------------------------------------------------
/.github/workflows/dependencies.yml:
--------------------------------------------------------------------------------
 1 | name: dependencies
 2 | 
 3 | on:
 4 |   schedule:
 5 |   - cron: "0 0 * * *"
 6 | 
 7 | jobs:
 8 |   build-dependencies:
 9 | 
10 |     runs-on:
11 |     - self-hosted
12 |     - deployer
13 | 
14 |     steps:
15 | 
16 |     - uses: actions/checkout@v3
17 | 
18 |     - name: Login to DockerHub
19 |       uses: docker/login-action@v2
20 |       with:
21 |         username: ${{ secrets.DOCKER_USER }}
22 |         password: ${{ secrets.DOCKER_PASS }}
23 | 
24 |     - name: Build and Push Dependencies Image
25 |       env:
26 |         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
27 |         AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
28 |         AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
29 |         GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
30 |       run: BUILD_DEPENDENCIES=true PULL_DEPENDENCIES=false PUSH_DEPENDENCIES=true ./ci.sh
31 |       shell: bash
32 | 


--------------------------------------------------------------------------------
/.github/workflows/develop.yml:
--------------------------------------------------------------------------------
 1 | name: develop
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - develop
 7 | 
 8 | jobs:
 9 |   build-test:
10 | 
11 |     runs-on:
12 |     - self-hosted
13 |     - deployer
14 | 
15 |     steps:
16 | 
17 |     - uses: actions/checkout@v3
18 | 
19 |     - name: Login to DockerHub
20 |       uses: docker/login-action@v2
21 |       with:
22 |         username: ${{ secrets.DOCKER_USER }}
23 |         password: ${{ secrets.DOCKER_PASS }}
24 | 
25 |     - name: Login to Google Cloud
26 |       uses: google-github-actions/setup-gcloud@v0
27 |       with:
28 |         service_account_key: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
29 |         project_id:          ${{ secrets.GCP_SERVICE_ACCOUNT_PROJECT_ID }}
30 |         export_default_credentials: true
31 | 
32 |     - name: Build and Run Tests
33 |       env:
34 |         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
35 |         AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
36 |         AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
37 |         GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
38 |       run: RUN_TESTS=true ./ci.sh
39 |       shell: bash
40 | 
41 |   build_and_deploy_docs:
42 |     needs:
43 |     - build-test
44 | 
45 |     runs-on:
46 |     - self-hosted
47 |     - deployer
48 | 
49 |     steps:
50 | 
51 |     - uses: actions/checkout@v3
52 | 
53 |     - name: Login to DockerHub
54 |       uses: docker/login-action@v2
55 |       with:
56 |         username: ${{ secrets.DOCKER_USER }}
57 |         password: ${{ secrets.DOCKER_PASS }}
58 | 
59 |     - name: Build Notebook,docs Docker
60 |       env:
61 |         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
62 |         AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
63 |         AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
64 |         RUNNER_NAME: ${{ runner.name }}
65 |         ADB_REPO: aperturedata/aperturedb
66 |         ADB_TAG: dev
67 |         LENZ_REPO: aperturedata/lenz
68 |         LENZ_TAG: dev
69 |       run: BUILD_COMPLETE=true ./ci.sh
70 |       shell: bash
71 | 
72 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
  1 | name: main
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - main
  7 | 
  8 | jobs:
  9 |   build_and_test:
 10 | 
 11 |     runs-on:
 12 |     - self-hosted
 13 |     - deployer
 14 | 
 15 |     steps:
 16 | 
 17 |     - uses: actions/checkout@v3
 18 | 
 19 |     - name: Login to DockerHub
 20 |       uses: docker/login-action@v2
 21 |       with:
 22 |         username: ${{ secrets.DOCKER_USER }}
 23 |         password: ${{ secrets.DOCKER_PASS }}
 24 | 
 25 |     - name: Login to Google Cloud
 26 |       uses: google-github-actions/setup-gcloud@v0
 27 |       with:
 28 |         service_account_key: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
 29 |         project_id:          ${{ secrets.GCP_SERVICE_ACCOUNT_PROJECT_ID }}
 30 |         export_default_credentials: true
 31 | 
 32 |     - name: Build and Run Tests
 33 |       env:
 34 |         AWS_ACCESS_KEY_ID:     ${{ secrets.AWS_ACCESS_KEY_ID }}
 35 |         AWS_DEFAULT_REGION:    ${{ secrets.AWS_DEFAULT_REGION }}
 36 |         AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
 37 |         GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
 38 |       run: RUN_TESTS=true ./ci.sh
 39 |       shell: bash
 40 | 
 41 |   build_notebooks_and_publish_pypi:
 42 |     needs:
 43 |     - build_and_test
 44 | 
 45 |     runs-on:
 46 |     - self-hosted
 47 |     - deployer
 48 | 
 49 |     steps:
 50 | 
 51 |     - uses: actions/checkout@v3
 52 | 
 53 |     - name: Login to DockerHub
 54 |       uses: docker/login-action@v2
 55 |       with:
 56 |         username: ${{ secrets.DOCKER_USER }}
 57 |         password: ${{ secrets.DOCKER_PASS }}
 58 | 
 59 |     - name: Build Notebook
 60 |       env:
 61 |         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
 62 |         AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
 63 |         AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
 64 |         RUNNER_NAME: ${{ runner.name }}
 65 |         ADB_REPO: aperturedata/aperturedb
 66 |         ADB_TAG: dev
 67 |         LENZ_REPO: aperturedata/lenz
 68 |         LENZ_TAG: dev
 69 |       run: BUILD_COMPLETE=true ./ci.sh
 70 |       shell: bash
 71 | 
 72 |     - name: Publish to PyPi
 73 |       env:
 74 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
 75 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
 76 |       run: |
 77 |         bash publish.sh
 78 | 
 79 |     - name: Tag release version
 80 |       run: './tag.sh'
 81 |       shell: bash
 82 | 
 83 |     - name: GitHub Release
 84 |       run: 'TOKEN=${{ secrets.GITHUBPAT }} ./github-release.sh'
 85 |       shell: bash
 86 | 
 87 |   trigger_demos_buils:
 88 |     runs-on: ubuntu-latest
 89 |     needs:
 90 |       - build_notebooks_and_publish_pypi
 91 |     steps:
 92 |     - uses: actions/checkout@v3
 93 |     - name: repository dispatch
 94 |       run: |
 95 |         curl -X POST https://api.github.com/repos/aperture-data/demos/actions/workflows/13727133/dispatches \
 96 |         -H "Accept: application/vnd.github+json" \
 97 |         -H "Authorization: Bearer ${{ secrets.GITHUBPAT }}" \
 98 |         -H "X-GitHub-Api-Version: 2022-11-28" \
 99 |         -d '{"ref":"master","inputs":{}}'
100 | 
101 |       shell: bash
102 | 
103 |   trigger_docs_deploy:
104 |     runs-on: ubuntu-latest
105 |     needs:
106 |       - build_notebooks_and_publish_pypi
107 |     steps:
108 |     - uses: actions/checkout@v3
109 |     - name: repository dispatch
110 |       run: |
111 |         curl -X POST https://api.github.com/repos/aperture-data/docs/actions/workflows/64451786/dispatches \
112 |         -H "Accept: application/vnd.github+json" \
113 |         -H "Authorization: Bearer ${{ secrets.GITHUBPAT }}" \
114 |         -H "X-GitHub-Api-Version: 2022-11-28" \
115 |         -d '{"ref":"main","inputs":{}}'
116 | 
117 |       shell: bash
118 | 


--------------------------------------------------------------------------------
/.github/workflows/pr.yaml:
--------------------------------------------------------------------------------
 1 | name: pr
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |     - develop
 7 | 
 8 | jobs:
 9 |   run_test:
10 | 
11 |     runs-on:
12 |     - self-hosted
13 |     - deployer
14 | 
15 |     steps:
16 | 
17 |     - uses: actions/checkout@v3
18 | 
19 |     - name: Login to DockerHub
20 |       uses: docker/login-action@v2
21 |       with:
22 |         username: ${{ secrets.DOCKER_USER }}
23 |         password: ${{ secrets.DOCKER_PASS }}
24 | 
25 |     - name: Login to Google Cloud
26 |       uses: google-github-actions/setup-gcloud@v0
27 |       with:
28 |         service_account_key: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
29 |         project_id:          ${{ secrets.GCP_SERVICE_ACCOUNT_PROJECT_ID }}
30 |         export_default_credentials: true
31 | 
32 |     - name: Build and Run Tests
33 |       env:
34 |         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
35 |         AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
36 |         AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
37 |         GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
38 |         RUNNER_NAME: ${{ runner.name }}
39 |         RUN_TESTS: true
40 |         NO_PUSH: true
41 |         BRANCH_NAME: ${{ github.event.pull_request.head.ref }}
42 |         TARGET_BRANCH_NAME: ${{ github.event.pull_request.base.ref }}
43 |         ADB_REPO: aperturedata/aperturedb
44 |         ADB_TAG: dev
45 |         LENZ_REPO: aperturedata/lenz
46 |         LENZ_TAG: dev
47 |       run: ./ci.sh
48 |       shell: bash
49 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |     - release*
 7 | 
 8 | jobs:
 9 |   build-test:
10 | 
11 |     runs-on:
12 |     - self-hosted
13 |     - deployer
14 | 
15 |     steps:
16 | 
17 |     - uses: actions/checkout@v3
18 | 
19 |     - name: Login to DockerHub
20 |       uses: docker/login-action@v2
21 |       with:
22 |         username: ${{ secrets.DOCKER_USER }}
23 |         password: ${{ secrets.DOCKER_PASS }}
24 | 
25 |     - name: Login to Google Cloud
26 |       uses: google-github-actions/setup-gcloud@v0
27 |       with:
28 |         service_account_key: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
29 |         project_id:          ${{ secrets.GCP_SERVICE_ACCOUNT_PROJECT_ID }}
30 |         export_default_credentials: true
31 | 
32 |     - name: Build and Run Tests
33 |       env:
34 |         AWS_ACCESS_KEY_ID:     ${{ secrets.AWS_ACCESS_KEY_ID }}
35 |         AWS_DEFAULT_REGION:    ${{ secrets.AWS_DEFAULT_REGION }}
36 |         AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
37 |         GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
38 |       run: RUN_TESTS=true NO_PUSH=true UPDATE_BRANCH=true ./ci.sh
39 |       shell: bash
40 | 
41 |   build_and_deploy_docs:
42 | 
43 |     runs-on:
44 |     - self-hosted
45 |     - deployer
46 | 
47 |     steps:
48 | 
49 |     - uses: actions/checkout@v3
50 | 
51 |     - name: Login to DockerHub
52 |       uses: docker/login-action@v2
53 |       with:
54 |         username: ${{ secrets.DOCKER_USER }}
55 |         password: ${{ secrets.DOCKER_PASS }}
56 | 
57 |     - name: Build Notebook Docker
58 |       run: BUILD_COMPLETE=true NO_PUSH=true ./ci.sh
59 |       shell: bash
60 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # VSCode
132 | .vscode/
133 | 
134 | #Data files
135 | *.adb.csv
136 | *.jpg
137 | *.npy
138 | test/aperturedb/db*/
139 | test/input/blobs/
140 | docs/examples/
141 | examples/*/coco
142 | examples/*/classification.txt
143 | kaggleds/
144 | examples/*/kaggleds/
145 | docs/*/*.svg
146 | test/aperturedb/log*
147 | adb-python/*
148 | docker/notebook/aperturedata/*
149 | docker/tests/aperturedata/*
150 | docker/pytorch-gpu/aperturedata/*
151 | /test/input/
152 | /test/input/images/
153 | 
154 | .aperturedb
155 | test/data/
156 | test/aperturedb/certificate/
157 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ci:
 2 |     autofix_commit_msg: |
 3 |         [pre-commit.ci] auto fixes from pre-commit.com hooks
 4 | 
 5 |         for more information, see https://pre-commit.ci
 6 |     autofix_prs: true
 7 |     autoupdate_branch: ''
 8 |     autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
 9 |     autoupdate_schedule: weekly
10 |     skip: []
11 |     submodules: false
12 | repos:
13 |     - repo: https://github.com/hhatto/autopep8
14 |       rev: 8b75604
15 |       hooks:
16 |       - id: autopep8
17 |         exclude: _pb2.py$
18 |         args: ["--ignore", "E251,E241,E221,E402,E265,E275", "-i"]
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | @copyright Copyright (c) 2017 Intel Corporation
 4 | @copyright Copyright (c) 2024 ApertureData Inc
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"),
 8 | to deal in the Software without restriction,
 9 | including without limitation the rights to use, copy, modify,
10 | merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 | ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ApertureDB Client Python Module
  2 | 
  3 | This is the Python SDK for building applications with [ApertureDB](https://docs.aperturedata.io/Introduction/WhatIsAperture).
  4 | 
  5 | This comprises of utilities to get data in and out of ApertureDB in an optimal manner.
  6 | A quick [getting started guide](https://docs.aperturedata.io/Setup/QuickStart) is useful to start building with this SDK.
  7 | For more concrete examples, please refer to:
  8 | * [Simple examples and concepts](https://docs.aperturedata.io/category/start-with-basics)
  9 | * [Advanced usage examples](https://docs.aperturedata.io/category/build-ml-examples)
 10 | * [Sample applications](https://docs.aperturedata.io/category/build-applications)
 11 | 
 12 | # Installing in a custom virtual environment
 13 | ```bash
 14 | pip install aperturedb[complete]
 15 | ```
 16 | 
 17 | or an installation with only the core part of the SDK
 18 | ```bash
 19 | pip install aperturedb
 20 | ```
 21 | 
 22 | A complete [reference](https://docs.aperturedata.io/category/aperturedb-python-sdk) of this SDK is available on the official [ApertureDB Documentation](https://docs.aperturedata.io)
 23 | 
 24 | 
 25 | # Development setup
 26 | The recommended way is to clone this repo, and do an editable install as follows:
 27 | ```bash
 28 | git clone https://github.com/aperture-data/aperturedb-python.git
 29 | cd aperturedb-python
 30 | pip install -e .[dev]
 31 | ```
 32 | 
 33 | 
 34 | # Running tests
 35 | The tests are inside the `test` dir. Currently these get run in Linux container. Refer to `docker/tests` and `test/run_test_container` for details. Following explanation assumes that the current working directory is `test`.
 36 | 
 37 | The tests bring up a set of components in an isolated network, namely:
 38 | - aperturedb-community
 39 | - lenz
 40 | - nginx
 41 | - ca (for initial provisioning of certificates)
 42 | - webui
 43 | 
 44 | 
 45 | To connect to this setup, the ports are exposed to the host as follows:
 46 | - 55556 for TCP connection to aperturedb (via lenz).
 47 | - 8087 for HTTP connection to aperturedb (via nginx).
 48 | 
 49 | 
 50 | 
 51 | This can be done manually as:
 52 | ```bash
 53 | docker compose up -d
 54 | ```
 55 | 
 56 | ## Changes to run the tests in development environment.
 57 | Edit the file `test/dbinfo.py` to loook like the following.
 58 | - DB_TCP_HOST = `localhost`
 59 | - DB_REST_HOST = `localhost`
 60 | - DB_TCP_PORT  = `55556`
 61 | - DB_REST_PORT = `8087`
 62 | 
 63 | 
 64 | All the tests can be run with:
 65 | 
 66 | ```bash
 67 | export GCP_SERVICE_ACCOUNT_KEY=<content of a GCP SERVICE ACCOUNT JSON file>
 68 | bash run_test.sh
 69 | ```
 70 | 
 71 | Running specific tests can be accomplished by invoking it with pytest as follows:
 72 | 
 73 | ```bash
 74 | PROJECT=aperturedata KAGGLE_username=ci KAGGLE_key=dummy coverage run -m pytest test_Session.py -v -s --log-cli-level=DEBUG
 75 | ```
 76 | 
 77 | **NOTE:The running environment is assumed to be Linux x86_64. There might be certain changes required for them to be run on MacOS or Windows python environments.**
 78 | 
 79 | ## Certain Environment variables that affect the runtime beaviour of the SDK.
 80 | 
 81 | These can be used as debugging aids.
 82 | 
 83 | | Variable | type | Comments | Default value |
 84 | | --- | --- | --- | --- |
 85 | |ADB_DEBUGGABLE | boolean | allows the application to register a fault handler that dumps a trace when SIGUSR1 is sent to the process | not set |
 86 | |LOG_FILE_LEVEL |  <a href="https://docs.python.org/3/library/logging.html#logging-levels">log levels</a> | The threshold for emitting log messages into the error<timestamp>.log file | WARN |
 87 | |LOG_CONSOLE_LEVEL | <a href="https://docs.python.org/3/library/logging.html#logging-levels">log levels</a> | The threshold for emitting log messages into stdout | ERROR |
 88 | |ADB_LOG_FILE | string | custom file path for the LOG file | not set|
 89 | 
 90 | 
 91 | # Reporting bugs
 92 | Any error in the functionality / documentation / tests maybe reported by creating a
 93 | [github issue](https://github.com/aperture-data/aperturedb-python/issues).
 94 | 
 95 | # Development guidelines
 96 | For inclusion of any features, a PR may be created with a patch,
 97 | and a brief description of the problem and the fix.
 98 | The CI enforces a coding style guideline with autopep8 and
 99 | a script to detect trailing white spaces.
100 | 
101 | If a PR encounters failures, the log will describe the location of
102 | the offending line with a description of the problem.
103 | 


--------------------------------------------------------------------------------
/aperturedb/BBoxDataCSV.py:
--------------------------------------------------------------------------------
  1 | from aperturedb import CSVParser
  2 | 
  3 | HEADER_X_POS = "x_pos"
  4 | HEADER_Y_POS = "y_pos"
  5 | HEADER_WIDTH = "width"
  6 | HEADER_HEIGHT = "height"
  7 | IMG_KEY_PROP = "img_key_prop"
  8 | IMG_KEY_VAL = "img_key_value"
  9 | 
 10 | 
 11 | class BBoxDataCSV(CSVParser.CSVParser):
 12 |     """
 13 |     **ApertureDB BBox Data.**
 14 | 
 15 |     This class loads the Bounding Box Data which is present in a CSV file,
 16 |     and converts it into a series of ApertureDB queries.
 17 | 
 18 |     :::note Is backed by a CSV file with the following columns:
 19 |     ``IMG_KEY``, ``x_pos``, ``y_pos``, ``width``, ``height``, ``BBOX_PROP_NAME_1``, ... ``BBOX_PROP_NAME_N``, ``constraint_BBOX_PROP_NAME_1``
 20 |     :::
 21 | 
 22 |     **IMG_KEY**: column has the property name of the image property that
 23 |     the bounding box will be connected to, and each row has the value
 24 |     that will be used for finding the image.
 25 | 
 26 |     **x_pos, y_pos**: Specify the coordinates of top left of the bounding box.
 27 | 
 28 |     **width, height**: Specify the dimensions of the bounding box, as integers (unit is in pixels).
 29 | 
 30 |     **BBOX_PROP_NAME_N**: is an arbitrary name of the property of the bounding
 31 |     box, and each row has the value for that property.
 32 | 
 33 |     **constraint_BBOX_PROP_NAME_1**: Constraints against specific property, used for conditionally adding a Bounding Box.
 34 | 
 35 |     Example CSV file::
 36 | 
 37 |         img_unique_id,x_pos,y_pos,width,height,type,dataset_id,constraint_dataset_id
 38 |         d5b25253-9c1e,257,154,84,125,manual,12345,12345
 39 |         d5b25253-9c1e,7,537,522,282,manual,12346,12346
 40 |         ...
 41 | 
 42 |     Example usage:
 43 | 
 44 |     ``` python
 45 | 
 46 |         data = BBoxDataCSV("/path/to/BoundingBoxesData.csv")
 47 |         loader = ParallelLoader(client)
 48 |         loader.ingest(data)
 49 |     ```
 50 | 
 51 |     :::info
 52 |     In the above example, the constraint_dataset_id ensures that a bounding box with the specified
 53 |     dataset_id would be only inserted if it does not already exist in the database.
 54 |     :::
 55 | 
 56 |     """
 57 | 
 58 |     def __init__(self, filename: str, **kwargs):
 59 | 
 60 |         super().__init__(filename, **kwargs)
 61 | 
 62 |         self.props_keys = [x for x in self.header[5:]
 63 |                            if not x.startswith(CSVParser.CONSTRAINTS_PREFIX)]
 64 |         self.constraints_keys = [x for x in self.header[5:]
 65 |                                  if x.startswith(CSVParser.CONSTRAINTS_PREFIX)]
 66 | 
 67 |         self.img_key = self.header[0]
 68 |         self.command = "AddBoundingBox"
 69 | 
 70 |     def get_indices(self):
 71 |         return {
 72 |             "entity": {
 73 |                 "_BoundingBox": self.get_indexed_properties()
 74 |             }
 75 |         }
 76 | 
 77 |     def getitem(self, idx):
 78 |         q = []
 79 |         img_id = self.df.loc[idx, self.img_key]
 80 |         fi = {
 81 |             "FindImage": {
 82 |                 "_ref": 1,
 83 |                 "unique": True,
 84 |                 "constraints": {
 85 |                     self.img_key: ["==", img_id],
 86 |                 },
 87 |                 "blobs": False,
 88 |             },
 89 |         }
 90 |         q.append(fi)
 91 | 
 92 |         box_data_headers = [HEADER_X_POS,
 93 |                             HEADER_Y_POS, HEADER_WIDTH, HEADER_HEIGHT]
 94 |         box_data = [int(self.df.loc[idx, h]) for h in box_data_headers]
 95 | 
 96 |         rect_attrs = ["x", "y", "width", "height"]
 97 |         custom_fields = {
 98 |             "image_ref": 1,
 99 |             "rectangle": {
100 |                 attr: val for attr, val in zip(rect_attrs, box_data)
101 |             },
102 |         }
103 |         abb = self._basic_command(idx, custom_fields)
104 | 
105 |         properties = self.parse_properties(idx)
106 |         if properties:
107 |             props = properties
108 |             if "_label" in props:
109 |                 abb[self.command]["label"] = props["_label"]
110 |                 props.pop("_label")
111 |             # Check if props is not empty after removing "_label"
112 |             if props:
113 |                 abb[self.command]["properties"] = props
114 |         q.append(abb)
115 | 
116 |         return q, []
117 | 
118 |     def validate(self) -> None:
119 | 
120 |         self.header = list(self.df.columns.values)
121 | 
122 |         if self.header[1] != HEADER_X_POS:
123 |             raise Exception("Error with CSV file field: " + HEADER_X_POS)
124 |         if self.header[2] != HEADER_Y_POS:
125 |             raise Exception("Error with CSV file field: " + HEADER_Y_POS)
126 |         if self.header[3] != HEADER_WIDTH:
127 |             raise Exception("Error with CSV file field: " + HEADER_WIDTH)
128 |         if self.header[4] != HEADER_HEIGHT:
129 |             raise Exception("Error with CSV file field: " + HEADER_HEIGHT)
130 | 


--------------------------------------------------------------------------------
/aperturedb/BlobDataCSV.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | from aperturedb import CSVParser
  4 | 
  5 | PROPERTIES = "properties"
  6 | CONSTRAINTS = "constraints"
  7 | BLOB_PATH = "filename"
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | 
 12 | class BlobDataCSV(CSVParser.CSVParser):
 13 |     """**ApertureDB Blob Data.**
 14 | 
 15 |     This class loads the Blob Data which is present in a CSV file,
 16 |     and converts it into a series of ApertureDB queries.
 17 | 
 18 |     :::note Is backed by a CSV file with the following columns:
 19 |     ``FILENAME``, ``PROP_NAME_1``, ... ``PROP_NAME_N``, ``constraint_PROP_NAME_1``
 20 |     :::
 21 | 
 22 |     **FILENAME**: The path of the blob object on the file system.
 23 | 
 24 |     **PROP_NAME_1 ... PROP_NAME_N**: Arbitrary property names associated with this blob.
 25 | 
 26 |     **constraint_PROP_NAME_1**: Constraints against specific property, used for conditionally adding a Blob.
 27 | 
 28 |     Example CSV file::
 29 | 
 30 |         filename,name,lastname,age,id,constraint_id
 31 |         /mnt/blob1,John,Salchi,69,321423532,321423532
 32 |         /mnt/blob2,Johna,Salchi,63,42342522,42342522
 33 |         ...
 34 | 
 35 |     Example usage:
 36 | 
 37 |     ``` python
 38 | 
 39 |         data = BlobDataCSV("/path/to/BlobData.csv")
 40 |         loader = ParallelLoader(client)
 41 |         loader.ingest(data)
 42 |     ```
 43 | 
 44 | 
 45 |     :::info
 46 |     In the above example, the constraint_id ensures that a blob with the specified
 47 |     id would be only inserted if it does not already exist in the database.
 48 |     :::
 49 |     """
 50 | 
 51 |     def __init__(self, filename: str, **kwargs):
 52 | 
 53 |         super().__init__(filename, **kwargs)
 54 | 
 55 |         self.props_keys = [x for x in self.header[1:]
 56 |                            if not x.startswith(CSVParser.CONSTRAINTS_PREFIX) and x != BLOB_PATH]
 57 |         self.constraints_keys = [x for x in self.header[1:]
 58 |                                  if x.startswith(CSVParser.CONSTRAINTS_PREFIX)]
 59 |         self.command = "AddBlob"
 60 | 
 61 |     def get_indices(self):
 62 |         return {
 63 |             "entity": {
 64 |                 "_Blob": self.get_indexed_properties()
 65 |             }
 66 |         }
 67 | 
 68 |     def getitem(self, idx):
 69 |         filename = os.path.join(self.relative_path_prefix,
 70 |                                 self.df.loc[idx, BLOB_PATH])
 71 |         blob_ok, blob = self.load_blob(filename)
 72 |         if not blob_ok:
 73 |             logger.error("Error loading blob: " + filename)
 74 |             raise Exception("Error loading blob: " + filename)
 75 | 
 76 |         q = []
 77 |         ab = self._basic_command(idx)
 78 |         q.append(ab)
 79 | 
 80 |         return q, [blob]
 81 | 
 82 |     def load_blob(self, filename):
 83 | 
 84 |         try:
 85 |             fd = open(filename, "rb")
 86 |             buff = fd.read()
 87 |             fd.close()
 88 |             return True, buff
 89 |         except Exception as e:
 90 |             logger.exception(e)
 91 | 
 92 |         return False, None
 93 | 
 94 |     def validate(self):
 95 | 
 96 |         self.header = list(self.df.columns.values)
 97 | 
 98 |         if self.header[0] != BLOB_PATH:
 99 |             raise Exception("Error with CSV file field: " + BLOB_PATH)
100 | 


--------------------------------------------------------------------------------
/aperturedb/Blobs.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from aperturedb.Entities import Entities
 4 | 
 5 | 
 6 | class Blobs(Entities):
 7 |     """
 8 |     **The object mapper representation of blobs in ApertureDB.**
 9 | 
10 |     This class is a layer on top of the native query.
11 |     It facilitates interactions with blobs in the database in the pythonic way.
12 |     """
13 |     db_object = "_Blob"
14 | 


--------------------------------------------------------------------------------
/aperturedb/BoundingBoxes.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from aperturedb.Entities import Entities
 4 | 
 5 | 
 6 | class BoundingBoxes(Entities):
 7 |     """
 8 |     **The object mapper representation of bounding boxes in ApertureDB.**
 9 | 
10 |     This class is a layer on top of the native query.
11 |     It facilitates interactions with bounding boxes in the database in the pythonic way.
12 |     """
13 |     db_object = "_BoundingBox"
14 | 


--------------------------------------------------------------------------------
/aperturedb/Clips.py:
--------------------------------------------------------------------------------
 1 | from aperturedb.Entities import Entities
 2 | 
 3 | 
 4 | class Clips(Entities):
 5 |     """
 6 |     **The object mapper representation of Video Clips in ApertureDB.**
 7 | 
 8 |     This class is a layer on top of the native query.
 9 |     It facilitates interactions with Video clips in the database in the pythonic way.
10 |     """
11 |     db_object = "_Clip"
12 | 


--------------------------------------------------------------------------------
/aperturedb/Constraints.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from enum import Enum
 3 | 
 4 | 
 5 | class Conjunction(Enum):
 6 |     AND = "all"
 7 |     OR = "any"
 8 | 
 9 | 
10 | class Constraints(object):
11 |     """
12 |     **Constraints object for the Object mapper API**
13 |     """
14 | 
15 |     def __init__(self, conjunction: Conjunction = Conjunction.AND):
16 |         self._conjunction = conjunction.value
17 |         self.constraints = {
18 |             conjunction.value: {
19 |             }
20 |         }
21 | 
22 |     def equal(self, key, value) -> Constraints:
23 |         self.constraints[self._conjunction][key] = ["==", value]
24 |         return self
25 | 
26 |     def notequal(self, key, value) -> Constraints:
27 |         self.constraints[self._conjunction][key] = ["!=", value]
28 |         return self
29 | 
30 |     def greaterequal(self, key, value) -> Constraints:
31 |         self.constraints[self._conjunction][key] = [">=", value]
32 |         return self
33 | 
34 |     def greater(self, key, value) -> Constraints:
35 |         self.constraints[self._conjunction][key] = [">", value]
36 |         return self
37 | 
38 |     def lessequal(self, key, value) -> Constraints:
39 |         self.constraints[self._conjunction][key] = ["<=", value]
40 |         return self
41 | 
42 |     def less(self, key, value) -> Constraints:
43 |         self.constraints[self._conjunction][key] = ["<", value]
44 |         return self
45 | 
46 |     def is_in(self, key, val_array) -> Constraints:
47 |         self.constraints[self._conjunction][key] = ["in", val_array]
48 |         return self
49 | 
50 |     def check(self, entity):
51 |         for key, op in self.constraints.items():
52 |             if key not in entity:
53 |                 return False
54 |             if op[0] == "==":
55 |                 if not entity[key] == op[1]:
56 |                     return False
57 |             elif op[0] == ">=":
58 |                 if not entity[key] >= op[1]:
59 |                     return False
60 |             elif op[0] == ">":
61 |                 if not entity[key] > op[1]:
62 |                     return False
63 |             elif op[0] == "<=":
64 |                 if not entity[key] <= op[1]:
65 |                     return False
66 |             elif op[0] == "<":
67 |                 if not entity[key] < op[1]:
68 |                     return False
69 |             elif op[0] == "in":
70 |                 if not entity[key] in op[1]:
71 |                     return False
72 |             else:
73 |                 raise Exception("invalid constraint operation: " + op[0])
74 |         return True
75 | 


--------------------------------------------------------------------------------
/aperturedb/DaskManager.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | import logging
 3 | from threading import Lock
 4 | import time
 5 | from types import SimpleNamespace
 6 | import dask
 7 | from dask.distributed import Client, LocalCluster, progress
 8 | from aperturedb.Connector import Connector
 9 | 
10 | import multiprocessing as mp
11 | 
12 | from aperturedb.Stats import Stats
13 | 
14 | dask.config.set({"dataframe.convert-string": False})
15 | 
16 | logger = logging.getLogger(__name__)
17 | 
18 | 
19 | class DaskManager:
20 |     """
21 |     **Class responsible for setting up a local cluster and assigning parts
22 |     of data to each worker**
23 |     """
24 | 
25 |     def __init__(self, num_workers: int = -1):
26 |         self.__num_workers = num_workers
27 |         # The -1 magic number is to use as many 90% of the cores (1 worker per core).
28 |         # This can be overridden by the user.
29 |         # Create a pool of workers.
30 |         # TODO: see if the same pool can be reused for multiple tasks.
31 |         workers = self.__num_workers if self.__num_workers != \
32 |             -1 else int(0.9 * mp.cpu_count())
33 | 
34 |         self._cluster = LocalCluster(n_workers=workers)
35 |         self._cluster.shutdown_on_close = False
36 |         self._client = Client(self._cluster)
37 |         dask.config.set(scheduler="distributed")
38 | 
39 |     def __del__(self):
40 |         logger.info(".......Shutting cluster.........")
41 |         self._client.close()
42 |         self._cluster.close()
43 | 
44 |     def run(self, QueryClass: type[ParallelQuery], client: Connector, generator, batchsize, stats):
45 |         def process(df, host, port, use_ssl, session, connnector_type):
46 |             metrics = Stats()
47 |             # Dask reads data in partitions, and the first partition is of 2 rows, with all
48 |             # values as 'foo'. This is for sampling the column names and types. Should not process
49 |             # those rows.
50 |             if len(df) == 2:
51 |                 if (df.iloc[0, 0] == "a" and df.isna().iloc[1, 0]) or df.iloc[0, 0] == "foo":
52 |                     return
53 |             count = 0
54 |             try:
55 |                 shared_data = SimpleNamespace()
56 |                 shared_data.session = session
57 |                 shared_data.lock = Lock()
58 |                 client = connnector_type(host=host, port=port,
59 |                                          use_ssl=use_ssl, shared_data=shared_data)
60 |             except Exception as e:
61 |                 logger.exception(e)
62 |             #from aperturedb.ParallelLoader import ParallelLoader
63 |             loader = QueryClass(client)
64 |             for i in range(0, len(df), batchsize):
65 |                 end = min(i + batchsize, len(df))
66 |                 slice = df[i:end]
67 |                 data = generator.__class__(
68 |                     filename=generator.filename,
69 |                     df=slice,
70 |                     blobs_relative_to_csv=generator.blobs_relative_to_csv)
71 | 
72 |                 loader.query(generator=data, batchsize=len(
73 |                     slice), numthreads=1, stats=False)
74 |                 count += 1
75 |                 metrics.times_arr.extend(loader.times_arr)
76 |                 metrics.error_counter += loader.error_counter
77 |                 metrics.succeeded_queries += loader.get_succeeded_queries()
78 |                 metrics.succeeded_commands += loader.get_succeeded_commands()
79 | 
80 |             return metrics
81 | 
82 |         start_time = time.time()
83 |         # Connector cannot be serialized across processes,
84 |         # so we pass session and host/port information instead.
85 |         computation = generator.df.map_partitions(
86 |             process,
87 |             client.host,
88 |             client.port,
89 |             client.use_ssl,
90 |             client.shared_data.session,
91 |             type(client))
92 |         computation = computation.persist()
93 |         if stats:
94 |             progress(computation)
95 |         results = computation.compute()
96 | 
97 |         return results, time.time() - start_time
98 | 


--------------------------------------------------------------------------------
/aperturedb/DataModels.py:
--------------------------------------------------------------------------------
 1 | """
 2 | **Data Model Classes to support (pydantic) model based ingestiton.**
 3 | """
 4 | from __future__ import annotations
 5 | from pydantic import BaseModel, Field
 6 | from typing_extensions import Annotated, List
 7 | from typing import ClassVar, Optional
 8 | from uuid import uuid4
 9 | from aperturedb.Query import ObjectType, PropertyType, RangeType
10 | 
11 | 
12 | class IdentityDataModel(BaseModel):
13 |     """Base class for all entities in ApertureDB.
14 |     Generates a default UUID for the entity.
15 |     """
16 | 
17 |     id: Annotated[str, Field(default_factory=lambda: uuid4().hex)]
18 |     # Change as per the docs for the error
19 |     # https://docs.pydantic.dev/dev-v2/usage/errors/#model-field-overridden
20 |     type: ClassVar[ObjectType] = ObjectType.ENTITY
21 | 
22 | 
23 | class BlobDataModel(IdentityDataModel):
24 |     """Base class for all blob entities in ApertureDB.
25 |     """
26 |     url: Annotated[str, Field(
27 |         title="URL", description="URL to file, http, s3 or gs resource")]
28 |     type = ObjectType.BLOB
29 | 
30 | 
31 | class ImageDataModel(BlobDataModel):
32 |     """Base class for all image objects in ApertureDB.
33 |     """
34 |     type = ObjectType.IMAGE
35 | 
36 | 
37 | class ClipDataModel(IdentityDataModel):
38 |     """Base class for all clip objects in ApertureDB.
39 |     """
40 |     type = ObjectType.CLIP
41 |     range_type: Annotated[RangeType,
42 |                           Field(title="Range Type", description="Range type",
43 |                                 default=RangeType.TIME),
44 |                           PropertyType.SYSTEM]
45 |     start: Annotated[float, Field(title="Start", description="Start point as frame, time(hh:mm:ss.uuuuuu) or fraction"),
46 |                      PropertyType.SYSTEM]
47 |     stop: Annotated[float, Field(title="Stop", description="Stop point as frame, time(hh:mm:ss.uuuuuu) or fraction"),
48 |                     PropertyType.SYSTEM]
49 | 
50 | 
51 | class VideoDataModel(BlobDataModel):
52 |     """Data model for video objects in ApertureDB.
53 |     """
54 |     type = ObjectType.VIDEO
55 | 
56 | 
57 | class DescriptorDataModel(IdentityDataModel):
58 |     """Descriptor (Embedding) data model for ApertureDB.
59 |     """
60 |     type = ObjectType.DESCRIPTOR
61 |     vector: Annotated[List[float], Field(
62 |         title="Vector", description="Vector of floats"), PropertyType.SYSTEM]
63 |     set: Annotated[DescriptorSetDataModel, Field(
64 |         title="Set", description="Descriptor set"), PropertyType.SYSTEM]
65 | 
66 | 
67 | class PolygonDataModel(IdentityDataModel):
68 |     """Polygon data model for ApertureDB.
69 |     """
70 |     type = ObjectType.POLYGON
71 | 
72 | 
73 | class FrameDataModel(IdentityDataModel):
74 |     """Frame data model for ApertureDB.
75 |     """
76 |     type = ObjectType.FRAME
77 | 
78 | 
79 | class DescriptorSetDataModel(IdentityDataModel):
80 |     """Descriptor Set data model for ApertureDB.
81 |     """
82 |     type = ObjectType.DESCRIPTORSET
83 |     name: Annotated[str, Field(title="Name", description="Name of the descriptor set"),
84 |                     PropertyType.SYSTEM]
85 |     dimensions: Annotated[int, Field(title="Dimension", description="Dimension of the descriptor set"),
86 |                           PropertyType.SYSTEM]
87 | 
88 | 
89 | class BoundingBoxDataModel(IdentityDataModel):
90 |     """Bounding Box data model for ApertureDB.
91 |     """
92 |     type = ObjectType.BOUNDING_BOX
93 | 


--------------------------------------------------------------------------------
/aperturedb/DescriptorSetDataCSV.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | from aperturedb import CSVParser
  3 | 
  4 | HEADER_NAME = "name"
  5 | HEADER_DIM = "dimensions"
  6 | HEADER_ENGINE = "engine"
  7 | HEADER_METRIC = "metric"
  8 | PROPERTIES = "properties"
  9 | CONSTRAINTS = "constraints"
 10 | 
 11 | 
 12 | class DescriptorSetDataCSV(CSVParser.CSVParser):
 13 |     """**ApertureDB DescriptorSet Data.**
 14 | 
 15 |     This class loads the Descriptor Set Data which is present in a CSV file,
 16 |     and converts it into a series of aperturedb queries.
 17 | 
 18 |     :::note Is backed by a CSV file with the following columns:
 19 |     ``name``, ``dimensions``, ``engine``, ``metric``, ``PROP_NAME_N``, ``constraint_PROP1``
 20 |     :::
 21 | 
 22 |     Example CSV file::
 23 | 
 24 |         name,dimensions,engine,metric
 25 |         dining_chairs,2048,FaissIVFFlat,L2
 26 |         chandeliers,2048,FaissIVFFlat,L2
 27 |         console_tables,2048,FaissIVFFlat,L2
 28 |         ...
 29 | 
 30 |     Example code to create an instance:
 31 | 
 32 |     ``` python
 33 | 
 34 |         data = DescriptorSetDataCSV("/path/to/DescriptorSetData.csv")
 35 |         loader = ParallelLoader(client)
 36 |         loader.ingest(data)
 37 |     ```
 38 | 
 39 | 
 40 |     :::info
 41 |     In the above example, the first row implies to create a Descriptor set called dining_chairs.
 42 |     The Descriptors in that set would be expected to be an array of float64, of length 2048.
 43 |     When performing a search on this set, FaissIVFFlat engine would be used and the metric to compute
 44 |     the distance would be L2.
 45 |     :::
 46 |     """
 47 | 
 48 |     def __init__(self, filename: str, **kwargs):
 49 | 
 50 |         super().__init__(filename, **kwargs)
 51 | 
 52 |         self.props_keys = [x for x in self.header[4:]
 53 |                            if not x.startswith(CSVParser.CONSTRAINTS_PREFIX)]
 54 |         self.constraints_keys = [x for x in self.header[4:]
 55 |                                  if x.startswith(CSVParser.CONSTRAINTS_PREFIX)]
 56 |         self.command = "AddDescriptorSet"
 57 | 
 58 |     def get_indices(self):
 59 |         return {
 60 |             "entity": {
 61 |                 "_DescriptorSet": self.get_indexed_properties()
 62 |             }
 63 |         }
 64 | 
 65 |     def getitem(self, idx):
 66 | 
 67 |         # Metrics/Engine can be of the form:
 68 |         #       "IP", or
 69 |         #       ["IP" ...]
 70 |         idx = self.df.index.start + idx
 71 |         metrics = self.df.loc[idx, HEADER_METRIC]
 72 |         metrics = metrics if "[" not in metrics else ast.literal_eval(metrics)
 73 |         engines = self.df.loc[idx, HEADER_ENGINE]
 74 |         engines = engines if "[" not in engines else ast.literal_eval(engines)
 75 | 
 76 |         data = {
 77 |             "name":       self.df.loc[idx, HEADER_NAME],
 78 |             "dimensions": self.df.loc[idx, HEADER_DIM],
 79 |             "engine":     engines,
 80 |             "metric":     metrics,
 81 |         }
 82 | 
 83 |         q = []
 84 |         ads = self._basic_command(idx, custom_fields=data)
 85 |         q.append(ads)
 86 | 
 87 |         return q, []
 88 | 
 89 |     def validate(self):
 90 | 
 91 |         self.header = list(self.df.columns.values)
 92 | 
 93 |         if self.header[0] != HEADER_NAME:
 94 |             raise Exception("Error with CSV file field: " + HEADER_NAME)
 95 |         if self.header[1] != HEADER_DIM:
 96 |             raise Exception("Error with CSV file field: " + HEADER_DIM)
 97 |         if self.header[2] != HEADER_ENGINE:
 98 |             raise Exception("Error with CSV file field: " + HEADER_ENGINE)
 99 |         if self.header[3] != HEADER_METRIC:
100 |             raise Exception("Error with CSV file field: " + HEADER_METRIC)
101 | 


--------------------------------------------------------------------------------
/aperturedb/EntityDataCSV.py:
--------------------------------------------------------------------------------
  1 | from aperturedb import CSVParser
  2 | import logging
  3 | 
  4 | logger = logging.getLogger(__name__)
  5 | ENTITY_CLASS = "EntityClass"
  6 | PROPERTIES = "properties"
  7 | CONSTRAINTS = "constraints"
  8 | 
  9 | 
 10 | class EntityDataCSV(CSVParser.CSVParser):
 11 |     """**ApertureDB Entity Data.**
 12 | 
 13 |     This class loads the Entity Data which is present in a CSV file,
 14 |     and converts it into a series of ApertureDB queries.
 15 | 
 16 |     :::note Is backed by a CSV file with the following columns:
 17 |     ``EntityClass``, ``PROP_NAME_1``, ... ``PROP_NAME_N``, ``constraint_PROP1``
 18 |     :::
 19 | 
 20 |     Example CSV file::
 21 | 
 22 |         EntityClass,name,lastname,age,id,constraint_id
 23 |         Person,John,Salchi,69,321423532,321423532
 24 |         Person,Johna,Salchi,63,42342522,42342522
 25 |         ...
 26 | 
 27 |     Example usage:
 28 | 
 29 |     ``` python
 30 | 
 31 |         data = EntityDataCSV("/path/to/EntityData.csv")
 32 |         loader = ParallelLoader(client)
 33 |         loader.ingest(data)
 34 |     ```
 35 | 
 36 | 
 37 |     :::info
 38 |     In the above example, the constraint_id ensures that a Entity with the specified
 39 |     id would be only inserted if it does not already exist in the database.
 40 |     :::
 41 | 
 42 |     """
 43 | 
 44 |     def __init__(self, filename: str, **kwargs):
 45 |         super().__init__(filename, **kwargs)
 46 | 
 47 |         self.props_keys = [x for x in self.header[1:]
 48 |                            if not x.startswith(CSVParser.CONSTRAINTS_PREFIX)]
 49 |         self.constraints_keys = [x for x in self.header[1:]
 50 |                                  if x.startswith(CSVParser.CONSTRAINTS_PREFIX)]
 51 |         self.command = "AddEntity"
 52 | 
 53 |     def get_indices(self):
 54 |         return {
 55 |             "entity": {
 56 |                 cls: self.get_indexed_properties() for cls in self.df[ENTITY_CLASS].unique()
 57 |             }
 58 |         }
 59 | 
 60 |     def getitem(self, idx):
 61 |         idx = self.df.index.start + idx
 62 |         eclass = self.df.loc[idx, ENTITY_CLASS]
 63 |         q = []
 64 |         ae = self._basic_command(idx,
 65 |                                  custom_fields={
 66 |                                      "class": eclass
 67 |                                  })
 68 | 
 69 |         q.append(ae)
 70 |         return q, []
 71 | 
 72 |     def validate(self):
 73 |         if self.header[0] != ENTITY_CLASS:
 74 |             raise Exception("Error with CSV file field: " + ENTITY_CLASS)
 75 | 
 76 | # Used when a csv has a single entity type that needs to be deleted
 77 | 
 78 | 
 79 | class EntityDeleteDataCSV(CSVParser.CSVParser):
 80 |     """**ApertureDB Entity Delete Data.**
 81 | 
 82 |     This class loads the Entity Data which is present in a CSV file,
 83 |     and converts it into a series of ApertureDB deletes.
 84 | 
 85 |     :::note
 86 |     Expects a CSV file with the following columns:
 87 | 
 88 |         ``constraint_PROP1``
 89 |     :::
 90 | 
 91 |     Example CSV file::
 92 | 
 93 |         constraint_id
 94 |         321423532
 95 |         42342522
 96 |         ...
 97 | 
 98 |     Example usage:
 99 | 
100 |    ```python
101 | 
102 |         data = ImageDeleteDataCSV("/path/to/UnusedImages.csv")
103 |         loader = ParallelQuery(client)
104 |         loader.query(data)
105 |     ```
106 | 
107 | 
108 |     :::info
109 |     In the above example, the constraint_id ensures that a Entity with the specified
110 |     id would be only deleted.
111 | 
112 |     Note that you can take a csv with normal prop data and this will ignore it, so you
113 |     could use input to a loader to this.
114 |     :::
115 | 
116 | 
117 |     """
118 | 
119 |     def __init__(self, entity_class, filename, df=None, use_dask=False):
120 |         super().__init__(filename, df=df, use_dask=use_dask)
121 |         self.command = "Delete" + entity_class
122 |         self.constraint_keyword = "constraints"
123 |         if not use_dask:
124 |             self.constraint_keys = [x for x in self.header[0:]]
125 | 
126 |     def getitem(self, idx):
127 |         idx = self.df.index.start + idx
128 |         q = []
129 |         entity_delete = self._basic_command(idx)
130 | 
131 |         q.append(entity_delete)
132 |         return q, []
133 | 
134 |     def validate(self):
135 |         # all we require is a valid csv with 1 or more columns.
136 |         return True
137 | 
138 | 
139 | class ImageDeleteDataCSV(EntityDeleteDataCSV):
140 |     """
141 |     **ApertureData CSV Loader class for deleting images**
142 | 
143 |     Usage details in EntityDeleteDataCSV
144 |     """
145 | 
146 |     def __init__(self, filename, df=None, use_dask=False):
147 |         super().__init__("Image", filename, df=df, use_dask=use_dask)
148 | 


--------------------------------------------------------------------------------
/aperturedb/KaggleData.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from typing import List, Tuple
 3 | import os
 4 | import pandas as pd
 5 | from kaggle.api.kaggle_api_extended import KaggleApi
 6 | import zipfile
 7 | from aperturedb.Subscriptable import Subscriptable
 8 | 
 9 | 
10 | class KaggleData(Subscriptable):
11 |     """
12 |     **Class to wrap around a Dataset retrieved from kaggle**
13 | 
14 |     A DataSet downloaded from kaggle does not implement a standard mechanism to iterate over its values
15 |     This class intends to provide an abstraction like that of a pytorch dataset
16 |     where the iteration over Dataset elements yields an atomic record.
17 | 
18 |     :::note
19 |     This class should be subclassed with specific implementations of generate_index and generate_query.
20 |     :::
21 | 
22 |     Example subclass: [CelebADataKaggle](https://github.com/aperture-data/aperturedb-python/blob/develop/examples/CelebADataKaggle.py)
23 | 
24 |     Args:
25 |         dataset_ref (str): URL of kaggle dataset, for example https://www.kaggle.com/datasets/jessicali9530/celeba-dataset
26 |         records_count (int): number of records to provide to generate.
27 | 
28 |     """
29 | 
30 |     def __init__(
31 |             self,
32 |             dataset_ref: str,
33 |             records_count: int = -1) -> None:
34 |         self._collection = None
35 |         self.records_count = records_count
36 |         kaggle = KaggleApi()
37 |         kaggle.authenticate()
38 |         if "datasets/" in dataset_ref:
39 |             dataset_ref = dataset_ref[dataset_ref.index(
40 |                 "datasets/") + len("datasets/"):]
41 | 
42 |         workdir = os.path.join("kaggleds", dataset_ref)
43 | 
44 |         files = kaggle.dataset_list_files(dataset_ref)
45 | 
46 |         # do not unzip from kaggle's API as it deletes the archive and
47 |         # a subsequent run results in a redownload.
48 |         x = kaggle.dataset_download_files(
49 |             dataset=dataset_ref,
50 |             path=workdir,
51 |             quiet=False,
52 |             unzip=False)
53 | 
54 |         archive = None
55 |         for _, subdirs, dfiles in os.walk(workdir):
56 |             if len(dfiles) == 1 and len(subdirs) == 0:
57 |                 archive = os.path.join(workdir, dfiles[0])
58 | 
59 |                 with zipfile.ZipFile(archive, 'r') as zip_ref:
60 |                     zip_ref.extractall(workdir)
61 | 
62 |                 break
63 |         self.workdir = workdir
64 |         self.collection = self.generate_index(
65 |             workdir, self.records_count).to_dict('records')
66 | 
67 |     def getitem(self, subscript):
68 |         return self.generate_query(subscript)
69 | 
70 |     def __len__(self):
71 |         return len(self.collection)
72 | 
73 |     def generate_index(self, root: str, records_count: int = -1) -> pd.DataFrame:
74 |         """**Generate a way to access each record downloaded at the root**
75 | 
76 |         Args:
77 |             root (str): Path to wich kaggle downloads a Dataset.
78 | 
79 |         Returns:
80 |             pd.DataFrame: The Data loaded in a dataframe.
81 |         """
82 |         raise Exception("To be implemented by subclass")
83 | 
84 |     def generate_query(self, idx: int) -> Tuple[List[dict], List[bytes]]:
85 |         """
86 |         **Takes information from one atomic record from the Data and converts it to Query for apertureDB**
87 | 
88 |         Args:
89 |             idx (int): index of the record in collection.
90 | 
91 |         Raises:
92 |             Exception: _description_
93 | 
94 |         Returns:
95 |             Tuple[List[dict], List[bytes]]: A pair of list of commands and optional list of blobs to go with them.
96 |         """
97 |         raise Exception("To be implemented by subclass")
98 | 


--------------------------------------------------------------------------------
/aperturedb/Operations.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | 
 4 | class Operations(object):
 5 |     """
 6 |     **Operations that can be performed on the fly on any retrieved images**
 7 | 
 8 |     [Supported operations](/query_language/Reference/shared_command_parameters/operations)
 9 |     """
10 | 
11 |     def __init__(self):
12 | 
13 |         self.operations_arr = []
14 | 
15 |     def get_operations_arr(self):
16 |         return self.operations_arr
17 | 
18 |     def resize(self, width: int, height: int) -> Operations:
19 | 
20 |         op = {
21 |             "type": "resize",
22 |             "width":  width,
23 |             "height": height,
24 |         }
25 | 
26 |         self.operations_arr.append(op)
27 |         return self
28 | 
29 |     def rotate(self, angle: int, resize=False) -> Operations:
30 | 
31 |         op = {
32 |             "type": "rotate",
33 |             "angle": angle,
34 |             "resize": resize,
35 |         }
36 | 
37 |         self.operations_arr.append(op)
38 |         return self
39 | 
40 |     def flip(self, code: str) -> Operations:
41 | 
42 |         op = {
43 |             "type": "flip",
44 |             "code": code,
45 |         }
46 | 
47 |         self.operations_arr.append(op)
48 |         return self
49 | 
50 |     def crop(self, x: int, y: int, width: int, height: int) -> Operations:
51 | 
52 |         op = {
53 |             "type": "crop",
54 |             "x": x,
55 |             "y": y,
56 |             "width": width,
57 |             "height": height,
58 |         }
59 | 
60 |         self.operations_arr.append(op)
61 |         return self
62 | 
63 |     def interval(self, start: int, stop: int, step: int) -> Operations:
64 | 
65 |         op = {
66 |             "type": "interval",
67 |             "start": start,
68 |             "stop": stop,
69 |             "step": step
70 |         }
71 | 
72 |         self.operations_arr.append(op)
73 |         return self
74 | 


--------------------------------------------------------------------------------
/aperturedb/Parallelizer.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import time
  3 | import threading
  4 | 
  5 | from threading import Thread
  6 | from tqdm import tqdm as tqdm
  7 | 
  8 | 
  9 | class Parallelizer:
 10 |     """**Generic Parallelizer**
 11 | 
 12 |     A parallelizer converts a series of operations to be executed and partitions it into
 13 |     batches, to be executed by multiple threads of execution.
 14 |     ```mermaid
 15 |     gantt
 16 |         title Parallel execution
 17 |         dateFormat HH:mm:ss
 18 |         section Worker1
 19 |         Batch1 :w1, 00:00:00, 10s
 20 |         Batch3 :w3, after w1, 10s
 21 |         Batch5 :after w3, 10s
 22 | 
 23 |         section Worker2
 24 |         Batch2 :w2, 00:00:00, 10s
 25 |         Batch4 :w4, after w2, 10s
 26 |         Batch6 :w6, after w4, 10s
 27 | 
 28 |     ```
 29 |     """
 30 | 
 31 |     def __init__(self):
 32 |         self._reset()
 33 | 
 34 |     def _reset(self, batchsize: int = 1, numthreads: int = 1):
 35 | 
 36 |         # Default Values
 37 |         self.batchsize = batchsize
 38 |         self.numthreads = numthreads
 39 | 
 40 |         self.total_actions = 0
 41 |         self.times_arr = []
 42 |         self.total_actions_time = 0
 43 |         self.error_counter = 0
 44 |         self.actual_stats = []
 45 | 
 46 |     def get_times(self):
 47 | 
 48 |         return self.times_arr
 49 | 
 50 |     def batched_run(self, generator, batchsize: int, numthreads: int, stats: bool):
 51 |         run_event = threading.Event()
 52 |         run_event.set()
 53 |         self._reset(batchsize, numthreads)
 54 |         self.stats = stats
 55 |         self.generator = generator
 56 |         if hasattr(generator, "sample_count"):
 57 |             print("sample_count", generator.sample_count)
 58 |             self.total_actions = generator.sample_count
 59 |         else:
 60 |             self.total_actions = len(generator)
 61 |         self.pb = tqdm(total=self.total_actions, desc="Progress",
 62 |                        unit="items", unit_scale=True, dynamic_ncols=True)
 63 |         start_time = time.time()
 64 | 
 65 |         if self.total_actions < batchsize:
 66 |             elements_per_thread = self.total_actions
 67 |             self.numthreads = 1
 68 |         else:
 69 |             elements_per_thread = math.ceil(
 70 |                 self.total_actions / self.numthreads)
 71 | 
 72 |         thread_arr = []
 73 |         for i in range(self.numthreads):
 74 |             idx_start = i * elements_per_thread
 75 |             idx_end = min(idx_start + elements_per_thread,
 76 |                           self.total_actions)
 77 | 
 78 |             thread_add = Thread(target=self.worker,
 79 |                                 args=(i, generator, idx_start, idx_end, run_event))
 80 |             thread_arr.append(thread_add)
 81 | 
 82 |         a = [th.start() for th in thread_arr]
 83 |         try:
 84 |             while run_event.is_set() and any([th.is_alive() for th in thread_arr]):
 85 |                 time.sleep(1)
 86 |         except KeyboardInterrupt:
 87 |             print("Interrupted ... Shutting down workers")
 88 |         finally:
 89 |             run_event.clear()
 90 |             a = [th.join() for th in thread_arr]
 91 | 
 92 |         # Update progress bar to completion
 93 |         if self.stats:
 94 |             self.pb.close()
 95 | 
 96 |         self.total_actions_time = time.time() - start_time
 97 | 
 98 |         if self.stats:
 99 |             self.print_stats()
100 | 
101 |     def print_stats(self):
102 |         """
103 |             Must be implemented by child class
104 |         """
105 |         pass
106 | 


--------------------------------------------------------------------------------
/aperturedb/PolygonDataCSV.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | from aperturedb import CSVParser
  4 | 
  5 | HEADER_POLYGONS = "polygons"
  6 | IMG_KEY_PROP = "img_key_prop"
  7 | IMG_KEY_VAL = "img_key_value"
  8 | POLYGON_FIELDS = {
  9 |     "_label": "label",
 10 | }
 11 | 
 12 | 
 13 | class PolygonDataCSV(CSVParser.CSVParser):
 14 |     """
 15 |     **ApertureDB Polygon Data.**
 16 | 
 17 |     This class loads the Polygon Data which is present in a CSV file,
 18 |     and converts it into a series of ApertureDB queries.
 19 | 
 20 |     :::note Is backed by a CSV file with the following columns:
 21 |     ``IMG_KEY``, [``POLYGON_PROPERTY_1``, ... ``POLYGON_PROPERTY_N``,] [``constraint_POLYGON_PROPERTY_1``, ... ``constraint_POLYGON_PROPERTY_N``,] [``_label``,] ``polygons``
 22 |     :::
 23 | 
 24 |     **IMG_KEY**: identifies the name of the image property that will identify the
 25 |     image with which to associate each polygon object. This property should reliably
 26 |     identify at most a single image, like a unique id. The value in each row will be
 27 |     used to look up the image to which the polygon will attach.
 28 | 
 29 |     **POLYGON_PROPERTY_I**: declares the name of a property that will be assigned to all polygon objects. Any number of properties can be declared in this way.
 30 | 
 31 |     **constraint_POLYGON_PROPERTY_I**: declares that POLYGON_PROPERTY_I should be unique, and that a new polygon will not be added if there already exists one with the same value for this property. For each row, the value in this column should match the value in column POLYGON_PROPERTY_I.
 32 | 
 33 |     **_label**: optionally applies a label to the polygon objects.
 34 | 
 35 |     **polygons**: a JSON array of polygon regions. Each polygon region is itself an array of [x,y] vertices that describe the boundary of a single contiguous polygon. See also [Polygon API parameter](/query_language/Reference/shared_command_parameters/polygons).
 36 | 
 37 |     Example CSV file::
 38 | 
 39 |         image_id,polygon_id,constraint_polygon_id,category_id,_label,polygons
 40 |         397133,82445,82445,44,bottle,"[[[224.24, 297.18], [228.29, 297.18], ...]]"
 41 |         397133,119568,119568,67,dining table,"[[[292.37, 425.1], [340.6, 373.86], ...]]"
 42 |         ...
 43 | 
 44 |     Example usage:
 45 | 
 46 |     ``` python
 47 | 
 48 |         data = PolygonDataCSV("/path/to/PolygonData.csv")
 49 |         loader = ParallelLoader(client)
 50 |         loader.ingest(data)
 51 |     ```
 52 | 
 53 |     """
 54 | 
 55 |     def __init__(self, filename: str, **kwargs):
 56 | 
 57 |         super().__init__(filename, kwargs=kwargs)
 58 | 
 59 |         self.props_keys = []
 60 |         self.constraints_keys = []
 61 |         self.polygon_keys = []
 62 |         for key in self.header[1:-1]:
 63 |             if key in POLYGON_FIELDS.keys():
 64 |                 self.polygon_keys.append(key)
 65 |             elif key.startswith(CSVParser.CONSTRAINTS_PREFIX):
 66 |                 self.constraints_keys.append(key)
 67 |             else:
 68 |                 self.props_keys.append(key)
 69 | 
 70 |         self.img_key = self.header[0]
 71 |         self.command = "AddPolygon"
 72 | 
 73 |     def get_indices(self):
 74 |         return {
 75 |             "entity": {
 76 |                 "_Polygon": self.get_indexed_properties()
 77 |             }
 78 |         }
 79 | 
 80 |     def getitem(self, idx):
 81 |         idx = self.df.index.start + idx
 82 | 
 83 |         q = []
 84 | 
 85 |         img_id = self.df.loc[idx, self.img_key]
 86 | 
 87 |         fi = {
 88 |             "FindImage": {
 89 |                 "_ref": 1,
 90 |                 "constraints": {
 91 |                     self.img_key: ["==", img_id],
 92 |                 },
 93 |                 "blobs": False,
 94 |             },
 95 |         }
 96 |         q.append(fi)
 97 | 
 98 |         polygon_fields = {
 99 |             "image_ref": 1,
100 |             "polygons": json.loads(self.df.loc[idx, HEADER_POLYGONS])
101 |         }
102 |         for key in self.polygon_keys:
103 |             polygon_fields[POLYGON_FIELDS[key]] = self.df.loc[idx, key]
104 | 
105 |         ap = self._basic_command(idx, polygon_fields)
106 |         q.append(ap)
107 | 
108 |         return q, []
109 | 
110 |     def validate(self):
111 | 
112 |         self.header = list(self.df.columns.values)
113 | 
114 |         if len(self.header) < 2:
115 |             raise Exception(
116 |                 "Error with CSV file: must have at least two columns")
117 |         if self.header[-1] != HEADER_POLYGONS:
118 |             raise Exception("Error with CSV file field: " + HEADER_POLYGONS)
119 | 


--------------------------------------------------------------------------------
/aperturedb/Polygons.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from aperturedb.Entities import Entities
 3 | from aperturedb.CommonLibrary import execute_query
 4 | 
 5 | 
 6 | class Polygons(Entities):
 7 |     db_object = "_Polygon"
 8 | 
 9 |     def intersection(self, other: Polygons, threshold: float) -> Polygons:
10 |         """
11 |         Find a set of polygons that intersect with another set of polygons.
12 |         The threshold is user specified and is used to determine if two polygons
13 |         sufficiently overlap to be considered intersecting.
14 | 
15 |         Args:
16 |             other (Polygons): Set of polygons to intersect with.
17 |             threshold (float): The threshold for determining if two polygons are sufficiently intersecting.
18 | 
19 |         Returns:
20 |             Polygons: unique set of polygons that intersect with the other set of polygons.
21 |         """
22 |         result = set()
23 |         for p1 in self:
24 |             for p2 in other:
25 |                 query = [
26 |                     {
27 |                         "FindEntity": {
28 |                             "_ref": 1,
29 |                             "unique": True,
30 |                             "constraints": {
31 |                                 "_uniqueid": ["==", p1["_uniqueid"]]
32 |                             }
33 |                         }
34 |                     }, {
35 |                         "FindEntity": {
36 |                             "_ref": 2,
37 |                             "unique": True,
38 |                             "constraints": {
39 |                                 "_uniqueid": ["==", p2["_uniqueid"]]
40 |                             }
41 |                         }
42 |                     }, {
43 |                         "RegionIoU": {
44 |                             "roi_1": 1,
45 |                             "roi_2": 2,
46 |                         }
47 |                     }
48 |                 ]
49 |                 res, r, b = execute_query(self.client, query, [])
50 |                 if r[2]["RegionIoU"]["IoU"][0][0] > threshold:
51 |                     result.add(int(p1["ann_id"]))
52 |                     result.add(int(p2["ann_id"]))
53 |         return list(result)
54 | 


--------------------------------------------------------------------------------
/aperturedb/PyTorchData.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | from torch.utils.data import Dataset
 3 | from aperturedb.Subscriptable import Subscriptable
 4 | 
 5 | 
 6 | class PyTorchData(Subscriptable):
 7 |     """
 8 |     **Class to wrap around a Dataset retrieved from [PyTorch datasets](https://pytorch.org/vision/0.15/datasets.html)**
 9 | 
10 |     The dataset in this case can be iterated over.
11 |     So the only thing that needs to be implemented is generate_query,
12 |     which takes an index and returns a query.
13 | 
14 |     :::note
15 |     This class should be subclassed with a specific (custom) implementation of generate_query().
16 |     :::
17 | 
18 |     Example subclass: [CocoDataPyTorch](https://github.com/aperture-data/aperturedb-python/blob/develop/examples/CocoDataPyTorch.py)
19 | 
20 |     """
21 | 
22 |     def __init__(self, dataset: Dataset) -> None:
23 |         self.loaded_dataset = [t for t in dataset]
24 | 
25 |     def getitem(self, idx: int):
26 |         return self.generate_query(idx)
27 | 
28 |     def __len__(self):
29 |         return len(self.loaded_dataset)
30 | 
31 |     def generate_query(self, idx: int) -> Tuple[List[dict], List[bytes]]:
32 |         """
33 |         **Takes information from one atomic record from the Data and converts it to Query for apertureDB**
34 | 
35 |         Args:
36 |             idx (int): index of the record in collection.
37 | 
38 |         Raises:
39 |             Exception: _description_
40 | 
41 |         Returns:
42 |             Tuple[List[dict], List[bytes]]: A pair of list of commands and optional list of blobs to go with them.
43 |         """
44 |         raise Exception("To be implemented by subclass")
45 | 


--------------------------------------------------------------------------------
/aperturedb/QueryGenerator.py:
--------------------------------------------------------------------------------
 1 | from aperturedb import Subscriptable
 2 | 
 3 | 
 4 | class QueryGenerator(Subscriptable.Subscriptable):
 5 |     """
 6 |     The base class to use for Query Generators.
 7 |     """
 8 | 
 9 |     def getitem(self, subscript):
10 |         raise Exception("To be implemented in subclass")
11 | 


--------------------------------------------------------------------------------
/aperturedb/Sort.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class Order(Enum):
 5 |     ASCENDING = "ascending"
 6 |     DESCENDING = "descending"
 7 | 
 8 | 
 9 | class Sort():
10 |     """
11 |     **Specification of the sort order**
12 |     """
13 | 
14 |     def __init__(self, key: str, order: Order) -> None:
15 |         self._sort = {
16 |             "key": key,
17 |             "order": order.value
18 |         }
19 | 


--------------------------------------------------------------------------------
/aperturedb/Sources.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import requests
  3 | import logging
  4 | 
  5 | logger = logging.getLogger(__name__)
  6 | 
  7 | 
  8 | class Sources():
  9 |     """
 10 |     **Load data from various resources**
 11 |     """
 12 | 
 13 |     def __init__(self, n_download_retries, **kwargs):
 14 | 
 15 |         self.n_download_retries = n_download_retries
 16 | 
 17 |         # Use custom clients if specified
 18 |         self.s3 = None if "s3_client" not in kwargs else kwargs["s3_client"]
 19 |         self.http_client = requests.Session(
 20 |         ) if "http_client" not in kwargs else kwargs["http_client"]
 21 | 
 22 |     def load_from_file(self, filename):
 23 |         """
 24 |         Load data from a file.
 25 |         """
 26 |         try:
 27 |             fd = open(filename, "rb")
 28 |             buff = fd.read()
 29 |             fd.close()
 30 |             return True, buff
 31 |         except Exception as e:
 32 |             logger.error(f"VALIDATION ERROR: {filename}")
 33 |             logger.exception(e)
 34 |         finally:
 35 |             if not fd.closed:
 36 |                 fd.close()
 37 |         return False, None
 38 | 
 39 |     def load_from_http_url(self, url, validator):
 40 |         """
 41 |         Load data from a http url.
 42 |         """
 43 |         import numpy as np
 44 | 
 45 |         retries = 0
 46 |         while True:
 47 |             imgdata = self.http_client.get(url)
 48 |             if imgdata.ok and ("Content-Length" not in imgdata.headers or int(imgdata.headers["Content-Length"]) == imgdata.raw._fp_bytes_read):
 49 |                 imgbuffer = np.frombuffer(imgdata.content, dtype='uint8')
 50 |                 if not validator(imgbuffer):
 51 |                     logger.error(f"VALIDATION ERROR: {url}")
 52 |                     return False, None
 53 | 
 54 |                 return imgdata.ok, imgdata.content
 55 |             else:
 56 |                 if retries >= self.n_download_retries:
 57 |                     break
 58 |                 logger.warning(f"Retrying object: {url}")
 59 |                 retries += 1
 60 |                 time.sleep(2)
 61 | 
 62 |         return False, None
 63 | 
 64 |     def load_from_s3_url(self, s3_url, validator):
 65 |         import numpy as np
 66 | 
 67 |         retries = 0
 68 |         while True:
 69 |             try:
 70 |                 bucket_name = s3_url.split("/")[2]
 71 |                 object_name = s3_url.split("s3://" + bucket_name + "/")[-1]
 72 |                 s3_response_object = self.s3.get_object(
 73 |                     Bucket=bucket_name, Key=object_name)
 74 |                 img = s3_response_object['Body'].read()
 75 |                 imgbuffer = np.frombuffer(img, dtype='uint8')
 76 |                 if not validator(imgbuffer):
 77 |                     logger.error(f"VALIDATION ERROR: {s3_url}")
 78 |                     return False, None
 79 | 
 80 |                 return True, img
 81 |             except Exception as e:
 82 |                 if retries >= self.n_download_retries:
 83 |                     break
 84 |                 logger.warning(f"Retrying object: {s3_url}", exc_info=True)
 85 |                 retries += 1
 86 |                 time.sleep(2)
 87 | 
 88 |         logger.error(f"S3 ERROR: {s3_url}")
 89 |         return False, None
 90 | 
 91 |     def load_from_gs_url(self, gs_url, validator):
 92 |         import numpy as np
 93 |         from google.cloud import storage
 94 | 
 95 |         retries = 0
 96 |         client = storage.Client()
 97 |         while True:
 98 |             try:
 99 |                 bucket_name = gs_url.split("/")[2]
100 |                 object_name = gs_url.split("gs://" + bucket_name + "/")[-1]
101 | 
102 |                 blob = client.bucket(bucket_name).blob(
103 |                     object_name).download_as_bytes()
104 |                 imgbuffer = np.frombuffer(blob, dtype='uint8')
105 |                 if not validator(imgbuffer):
106 |                     logger.warning(f"VALIDATION ERROR: {gs_url}")
107 |                     return False, None
108 |                 return True, blob
109 |             except:
110 |                 if retries >= self.n_download_retries:
111 |                     break
112 |                 logger.warning("Retrying object: {gs_url}", exc_info=True)
113 |                 retries += 1
114 |                 time.sleep(2)
115 | 
116 |         logger.error(f"GS ERROR: {gs_url}")
117 |         return False, None
118 | 


--------------------------------------------------------------------------------
/aperturedb/SparseAddingDataCSV.py:
--------------------------------------------------------------------------------
 1 | from aperturedb import CSVParser
 2 | import logging
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | # SparseAddingDataCSV
 6 | # Check for item existance using constraints before adding
 7 | # Useful when adding larger resources where a portion already exist
 8 | 
 9 | 
10 | class SparseAddingDataCSV(CSVParser.CSVParser):
11 |     """
12 |     **ApertureDB General CSV Parser for Loading Blob data where a large amount of the blobs already exist.
13 | 
14 |     This is a blob loader where the entity is searched for first, before the blob data is passed to the server.
15 |     This can be useful speedup if blob data is large in comparison to the amount of data actually causing loads
16 | 
17 |     This is an abstract class, ImageSparseAddDataCSV loads Images.
18 | 
19 |     """
20 | 
21 |     def __init__(self, entity_class: str, filename: str, **kwargs):
22 |         self.entity = entity_class
23 |         self.keys_set = False
24 |         super().__init__(filename, **kwargs)
25 |         self.blobs_per_query = [0, 1]
26 |         self.commands_per_query = [1, 1]
27 |         self._setupkeys()
28 | 
29 |     def _setupkeys(self):
30 |         if not self.keys_set:
31 |             self.keys_set = True
32 |             self.props_keys = [x for x in self.header[1:]
33 |                                if not x.startswith(CSVParser.CONSTRAINTS_PREFIX)]
34 |             self.constraints_keys = [x for x in self.header[1:]
35 |                                      if x.startswith(CSVParser.CONSTRAINTS_PREFIX)]
36 | 
37 |     def getitem(self, idx):
38 |         idx = self.df.index.start + idx
39 |         query_set = []
40 | 
41 |         hold_props_keys = self.props_keys
42 |         self.props_keys = []
43 |         self.command = "Find" + self.entity
44 |         self.constraint_keyword = "constraints"
45 |         entity_find = self._basic_command(
46 |             idx, custom_fields={"results": {"count": True}})
47 |         # proceed to second command if count == 0
48 |         condition_find_failed = {"results": {0: {"count": ["==", 0]}}}
49 |         self.props_keys = hold_props_keys
50 |         self.command = "Add" + self.entity
51 |         self.constraint_keyword = "if_not_found"
52 |         entity_add = self._basic_command(idx)
53 |         query_set.append(entity_find)
54 |         query_set.append([condition_find_failed, entity_add])
55 | 
56 |         if hasattr(self, "modify_item") and callable(self.modify_item):
57 |             query_set = self.modify_item(query_set, idx)
58 | 
59 |         return [query_set], []
60 | 
61 |     def validate(self):
62 |         self._setupkeys()
63 |         valid = True
64 |         if not self.use_dask:
65 |             if len(self.constraints_keys) < 1:
66 |                 logger.error("Cannot add/update " +
67 |                              self.entity + "; no constraint keys")
68 |                 valid = False
69 |         return valid
70 | 


--------------------------------------------------------------------------------
/aperturedb/Stats.py:
--------------------------------------------------------------------------------
 1 | class Stats:
 2 |     total_actions = 0
 3 |     times_arr = []
 4 |     total_actions_time = 0
 5 |     error_counter = 0
 6 |     objects_existed  = 0
 7 |     succeeded_queries = 0
 8 |     succeeded_commands = 0
 9 | 
10 |     def __init__(self):
11 |         self.total_actions = 0
12 |         self.times_arr = []
13 |         self.total_actions_time = 0
14 |         self.error_counter = 0
15 |         self.objects_existed = 0
16 |         self.succeeded_queries = 0
17 |         self.succeeded_commands = 0
18 | 


--------------------------------------------------------------------------------
/aperturedb/Subscriptable.py:
--------------------------------------------------------------------------------
 1 | class Wrapper():
 2 |     """
 3 |     This is needed because slicing in Subscriptable returns a list.
 4 |     The response handler also needs to be accounted for as
 5 |     that will be a part of generator.
 6 |     """
 7 | 
 8 |     def __init__(self, list, response_handler, strict_response_validation, blobs_relative_to_csv):
 9 |         self.list = list
10 |         self.response_handler = response_handler
11 |         self.strict_response_validation = strict_response_validation
12 |         self.blobs_relative_to_csv = blobs_relative_to_csv
13 | 
14 |     def __len__(self):
15 |         return len(self.list)
16 | 
17 |     def __getitem__(self, i):
18 |         return self.list[i]
19 | 
20 | 
21 | class Subscriptable():
22 |     """
23 |     The base class to use for Data/Generators and such collection types.
24 |     """
25 | 
26 |     def __getitem__(self, subscript):
27 |         if isinstance(subscript, slice):
28 |             start = subscript.start if subscript.start else 0
29 |             start = len(self) + start if start < 0 else start
30 |             stop = subscript.stop if subscript.stop else len(self)
31 |             step = subscript.step if subscript.step else 1
32 |             wrapper = Wrapper(
33 |                 [self.getitem(i) for i in range(start, stop, step)],
34 |                 self.response_handler if hasattr(
35 |                     self, "response_handler") else None,
36 |                 self.strict_response_validation if hasattr(
37 |                     self, "strict_response_validation") else None,
38 |                 self.blobs_relative_to_csv if hasattr(
39 |                     self, "blobs_relative_to_csv") else False
40 |             )
41 |             return wrapper
42 | 
43 |         else:
44 |             if subscript < len(self):
45 |                 return self.getitem(subscript)
46 |             else:
47 |                 raise StopIteration()
48 | 
49 |     def getitem(self, subscript):
50 |         raise Exception("To be implemented in subclass")
51 | 
52 |     def __iter__(self):
53 |         self.ind = 0
54 |         return self
55 | 
56 |     def __next__(self):
57 |         if self.ind >= len(self):
58 |             raise StopIteration
59 |         else:
60 |             r = self.getitem(self.ind)
61 |             self.ind += 1
62 |             return r
63 | 


--------------------------------------------------------------------------------
/aperturedb/TensorFlowData.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | from aperturedb.Subscriptable import Subscriptable
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | class TensorFlowData(Subscriptable):
 7 |     """
 8 |     **Class to wrap around a Dataset retrieved from [Tensorflow datasets](https://www.tensorflow.org/datasets)**
 9 | 
10 |     The dataset in this case can be iterated over.
11 |     So the only thing that needs to be implemented is __init__ and generate_query,
12 |     which takes an index and returns a query.
13 | 
14 |     :::note
15 |     This class should be subclassed with a specific (custom) implementation of generate_query(),
16 |     and __init__ should be called with the dataset to be wrapped.
17 |     :::
18 | 
19 |     Example subclass: [Cifar10DataTensorflow](https://github.com/aperture-data/aperturedb-python/blob/develop/examples/Cifar10DataTensorflow.py)
20 | 
21 |     """
22 | 
23 |     def __init__(self, dataset: tf.data.Dataset) -> None:
24 |         raise Exception("To be implemented by subclass")
25 | 
26 |     def getitem(self, idx: int):
27 |         return self.generate_query(idx)
28 | 
29 |     def __len__(self):
30 |         raise Exception("To be implemented by subclass")
31 | 
32 |     def generate_query(self, idx: int) -> Tuple[List[dict], List[bytes]]:
33 |         """
34 |         **Takes information from one atomic record from the Data and converts it to Query for ApertureDB**
35 | 
36 |         Args:
37 |             idx (int): index of the record in collection.
38 | 
39 |         Raises:
40 |             Exception: _description_
41 | 
42 |         Returns:
43 |             Tuple[List[dict], List[bytes]]: A pair of list of commands and optional list of blobs to go with them.
44 |         """
45 |         raise Exception("To be implemented by subclass")
46 | 


--------------------------------------------------------------------------------
/aperturedb/VideoDownloader.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import requests
  3 | import os
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | 
  8 | from aperturedb import Parallelizer
  9 | from aperturedb import CSVParser
 10 | 
 11 | HEADER_PATH = "filename"
 12 | HEADER_URL  = "url"
 13 | 
 14 | 
 15 | class VideoDownloaderCSV(CSVParser.CSVParser):
 16 |     """
 17 |         **ApertureDB Video Downloader.**
 18 | 
 19 |     :::info
 20 |     Expects a CSV file with AT LEAST a ``url`` column, and
 21 |     optionally a ``filename`` field.
 22 |     If ``filename`` is not present, it is taken from the URL.
 23 |     :::
 24 |     """
 25 | 
 26 |     def __init__(self, filename, check_video=True):
 27 | 
 28 |         self.has_filename = False
 29 |         self.check_video = check_video
 30 | 
 31 |         super().__init__(filename)
 32 | 
 33 |     def __getitem__(self, idx):
 34 | 
 35 |         url = self.df.loc[idx, HEADER_URL]
 36 | 
 37 |         if self.has_filename:
 38 |             filename = self.df.loc[idx, HEADER_PATH]
 39 |         else:
 40 |             filename = self.url_to_filename(url)
 41 | 
 42 |         return url, filename
 43 | 
 44 |     def url_to_filename(self, url):
 45 | 
 46 |         filename = url.split("/")[-1]
 47 |         folder = "/tmp/videos/"
 48 | 
 49 |         return folder + filename
 50 | 
 51 |     def validate(self):
 52 | 
 53 |         self.header = list(self.df.columns.values)
 54 | 
 55 |         if HEADER_URL not in self.header:
 56 |             raise Exception("Error with CSV file field: url. Must be a field")
 57 | 
 58 |         if HEADER_PATH in self.header:
 59 |             self.has_filename = True
 60 | 
 61 | 
 62 | class VideoDownloader(Parallelizer.Parallelizer):
 63 | 
 64 |     def __init__(self, ):
 65 | 
 66 |         super().__init__()
 67 | 
 68 |         self.type = "video"
 69 | 
 70 |         self.check_video = False
 71 | 
 72 |     def check_if_video_is_ok(self, filename, url):
 73 | 
 74 |         if not os.path.exists(filename):
 75 |             return False
 76 | 
 77 |         try:
 78 |             a = cv2.VideoCapture(filename)
 79 |             if a.isOpened() == False:
 80 |                 print("Video present but error reading it:", url)
 81 |                 return False
 82 |         except BaseException:
 83 |             print("Video present but error decoding:", url)
 84 |             return False
 85 | 
 86 |         return True
 87 | 
 88 |     def download_video(self, url, filename):
 89 | 
 90 |         start = time.time()
 91 | 
 92 |         if self.check_video and self.check_if_video_is_ok(filename, url):
 93 |             return
 94 | 
 95 |         folder = os.path.dirname(filename)
 96 |         if not os.path.exists(folder):
 97 |             os.makedirs(folder, exist_ok=True)
 98 | 
 99 |         videodata = requests.get(url)
100 |         if videodata.ok:
101 |             fd = open(filename, "wb")
102 |             fd.write(videodata.content)
103 |             fd.close()
104 | 
105 |             try:
106 |                 a = cv2.VideoCapture(filename)
107 |                 if a.isOpened() == False:
108 |                     print("Downloaded Video size error:", url)
109 |                     os.remove(filename)
110 |                     self.error_counter += 1
111 |             except BaseException:
112 |                 print("Downloaded Video cannot be decoded:", url)
113 |                 os.remove(filename)
114 |                 self.error_counter += 1
115 |         else:
116 |             print("URL not found:", url)
117 |             self.error_counter += 1
118 | 
119 |         self.times_arr.append(time.time() - start)
120 | 
121 |     def worker(self, thid, generator, start, end):
122 | 
123 |         for i in range(start, end):
124 | 
125 |             url, filename = generator[i]
126 | 
127 |             self.download_video(url, filename)
128 | 
129 |             if thid == 0 and self.stats:
130 |                 self.pb.update((i - start) / (end - start))
131 | 
132 |     def print_stats(self):
133 | 
134 |         print("====== ApertureDB VideoDownloader Stats ======")
135 | 
136 |         times = np.array(self.times_arr)
137 |         print("Avg Video download time(s):", np.mean(times))
138 |         print("Img download time std:", np.std(times))
139 |         print("Avg download throughput (videos/s)):",
140 |               1 / np.mean(times) * self.numthreads)
141 | 
142 |         print("Total time(s):", self.total_actions_time)
143 |         print("Overall throughput (videos/s):",
144 |               self.total_actions / self.total_actions_time)
145 |         print("Total errors encountered:", self.error_counter)
146 |         print("=============================================")
147 | 


--------------------------------------------------------------------------------
/aperturedb/Videos.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from typing import Any
 3 | 
 4 | from aperturedb.Entities import Entities
 5 | from IPython.display import HTML, display
 6 | from aperturedb.NotebookHelpers import display_annotated_video
 7 | from ipywidgets import widgets
 8 | 
 9 | 
10 | class Videos(Entities):
11 |     """
12 |     **The object mapper representation of videos in ApertureDB.**
13 | 
14 |     This class is a layer on top of the native query.
15 |     It facilitates interactions with videos in the database in the pythonic way.
16 |     """
17 |     db_object = "_Video"
18 | 
19 |     def getitem(self, idx):
20 |         item = super().getitem(idx)
21 |         if self.blobs:
22 |             if 'preview' not in item:
23 |                 item['preview'] = self.get_blob(item)
24 |         return item
25 | 
26 |     def inspect(self, show_preview: bool = True, meta = None) -> Any:
27 |         if meta == None:
28 |             def meta(x): return []
29 |         df = super().inspect()
30 |         if show_preview == True:
31 |             op = widgets.Output()
32 |             with op:
33 |                 df['preview'] = df.apply(lambda x: display_annotated_video(
34 |                     x["preview"], bboxes=meta(x)), axis=1)
35 |                 display(HTML(
36 |                     "<div style='max-width: 100%; overflow: auto;'>" +
37 |                     df.to_html(escape=False)
38 |                     + "</div>"
39 |                 ))
40 |             return op
41 |         else:
42 |             return df
43 | 


--------------------------------------------------------------------------------
/aperturedb/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import logging
 3 | import datetime
 4 | import os
 5 | import json
 6 | import requests
 7 | from string import Template
 8 | import platform
 9 | import faulthandler
10 | import signal
11 | import sys
12 | 
13 | __version__ = "0.4.47"
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | # https://docs.python.org/3/library/faulthandler.html
18 | # Register SIGUSR1 to dump the stack trace
19 | # Good for debugging a running process
20 | 
21 | if os.getenv("ADB_DEBUGGABLE", None) != None:
22 |     if sys.platform == "win32":
23 |         logger.warn("Unable to configure debugging support for win32")
24 |     else:
25 |         faulthandler.register(signal.SIGUSR1.value)
26 | 
27 | # set log level
28 | formatter = logging.Formatter(
29 |     "%(asctime)s : %(levelname)s : %(name)s : %(thread)d : %(lineno)d : %(message)s")
30 | 
31 | log_file_level = logging.getLevelName(os.getenv("LOG_FILE_LEVEL", "WARN"))
32 | log_console_level = logging.getLevelName(
33 |     os.getenv("LOG_CONSOLE_LEVEL", "ERROR"))
34 | 
35 | # Set the logger filter to the minimum (more chatty) of the two handler levels
36 | # This reduces problems if the environment adds a root handler (e.g. Google Colab)
37 | logger_level = min(log_file_level, log_console_level)
38 | if any(log_control in os.environ
39 |        for log_control in ["LOG_CONSOLE_LEVEL", "LOG_FILE_LEVEL"]):
40 |     logger.setLevel(logger_level)
41 | 
42 | # define file handler and set formatter
43 | error_file_name = "error.${now}.log"
44 | 
45 | if "ADB_LOG_FILE" in os.environ:
46 |     error_file_name = None if len(
47 |         os.environ["ADB_LOG_FILE"]) == 0 else os.environ["ADB_LOG_FILE"]
48 | 
49 | if error_file_name is not None:
50 |     error_file_tmpl = Template(error_file_name)
51 |     template_items = {
52 |         # python isodate has ':', not valid in files in windows.
53 |         "now": str(datetime.datetime.now().isoformat()).replace(':', ''),
54 |         "node": str(platform.node())
55 |     }
56 |     error_file_handler = logging.FileHandler(error_file_tmpl.safe_substitute(
57 |         **template_items), delay=True)
58 |     error_file_handler.setFormatter(formatter)
59 |     error_file_handler.setLevel(log_file_level)
60 |     logger.addHandler(error_file_handler)
61 | 
62 | error_console_handler = logging.StreamHandler()
63 | error_console_handler.setLevel(log_console_level)
64 | error_console_handler.setFormatter(formatter)
65 | logger.addHandler(error_console_handler)
66 | 
67 | try:
68 |     latest_version = json.loads(requests.get(
69 |         "https://pypi.org/pypi/aperturedb/json").text)["info"]["version"]
70 | except Exception as e:
71 |     logger.warning(
72 |         f"Failed to get latest version: {e}. You are using version {__version__}")
73 |     latest_version = None
74 | if __version__ != latest_version:
75 |     logger.warning(
76 |         f"The latest version of aperturedb is {latest_version}. You are using version {__version__}. It is recommended to upgrade.")
77 | 


--------------------------------------------------------------------------------
/aperturedb/cli/README.md:
--------------------------------------------------------------------------------
 1 | # adb : Commad line utility.
 2 | 
 3 | adb is a command line utility to have a well defined way of doing routine tasks with AperturDB instance.
 4 | It's based on [typer](https://typer.tiangolo.com/)
 5 | 
 6 | It has subcommands with their parameters defined under the cli directory.
 7 | 
 8 | Some key points to consider:
 9 | - Against conventions of importing different classes at module level, the functions in adb should tend to import them lazily (even at the risk of repeating). This is because the recursive imports bog the startup down, which makes for a bad user experience.
10 | 
11 | ## Notes about improving the load times.
12 | execute the command to be tested with PYTHONPROFILEIMPORTTIME set as 1
13 | ```
14 | pip install tuna
15 | PYTHONPROFILEIMPORTTIME=1 adb config ls 2>&1 | tee check_times
16 | tuna check_times
17 | ```


--------------------------------------------------------------------------------
/aperturedb/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aperture-data/aperturedb-python/a58fe1eab4c78ae92a6b03ab6d5c9e5ed1f0d62d/aperturedb/cli/__init__.py


--------------------------------------------------------------------------------
/aperturedb/cli/adb.py:
--------------------------------------------------------------------------------
 1 | import typer
 2 | 
 3 | from aperturedb.cli import configure, ingest, utilities, transact
 4 | 
 5 | app = typer.Typer(pretty_exceptions_show_locals=False)
 6 | 
 7 | app.add_typer(ingest.app, name="ingest", help="Ingest data into ApertureDB.")
 8 | app.add_typer(configure.app, name="config",
 9 |               help="Configure ApertureDB client.")
10 | app.add_typer(utilities.app, name="utils", help="Utilities")
11 | app.add_typer(transact.app, name="transact",
12 |               help="Run a transaction against ApertureDB.")
13 | 
14 | 
15 | @app.callback()
16 | def check_context(ctx: typer.Context):
17 |     if ctx.invoked_subcommand != "config" and not \
18 |             configure.has_environment_configuration():
19 |         configure.check_configured(as_global=False) or \
20 |             configure.check_configured(as_global=True, show_error=True)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     app()
25 | 


--------------------------------------------------------------------------------
/aperturedb/cli/console.py:
--------------------------------------------------------------------------------
1 | from rich.console import Console
2 | 
3 | console = Console()
4 | 


--------------------------------------------------------------------------------
/aperturedb/cli/keys.py:
--------------------------------------------------------------------------------
 1 | from typing import Annotated
 2 | 
 3 | import typer
 4 | 
 5 | from aperturedb.cli.console import console
 6 | from aperturedb.Configuration import Configuration
 7 | from aperturedb.Connector import Connector
 8 | 
 9 | app = typer.Typer()
10 | 
11 | 
12 | @app.command(help="Create Key for a user")
13 | def generate(user: Annotated[str, typer.Argument(help="The user to generate a key for")]):
14 |     from aperturedb.CommonLibrary import create_connector
15 |     conn = create_connector()
16 |     key = generate_user_key(conn, user)
17 |     console.log(f"Key for {user} is", key, highlight=False)
18 | 
19 | 
20 | def generate_user_key(conn: Connector, user: str):
21 |     from aperturedb.Utils import Utils
22 |     u = Utils(conn)
23 |     token = u.generate_token()
24 |     u.assign_token(user, token)
25 |     key = Configuration.create_aperturedb_key(
26 |         conn.config.host, conn.config.port, token, conn.config.use_rest,
27 |         conn.config.use_ssl)
28 |     return key
29 | 


--------------------------------------------------------------------------------
/aperturedb/cli/tokens.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import Annotated
 3 | 
 4 | import typer
 5 | 
 6 | from aperturedb.cli.console import console
 7 | from aperturedb.CommonLibrary import create_connector, execute_query
 8 | from aperturedb.Utils import Utils
 9 | 
10 | app = typer.Typer()
11 | 
12 | 
13 | @app.command(help="List User Authentication Tokens")
14 | def list(user: Annotated[str, typer.Argument(help="The user the display tokens for")]):
15 |     token_list_query = [{"GetUserDetails": {"username": user}}]
16 |     client = create_connector()
17 |     result, response, blobs = execute_query(
18 |         client=client,
19 |         query=token_list_query,
20 |         blobs=[])
21 |     utokens = response[0]['GetUserDetails']['tokens']
22 |     if len(utokens) == 0:
23 |         console.log(f"No Tokens for {user}")
24 |     else:
25 |         console.log(utokens)
26 | 
27 | 
28 | @app.command(help="Generate an Authentication token for a user")
29 | def generate():
30 |     conn = create_connector()
31 |     u = Utils(conn)
32 |     token = u.generate_token()
33 |     print(f"{token}")
34 |     return token
35 | 
36 | 
37 | @app.command(help="Assign an Authentication token to a user")
38 | def assign(user: Annotated[str, typer.Argument(help="user to assign the token to")],
39 |            token: Annotated[str, typer.Argument(help="Token to be assigned")]):
40 |     conn = create_connector()
41 |     u = Utils(conn)
42 |     try:
43 |         u.assign_token(user, token)
44 |         console.log(f"Assigned token to {user}")
45 |     except Exception as e:
46 |         console.log(f"Failed to assign token: {e}", style="red")
47 | 
48 | 
49 | @app.command(help="Remove an Authentication token from a user")
50 | def remove(user: Annotated[str, typer.Argument(help="User to remove a token from")],
51 |            token: Annotated[str, typer.Argument(help="Token to be removed")]):
52 |     conn = create_connector()
53 |     u = Utils(conn)
54 |     try:
55 |         u.remove_token(user, token)
56 |         console.log("Action complete")
57 |     except Exception as e:
58 |         console.log(f"Failed to remove token: {e}", style="red")
59 | 


--------------------------------------------------------------------------------
/aperturedb/cli/transact.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from enum import Enum
  3 | import sys
  4 | import traceback
  5 | 
  6 | import typer
  7 | from typing_extensions import Annotated
  8 | 
  9 | from aperturedb.cli.console import console
 10 | 
 11 | from aperturedb.Connector import Connector
 12 | import logging
 13 | 
 14 | logger = logging.getLogger(__file__)
 15 | 
 16 | FUSE_AVAIALBLE = False
 17 | 
 18 | 
 19 | def load_fuse():
 20 |     global FUSE_AVAIALBLE
 21 |     try:
 22 |         from aperturedb.cli.mount_coco import mount_images_from_aperturedb
 23 |         FUSE_AVAIALBLE = True
 24 |     except ImportError as e:
 25 |         logger.warning(
 26 |             "fuse not found for this env. This is not critical for adb to continue.")
 27 | 
 28 | 
 29 | app = typer.Typer(callback=load_fuse)
 30 | 
 31 | 
 32 | class OutputTypes(str, Enum):
 33 |     STDOUT = "stdout"
 34 |     MOUNT_COCO = "mount_coco"
 35 |     RAW_JSON = "raw_json"
 36 | 
 37 | 
 38 | def dump_as_raw_json(client: Connector, transaction: dict, **kwargs):
 39 |     """
 40 |     Function to pass the result of a transaction as raw json to stdout.
 41 |     Does not handle blobs.
 42 | 
 43 |     Args:
 44 |         client (Connector): The client to the database
 45 |         transaction (dict): Query to be executed.
 46 |     """
 47 |     from aperturedb.CommonLibrary import execute_query
 48 | 
 49 |     result, response, blobs = execute_query(
 50 |         client=client,
 51 |         query=transaction,
 52 |         blobs=[])
 53 |     print(json.dumps(response, indent=2))
 54 | 
 55 | 
 56 | def dump_to_stdout(client: Connector, transaction: dict, **kwargs):
 57 |     from aperturedb.CommonLibrary import execute_query
 58 | 
 59 |     result, response, blobs = execute_query(
 60 |         client=client,
 61 |         query=transaction,
 62 |         blobs=[])
 63 |     console.log(result)
 64 |     console.log(response)
 65 |     for i, blob in enumerate(blobs):
 66 |         console.log(f"len(blob[{i}]) = {len(blob[i])}")
 67 | 
 68 | 
 69 | def mount_as_coco_ds(client: Connector, transaction: dict, **kwargs):
 70 |     from aperturedb.Images import Images
 71 |     from aperturedb.CommonLibrary import execute_query
 72 | 
 73 |     result, response, blobs = execute_query(
 74 |         client=client,
 75 |         query=transaction,
 76 |         blobs=[])
 77 |     if result == 0:
 78 |         image_entities = []
 79 |         for i, cr in enumerate(response):
 80 |             if "FindImage" in cr:
 81 |                 if "entities" in cr["FindImage"]:
 82 |                     image_entities.extend(cr["FindImage"]["entities"])
 83 |                 else:
 84 |                     console.log(f"No entities found in FindImage {i} response")
 85 |         try:
 86 |             from aperturedb.cli.mount_coco import mount_images_from_aperturedb
 87 |             images = Images(client, response=image_entities)
 88 |             console.log(f"Found {len(images)} images")
 89 |             mount_images_from_aperturedb(images)
 90 |         except Exception as e:
 91 |             console.log(traceback.format_exc())
 92 |     else:
 93 |         console.log(response)
 94 | 
 95 | 
 96 | @app.command()
 97 | def from_json_file(
 98 |     filepath: Annotated[str, typer.Argument(help="Path to query in json format")],
 99 |     output_type: Annotated[OutputTypes, typer.Option(
100 |         help="Type of output")] = "stdout",
101 |     output_path: Annotated[str,  typer.Option(
102 |         help="Path to output (only for mount as output)")] = None
103 | ):
104 |     from aperturedb.CommonLibrary import create_connector
105 | 
106 |     client = create_connector()
107 | 
108 |     output_types = {
109 |         OutputTypes.STDOUT: dump_to_stdout,
110 |         OutputTypes.RAW_JSON: dump_as_raw_json
111 |     }
112 |     global FUSE_AVAIALBLE
113 |     if FUSE_AVAIALBLE:
114 |         output_types[OutputTypes.MOUNT_COCO] = mount_as_coco_ds
115 | 
116 |     with open(filepath) as inputstream:
117 |         transaction = json.loads(inputstream.read())
118 |         old_argv = sys.argv[1:]
119 |         sys.argv[1:] = [output_path]
120 |         output_types[output_type](client, transaction, output_path=output_path)
121 |         sys.argv[1:] = old_argv
122 | 


--------------------------------------------------------------------------------
/aperturedb/cli/utilities.py:
--------------------------------------------------------------------------------
  1 | from enum import Enum
  2 | from typing import Annotated
  3 | 
  4 | import typer
  5 | 
  6 | from aperturedb.cli.console import console
  7 | 
  8 | app = typer.Typer()
  9 | 
 10 | import aperturedb.cli.keys as keys
 11 | import aperturedb.cli.tokens as tokens
 12 | app.add_typer(keys.app, name="keys",
 13 |               help="Manage Aperturedb keys")
 14 | app.add_typer(tokens.app, name="tokens",
 15 |               help="Manage database authentication tokens")
 16 | 
 17 | 
 18 | class CommandTypes(str, Enum):
 19 |     STATUS = "status"
 20 |     SUMMARY = "summary"
 21 |     REMOVE_ALL = "remove_all"
 22 |     REMOVE_INDEXES = "remove_indexes"
 23 | 
 24 | 
 25 | def confirm(command: CommandTypes, force: bool):
 26 |     if force:
 27 |         return True
 28 |     console.print("Danger", style="bold red")
 29 |     console.log(f"This will execute {command}.")
 30 |     response = typer.prompt("Are you sure you want to continue? [y/N]")
 31 |     if response.lower() != "y":
 32 |         typer.echo("Aborting...")
 33 |         raise typer.Abort()
 34 |     return True
 35 | 
 36 | 
 37 | @app.command(help="Execute a command on the database")
 38 | def execute(command: CommandTypes,
 39 |             force: Annotated[bool, typer.Option(help="Do not confirm")] = False):
 40 | 
 41 |     from aperturedb.Utils import Utils
 42 |     from aperturedb.CommonLibrary import create_connector
 43 | 
 44 |     utils = Utils(create_connector())
 45 |     available_commands = {
 46 |         CommandTypes.STATUS: lambda: print(utils.status()),
 47 |         CommandTypes.SUMMARY: utils.summary,
 48 |         CommandTypes.REMOVE_ALL: lambda: confirm(
 49 |             CommandTypes.REMOVE_ALL, force) and utils.remove_all_objects(),
 50 |         CommandTypes.REMOVE_INDEXES: lambda: confirm(
 51 |             CommandTypes.REMOVE_INDEXES, force) and utils.remove_all_indexes(),
 52 |     }
 53 | 
 54 |     available_commands[command]()
 55 | 
 56 | 
 57 | class LogLevel(str, Enum):
 58 |     INFO = "INFO"
 59 |     WARNING = "WARNING"
 60 |     ERROR = "ERROR"
 61 | 
 62 | 
 63 | @app.command()
 64 | def log(
 65 |     message: Annotated[str, typer.Argument(help="The message to log")],
 66 |     level: LogLevel = LogLevel.INFO
 67 | ):
 68 |     """
 69 |     Log a message to the user log.
 70 | 
 71 |     This is useful because it can later be seen in Grafana, not only as log entries in the
 72 |     ApertureDB Logging dashboard, but also as event markers in the ApertureDB Status dashboard.
 73 |     """
 74 |     from aperturedb.Utils import Utils
 75 |     from aperturedb.CommonLibrary import create_connector
 76 | 
 77 |     utils = Utils(create_connector())
 78 |     utils.user_log_message(message, level=level.value)
 79 | 
 80 | 
 81 | @app.command()
 82 | def visualize_schema(
 83 |     filename: str = "schema",
 84 |     format: str = "png"
 85 | ):
 86 |     """
 87 |     Visualize the schema of the database.
 88 | 
 89 |     This will create a file with the schema of the database in the specified format.
 90 | 
 91 |     Relies on graphviz to be installed.
 92 |     """
 93 |     from aperturedb.Utils import Utils
 94 |     from aperturedb.CommonLibrary import create_connector
 95 | 
 96 |     utils = Utils(create_connector())
 97 |     s = utils.visualize_schema()
 98 |     result = s.render(filename, format=format)
 99 |     print(result)
100 | 


--------------------------------------------------------------------------------
/aperturedb/queryMessage.py:
--------------------------------------------------------------------------------
 1 | # queryMessage.py - wraps protobuf versions
 2 | import google.protobuf
 3 | 
 4 | if google.protobuf.__version__.split(".")[0] == "3":
 5 |     from . import queryMessage3_pb2
 6 | 
 7 |     def queryMessage():
 8 |         return queryMessage3_pb2.queryMessage()
 9 | 
10 |     def ParseFromString(msg, data):
11 |         return msg.ParseFromString(data)
12 | elif google.protobuf.__version__.split(".")[0] == "4":
13 |     from . import queryMessage4_pb2
14 | 
15 |     def queryMessage():
16 |         return queryMessage4_pb2.queryMessage()
17 | 
18 |     def ParseFromString(msg, data):
19 |         # because of https://github.com/protocolbuffers/protobuf/issues/10774
20 |         return msg.ParseFromString(memoryview(data).tobytes())
21 | elif google.protobuf.__version__.split(".")[0] == "5":
22 |     from . import queryMessage5_pb2
23 | 
24 |     def queryMessage():
25 |         return queryMessage5_pb2.queryMessage()
26 | 
27 |     def ParseFromString(msg, data):
28 |         return msg.ParseFromString(data)
29 | else:
30 |     raise Exception(
31 |         f"aperturedb not compatible with {google.protobuf.__version__}")
32 | 


--------------------------------------------------------------------------------
/aperturedb/queryMessage3_pb2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
 3 | # source: queryMessage3.proto
 4 | 
 5 | from google.protobuf import descriptor as _descriptor
 6 | from google.protobuf import message as _message
 7 | from google.protobuf import reflection as _reflection
 8 | from google.protobuf import symbol_database as _symbol_database
 9 | # @@protoc_insertion_point(imports)
10 | 
11 | _sym_db = _symbol_database.Default()
12 | 
13 | 
14 | 
15 | 
16 | DESCRIPTOR = _descriptor.FileDescriptor(
17 |   name='queryMessage3.proto',
18 |   package='VDMS.protobufs',
19 |   syntax='proto3',
20 |   serialized_options=None,
21 |   create_key=_descriptor._internal_create_key,
22 |   serialized_pb=b'\n\x13queryMessage3.proto\x12\x0eVDMS.protobufs\":\n\x0cqueryMessage\x12\x0c\n\x04json\x18\x01 \x01(\t\x12\r\n\x05\x62lobs\x18\x02 \x03(\x0c\x12\r\n\x05token\x18\x03 \x01(\tb\x06proto3'
23 | )
24 | 
25 | 
26 | 
27 | 
28 | _QUERYMESSAGE = _descriptor.Descriptor(
29 |   name='queryMessage',
30 |   full_name='VDMS.protobufs.queryMessage',
31 |   filename=None,
32 |   file=DESCRIPTOR,
33 |   containing_type=None,
34 |   create_key=_descriptor._internal_create_key,
35 |   fields=[
36 |     _descriptor.FieldDescriptor(
37 |       name='json', full_name='VDMS.protobufs.queryMessage.json', index=0,
38 |       number=1, type=9, cpp_type=9, label=1,
39 |       has_default_value=False, default_value=b"".decode('utf-8'),
40 |       message_type=None, enum_type=None, containing_type=None,
41 |       is_extension=False, extension_scope=None,
42 |       serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
43 |     _descriptor.FieldDescriptor(
44 |       name='blobs', full_name='VDMS.protobufs.queryMessage.blobs', index=1,
45 |       number=2, type=12, cpp_type=9, label=3,
46 |       has_default_value=False, default_value=[],
47 |       message_type=None, enum_type=None, containing_type=None,
48 |       is_extension=False, extension_scope=None,
49 |       serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
50 |     _descriptor.FieldDescriptor(
51 |       name='token', full_name='VDMS.protobufs.queryMessage.token', index=2,
52 |       number=3, type=9, cpp_type=9, label=1,
53 |       has_default_value=False, default_value=b"".decode('utf-8'),
54 |       message_type=None, enum_type=None, containing_type=None,
55 |       is_extension=False, extension_scope=None,
56 |       serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
57 |   ],
58 |   extensions=[
59 |   ],
60 |   nested_types=[],
61 |   enum_types=[
62 |   ],
63 |   serialized_options=None,
64 |   is_extendable=False,
65 |   syntax='proto3',
66 |   extension_ranges=[],
67 |   oneofs=[
68 |   ],
69 |   serialized_start=39,
70 |   serialized_end=97,
71 | )
72 | 
73 | DESCRIPTOR.message_types_by_name['queryMessage'] = _QUERYMESSAGE
74 | _sym_db.RegisterFileDescriptor(DESCRIPTOR)
75 | 
76 | queryMessage = _reflection.GeneratedProtocolMessageType('queryMessage', (_message.Message,), {
77 |   'DESCRIPTOR' : _QUERYMESSAGE,
78 |   '__module__' : 'queryMessage3_pb2'
79 |   # @@protoc_insertion_point(class_scope:VDMS.protobufs.queryMessage)
80 |   })
81 | _sym_db.RegisterMessage(queryMessage)
82 | 
83 | 
84 | # @@protoc_insertion_point(module_scope)
85 | 


--------------------------------------------------------------------------------
/aperturedb/queryMessage4_pb2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
 3 | # source: queryMessage4.proto
 4 | # Protobuf Python Version: 4.25.3
 5 | """Generated protocol buffer code."""
 6 | from google.protobuf import descriptor as _descriptor
 7 | from google.protobuf import descriptor_pool as _descriptor_pool
 8 | from google.protobuf import symbol_database as _symbol_database
 9 | from google.protobuf.internal import builder as _builder
10 | # @@protoc_insertion_point(imports)
11 | 
12 | _sym_db = _symbol_database.Default()
13 | 
14 | 
15 | 
16 | 
17 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13queryMessage4.proto\x12\x0eVDMS.protobufs\":\n\x0cqueryMessage\x12\x0c\n\x04json\x18\x01 \x01(\t\x12\r\n\x05\x62lobs\x18\x02 \x03(\x0c\x12\r\n\x05token\x18\x03 \x01(\tb\x06proto3')
18 | 
19 | _globals = globals()
20 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
21 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'queryMessage4_pb2', _globals)
22 | if _descriptor._USE_C_DESCRIPTORS == False:
23 |   DESCRIPTOR._options = None
24 |   _globals['_QUERYMESSAGE']._serialized_start=39
25 |   _globals['_QUERYMESSAGE']._serialized_end=97
26 | # @@protoc_insertion_point(module_scope)
27 | 


--------------------------------------------------------------------------------
/aperturedb/queryMessage5_pb2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
 3 | # NO CHECKED-IN PROTOBUF GENCODE
 4 | # source: queryMessage5.proto
 5 | # Protobuf Python Version: 5.29.0
 6 | """Generated protocol buffer code."""
 7 | from google.protobuf import descriptor as _descriptor
 8 | from google.protobuf import descriptor_pool as _descriptor_pool
 9 | from google.protobuf import runtime_version as _runtime_version
10 | from google.protobuf import symbol_database as _symbol_database
11 | from google.protobuf.internal import builder as _builder
12 | _runtime_version.ValidateProtobufRuntimeVersion(
13 |     _runtime_version.Domain.PUBLIC,
14 |     5,
15 |     29,
16 |     0,
17 |     '',
18 |     'queryMessage5.proto'
19 | )
20 | # @@protoc_insertion_point(imports)
21 | 
22 | _sym_db = _symbol_database.Default()
23 | 
24 | 
25 | 
26 | 
27 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13queryMessage5.proto\x12\x0eVDMS.protobufs\":\n\x0cqueryMessage\x12\x0c\n\x04json\x18\x01 \x01(\t\x12\r\n\x05\x62lobs\x18\x02 \x03(\x0c\x12\r\n\x05token\x18\x03 \x01(\tb\x06proto3')
28 | 
29 | _globals = globals()
30 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
31 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'queryMessage5_pb2', _globals)
32 | if not _descriptor._USE_C_DESCRIPTORS:
33 |   DESCRIPTOR._loaded_options = None
34 |   _globals['_QUERYMESSAGE']._serialized_start=39
35 |   _globals['_QUERYMESSAGE']._serialized_end=97
36 | # @@protoc_insertion_point(module_scope)
37 | 


--------------------------------------------------------------------------------
/aperturedb/transformers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aperture-data/aperturedb-python/a58fe1eab4c78ae92a6b03ab6d5c9e5ed1f0d62d/aperturedb/transformers/__init__.py


--------------------------------------------------------------------------------
/aperturedb/transformers/clip.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import logging
 3 | import numpy as np
 4 | from PIL import Image
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | error_message = """
 9 | CLIP transformer requires git+https://github.com/openai/CLIP.git and torch
10 | Install with: pip install aperturedb[complete], followed by explicit install of CLIP.
11 | Can be done with : "pip install git+https://github.com/openai/CLIP.git" in the same
12 | venv as aperturedb.
13 | """
14 | 
15 | try:
16 |     import clip
17 |     import torch
18 |     import cv2
19 | except ImportError:
20 |     logger.critical(error_message)
21 |     exit(1)
22 | 
23 | descriptor_set = "ViT-B/16"
24 | device = "cuda" if torch.cuda.is_available() else "cpu"
25 | model, preprocess = clip.load(descriptor_set, device=device)
26 | 
27 | 
28 | def generate_embedding(blob):
29 |     global errors
30 | 
31 |     nparr = np.fromstring(blob, np.uint8)
32 |     image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
33 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
34 |     image = preprocess(Image.fromarray(image)).unsqueeze(0).to(device)
35 | 
36 |     image_features = model.encode_image(image)
37 |     embedding = None
38 |     if device == "cuda":
39 |         image_features = image_features.float()
40 |         embedding = image_features.detach().cpu().numpy().tobytes()
41 |     else:
42 |         embedding = image_features.detach().numpy().tobytes()
43 | 
44 |     return embedding
45 | 


--------------------------------------------------------------------------------
/aperturedb/transformers/clip_pytorch_embeddings.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | from aperturedb.Subscriptable import Subscriptable
 3 | from aperturedb.transformers.transformer import Transformer
 4 | from .clip import generate_embedding, descriptor_set
 5 | 
 6 | 
 7 | class CLIPPyTorchEmbeddings(Transformer):
 8 |     """
 9 |     Generates the embeddings for the images using the CLIP Pytorch model.
10 |     https://github.com/openai/CLIP
11 |     """
12 | 
13 |     def __init__(self, data: Subscriptable, **kwargs) -> None:
14 |         """
15 |         Args:
16 |             data: Subscriptable object
17 |             search_set_name: Name of the [descriptorset](/query_language/Reference/descriptor_commands/desc_commands/AddDescriptor) to use for the search.
18 |         """
19 |         self.search_set_name = kwargs.pop(
20 |             "search_set_name", descriptor_set)
21 |         super().__init__(data, **kwargs)
22 | 
23 |         # Let's sample some data to figure out the descriptorset we need.
24 |         if len(self._add_image_index) > 0:
25 |             sample = generate_embedding(self.data[0][1][0])
26 |             utils = self.get_utils()
27 |             utils.add_descriptorset(
28 |                 self.search_set_name, dim=len(sample) // 4, metric=["CS"])
29 | 
30 |     def getitem(self, subscript):
31 |         x = self.data[subscript]
32 | 
33 |         for ic in self._add_image_index:
34 |             serialized = generate_embedding(x[1][ic])
35 |             # If the image already has an image_sha256, we use it.
36 |             image_sha256 = x[0][ic]["AddImage"].get("properties", {}).get(
37 |                 "adb_image_sha256", None)
38 |             if not image_sha256:
39 |                 image_sha256 = hashlib.sha256(x[1][ic]).hexdigest()
40 |             x[1].append(serialized)
41 |             x[0].append(
42 |                 {
43 |                     "AddDescriptor": {
44 |                         "set": self.search_set_name,
45 |                         "properties": {
46 |                             "image_sha256": image_sha256,
47 |                         },
48 |                         "if_not_found": {
49 |                             "image_sha256": ["==", image_sha256],
50 |                         },
51 |                         "connect": {
52 |                             "ref": x[0][ic]["AddImage"]["_ref"]
53 |                         }
54 |                     }
55 |                 })
56 |         return x
57 | 


--------------------------------------------------------------------------------
/aperturedb/transformers/common_properties.py:
--------------------------------------------------------------------------------
 1 | from aperturedb.Subscriptable import Subscriptable
 2 | from aperturedb.transformers.transformer import Transformer
 3 | import logging
 4 | 
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class CommonProperties(Transformer):
10 |     """
11 |     This applies some common properties to the data.
12 |     """
13 | 
14 |     def __init__(self, data: Subscriptable, **kwargs) -> None:
15 |         """
16 |         Args:
17 |             data: Subscriptable object
18 |             adb_data_source: Data source for the data
19 |             adb_timestamp: Timestamp for the data
20 |             adb_main_object: Main object for the data
21 |         """
22 |         super().__init__(data, **kwargs)
23 | 
24 |         # Statically set some properties, these are not in the data
25 |         self.adb_data_source = kwargs.get("adb_data_source", None)
26 |         self.adb_timestamp = kwargs.get("adb_timestamp", None)
27 |         self.adb_main_object = kwargs.get("adb_main_object", None)
28 | 
29 |     def getitem(self, subscript):
30 |         x = self.data[subscript]
31 |         try:
32 |             # x is a transaction that has an add_image command and a blob
33 |             for ic in self._add_image_index:
34 |                 src_properties = x[0][ic]["AddImage"]["properties"]
35 |                 # Set the static properties, if explicitly set
36 |                 if self.adb_data_source:
37 |                     src_properties["adb_data_source"] = self.adb_data_source
38 |                 if self.adb_timestamp:
39 |                     src_properties["adb_timestamp"] = self.adb_timestamp
40 |                 if self.adb_main_object:
41 |                     src_properties["adb_main_object"] = self.adb_main_object
42 |         except Exception as e:
43 |             logger.exception(e.with_traceback(), stack_info=True)
44 | 
45 |         return x
46 | 


--------------------------------------------------------------------------------
/aperturedb/transformers/facenet.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | logger = logging.getLogger(__name__)
 4 | 
 5 | error_message = """
 6 | Facenet transformer requires facenet-pytorch and torch
 7 | Install with: pip install aperturedb[complete]
 8 | Alternatively, install with: "pip install facenet-pytorch torch" in the same
 9 | venv as aperturedb.
10 | """
11 | 
12 | try:
13 |     from facenet_pytorch import MTCNN, InceptionResnetV1
14 |     import torch
15 | except ImportError:
16 |     logger.critical(error_message)
17 |     exit(1)
18 | 
19 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
20 | 
21 | # If required, create a face detection pipeline using MTCNN:
22 | mtcnn = MTCNN(image_size=96, margin=0, device=device)
23 | 
24 | # Create an inception resnet (in eval mode):
25 | resnet = InceptionResnetV1(pretrained='vggface2', device=device).eval()
26 | 
27 | errors = 0
28 | 
29 | 
30 | def generate_embedding(img):
31 |     global errors
32 |     # Get cropped and prewhitened image tensor
33 |     img_cropped = mtcnn(img)
34 |     if img_cropped is not None:
35 |         # Calculate embedding (unsqueeze to add batch dimension)
36 |         img_embedding = resnet(img_cropped.unsqueeze(0).to(device))
37 |     else:
38 |         img_embedding = torch.zeros(1, 512).to(device)
39 |         errors += 1
40 | 
41 |     return img_embedding
42 | 


--------------------------------------------------------------------------------
/aperturedb/transformers/facenet_pytorch_embeddings.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | from aperturedb.Subscriptable import Subscriptable
 3 | from aperturedb.transformers.transformer import Transformer
 4 | from PIL import Image
 5 | import io
 6 | import time
 7 | from .facenet import generate_embedding
 8 | 
 9 | 
10 | class FacenetPyTorchEmbeddings(Transformer):
11 |     """
12 |     Generates the embeddings for the images using the Facenet Pytorch model.
13 |     """
14 | 
15 |     def __init__(self, data: Subscriptable, **kwargs) -> None:
16 |         """
17 |         Args:
18 |             data: Subscriptable object
19 |             search_set_name: Name of the [descriptorset](/query_language/Reference/descriptor_commands/desc_commands/AddDescriptor) to use for the search.
20 |         """
21 |         self.search_set_name = kwargs.pop(
22 |             "search_set_name", "facenet_pytorch_embeddings")
23 |         super().__init__(data, **kwargs)
24 | 
25 |         # Let's sample some data to figure out the descriptorset we need.
26 |         if len(self._add_image_index) > 0:
27 |             sample = self._get_embedding_from_blob(self.data[0][1][0])
28 |             utils = self.get_utils()
29 |             utils.add_descriptorset(self.search_set_name, dim=len(sample) // 4)
30 | 
31 |     def _get_embedding_from_blob(self, image_blob: bytes):
32 |         pil_image = Image.open(io.BytesIO(image_blob))
33 |         embedding = generate_embedding(pil_image)
34 |         serialized = embedding.cpu().detach().numpy().tobytes()
35 |         return serialized
36 | 
37 |     def getitem(self, subscript):
38 |         start = time.time()
39 |         self.ncalls += 1
40 |         x = self.data[subscript]
41 | 
42 |         for ic in self._add_image_index:
43 |             serialized = self._get_embedding_from_blob(
44 |                 x[1][self._add_image_index.index(ic)])
45 |             # If the image already has an image_sha256, we use it.
46 |             image_sha256 = x[0][ic]["AddImage"].get("properties", {}).get(
47 |                 "adb_image_sha256", None)
48 |             if not image_sha256:
49 |                 image_sha256 = hashlib.sha256(x[1][ic]).hexdigest()
50 |             x[1].append(serialized)
51 |             x[0].append(
52 |                 {
53 |                     "AddDescriptor": {
54 |                         "set": self.search_set_name,
55 |                         "properties": {
56 |                             "image_sha256": image_sha256,
57 |                         },
58 |                         "if_not_found": {
59 |                             "image_sha256": ["==", image_sha256],
60 |                         },
61 |                         "connect": {
62 |                             "ref": x[0][ic]["AddImage"]["_ref"]
63 |                         }
64 |                     }
65 |                 })
66 |         self.cumulative_time += time.time() - start
67 |         return x
68 | 


--------------------------------------------------------------------------------
/aperturedb/transformers/image_properties.py:
--------------------------------------------------------------------------------
 1 | from aperturedb.transformers.transformer import Transformer
 2 | from aperturedb.Subscriptable import Subscriptable
 3 | 
 4 | from PIL import Image
 5 | import io
 6 | import logging
 7 | import uuid
 8 | import hashlib
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | class ImageProperties(Transformer):
14 |     """
15 |     This computes some image properties and adds them to the metadata.
16 |     """
17 | 
18 |     def __init__(self, data: Subscriptable, **kwargs) -> None:
19 |         super().__init__(data, **kwargs)
20 |         utils = self.get_utils()
21 | 
22 |         if "adb_data_source" not in utils.get_indexed_props("_Image"):
23 |             utils.create_entity_index("_Image", "adb_data_source")
24 | 
25 |     def getitem(self, subscript):
26 |         x = self.data[subscript]
27 |         try:
28 |             # x is a transaction that has an add_image command and a blob
29 |             for ic in self._add_image_index:
30 |                 blob_index = self._add_image_index.index(ic)
31 |                 src_properties = x[0][ic]["AddImage"]["properties"]
32 |                 # Compute the dynamic properties and apply them to metadata
33 |                 src_properties["adb_image_size"] = len(x[1][blob_index])
34 |                 src_properties["adb_image_sha256"] = hashlib.sha256(
35 |                     x[1][blob_index]).hexdigest()
36 | 
37 |                 # Compute the image dimensions.
38 |                 pil_image = Image.open(io.BytesIO(x[1][blob_index]))
39 |                 src_properties["adb_image_width"] = pil_image.width
40 |                 src_properties["adb_image_height"] = pil_image.height
41 |                 src_properties["adb_image_id"] = str(
42 |                     src_properties["id"] if "id" in src_properties else uuid.uuid4().hex)
43 | 
44 |         except Exception as e:
45 |             # Importantly, do not raise an exception here, since it will kill ingestion.
46 |             # Create a log message instead, for post-mortem analysis.
47 |             logger.exception(e.with_traceback(None), stack_info=True)
48 | 
49 |         return x
50 | 


--------------------------------------------------------------------------------
/aperturedb/transformers/transformer.py:
--------------------------------------------------------------------------------
 1 | from aperturedb.Subscriptable import Subscriptable
 2 | from aperturedb.CommonLibrary import create_connector
 3 | from aperturedb.Utils import Utils
 4 | import logging
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class Transformer(Subscriptable):
10 |     """
11 |     Transformer is an abstract class that can be used to transform
12 |     data before ingestion into aperturedb.
13 | 
14 |     :::info
15 |     **Some build in transformers:**
16 |         - CommonProperties: Add common properties to the data
17 |         - ImageProperties: Add image properties to the data
18 |         - Facenet: Add facenet embeddings to the data
19 |     :::
20 | 
21 | 
22 |     [Example](https://github.com/aperture-data/aperturedb-python/blob/develop/examples/similarity_search/add_faces.py) of how to use transformers:
23 |         ```python
24 |         from CelebADataKaggle import CelebADataKaggle
25 |         from aperturedb.transformers.facenet_pytorch_embeddings import FacenetPyTorchEmbeddings
26 |         from aperturedb.transformers.common_properties import CommonProperties
27 |         from aperturedb.transformers.image_properties import ImageProperties
28 | 
29 |         .
30 |         .
31 |         .
32 | 
33 |         dataset = CelebADataKaggle()
34 | 
35 |         # Here's a pipeline that adds extra properties to the celebA dataset
36 |         dataset = CommonProperties(
37 |             dataset,
38 |             adb_data_source="kaggle-celebA",
39 |             adb_main_object="Face",
40 |             adb_annoted=True)
41 | 
42 |         # some useful properties for the images
43 |         dataset = ImageProperties(dataset)
44 | 
45 |         # Add the embeddings generated through facenet.
46 |         dataset = FacenetPyTorchEmbeddings(dataset)
47 | 
48 |         ```
49 | 
50 |     """
51 | 
52 |     def __init__(self, data: Subscriptable, client=None, **kwargs) -> None:
53 |         self.data = data
54 | 
55 |         # Inspect the first element to get the number of queries and blobs
56 |         x = self.data[0]
57 |         self._queries = len(x[0])
58 |         self._blobs = len(x[1])
59 |         self._blob_index = []
60 |         self._add_image_index = []
61 |         self._client = client
62 | 
63 |         bc = 0
64 |         for i, c in enumerate(x[0]):
65 |             command = list(c.keys())[0]
66 |             if command in ["AddImage", "AddDescriptor", "AddVideo", "AddBlob"]:
67 |                 self._blob_index.append(i)
68 |                 if command == "AddImage":
69 |                     self._add_image_index.append(i)
70 |                 bc += 1
71 |         logger.info(f"Found {bc} blobs in the data")
72 |         logger.info(
73 |             f"Found {len(self._add_image_index)} AddImage commands in the data")
74 | 
75 |         self.ncalls = 0
76 |         self.cumulative_time = 0
77 | 
78 |     def getitem(self, subscript):
79 |         raise NotImplementedError("Needs to be subclassed")
80 | 
81 |     def __len__(self):
82 |         return len(self.data)
83 | 
84 |     def get_client(self):
85 |         if self._client is None:
86 |             self._client = create_connector()
87 |         return self._client
88 | 
89 |     def get_utils(self):
90 |         return Utils(self.get_client())
91 | 


--------------------------------------------------------------------------------
/aperturedb/types.py:
--------------------------------------------------------------------------------
 1 | # This file only exists to support readable type hints
 2 | 
 3 | from typing import List, Dict, Any
 4 | 
 5 | Command = Dict[str, Any]
 6 | Blob = bytes
 7 | Commands = List[Command]  # aka Query, but that's also a class name
 8 | Blobs = List[Blob]
 9 | CommandResponses = List[Dict]
10 | 
11 | Image = bytes
12 | Video = bytes
13 | Descriptor = bytes
14 | 


--------------------------------------------------------------------------------
/configure_deployment.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | 
 3 | source $(dirname "$0")/version.sh
 4 | 
 5 | read_version
 6 | 
 7 | echo "Configuring deployment with: $BUILD_VERSION"
 8 | 
 9 | 
10 | find deploy/ -type f -name "*.yaml" -exec sed -i "s/\$VERSION/v$BUILD_VERSION/g" {} \;
11 | 
12 | 


--------------------------------------------------------------------------------
/docker/complete/Dockerfile:
--------------------------------------------------------------------------------
1 | # Pull base image.
2 | FROM aperturedata/aperturedb-notebook:dependencies
3 | 
4 | RUN mkdir /aperturedata
5 | ADD docker/complete/aperturedata /aperturedata
6 | 
7 | RUN cd /aperturedata && pip install -e ".[complete]"
8 | 


--------------------------------------------------------------------------------
/docker/dependencies/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | # Updated as per the newest release.
 4 | ENV OPENCV_VERSION=4.7.0
 5 | 
 6 | RUN apt-get update \
 7 |     && apt-get upgrade -y \
 8 |     && apt-get install -y --no-install-recommends \
 9 |        python3-venv
10 | 
11 | ENV DEBIAN_FRONTEND=noninteractive
12 | ENV VIRTUAL_ENV=/opt/venv
13 | ENV NODEJS_LTS=v20.12.2
14 | RUN python3 -m venv $VIRTUAL_ENV
15 | ENV PATH="$VIRTUAL_ENV/bin:$PATH"
16 | 
17 | RUN apt-get -y install build-essential git cmake python3.10-venv\
18 |     libx264-* libx265-* libavcodec-dev libavformat-dev\
19 |     pkg-config\
20 |     libavutil-dev libswscale-dev python3-venv\
21 |     libavcodec-extra libavcodec-dev python3-dev\
22 |     ffmpeg h264enc wget fuse libfuse-dev
23 | 
24 | #The version of nodejs in ubuntu is very old
25 | #Installing the LTS as on Feb 23
26 | RUN cd /opt && wget https://nodejs.org/dist/${NODEJS_LTS}/node-${NODEJS_LTS}-linux-x64.tar.xz && tar xf node-${NODEJS_LTS}-linux-x64.tar.xz
27 | ENV PATH="/opt/node-${NODEJS_LTS}-linux-x64/bin:$PATH"
28 | 
29 | # Cmake determines the correct path for site packages by looking at
30 | # numpy, and results in following output in configure:
31 | # --   Python 3:
32 | # --     Interpreter:                 /opt/venv/bin/python3 (ver 3.10.6)
33 | # --     Libraries:                   /usr/lib/x86_64-linux-gnu/libpython3.10.so (ver 3.10.6)
34 | # --     numpy:                       /opt/venv/lib/python3.10/site-packages/numpy/core/include (ver 1.24.1)
35 | # --     install path:                lib/python3.10/site-packages/cv2/python-3.10
36 | RUN pip install "numpy<2"
37 | 
38 | RUN wget -q https://github.com/opencv/opencv/archive/$OPENCV_VERSION.tar.gz && \
39 |     tar xf $OPENCV_VERSION.tar.gz && rm $OPENCV_VERSION.tar.gz && \
40 |     cd opencv-$OPENCV_VERSION && mkdir build && cd build && \
41 |     cmake                                               \
42 |         -D CMAKE_BUILD_TYPE=Release                     \
43 |         -D WITH_TBB=OFF -D WITH_OPENMP=ON -D WITH_IPP=ON \
44 |         -D CPU_DISPATCH=SSE4_2,AVX,AVX2                 \
45 |         -D BUILD_EXAMPLES=OFF                           \
46 |         -D BUILD_DOCS=OFF                               \
47 |         -D BUILD_PERF_TESTS=OFF                         \
48 |         -D BUILD_TESTS=OFF                              \
49 |         -D BUILD_opencv_apps=OFF                        \
50 |         -D WITH_FFMPEG=ON                               \
51 |         -D CMAKE_INSTALL_PREFIX=/usr/local              \
52 |         -D OPENCV_PYTHON3_INSTALL_PATH=/opt/venv/lib/python3.10/site-packages .. && \
53 |     make -j6 && make install
54 | 
55 | RUN pip install jupyterlab jupyterlab-dash dash-cytoscape plotly jupyter-dash numpy
56 | RUN jupyter lab build
57 | RUN jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
58 | 


--------------------------------------------------------------------------------
/docker/dependencies/build.sh:
--------------------------------------------------------------------------------
1 | docker build -f Dockerfile -t aperturedata/aperturedb-notebook:dependencies .
2 | 


--------------------------------------------------------------------------------
/docker/notebook/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Pull base image.
 2 | FROM aperturedata/aperturedb-notebook:dependencies
 3 | 
 4 | RUN mkdir /aperturedata
 5 | ADD docker/notebook/aperturedata /aperturedata
 6 | 
 7 | COPY docker/notebook/scripts/start.sh /start.sh
 8 | RUN  chmod 755 /start.sh
 9 | 
10 | # Add Tini. Tini operates as a process subreaper for jupyter.
11 | # This prevents kernel crashes.
12 | # ENV TINI_VERSION v0.6.0
13 | # ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /usr/bin/tini
14 | # RUN chmod +x /usr/bin/tini
15 | # ENTRYPOINT ["/usr/bin/tini", "--"]
16 | RUN cd /aperturedata && pip install -e ".[dev]"
17 | RUN echo "adb --install-completion" | bash
18 | 
19 | # Install useful JupyterLab extensions
20 | RUN pip install jupyter-resource-usage
21 | 
22 | # Suppress the annoying announcements popup
23 | RUN jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
24 | 
25 | # Install CLIP (for running transformers)
26 | RUN pip install git+https://github.com/openai/CLIP.git
27 | 
28 | RUN apt update && apt install -y curl && apt clean
29 | 
30 | EXPOSE 8888
31 | CMD ["/start.sh"]
32 | 


--------------------------------------------------------------------------------
/docker/notebook/Dockerfile.cpu:
--------------------------------------------------------------------------------
 1 | # Pull base image.
 2 | FROM aperturedata/aperturedb-notebook:dependencies
 3 | 
 4 | RUN mkdir /aperturedata
 5 | ADD docker/notebook/aperturedata /aperturedata
 6 | 
 7 | COPY docker/notebook/scripts/start.sh /start.sh
 8 | RUN  chmod 755 /start.sh
 9 | 
10 | # Add Tini. Tini operates as a process subreaper for jupyter.
11 | # This prevents kernel crashes.
12 | # ENV TINI_VERSION v0.6.0
13 | # ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /usr/bin/tini
14 | # RUN chmod +x /usr/bin/tini
15 | # ENTRYPOINT ["/usr/bin/tini", "--"]
16 | RUN cd /aperturedata && pip install -e "."
17 | RUN echo "adb --install-completion" | bash
18 | 
19 | # Install useful JupyterLab extensions
20 | RUN pip install jupyter-resource-usage
21 | 
22 | # Suppress the annoying announcements popup
23 | RUN jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
24 | 
25 | # Install torch and torchvision cpu version
26 | RUN pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
27 | RUN pip install facenet-pytorch --no-deps
28 | 
29 | # Install CLIP (for running transformers)
30 | RUN pip install git+https://github.com/openai/CLIP.git
31 | 
32 | RUN apt update && apt install -y curl && apt clean
33 | 
34 | EXPOSE 8888
35 | CMD ["/start.sh"]
36 | 


--------------------------------------------------------------------------------
/docker/notebook/scripts/start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Configure the Juypter Notebook password
 4 | jupyter lab --generate-config
 5 | 
 6 | PASSWORD=${PASSWORD:-test}
 7 | PASS_HASH=$(python3 -c "from jupyter_server.auth import passwd; print(passwd('${PASSWORD}'))")
 8 | echo "c.NotebookApp.password='${PASS_HASH}'">> /root/.jupyter/jupyter_lab_config.py
 9 | 
10 | BASE_URL=${BASE_URL:-/}
11 | echo "c.ServerApp.base_url='${BASE_URL}'">> /root/.jupyter/jupyter_lab_config.py
12 | 
13 | NOTEBOOK_DIR=${NOTEBOOK_DIR:-/notebooks}
14 | mkdir -p ${NOTEBOOK_DIR}
15 | echo "c.NotebookApp.notebook_dir='${NOTEBOOK_DIR}'">> /root/.jupyter/jupyter_lab_config.py
16 | 
17 | cd ${HOME}
18 | jupyter-lab --port=8888 --no-browser --allow-root --ip=0.0.0.0
19 | 


--------------------------------------------------------------------------------
/docker/pytorch-gpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM gcr.io/deeplearning-platform-release/pytorch-gpu.1-13.py37
 2 | 
 3 | RUN mkdir /aperturedata
 4 | ADD docker/pytorch-gpu/aperturedata /aperturedata
 5 | 
 6 | RUN pip install awscli
 7 | RUN apt-get update && apt-get install -y libopencv-dev python3-opencv fuse libfuse-dev
 8 | RUN cd /aperturedata && pip install -e ".[dev]"
 9 | 
10 | COPY docker/pytorch-gpu/scripts/start.sh /start.sh
11 | RUN  chmod 755 /start.sh
12 | CMD ["/start.sh"]


--------------------------------------------------------------------------------
/docker/pytorch-gpu/build.sh:
--------------------------------------------------------------------------------
1 | docker build -f docker/pytorch-gpu/Dockerfile -t aperturedata/aperturedb-pytorch-gpu:latest .


--------------------------------------------------------------------------------
/docker/pytorch-gpu/scripts/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cd /aperturedata/test && bash run_test.sh


--------------------------------------------------------------------------------
/docker/release/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:22.04
2 | ENV DEBIAN_FRONTEND=noninteractive
3 | RUN apt update && apt upgrade -y && apt install -y --no-install-recommends \
4 |     python3-dev \
5 |     python3-pip
6 | ARG VERSION
7 | COPY dist/aperturedb-${VERSION}-py3-none-any.whl /tmp
8 | RUN pip3 install /tmp/aperturedb-${VERSION}-py3-none-any.whl
9 | 


--------------------------------------------------------------------------------
/docker/tests/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Pull base image.
 2 | FROM aperturedata/aperturedb-notebook:dependencies
 3 | 
 4 | RUN mkdir /aperturedata
 5 | ADD docker/tests/aperturedata /aperturedata
 6 | 
 7 | RUN pip install awscli
 8 | RUN cd /aperturedata && pip install -e ".[dev]"
 9 | RUN pip install git+https://github.com/openai/CLIP.git
10 | COPY docker/tests/scripts/start.sh /start.sh
11 | RUN  chmod 755 /start.sh
12 | CMD ["/start.sh"]


--------------------------------------------------------------------------------
/docker/tests/scripts/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cd /aperturedata/test && bash run_test.sh


--------------------------------------------------------------------------------
/docker/twine/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.10
2 | 
3 | RUN pip install twine build
4 | 


--------------------------------------------------------------------------------
/docs/README.protobuf:
--------------------------------------------------------------------------------
 1 | ApertureDB python library supports version 3 and 4 of protobuf.
 2 | 
 3 | These are incompatable implementations in that the proto file that protoc
 4 | compiles into python code will not work between versions.
 5 | 
 6 | To solve this we have added a thin wrapper which selects the backend
 7 | implementation based on the system installed version.
 8 | 
 9 | This is done because customers use packages alongside aperturedb which require
10 | python protobuf packages from both the 3.x line and the 4.x line.
11 | 
12 | To regenerate the files, simply take the queryMessage.proto file, and make
13 | copies which append the version at the end, then use the matching protoc to
14 | compile them. Finally place them in this repo.
15 | 


--------------------------------------------------------------------------------
/examples/CelebADataKaggle.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | from aperturedb.KaggleData import KaggleData
 3 | import pandas as pd
 4 | import os
 5 | import logging
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class CelebADataKaggle(KaggleData):
11 |     """
12 |     **ApertureDB ingestable Dataset based off
13 |     [CelebA on kaggle](https://www.kaggle.com/datasets/jessicali9530/celeba-dataset)**
14 |     """
15 | 
16 |     def __init__(self, **kwargs) -> None:
17 |         self.records_count = -1
18 |         super().__init__(dataset_ref = "jessicali9530/celeba-dataset",
19 |                          records_count=self.records_count)
20 | 
21 |     def generate_index(self, root: str, records_count=-1) -> pd.DataFrame:
22 |         attr_index = pd.read_csv(
23 |             os.path.join(root, "list_attr_celeba.csv"))
24 |         bbox_index = pd.read_csv(
25 |             os.path.join(root, "list_bbox_celeba.csv"))
26 |         landmarks_index = pd.read_csv(os.path.join(
27 |             root, "list_landmarks_align_celeba.csv"))
28 |         partition_index = pd.read_csv(
29 |             os.path.join(root, "list_eval_partition.csv"))
30 |         rows = attr_index.combine_first(bbox_index).combine_first(
31 |             landmarks_index).combine_first(partition_index)
32 |         original_size = len(rows)
33 |         records_count = records_count if records_count > 0 else original_size
34 | 
35 |         rows = rows[:records_count]
36 | 
37 |         logger.info(
38 |             f"Created {len(rows)} items from {original_size} in the original dataset.")
39 |         return rows
40 | 
41 |     def generate_query(self, idx: int) -> Tuple[List[dict], List[bytes]]:
42 |         record = self.collection[idx]
43 |         p = record
44 |         q = [
45 |             {
46 |                 "AddImage": {
47 |                     "_ref": 1,
48 |                     "properties": {
49 |                         c: p[c] for c in p.keys()
50 |                     },
51 |                 }
52 |             }, {
53 |                 "AddBoundingBox": {
54 |                     "_ref": 2,
55 |                     "image_ref": 1,
56 |                     "rectangle": {
57 |                         "x": p["x_1"],
58 |                         "y": p["y_1"],
59 |                         "width": p["width"] if p["width"] > 0 else 1,
60 |                         "height": p["height"] if p["height"] > 0 else 1,
61 |                     }
62 |                 }
63 |             }
64 |         ]
65 |         q[0]["AddImage"]["properties"]["keypoints"] = f"10 {p['lefteye_x']} {p['lefteye_y']} {p['righteye_x']} {p['righteye_y']} {p['nose_x']} {p['nose_y']} {p['leftmouth_x']} {p['leftmouth_y']} {p['rightmouth_x']} {p['rightmouth_y']}"
66 | 
67 |         image_file_name = os.path.join(
68 |             self.workdir,
69 |             'img_align_celeba/img_align_celeba',
70 |             p["image_id"])
71 |         blob = open(image_file_name, "rb").read()
72 |         return q, [blob]
73 | 


--------------------------------------------------------------------------------
/examples/Cifar10DataTensorFlow.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from aperturedb.TensorFlowData import TensorFlowData
 3 | from typing import List, Tuple
 4 | from aperturedb.Images import np_arr_img_to_bytes
 5 | 
 6 | 
 7 | class Cifar10DataTensorFlow(TensorFlowData):
 8 |     """
 9 |     **ApertureDB ingestable Dataset, which is sourced from
10 |     [Cifar10 (tensorflow.datasets)](https://www.tensorflow.org/datasets/catalog/cifar10)**
11 |     """
12 | 
13 |     def __init__(self):
14 |         (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
15 |         self.x = tf.concat([x_train, x_test], axis=0)
16 |         self.y = tf.concat([tf.squeeze(y_train), tf.squeeze(y_test)], axis=0)
17 |         self.train_len = x_train.shape[0]
18 | 
19 |     def __len__(self):
20 |         return self.x.shape[0]
21 | 
22 |     def generate_query(self, idx: int) -> Tuple[List[dict], List[bytes]]:
23 |         x, y = self.x[idx], self.y[idx]
24 |         q = [{
25 |             "AddImage": {
26 |                 "_ref": 1
27 |             }
28 |         }]
29 |         q[0]["AddImage"]["properties"] = {
30 |             "label": str(y.numpy()),
31 |             "train": True if idx < self.train_len else False
32 |         }
33 | 
34 |         return q, [np_arr_img_to_bytes(x.numpy())]
35 | 


--------------------------------------------------------------------------------
/examples/Foo.py:
--------------------------------------------------------------------------------
 1 | from aperturedb.transformers.transformer import Transformer
 2 | 
 3 | 
 4 | class Foo(Transformer):
 5 |     """
 6 |     An example of a non packaged transformer.
 7 |     example usage in adb (The argument to cli is --user-transformer)):
 8 |     adb from-generator examples/CelebADataKaggle.py --sample-count 1 --user-transformer examples/Foo.py
 9 |     """
10 | 
11 |     def getitem(self, subscript):
12 |         x = self.data[subscript]
13 |         for ic in self._add_image_index:
14 |             x[0][ic]["AddImage"]["properties"]["foo"] = "bar"
15 | 
16 |         return x
17 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Code examples with aperturedb.
 2 | 
 3 | Following are the table of contents for this folder and its subfolders.
 4 | There are instructions to run these scripts also.
 5 | 
 6 | A part of Coco validation needs to be downloaded.
 7 | This is a prerequisite for running some of the scripts below.
 8 | 
 9 | ```
10 | mkdir coco && cd coco && wget http://images.cocodataset.org/zips/val2017.zip && unzip val2017.zip && wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip && unzip annotations_trainval2017.zip
11 | ```
12 | 
13 | ## Example 1: ApertureDB Loaders 101
14 | 
15 | The following files are under *loaders_101*
16 | | File | Description | instructions |
17 | | -----| ------------| -----|
18 | | loaders.ipynb | A notebook with some sample code for aperturedb | Also available to read at [Aperturedb documentation](https://docs.aperturedata.io/HowToGuides/Advanced/loaders)|
19 | 
20 | ## Example 2: Image classification using a pretrained model
21 | The following files are under *image_classification*
22 | 
23 | | File | Description | instructions |
24 | | -----| ------------| -----|
25 | | AlexNetClassifier.py | Helper code to transorm images before using pretrained alexnet model to classify them | Is not invoked directly |
26 | | imagenet_classes.txt | The class labels for the outputs from alexnet | used by pytorch_classification.py |
27 | | prepare_aperturedb.py | Helper to download images from coco dataset, and load them into aperturedb | ``python prepare_aperturedb.py -images_count 100`` |
28 | | pytorch_classification.py | Pulls all images from aperturedb with a certain property set by prepare_aperturedb.py script , and classifies them using alexnet | ``python pytorch_classification.py`` |
29 | | pytorch_classification.ipynb | It does the same operation as ``pytorch_classification.py``. Also displays the classified images | Also available to read at [Aperturedb python documentation](https://docs.aperturedata.io/HowToGuides/Basic/pytorch_classification) |
30 | 
31 | ## Example 3: Similarity search using apertureDB
32 | 
33 | This needs a bit of extra setup.
34 | - Install the dependent packages using the commands as shown, in the top level path of this repo.
35 | ```
36 | pip install ".[complete]"
37 | 
38 | ```
39 | - Setup kaggle account and the API token as per the official [kagggle api guide](https://github.com/Kaggle/kaggle-api).
40 | 
41 | The following files are under *similarity_search*
42 | 
43 | | File | Description | instructions |
44 | | -----| ------------| -----|
45 | | similarity_search.ipynb | A notebook with some sample code for describing similarity search using aperturedb | Also available to read at [Aperturedb documentation](https://docs.aperturedata.io/HowToGuides/Advanced/similarity_search)|
46 | | facenet.py | Face Recognition using facenet and pytorch | Is invoked indirectly |
47 | | add_faces.py | A Script to load celebA dataset into aperturedb | ``python add_faces.py``|
48 | 
49 | ## Example 4: REST interface to apertureDB.
50 | 
51 | The following files are under *rest_api*
52 | 
53 | | File | Description | instructions |
54 | | -----| ------------| -----|
55 | | rest_api.py | Interactions with aperturedb using python's requests | ``python rest_api.py``|
56 | | rest_api.js | Interactions with aperturedb using javascript with axios | Is included in index.html |
57 | | index.html | A web page that renders from responses from aperturedb | Tested on chrome |
58 | 
59 | ## Example 5: Adding Data to aperturedb with User defined models
60 | 
61 | The following files are under *loading_with_models*
62 | 
63 | | File | Description | instructions |
64 | | -----| ------------| -----|
65 | | models.ipynb | A notebook with some sample code to add data using models | Also available to read at [Aperturedb model example](https://docs.aperturedata.io/HowToGuides/Advanced/models)|
66 | 


--------------------------------------------------------------------------------
/examples/dask/ingest_dask.py:
--------------------------------------------------------------------------------
 1 | import dask
 2 | from dask import dataframe
 3 | import os
 4 | from aperturedb import EntityDataCSV
 5 | from dask.distributed import Client, LocalCluster
 6 | 
 7 | 
 8 | if __name__ == '__main__':
 9 |     batchsize = 2000
10 |     numthreads = 8
11 | 
12 |     cluster = LocalCluster(n_workers=numthreads)
13 |     client = Client(cluster)
14 |     dask.config.set(scheduler="distributed")
15 | 
16 |     FILENAME = os.path.join(os.path.dirname(__file__), 'see2.out')
17 | 
18 |     ratings = dataframe.read_csv(
19 |         FILENAME, blocksize=os.path.getsize(FILENAME) // numthreads)
20 | 
21 |     def process(df):
22 |         from aperturedb.CommonLibrary import create_connector
23 |         from aperturedb.ParallelLoader import ParallelLoader
24 |         client = create_connector()
25 |         loader = ParallelLoader(client)
26 |         count = 0
27 | 
28 |         for i in range(0, len(df), batchsize):
29 |             end = min(i + batchsize, len(df))
30 |             batch = df[i:end]
31 |             data = EntityDataCSV.EntityDataCSV(filename="", df=batch)
32 |             loader.ingest(data, batchsize=len(batch), numthreads=1)
33 |             count += 1
34 | 
35 |         print(f"len(df) = {len(df)}, count = {count}")
36 | 
37 |     ratings.map_partitions(process).compute()
38 | 


--------------------------------------------------------------------------------
/examples/dask/ingest_loader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from aperturedb.EntityDataCSV import EntityDataCSV
 3 | from aperturedb.ParallelLoader import ParallelLoader
 4 | from aperturedb.CommonLibrary import create_connector
 5 | import typer
 6 | 
 7 | 
 8 | app = typer.Typer()
 9 | 
10 | 
11 | @app.command()
12 | def main(use_dask: bool = False, csv_path: str = "data.csv"):
13 |     client = create_connector()
14 | 
15 |     data = EntityDataCSV(filename=os.path.join(
16 |         os.path.dirname(__file__), csv_path), use_dask=use_dask)
17 |     loader = ParallelLoader(client=client)
18 |     loader.ingest(generator=data, batchsize=2000, numthreads=8, stats=True)
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     app()
23 | 


--------------------------------------------------------------------------------
/examples/image_classification/AlexNetClassifier.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torchvision import transforms
 3 | from torchvision import models
 4 | from PIL import Image
 5 | 
 6 | 
 7 | class AlexNetClassifier(object):
 8 | 
 9 |     def __init__(self):
10 | 
11 |         self.alexnet = models.alexnet(pretrained=True)
12 | 
13 |         self.transform = transforms.Compose([
14 |             # transforms.Resize(256), # Resize done by ApertureDB
15 |             transforms.CenterCrop(224),
16 |             transforms.ToTensor(),
17 |             transforms.Normalize(
18 |                 mean=[0.485, 0.456, 0.406],
19 |                 std =[0.229, 0.224, 0.225]
20 |             )])
21 | 
22 |         with open('imagenet_classes.txt') as f:
23 |             self.classes = [line.strip() for line in f.readlines()]
24 | 
25 |     def classify(self, image):
26 |         img   = Image.fromarray(image.astype('uint8'), 'RGB')
27 | 
28 |         img_t = self.transform(img)
29 |         batch_t = torch.unsqueeze(img_t, 0)
30 |         self.alexnet.eval()
31 |         out = self.alexnet(batch_t)
32 |         _, index = torch.max(out, 1)
33 |         percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100
34 | 
35 |         label = self.classes[index[0]]
36 |         confidence = percentage[index[0]].item()
37 | 
38 |         return label, confidence
39 | 
40 |     def print_model(self):
41 |         # dir(models)
42 |         print(self.alexnet)
43 | 


--------------------------------------------------------------------------------
/examples/image_classification/CocoDataPytorch.py:
--------------------------------------------------------------------------------
1 | ../CocoDataPytorch.py


--------------------------------------------------------------------------------
/examples/image_classification/prepare_aperturedb.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | 
 3 | from aperturedb.ParallelLoader import ParallelLoader
 4 | from aperturedb.CommonLibrary import create_connector
 5 | from PIL import Image
 6 | from CocoDataPyTorch import CocoDataPyTorch
 7 | import argparse
 8 | 
 9 | 
10 | def main(params):
11 |     # Define a helper function to convert PIL.image to a bytes array.
12 |     def image_to_byte_array(image: Image) -> bytes:
13 |         imgByteArr = io.BytesIO()
14 |         image.save(imgByteArr, format="JPEG")
15 |         imgByteArr = imgByteArr.getvalue()
16 |         return imgByteArr
17 | 
18 |     coco_detection = CocoDataPyTorch("prepare_aperturedb")
19 | 
20 |     # Lets use some images from the coco which are annotated for the purpose of the demo
21 |     images = []
22 |     for t in coco_detection:
23 |         X, y = t
24 |         if len(y) > 0:
25 |             images.append(t)
26 |             if len(images) == params.images_count:
27 |                 break
28 | 
29 |     loader = ParallelLoader(create_connector())
30 |     loader.ingest(generator = images, stats=True)
31 |     print(f"Inserted {params.images_count} images to aperturedb")
32 | 
33 | 
34 | def get_args():
35 |     parser = argparse.ArgumentParser()
36 |     parser.add_argument('-images_count', type=int, required=True,
37 |                         help="The number of images to ingest into aperturedb")
38 |     return parser.parse_args()
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     main(get_args())
43 | 


--------------------------------------------------------------------------------
/examples/image_classification/pytorch_classification.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import AlexNetClassifier as alexnet
 3 | from aperturedb import PyTorchDataset
 4 | from aperturedb.CommonLibrary import create_connector
 5 | 
 6 | client = create_connector()
 7 | 
 8 | out_file_name = "classification.txt"
 9 | query = [{
10 |     "FindImage": {
11 |         "constraints": {
12 |             "dataset_name": ["==", "prepare_aperturedb"]
13 |         },
14 |         "operations": [
15 |             {
16 |                 "type": "resize",
17 |                 "width": 256,
18 |                 "height": 256
19 |             }
20 |         ],
21 |         "results": {
22 |             "list": ["image_id"],
23 |         }
24 |     }
25 | }]
26 | 
27 | 
28 | classifier = alexnet.AlexNetClassifier()
29 | with open(out_file_name, 'w') as classification:
30 |     dataset = PyTorchDataset.ApertureDBDataset(
31 |         client=client, query=query, label_prop='image_id')
32 |     start = time.time()
33 |     for item in dataset:
34 |         image, id = item
35 |         label, conf = classifier.classify(image)
36 |         classification.write(f"{id}: {label}, confidence = {conf}\n")
37 |     print("\rRetrieval performance (imgs/s):",
38 |           len(dataset) / (time.time() - start), end="")
39 | 
40 | print(f"\nWritten classification results into {out_file_name}")
41 | 


--------------------------------------------------------------------------------
/examples/loaders_101/CocoDataPytorch.py:
--------------------------------------------------------------------------------
1 | ../CocoDataPytorch.py


--------------------------------------------------------------------------------
/examples/loading_with_models/add_video_model.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from aperturedb.DataModels import VideoDataModel, ClipDataModel, DescriptorDataModel, DescriptorSetDataModel
 3 | from aperturedb.CommonLibrary import create_connector, execute_query
 4 | from aperturedb.Query import generate_add_query
 5 | from aperturedb.Query import RangeType
 6 | import json
 7 | 
 8 | # In aperturedb we have Videos, Video Clips and Embeddings(aka Descriptors)
 9 | #recognized as first class objects.
10 | # Note : Video has multiple Clips, and each Clip has an embedding.
11 | 
12 | # In aperturedb.datamodel, we already define datamodels for Videos, Clips and Embeddings
13 | # Now Define the data models for the "association" of Video, Video Clips, and Embeddings
14 | 
15 | # Video clip -> Embedding.
16 | 
17 | 
18 | class ClipEmbeddingModel(ClipDataModel):
19 |     embedding: DescriptorDataModel
20 | 
21 | # Video -> Video Clips
22 | 
23 | 
24 | class VideoClipsModel(VideoDataModel):
25 |     title: str
26 |     description: str
27 |     clips: List[ClipEmbeddingModel] = []
28 | 
29 | 
30 | # Function to create a connected Video object model.
31 | def save_video_details_to_aperturedb(URL: str, clips, collection):
32 |     video = VideoClipsModel(url=URL, title="Ecommerce v2.5",
33 |                             description="Ecommerce v2.5 video with clips by Marengo26")
34 |     # Use the embeddings to create the video clips, and add them to the video object
35 |     for clip in clips:
36 |         video.clips.append(ClipEmbeddingModel(
37 |             range_type=RangeType.TIME,
38 |             start=clip['start_offset_sec'],
39 |             stop=clip['end_offset_sec'],
40 |             embedding=DescriptorDataModel(
41 |                 # The corresponding descriptor to the Video Clip.
42 |                 vector=clip['embedding'], set=collection)
43 |         ))
44 |     return video
45 | 
46 | 
47 | video_url = "https://storage.googleapis.com/ad-demos-datasets/videos/Ecommerce%20v2.5.mp4"
48 | 
49 | clips = None
50 | with open("video_clips.json", "r") as f:
51 |     clips = json.load(f)
52 | 
53 | client = create_connector()
54 | 
55 | # Create a descriptor set
56 | # DS is a search space for descriptors added to it (some times called collections)
57 | # https://docs.aperturedata.io/HowToGuides/Advanced/similarity_search#descriptorsets-and-descriptors
58 | collection = DescriptorSetDataModel(
59 |     name="marengo26", dimensions=len(clips[0]['embedding']))
60 | q, blobs, c = generate_add_query(collection)
61 | result, response, blobs = execute_query(query=q, blobs=blobs, client=client)
62 | print(f"{result=}, {response=}")
63 | 
64 | # Create a video object, with clips, and embeddings
65 | video = save_video_details_to_aperturedb(video_url, clips, collection)
66 | q, blobs, c = generate_add_query(video)
67 | result, response, blobs = execute_query(query=q, blobs=blobs, client=client)
68 | print(f"{result=}, {response=}")
69 | 


--------------------------------------------------------------------------------
/examples/loading_with_models/find_roi.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from aperturedb.Descriptors import Descriptors
 3 | from aperturedb.CommonLibrary import create_connector
 4 | from aperturedb.Query import ObjectType
 5 | 
 6 | client = create_connector()
 7 | 
 8 | with open("text_embedding.json", "r") as f:
 9 |     # Load the embeddings from the json file. Look at get_tl_embedding.py for more details
10 |     # on how it was generated.
11 |     embeddings = json.load(f)
12 | 
13 |     # We will search from a set of descriptors in the DB called "marengo26".
14 |     descriptorset = "marengo26"
15 | 
16 |     # Find similar descriptors to the text_embedding in the descriptorset.
17 |     descriptors = Descriptors(client)
18 |     descriptors.find_similar(
19 |         descriptorset,
20 |         embeddings["text_embedding"],
21 |         k_neighbors=3,
22 |         distances=True)
23 | 
24 |     # Find connected clips to the descriptors.
25 |     clip_descriptors = descriptors.get_connected_entities(ObjectType.CLIP)
26 | 
27 |     # Show the metadata of the clips.
28 |     for clips in clip_descriptors:
29 |         for clip in clips:
30 |             print(clip)
31 |             print("-----")
32 | 


--------------------------------------------------------------------------------
/examples/loading_with_models/get_tl_embeddings.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from twelvelabs import TwelveLabs
 3 | from twelvelabs.models.embed import EmbeddingsTask
 4 | 
 5 | # Initialize the Twelve Labs client
 6 | twelvelabs_client = TwelveLabs(api_key=TL_API_KEY)
 7 | 
 8 | 
 9 | def generate_embedding(video_url):
10 |     # Create an embedding task
11 |     task = twelvelabs_client.embed.task.create(
12 |         engine_name="Marengo-retrieval-2.6",
13 |         video_url=video_url
14 |     )
15 |     print(
16 |         f"Created task: id={task.id} engine_name={task.engine_name} status={task.status}")
17 | 
18 |     # Define a callback function to monitor task progress
19 |     def on_task_update(task: EmbeddingsTask):
20 |         print(f"  Status={task.status}")
21 | 
22 |     # Wait for the task to complete
23 |     status = task.wait_for_done(
24 |         sleep_interval=2,
25 |         callback=on_task_update
26 |     )
27 |     print(f"Embedding done: {status}")
28 | 
29 |     # Retrieve the task result
30 |     task_result = twelvelabs_client.embed.task.retrieve(task.id)
31 | 
32 |     # Extract and return the embeddings
33 |     embeddings = []
34 |     for v in task_result.video_embeddings:
35 |         embeddings.append({
36 |             'embedding': v.embedding.float,
37 |             'start_offset_sec': v.start_offset_sec,
38 |             'end_offset_sec': v.end_offset_sec,
39 |             'embedding_scope': v.embedding_scope
40 |         })
41 | 
42 |     return embeddings, task_result
43 | 
44 | 
45 | def generate_text_embeddings(text: str):
46 |     text_embedding = twelvelabs_client.embed.create(
47 |         engine_name="Marengo-retrieval-2.6",
48 |         text=text,
49 |         text_truncate="none")
50 | 
51 |     return text_embedding
52 | 
53 | 
54 | # Example usage
55 | video_url = "https://storage.googleapis.com/ad-demos-datasets/videos/Ecommerce%20v2.5.mp4"
56 | 
57 | # Generate embeddings for the video
58 | embeddings, task_result = generate_embedding(video_url)
59 | 
60 | print(f"Generated {len(embeddings)} embeddings for the video")
61 | for i, emb in enumerate(embeddings):
62 |     print(f"Embedding {i+1}:")
63 |     print(f"  Scope: {emb['embedding_scope']}")
64 |     print(
65 |         f"  Time range: {emb['start_offset_sec']} - {emb['end_offset_sec']} seconds")
66 |     print(f"  Embedding vector (first 5 values): {emb['embedding'][:5]}")
67 |     print()
68 | 
69 | 
70 | with open("embeddings.txt", "w") as f:
71 |     f.write(json.dumps(embeddings, indent=2))
72 | 
73 | text_embedding, result = generate_text_embeddings(
74 |     "Show me the part which has lot of outfits being displayed"
75 | )
76 | 
77 | with open("text_embedding.json", "w") as f:
78 |     f.write(json.dumps(str(text_embedding), indent=2))
79 | 


--------------------------------------------------------------------------------
/examples/rest_api/index.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |     <head>
 3 |         <script src="https://cdnjs.cloudflare.com/ajax/libs/axios/0.27.2/axios.min.js" integrity="sha512-odNmoc1XJy5x1TMVMdC7EMs3IVdItLPlCeL5vSUPN2llYKMJ2eByTTAIiiuqLg+GdNr9hF6z81p27DArRFKT7A==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
 4 |         <script src="rest_api.js" ></script>
 5 |     </head>
 6 |     <body onload="run_requests()">
 7 |         <pre id="output"></pre>
 8 |         <h6>AddImage. Select a file to continue</h6>
 9 |         <form id="addimage" enctype="multipart/form-data">
10 |             <input id="fileupload" name="blob" type="file" />
11 |             <input type="submit" value="submit" id="submit" />
12 | 
13 |         </form>
14 |     </body>
15 | </html>
16 | 


--------------------------------------------------------------------------------
/examples/rest_api/rest_api.js:
--------------------------------------------------------------------------------
  1 | const request = (query, blobs, handler, sessionToken) => {
  2 |     apiURL =  "https://coco.datasets.aperturedata.io/api"
  3 |     const formData = new FormData();
  4 |     formData.append('query', JSON.stringify(query));
  5 |     displayContent(query, response=false);
  6 |     
  7 |     blobs.forEach(element => {
  8 |         formData.append('blobs', element);
  9 |     });
 10 | 
 11 |     let headers = null;
 12 |     if (sessionToken != null){
 13 |         console.log(`setting session token ${sessionToken}`);
 14 |         headers = {
 15 |             "Authorization": `Bearer ${sessionToken}`
 16 |         }
 17 |     }
 18 |     
 19 |     axios.post(
 20 |             url=apiURL,
 21 |             data=formData, {
 22 |                 headers: headers
 23 |             }).then((response)=>{
 24 |                 handler(response.data)
 25 |             })
 26 | }
 27 | 
 28 | const displayContent = (payload, response=true) => {
 29 |     var tag = document.createElement("p");
 30 |     var text = JSON.stringify(payload, undefined, 4);
 31 |     var element = document.getElementById("output");
 32 |     var br = document.createElement("hr");
 33 |     prefix = response ? "<<<<<<< Response" : "Request >>>>>>>";
 34 |     tag.innerHTML = `${prefix}\r\n${text}`;
 35 |     element.appendChild(tag);
 36 |     element.appendChild(br);
 37 | }
 38 | 
 39 | run_requests = () => {
 40 |     //Get a refresh token.
 41 |     auth = [{
 42 |         "Authenticate": {
 43 |             "username": "admin",
 44 |             "password": "admin"
 45 |         }
 46 |     }]
 47 |     request(query = auth, blobs = [], handler = (data)=>{
 48 |         authData = data["json"];
 49 |         // console.log(authData[0]);
 50 |         displayContent(authData);
 51 |         sessionToken = authData[0].Authenticate.session_token;
 52 |         
 53 | 
 54 |         //List images
 55 |         listQuery = [{
 56 |             "FindImage": {
 57 |                 "blobs": false,
 58 |                 "uniqueids": true,
 59 |                 "results" : {
 60 |                     "limit": 10
 61 |                 }
 62 |             }
 63 |         }]
 64 |         request(query = listQuery, blobs = [], handler = (data) => {
 65 |             response = data["json"];
 66 |             displayContent(response);
 67 | 
 68 |             //Find an image
 69 |             findQuery = [{
 70 |                 "FindImage": {
 71 |                     "constraints": {
 72 |                         "_uniqueid": ["==", response[0].FindImage.entities[0]._uniqueid]
 73 |                     },
 74 |                     "results": {
 75 |                         "all_properties": true
 76 |                     }
 77 |                 }
 78 |             }]
 79 |             request(query = findQuery, blobs = [], handler = (data) => {
 80 |                 response = data["json"];
 81 |                 console.log(data);
 82 |                 displayContent(response);
 83 |                 const url = `data:image/jpeg;base64,${data["blobs"][0]}`;
 84 |                 fetch(url)
 85 |                 .then(res=>res.blob())
 86 |                 .then(blob=>{
 87 |                     var image = document.createElement('img');
 88 |                     console.log(blob);
 89 |                     image.src = window.webkitURL.createObjectURL(blob);
 90 |                     var element = document.getElementById("output");
 91 |                     element.appendChild(image);
 92 |                 });
 93 |             }, sessionToken = sessionToken)
 94 | 
 95 | 
 96 |         }, sessionToken=sessionToken)
 97 | 
 98 |         sessionStorage.setItem("session_token", sessionToken);
 99 |     })
100 | 
101 |     
102 |     
103 | }
104 | 
105 | const addImage = (event) => {
106 |     event.preventDefault();
107 |     query = [{
108 |         "AddImage": {
109 |             "properties": {
110 |                 "rest_api_example_id": 123456789
111 |             }
112 |         }
113 |     }];
114 |     const file = document.getElementById("fileupload").files[0]
115 |     request(query = query, blobs = [file], (data)=>{
116 |         response = data["json"];
117 |         displayContent(response);
118 |     }, sessionToken = sessionStorage.getItem("session_token"));
119 |     
120 | }
121 | 
122 | window.addEventListener("load", (event)=>{
123 |     console.log("hello world");
124 |     const form = document.getElementById("addimage");
125 |     form.addEventListener('submit', addImage);
126 | })
127 | 


--------------------------------------------------------------------------------
/examples/rest_api/rest_api.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import argparse
  3 | import json
  4 | import os
  5 | from aperturedb.CommonLibrary import create_connector
  6 | from aperturedb.Connector import Connector
  7 | 
  8 | client: Connector = create_connector()
  9 | 
 10 | URL = "https://" + client.config.host  + '/api'
 11 | 
 12 | VERIFY_SSL = True
 13 | 
 14 | 
 15 | def parse_auth(res):
 16 | 
 17 |     res = json.loads(res)["json"]
 18 |     print(json.dumps(res, indent=4, sort_keys=False))
 19 | 
 20 |     session_token = res[0]["Authenticate"]["session_token"]
 21 |     refresh_token = res[0]["Authenticate"]["refresh_token"]
 22 |     return session_token, refresh_token
 23 | 
 24 | 
 25 | def auth():
 26 | 
 27 |     query = [{
 28 |         "Authenticate": {
 29 |             "username": client.config.username,
 30 |             "password": client.config.password,
 31 |         }
 32 |     }]
 33 | 
 34 |     # Authenticate
 35 |     response = requests.post(URL,
 36 |                              files = [('query', (None, json.dumps(query)))],
 37 |                              verify = VERIFY_SSL)
 38 | 
 39 |     # print(response.status_code)
 40 |     # print(response.text)
 41 | 
 42 |     return parse_auth(response.text)
 43 | 
 44 | 
 45 | def query_api(query, st, files_upload=[]):
 46 | 
 47 |     files = [
 48 |         ('query', (None, json.dumps(query))),
 49 |     ]
 50 | 
 51 |     for file in files_upload:
 52 |         instream = open(file, 'rb')
 53 |         files.append(
 54 |             ('blobs', (os.path.basename(file), instream, 'image/jpeg')))
 55 | 
 56 |     response = requests.post(URL,
 57 |                              headers = {'Authorization': "Bearer " + st},
 58 |                              files   = files,
 59 |                              verify  = VERIFY_SSL)
 60 | 
 61 |     # Parse response:
 62 |     try:
 63 |         json_response = json.loads(response.text)
 64 |         response      = json_response["json"]
 65 |         blobs         = json_response["blobs"]
 66 |     except:
 67 |         print("Error with response:")
 68 |         print(response.status_code)
 69 |         print(response.text)
 70 |         response = "error!"
 71 |         blobs = []
 72 | 
 73 |     return response, blobs
 74 | 
 75 | 
 76 | def get_status(st):
 77 | 
 78 |     query = [{
 79 |         "GetStatus": {}
 80 |     }]
 81 | 
 82 |     return query_api(query, st)
 83 | 
 84 | 
 85 | def add_image_by_id(st, id):
 86 | 
 87 |     query = [{
 88 |         "AddImage": {
 89 |             "properties": {
 90 |                 "rest_api_example_id": id
 91 |             }
 92 |         }
 93 |     }]
 94 | 
 95 |     return query_api(query, st, files_upload=["songbird.jpg"])
 96 | 
 97 | 
 98 | def get_image_by_id(st, id):
 99 | 
100 |     query = [{
101 |         "FindImage": {
102 |             "constraints": {
103 |                 "_uniqueid": ["==", id]
104 |             },
105 |             "results": {
106 |                 "all_properties": True
107 |             }
108 |         }
109 |     }]
110 | 
111 |     return query_api(query, st)
112 | 
113 | 
114 | def list_images(st):
115 | 
116 |     query = [{
117 |         "FindImage": {
118 |             "blobs": False,
119 |             "uniqueids": True
120 |         }
121 |     }]
122 | 
123 |     return query_api(query, st)
124 | 
125 | 
126 | def main(params):
127 | 
128 |     VERIFY_SSL = params.verify_ssl
129 | 
130 |     print("-" * 80)
131 |     print("Authentication:")
132 |     session_token, refresh_token = auth()
133 | 
134 |     # Print DB Status
135 |     # get_status(session_token)
136 | 
137 |     # ----------------------
138 |     print("-" * 80)
139 |     print("List Images:")
140 |     r, blobs = list_images(session_token)
141 |     print("Response:")
142 |     print(json.dumps(r, indent=4, sort_keys=False))
143 |     img_id = r[0]["FindImage"]["entities"][0]["_uniqueid"]
144 | 
145 |     # ----------------------
146 |     print("-" * 80)
147 |     print("Find image by id:")
148 |     r, blobs = get_image_by_id(session_token, img_id)
149 | 
150 |     print("Response:")
151 |     print(json.dumps(r, indent=4, sort_keys=False))
152 | 
153 |     print("Returned images: {}".format(len(blobs)))
154 | 
155 |     # Base 64 encoded images
156 |     for img in blobs:
157 | 
158 |         print("Image size (base64 enconded): {}".format(len(img)))
159 | 
160 |     # ----------------------
161 |     print("-" * 80)
162 |     print("Add image by id:")
163 |     r, blobs = add_image_by_id(session_token, 123456789)
164 | 
165 |     print("Response:")
166 |     print(json.dumps(r, indent=4, sort_keys=False))
167 | 
168 |     # ----------------------
169 | 
170 | 
171 | def get_args():
172 |     obj = argparse.ArgumentParser()
173 | 
174 |     obj.add_argument('-verify_ssl',  type=bool, default=True)
175 | 
176 |     params = obj.parse_args()
177 | 
178 |     return params
179 | 
180 | 
181 | if __name__ == "__main__":
182 |     args = get_args()
183 |     main(args)
184 | 


--------------------------------------------------------------------------------
/examples/rest_api/songbird.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aperture-data/aperturedb-python/a58fe1eab4c78ae92a6b03ab6d5c9e5ed1f0d62d/examples/rest_api/songbird.jpg


--------------------------------------------------------------------------------
/examples/similarity_search/CelebADataKaggle.py:
--------------------------------------------------------------------------------
1 | ../CelebADataKaggle.py


--------------------------------------------------------------------------------
/examples/similarity_search/add_faces.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from aperturedb.ParallelLoader import ParallelLoader
 3 | from CelebADataKaggle import CelebADataKaggle
 4 | from aperturedb.transformers.facenet_pytorch_embeddings import FacenetPyTorchEmbeddings
 5 | from aperturedb.transformers.common_properties import CommonProperties
 6 | from aperturedb.transformers.image_properties import ImageProperties
 7 | from aperturedb.CommonLibrary import create_connector
 8 | from aperturedb.Utils import Utils
 9 | 
10 | search_set_name = "similar_celebreties"
11 | 
12 | 
13 | def main(params):
14 |     utils = Utils(create_connector())
15 |     utils.remove_descriptorset(search_set_name)
16 | 
17 |     dataset = CelebADataKaggle()
18 | 
19 |     # Here's a pipeline that adds extra properties to the celebA dataset
20 |     dataset = CommonProperties(
21 |         dataset,
22 |         adb_data_source="kaggle-celebA",
23 |         adb_main_object="Face")
24 | 
25 |     # some useful properties for the images
26 |     dataset = ImageProperties(dataset)
27 | 
28 |     # Add the embeddings generated through facenet.
29 |     dataset = FacenetPyTorchEmbeddings(dataset)
30 | 
31 |     # Limit the number of images to ingest
32 |     dataset = dataset[:params.images_count]
33 |     print(len(dataset))
34 | 
35 |     loader = ParallelLoader(create_connector())
36 |     loader.ingest(dataset, stats=True)
37 | 
38 | 
39 | def get_args():
40 |     parser = argparse.ArgumentParser()
41 |     parser.add_argument('-images_count', type=int, required=True,
42 |                         help="The number of images to ingest into aperturedb")
43 |     return parser.parse_args()
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     main(get_args())
48 | 


--------------------------------------------------------------------------------
/examples/similarity_search/bruce-lee.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aperture-data/aperturedb-python/a58fe1eab4c78ae92a6b03ab6d5c9e5ed1f0d62d/examples/similarity_search/bruce-lee.jpg


--------------------------------------------------------------------------------
/examples/similarity_search/taylor-swift.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aperture-data/aperturedb-python/a58fe1eab4c78ae92a6b03ab6d5c9e5ed1f0d62d/examples/similarity_search/taylor-swift.jpg


--------------------------------------------------------------------------------
/github-release.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | source $(dirname "$0")/version.sh
 4 | 
 5 | # Set default version to develop
 6 | BUILD_VERSION=develop
 7 | 
 8 | # Trigger read version
 9 | read_version
10 | echo "Build version: $BUILD_VERSION"
11 | 
12 | create_release() {
13 |     user="aperture-data"
14 |     repo="aperturedb-python"
15 |     token=$TOKEN
16 |     tag="v$BUILD_VERSION"
17 | 
18 |     command="curl -s -o release.json -w '%{http_code}' \
19 |          --request POST \
20 |          --header 'Accept: application/vnd.github+json' \
21 |          --header 'Authorization: Bearer ${token}' \
22 |          --header 'X-GitHub-Api-Version: 2022-11-28' \
23 |          --data '{\"tag_name\": \"${tag}\", \"name\": \"${tag}\", \"body\":\"Release ${tag}\"}' \
24 |          https://api.github.com/repos/$user/$repo/releases"
25 |     http_code=`eval $command`
26 |     if [ $http_code == "201" ]; then
27 |         echo "created release:"
28 |         cat release.json
29 |     else
30 |         echo "create release failed with code '$http_code':"
31 |         cat release.json
32 |         echo "command:"
33 |         echo $command
34 |         return 1
35 |     fi
36 | }
37 | 
38 | create_release


--------------------------------------------------------------------------------
/publish.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | 
 3 | echo "Building aperturedb"
 4 | rm -rf build/ dist/ vdms.egg-info/
 5 | 
 6 | docker build --no-cache -t CI/twine -f docker/twine/Dockerfile .
 7 | echo "Uploading aperturedb"
 8 | 
 9 | docker rm -f publisher || true
10 | docker run --rm --name publisher \
11 |   -e "TWINE_USERNAME=${TWINE_USERNAME}" \
12 |   -e "TWINE_PASSWORD=${TWINE_PASSWORD}" \
13 |   -v ./:/publish \
14 |   CI/twine bash -c "cd /publish && python -m build && twine upload --skip-existing --verbose dist/*"
15 | 
16 | RELEASE_IMAGE="aperturedata/aperturedb-python:latest"
17 | source version.sh && read_version
18 | echo "Building image ${RELEASE_IMAGE}"
19 | docker build --no-cache -t ${RELEASE_IMAGE} \
20 |     --build-arg="VERSION=${BUILD_VERSION}" -f docker/release/Dockerfile .
21 | docker push ${RELEASE_IMAGE}
22 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "aperturedb"
 3 | dynamic = ["version"]
 4 | description = "ApertureDB Python SDK"
 5 | 
 6 | readme = "README.md"
 7 | requires-python = ">=3.8"
 8 | license = {file = "LICENSE"}
 9 | keywords = ["aperturedb", "graph", "database",
10 |  "image", "video", "metadata", "search", "indexing"]
11 | 
12 | authors = [
13 |     {name = "ApertureData Engineering", email = "team@aperturedata.io"}
14 | ]
15 | 
16 | dependencies = [
17 |     # Pin to the bridge version.
18 |     # https://github.com/tensorflow/tensorflow/issues/60320
19 |     'protobuf >=3.20.3,<6.0.0',
20 |     #Folllowing is needed parallel loaders, and basic things for
21 |     # making the notebooks.
22 |     'requests', 'boto3',
23 |     # https://github.com/Kaggle/kaggle-api/issues/611
24 |     'numpy<2; python_version<"3.9.0"', 'numpy', 'distributed',
25 |     'matplotlib', 'pandas', 'kaggle!=1.6.15', 'google-cloud-storage',
26 |     'ipython', 'dask[complete]', 'ipywidgets', 'pydantic>=2.6.0', 'devtools', 'typer',
27 |     "opencv-python-headless",
28 |     # Pinning this to resolve test errors temporarily
29 |     'ipywidgets==8.0.4',
30 |     'keepalive-socket==0.0.1',
31 |     'graphviz==0.20.2',
32 |     "python-dotenv",
33 | ]
34 | 
35 | [tool.setuptools.package-dir]
36 | aperturedb = "aperturedb"
37 | 
38 | [project.urls]
39 | "Homepage" = "https://github.com/aperture-data/aperturedb-python"
40 | "Bug Reports" = "https://github.com/aperture-data/aperturedb-python/issues"
41 | 
42 | [project.optional-dependencies]
43 | # This is used when we build the docker image for notebook
44 | notebook = [
45 |     "torch",
46 |     "torchvision",
47 |     "tensorflow",
48 |     "facenet-pytorch",
49 | ]
50 | # User install requirements, guaranteed to be pip installable
51 | complete = [
52 |     "torch",
53 |     "torchvision",
54 |     "tensorflow",
55 |     "facenet-pytorch",
56 | ]
57 | # Dev install requirements, bleeding edge, will break CI.
58 | dev = [
59 |     "torch",
60 |     "torchvision",
61 |     "tensorflow",
62 |     "facenet-pytorch",
63 |     "coverage",
64 |     "autopep8",
65 |     "pre-commit",
66 |     "pytest",
67 |     "build",
68 |     "fuse-python ; platform_system == 'Linux'",
69 |     "rdflib",
70 | ]
71 | 
72 | # The following would provide a command line executable called `sample`
73 | # which executes the function `main` from this package when invoked.
74 | [project.scripts]  # Optional
75 | adb = "aperturedb.cli.adb:app"
76 | 
77 | [build-system]
78 | # These are the assumed default build requirements from pip:
79 | # https://pip.pypa.io/en/stable/reference/pip/#pep-517-and-518-support
80 | requires = ["setuptools>=61.0", "wheel"]
81 | build-backend = "setuptools.build_meta"
82 | 
83 | [tool.setuptools.dynamic]
84 | version = {attr = "aperturedb.__version__"}
85 | 


--------------------------------------------------------------------------------
/tag.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD)
 6 | if [ -z "$BRANCH_NAME" ]
 7 | then
 8 |     echo "This is on a merge branch. Will not continue"
 9 |     exit 0
10 | fi
11 | 
12 | source $(dirname "$0")/version.sh
13 | 
14 | # Trigger read version
15 | read_version
16 | echo "Build version: $BUILD_VERSION"
17 | 
18 | git config --local user.name "github-actions[bot]"
19 | git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
20 | git tag "v$BUILD_VERSION" $TAG_BASE
21 | git push origin "v$BUILD_VERSION"
22 | 


--------------------------------------------------------------------------------
/test/.coveragerc:
--------------------------------------------------------------------------------
1 | # .coveragerc to control coverage.py
2 | [run]
3 | branch = True
4 | source = ../aperturedb
5 | 
6 | 


--------------------------------------------------------------------------------
/test/.dockerignore:
--------------------------------------------------------------------------------
1 | aperturedb/
2 | input/
3 | notebooks/
4 | kaggleds/
5 | __pycache__/
6 | .pytest_cache/
7 | 


--------------------------------------------------------------------------------
/test/.env:
--------------------------------------------------------------------------------
1 | ADB_REPO=${ADB_REPO:-aperturedata/aperturedb-community}
2 | ADB_TAG=${ADB_TAG:-latest}
3 | LENZ_REPO=${LENZ_REPO:-aperturedata/lenz}
4 | LENZ_TAG=${LENZ_TAG:-latest}
5 | RUNNER_NAME=${RUNNER_NAME:-default}
6 | GATEWAY=${GATEWAY:-0.0.0.0}
7 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aperture-data/aperturedb-python/a58fe1eab4c78ae92a6b03ab6d5c9e5ed1f0d62d/test/__init__.py


--------------------------------------------------------------------------------
/test/adb_timing_tests.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from datetime import datetime
 3 | if __name__ == '__main__':
 4 |     for command in [
 5 |         "adb config create aperturedb1 --host aperturedb --port 5555 --no-interactive --overwrite",
 6 |         "adb config create aperturedb2 --host aperturedb --port 5555 --no-interactive --overwrite",
 7 |         "adb config ls",
 8 |         "adb config activate aperturedb2",
 9 | 
10 |     ]:
11 |         print(command)
12 |         start = datetime.now()
13 |         os.system(command)
14 |         diff = datetime.now() - start
15 |         print(diff)
16 |         assert diff.total_seconds() <= 0.9, f"Command {command} took too long"
17 | 


--------------------------------------------------------------------------------
/test/coverage/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nginx:alpine
2 | COPY output/ /usr/share/nginx/html
3 | 


--------------------------------------------------------------------------------
/test/dbinfo.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # This file containts information on to access the server
 4 | 
 5 | GATEWAY = os.getenv("GATEWAY", "localhost")
 6 | 
 7 | DB_TCP_HOST  = GATEWAY
 8 | DB_REST_HOST = GATEWAY
 9 | DB_TCP_PORT  = 55556
10 | DB_REST_PORT = 8087
11 | DB_USER      = "admin"
12 | DB_PASSWORD  = "admin"
13 | 


--------------------------------------------------------------------------------
/test/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | name: $RUNNER_NAME
 2 | 
 3 | services:
 4 |   ca:
 5 |     image: alpine/openssl
 6 |     restart: on-failure
 7 |     command: req -x509 -newkey rsa:4096 -days 3650 -nodes -keyout /cert/tls.key -out /cert/tls.crt -subj "/C=US/O=ApertureData Inc./CN=localhost"
 8 |     volumes:
 9 |       - ./aperturedb/certificate:/cert
10 | 
11 |   lenz:
12 |     depends_on:
13 |       ca:
14 |         condition: service_completed_successfully
15 |       aperturedb:
16 |         condition: service_started
17 |     image: $LENZ_REPO:$LENZ_TAG
18 |     ports:
19 |       - $GATEWAY:55556:55551
20 |     restart: always
21 |     environment:
22 |       LNZ_HEALTH_PORT: 58085
23 |       LNZ_TCP_PORT: 55551
24 |       LNZ_HTTP_PORT: 8080
25 |       LNZ_ADB_BACKENDS: '["aperturedb:55553"]'
26 |       LNZ_REPLICAS: 1
27 |       LNZ_ADB_MAX_CONCURRENCY: 48
28 |       LNZ_FORCE_SSL: false
29 |       LNZ_CERTIFICATE_PATH: /etc/lenz/certificate/tls.crt
30 |       LNZ_PRIVATE_KEY_PATH: /etc/lenz/certificate/tls.key
31 |     volumes:
32 |       - ./aperturedb/certificate:/etc/lenz/certificate
33 | 
34 |   aperturedb:
35 |     image: $ADB_REPO:$ADB_TAG
36 |     volumes:
37 |       - ./aperturedb/db_$RUNNER_NAME:/aperturedb/db
38 |       - ./aperturedb/logs:/aperturedb/logs
39 |     restart: always
40 |     environment:
41 |       ADB_KVGD_DB_SIZE: "204800"
42 |       ADB_LOG_PATH: "logs"
43 |       ADB_ENABLE_DEBUG: 1
44 |       ADB_MASTER_KEY: "admin"
45 |       ADB_PORT: 55553
46 |       ADB_FORCE_SSL: false
47 | 
48 |   webui:
49 |     image: aperturedata/aperturedata-platform-web-private:latest
50 |     restart: always
51 | 
52 |   nginx:
53 |     depends_on:
54 |       ca:
55 |         condition: service_completed_successfully
56 |     image: nginx
57 |     restart: always
58 |     ports:
59 |       - $GATEWAY:8087:80
60 |       - $GATEWAY:8443:443
61 |     configs:
62 |       - source: nginx.conf
63 |         target: /etc/nginx/conf.d/default.conf
64 |     volumes:
65 |       - ./aperturedb/certificate:/etc/nginx/certificate
66 | 
67 | configs:
68 |   nginx.conf:
69 |     content: |
70 |       server {
71 |         listen 80;
72 |         listen 443 ssl;
73 |         client_max_body_size 256m;
74 |         ssl_certificate /etc/nginx/certificate/tls.crt;
75 |         ssl_certificate_key /etc/nginx/certificate/tls.key;
76 |         location / {
77 |           proxy_pass http://webui;
78 |         }
79 |         location /api/ {
80 |           proxy_pass http://lenz:8080;
81 |         }
82 |       }
83 | 


--------------------------------------------------------------------------------
/test/download_images.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | from aperturedb import ImageDownloader
 4 | 
 5 | 
 6 | def main(params):
 7 | 
 8 |     downloader = ImageDownloader.ImageDownloader(
 9 |         check_if_present=True, n_download_retries=2)
10 |     downloader.batched_run(ImageDownloader.ImageDownloaderCSV(params.in_file),
11 |                            numthreads=32,
12 |                            batchsize=1,
13 |                            stats=True)
14 |     return downloader.error_counter == 0
15 | 
16 | 
17 | def get_args():
18 |     obj = argparse.ArgumentParser()
19 | 
20 |     # Run Config
21 |     obj.add_argument('-in_file', type=str, default="input/url_images.adb.csv")
22 | 
23 |     params = obj.parse_args()
24 | 
25 |     return params
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     args = get_args()
30 |     sys.exit(0 if main(args) else 1)
31 | 


--------------------------------------------------------------------------------
/test/get_10_faces_with_annotations.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     { "FindImage" : {
 3 |         "blobs": false,
 4 |         "constraints": {
 5 |             "Bald": ["==", null]
 6 |         },
 7 |         "results" : {
 8 |             "list": ["_uniqueid"],
 9 |             "limit": 10
10 |         }
11 |     }}
12 | ]
13 | 


--------------------------------------------------------------------------------
/test/get_10_faces_with_optional_annotations.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     { "FindImage" : {
 3 |         "blobs": false,
 4 |         "constraints": {
 5 |             "any": {
 6 |                 "Bald": ["!=", null]
 7 |             }
 8 |         },
 9 |         "results" : {
10 |             "list": ["_uniqueid"],
11 |             "limit": 10
12 |         }
13 |     }},
14 |     { "FindImage" : {
15 |         "blobs": false,
16 |         "constraints": {
17 |             "any": {
18 |                 "Bald": ["==", null]
19 |             }
20 |         },
21 |         "results" : {
22 |             "list": ["_uniqueid"],
23 |             "limit": 10
24 |         }
25 |     }}
26 | ]
27 | 


--------------------------------------------------------------------------------
/test/get_10_image_uniqueids.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     { "FindImage" : {
 3 |         "blobs": false,
 4 |         "results" : {
 5 |             "list": ["_uniqueid"],
 6 |             "limit": 10
 7 |         }
 8 |     }}
 9 | ]
10 | 


--------------------------------------------------------------------------------
/test/input/README.md:
--------------------------------------------------------------------------------
1 | Here goes input data for testing
2 | 


--------------------------------------------------------------------------------
/test/input/sample_gs_urls:
--------------------------------------------------------------------------------
 1 | gs://aperturedb-testing/sample_images/1002318269_97db6e0975.jpg
 2 | gs://aperturedb-testing/sample_images/10201275523_3e6ea67c7f.jpg
 3 | gs://aperturedb-testing/sample_images/2297552664_1ee0e8855d.jpg
 4 | gs://aperturedb-testing/sample_images/4140939180_07aeded917.jpg
 5 | gs://aperturedb-testing/sample_images/4436463882_b96a3d9df9.jpg
 6 | gs://aperturedb-testing/sample_images/4572998878_658b45226f.jpg
 7 | gs://aperturedb-testing/sample_images/6985418911_df7747990d.jpg
 8 | gs://aperturedb-testing/sample_images/7289030198_1f1ba44113.jpg
 9 | gs://aperturedb-testing/sample_images/9329902958_0bc80ce58a.jpg
10 | gs://aperturedb-testing/sample_images/9506922316_c19019e38f.jpg
11 | 


--------------------------------------------------------------------------------
/test/input/sample_gs_video_urls:
--------------------------------------------------------------------------------
 1 | gs://aperturedb-testing/sample_videos/109b799c2ec09f526dea6caabaefc53.mp4
 2 | gs://aperturedb-testing/sample_videos/1dd248793c90a3e07f5ea825df27a0d7.mp4
 3 | gs://aperturedb-testing/sample_videos/2e7a3ea46f4b0c12b81348bde1d45.mp4
 4 | gs://aperturedb-testing/sample_videos/4ca3fb2f50eb773480acd3b8d7decee.mp4
 5 | gs://aperturedb-testing/sample_videos/4ec3609ad2d97661146fd536957df14.mp4
 6 | gs://aperturedb-testing/sample_videos/4fb04ae53497501c91c1f91c45dad1c7.mp4
 7 | gs://aperturedb-testing/sample_videos/5116a64cc2159daebfb161d0cc3f6945.mp4
 8 | gs://aperturedb-testing/sample_videos/51b71187b6cd1d6dd2aa32aa107a2f.mp4
 9 | gs://aperturedb-testing/sample_videos/5573f4dd80f6c427e9f1c3d16751ad8.mp4
10 | gs://aperturedb-testing/sample_videos/5952518f7358d3d5d1c1b31d745aae3.mp4
11 | 


--------------------------------------------------------------------------------
/test/input/sample_http_urls:
--------------------------------------------------------------------------------
 1 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/1002318269_97db6e0975.jpg
 2 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/10201275523_3e6ea67c7f.jpg
 3 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/2297552664_1ee0e8855d.jpg
 4 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/4140939180_07aeded917.jpg
 5 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/4436463882_b96a3d9df9.jpg
 6 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/4572998878_658b45226f.jpg
 7 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/6985418911_df7747990d.jpg
 8 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/7289030198_1f1ba44113.jpg
 9 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/9329902958_0bc80ce58a.jpg
10 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_images/9506922316_c19019e38f.jpg
11 | 


--------------------------------------------------------------------------------
/test/input/sample_http_video_urls:
--------------------------------------------------------------------------------
 1 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/109b799c2ec09f526dea6caabaefc53.mp4
 2 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/1dd248793c90a3e07f5ea825df27a0d7.mp4
 3 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/2e7a3ea46f4b0c12b81348bde1d45.mp4
 4 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/4ca3fb2f50eb773480acd3b8d7decee.mp4
 5 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/4ec3609ad2d97661146fd536957df14.mp4
 6 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/4fb04ae53497501c91c1f91c45dad1c7.mp4
 7 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/5116a64cc2159daebfb161d0cc3f6945.mp4
 8 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/51b71187b6cd1d6dd2aa32aa107a2f.mp4
 9 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/5573f4dd80f6c427e9f1c3d16751ad8.mp4
10 | https://aperturedata-public.s3.us-west-2.amazonaws.com/sample_videos/5952518f7358d3d5d1c1b31d745aae3.mp4
11 | 


--------------------------------------------------------------------------------
/test/input/sample_s3_urls:
--------------------------------------------------------------------------------
 1 | s3://aperturedata-public/sample_images/4436463882_b96a3d9df9.jpg
 2 | s3://aperturedata-public/sample_images/9329902958_0bc80ce58a.jpg
 3 | s3://aperturedata-public/sample_images/2297552664_1ee0e8855d.jpg
 4 | s3://aperturedata-public/sample_images/4140939180_07aeded917.jpg
 5 | s3://aperturedata-public/sample_images/6985418911_df7747990d.jpg
 6 | s3://aperturedata-public/sample_images/10201275523_3e6ea67c7f.jpg
 7 | s3://aperturedata-public/sample_images/7289030198_1f1ba44113.jpg
 8 | s3://aperturedata-public/sample_images/1002318269_97db6e0975.jpg
 9 | s3://aperturedata-public/sample_images/9506922316_c19019e38f.jpg
10 | s3://aperturedata-public/sample_images/4572998878_658b45226f.jpg
11 | 


--------------------------------------------------------------------------------
/test/input/sample_s3_video_urls:
--------------------------------------------------------------------------------
 1 | s3://aperturedata-public/sample_videos/109b799c2ec09f526dea6caabaefc53.mp4
 2 | s3://aperturedata-public/sample_videos/1dd248793c90a3e07f5ea825df27a0d7.mp4
 3 | s3://aperturedata-public/sample_videos/2e7a3ea46f4b0c12b81348bde1d45.mp4
 4 | s3://aperturedata-public/sample_videos/4ca3fb2f50eb773480acd3b8d7decee.mp4
 5 | s3://aperturedata-public/sample_videos/4ec3609ad2d97661146fd536957df14.mp4
 6 | s3://aperturedata-public/sample_videos/4fb04ae53497501c91c1f91c45dad1c7.mp4
 7 | s3://aperturedata-public/sample_videos/5116a64cc2159daebfb161d0cc3f6945.mp4
 8 | s3://aperturedata-public/sample_videos/51b71187b6cd1d6dd2aa32aa107a2f.mp4
 9 | s3://aperturedata-public/sample_videos/5573f4dd80f6c427e9f1c3d16751ad8.mp4
10 | s3://aperturedata-public/sample_videos/5952518f7358d3d5d1c1b31d745aae3.mp4
11 | 


--------------------------------------------------------------------------------
/test/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | log_format = %(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s
 3 | log_date_format = %Y-%m-%d %H:%M:%S
 4 | pythonpath = .
 5 | markers =
 6 | 	slow: slow running test
 7 | 	external_network: mark a test using external network
 8 | 	remote_credentials: mark a test as requiring remote authentication ( that isn't included in checkout )
 9 | 	kaggle: uses kaggle
10 | 	http: uses HTTP/HTTPS interface
11 | 	tcp: uses TCP/REST interface
12 |     dask: uses DASK multi-processing library
13 | 


--------------------------------------------------------------------------------
/test/run_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -u
 4 | set -e
 5 | 
 6 | mkdir -p output
 7 | rm -rf output/*
 8 | mkdir -p input/blobs
 9 | 
10 | echo "Downloading images..."
11 | python3 download_images.py
12 | RESULT=$?
13 | if [[ $RESULT != 0 ]]; then
14 | 	echo "Download failed."
15 | 	exit 1
16 | fi
17 | echo "Done downloading images."
18 | 
19 | echo "Generating input files..."
20 | python3 generateInput.py
21 | echo "Done generating input files."
22 | 
23 | echo "Running tests..."
24 | CREDENTIALS_FILE='/tmp/key.json'
25 | echo $GCP_SERVICE_ACCOUNT_KEY > $CREDENTIALS_FILE
26 | export GOOGLE_APPLICATION_CREDENTIALS=$CREDENTIALS_FILE
27 | # capture errors
28 | set +e
29 | CLIENT_PATH="${APERTUREDB_LOG_PATH}/../client/${FILTER}"
30 | CLIENT_PATH=${CLIENT_PATH// /_}
31 | mkdir -p ${CLIENT_PATH}
32 | PROJECT=aperturedata KAGGLE_username=ci KAGGLE_key=dummy coverage run -m pytest -m "$FILTER" test_*.py -v | tee ${CLIENT_PATH}/test.log
33 | RESULT=$?
34 | cp error*.log -v ${CLIENT_PATH}
35 | 
36 | if [[ $RESULT != 0 ]]; then
37 | 	echo "Test failed; outputting db log:"
38 | 	if [[ "${APERTUREDB_LOG_PATH}" != "" ]]; then
39 | 
40 | 		BUCKET=python-ci-runs
41 | 		NOW=$(date -Iseconds)
42 | 		ARCHIVE_NAME=logs.tar.gz
43 | 		DESTINATION="s3://${BUCKET}/aperturedb-${NOW}-${FILTER}.tgz"
44 | 		tar czf ${ARCHIVE_NAME} ${APERTUREDB_LOG_PATH}/..
45 | 		aws s3 cp ${ARCHIVE_NAME} $DESTINATION
46 | 		echo "Log output to $DESTINATION"
47 | 	else
48 | 		echo "Unable to output log, APERTUREDB_LOG_PATH not set."
49 | 	fi
50 | 	exit 1
51 | else
52 | 	echo "Generating coverage..."
53 | 	coverage html -i --directory=output
54 | 	python adb_timing_tests.py
55 | fi
56 | 
57 | 


--------------------------------------------------------------------------------
/test/run_test_container.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -u
 4 | set -e
 5 | 
 6 | function check_containers_networks(){
 7 |     echo "Running containers and networks cleanup"
 8 |     docker ps
 9 |     echo "Existing networks"
10 |     docker network ls
11 | }
12 | 
13 | function run_aperturedb_instance(){
14 |     set -e
15 |     TAG=$1
16 |     #Ensure clean environment (as much as possible)
17 |     RUNNER_NAME=$TAG docker compose -f docker-compose.yml down --remove-orphans
18 |     docker network rm ${TAG}_host_default || true
19 | 
20 |     # ensure latest db
21 |     docker compose pull
22 | 
23 |     rm -rf output
24 |     mkdir -m 777 output
25 | 
26 |     docker network create ${TAG}_host_default
27 |     GATEWAY=$(docker network inspect ${TAG}_host_default | jq -r .[0].IPAM.Config[0].Gateway)
28 |     GATEWAY=$GATEWAY RUNNER_NAME=$TAG docker compose -f docker-compose.yml up -d
29 |     echo "$GATEWAY"
30 | }
31 | 
32 | IP_REGEX='[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}'
33 | 
34 | GATEWAY_HTTP=$(run_aperturedb_instance "${RUNNER_NAME}_http" | grep $IP_REGEX )
35 | GATEWAY_NON_HTTP=$(run_aperturedb_instance "${RUNNER_NAME}_non_http" | grep $IP_REGEX )
36 | 
37 | # The LOG_PATH and RUNNER_INFO_PATH are set to the current working directory
38 | LOG_PATH="$(pwd)/aperturedb/logs"
39 | TESTING_LOG_PATH="/aperturedb/test/server_logs"
40 | RUNNER_INFO_PATH="$(pwd)/aperturedb/logs/runner_state"
41 | 
42 | check_containers_networks | tee "$RUNNER_INFO_PATH"/runner_state.log
43 | 
44 | REPOSITORY="aperturedata/aperturedb-python-tests"
45 | if ! [ -z ${1+x} ]
46 | then
47 |      REPOSITORY="$1"
48 | fi
49 | echo "running tests on docker image $REPOSITORY with $GATEWAY_HTTP and $GATEWAY_NON_HTTP"
50 | docker run \
51 |     -v $(pwd)/output:/aperturedata/test/output \
52 |     -v "$LOG_PATH":"${TESTING_LOG_PATH}" \
53 |     -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
54 |     -e AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION \
55 |     -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
56 |     -e GCP_SERVICE_ACCOUNT_KEY="$GCP_SERVICE_ACCOUNT_KEY" \
57 |     -e APERTUREDB_LOG_PATH="${TESTING_LOG_PATH}" \
58 |     -e GATEWAY="${GATEWAY_HTTP}" \
59 |     -e FILTER="http" \
60 |     $REPOSITORY &
61 | 
62 | pid1=$!
63 | docker run \
64 |     -v $(pwd)/output:/aperturedata/test/output \
65 |     -v "$LOG_PATH":"${TESTING_LOG_PATH}" \
66 |     -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
67 |     -e AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION \
68 |     -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
69 |     -e GCP_SERVICE_ACCOUNT_KEY="$GCP_SERVICE_ACCOUNT_KEY" \
70 |     -e APERTUREDB_LOG_PATH="${TESTING_LOG_PATH}" \
71 |     -e GATEWAY="${GATEWAY_NON_HTTP}" \
72 |     -e FILTER="not http" \
73 |     $REPOSITORY &
74 | 
75 | pid2=$!
76 | wait $pid1
77 | exit_code1=$?
78 | wait $pid2
79 | exit_code2=$?
80 | 
81 | if [ $exit_code1 -ne 0 ]; then
82 |     echo "Tests failed for HTTP"
83 |     exit $exit_code1
84 | fi
85 | if [ $exit_code2 -ne 0 ]; then
86 |     echo "Tests failed for NON_HTTP"
87 |     exit $exit_code2
88 | fi
89 | 
90 | echo "Tests completed successfully"
91 | echo " --- Runner name: ${RUNNER_NAME} ---"
92 | check_containers_networks
93 | 


--------------------------------------------------------------------------------
/test/test_Key.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import typer
 3 | 
 4 | from aperturedb.Configuration import Configuration
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | key_pairs = {
 9 |     "WzEsMSwibG9jYWxob3N0IiwiYWRtaW4iLCJhZG1pbiJd":
10 |     [1, 1, "localhost", "admin", "admin"],
11 |         "WzEsMCwiMTI3LjAuMC4xOjU1NTU0IiwiWVFadVZVV2Zab0FkWjJrUUVMeFB5RnptZHJ3WXd0cjBBRGEiXQ==":
12 |             [1, 0, "127.0.0.1:55554", "YQZuVUWfZoAdZ2kQELxPyFzmdrwYwtr0ADa"],
13 |         "WzEsNywid29ya2Zsb3ctbG9hZGVkLWZvMWphdTN0LjAiLCJhZG1pbiIsIjEyMzRCVFFMUF8lMnR0Il0=":
14 |             [1, 7, "workflow-loaded-fo1jau3t.farm0000.cloud.aperturedata.io",
15 |                 "admin", "1234BTQLP_%2tt"],
16 |         "WzEsNSwidGVzdC0zcWpxdDZrcy40IiwiWVFadVZVV2Zab0FkWjJrUUVMeFB5RnptZHJ3WXd0cjBBRGEiXQ==":
17 |             [1, 5, "test-3qjqt6ks.farm0004.cloud.aperturedata.io",
18 |                 "YQZuVUWfZoAdZ2kQELxPyFzmdrwYwtr0ADa"],
19 |         "WzEsMiwiMTkyLjE2OC40LjEyOjU1NTU1IiwiYWRtaW4iLCJhZG1pbiJd":
20 |         [1, 2, "192.168.4.12:55555", "admin", "admin"],
21 |         "WzEsMywiYXBlcnR1cmVkYi5iaWdjb3JwLmlvOjE5MTgiLCJZUVp1VlVXZlpvQWRaMmtRRUx4UHlGem1kcndZd3RyMEFEYSJd":
22 |         [1, 3, "aperturedb.bigcorp.io:1918", "YQZuVUWfZoAdZ2kQELxPyFzmdrwYwtr0ADa"],
23 |         "WzEsNCwidGNwLTU1N2Vwbm4zLjkwOToxOTE4IiwiYWRtaW4iLCI4OTBFcE1uKyElMiRfIl0=":
24 |         [1, 4, "tcp-557epnn3.farm0909.cloud.aperturedata.io:1918",
25 |             "admin", "890EpMn+!%2$_"],
26 |         "WzEsNiwiaHR0cC05MGpnM3pwcy4xMjo0NDMiLCJZUVp1VlVXZlpvQWRaMmtRRUx4UHlGem1kcndZd3RyMEFEYSJd":
27 |         [1, 6, "http-90jg3zps.farm0012.cloud.aperturedata.io:443",
28 |             "YQZuVUWfZoAdZ2kQELxPyFzmdrwYwtr0ADa"]
29 | }
30 | 
31 | 
32 | class TestApertureDBKey():
33 | 
34 |     def test_encode_keys(self):
35 |         for key, data in key_pairs.items():
36 |             logger.info(f"Testing encoding of {key}")
37 |             config_type = data[1]
38 |             host = data[2]
39 |             username = password = token = None
40 |             comp, rest, ssl = Configuration.key_type_to_config(config_type)
41 |             if host.rfind(':') != -1:
42 |                 port = int(host.split(':')[1])
43 |                 host = host.split(':')[0]
44 |             else:
45 |                 port = Configuration.config_default_port(rest, ssl)
46 |             if len(data) == 4:
47 |                 token = data[3]
48 |             else:
49 |                 username = data[3]
50 |                 password = data[4]
51 |             c = Configuration(host, port, username, password,
52 |                               "encoding test", use_rest=rest, use_ssl=ssl, token=token)
53 |             deflated = c.deflate()
54 |             assert deflated == key
55 | 
56 |     def test_decode_keys(self):
57 |         for key, data in key_pairs.items():
58 |             logger.info(f"Testing decoding of {key}")
59 |             config = Configuration.reinflate(key)
60 |             config_type = data[1]
61 |             host = data[2]
62 |             if config_type == 0 or config_type == 4:
63 |                 assert not config.use_rest and not config.use_ssl
64 |             if config_type == 1 or config_type == 5:
65 |                 assert not config.use_rest and config.use_ssl
66 |             if config_type == 2 or config_type == 6:
67 |                 assert config.use_rest and not config.use_ssl
68 |             if config_type == 3 or config_type == 7:
69 |                 assert config.use_rest and config.use_ssl
70 | 
71 |             if host.rfind(':') != -1:
72 |                 port = int(host.split(':')[1])
73 |                 host = host.split(':')[0]
74 |                 assert config.port == port
75 | 
76 |             if len(data) == 4:
77 |                 assert config.token == "adbp_" + data[3]
78 |             else:
79 |                 assert config.username == data[3] and config.password == data[4]
80 | 
81 |             assert(config.host == host)
82 | 


--------------------------------------------------------------------------------
/test/test_Parallel.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import random
 3 | 
 4 | from aperturedb.Connector import Connector
 5 | from aperturedb.ParallelQuery import ParallelQuery
 6 | from aperturedb.Subscriptable import Subscriptable
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | # Tests for parallel which don't involve data.
11 | 
12 | 
13 | class GeneratorWithErrors(Subscriptable):
14 |     def __init__(self, commands_per_query=1, elements=100, error_pct=.5) -> None:
15 |         super().__init__()
16 |         self.commands_per_query = commands_per_query
17 |         self.elements = elements
18 |         self.error_pct = error_pct
19 | 
20 |     def __len__(self):
21 |         return self.elements
22 | 
23 |     def getitem(self, subscript):
24 |         query = []
25 |         blobs = []
26 |         for i in range(self.commands_per_query):
27 |             if random.randint(0, 100) <= (self.error_pct * 100):
28 |                 query.append({
29 |                     "BadCommand": {
30 |                     }
31 |                 })
32 |             else:
33 |                 query.append({
34 |                     "FindEntity": {
35 |                         "results": {
36 |                             "count": True
37 |                         }
38 |                     }
39 |                 })
40 | 
41 |         return query, blobs
42 | 
43 | 
44 | class TestParallel():
45 |     """
46 |     These check operation of ParallelQuery
47 |     """
48 | 
49 |     def test_someBadQueries(self, db: Connector):
50 |         """
51 |         Verifies that it handles some queries returning errors
52 |         """
53 |         try:
54 |             elements = 100
55 |             generator = GeneratorWithErrors(elements=elements)
56 |             querier = ParallelQuery(db, dry_run=False)
57 |             querier.query(generator, batchsize=2,
58 |                           numthreads=8,
59 |                           stats=True)
60 |             logger.info(querier.get_succeeded_commands())
61 |             assert querier.get_succeeded_commands() < elements
62 |         except Exception as e:
63 |             print(e)
64 |             print("Failed to renew Session")
65 |             assert False
66 | 
67 |     def test_allBadQueries(self, db: Connector):
68 |         """
69 |         Verifies that it handles all queries returning errors
70 |         """
71 |         try:
72 |             elements = 100
73 |             generator = GeneratorWithErrors(elements=elements, error_pct=1)
74 |             querier = ParallelQuery(db, dry_run=False)
75 |             querier.query(generator, batchsize=2,
76 |                           numthreads=8,
77 |                           stats=True)
78 |             logger.info(querier.get_succeeded_commands())
79 |             assert querier.get_succeeded_commands() == 0
80 |         except Exception as e:
81 |             print(e)
82 |             print("Failed to renew Session")
83 |             assert False
84 | 


--------------------------------------------------------------------------------
/test/test_SPARQL.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import os
  3 | import subprocess
  4 | import runpy
  5 | import requests
  6 | import shutil
  7 | import pytest
  8 | import numpy as np
  9 | import pandas as pd
 10 | import os.path as osp
 11 | import tempfile
 12 | from aperturedb.Query import QueryBuilder, Query
 13 | from aperturedb.Entities import Entities
 14 | from aperturedb.Constraints import Constraints
 15 | from aperturedb.Images import Images
 16 | from aperturedb.Utils import Utils
 17 | from aperturedb.SPARQL import SPARQL
 18 | from aperturedb.cli.ingest import from_csv, TransformerType, IngestType
 19 | from aperturedb.ImageDataCSV import ImageDataCSV
 20 | from aperturedb.EntityDataCSV import EntityDataCSV
 21 | from aperturedb.ConnectionDataCSV import ConnectionDataCSV
 22 | from aperturedb.DescriptorDataCSV import DescriptorDataCSV
 23 | from aperturedb.ParallelLoader import ParallelLoader
 24 | from aperturedb.transformers.common_properties import CommonProperties
 25 | from aperturedb.transformers.image_properties import ImageProperties
 26 | from aperturedb.transformers.clip_pytorch_embeddings import CLIPPyTorchEmbeddings
 27 | from aperturedb.transformers.facenet_pytorch_embeddings import FacenetPyTorchEmbeddings
 28 | 
 29 | import logging
 30 | logger = logging.getLogger(__name__)
 31 | 
 32 | 
 33 | @pytest.fixture
 34 | def load_cookbook(utils: Utils, db):
 35 |     utils.remove_all_indexes()
 36 |     utils.remove_all_objects()
 37 | 
 38 |     temp_dir = tempfile.mkdtemp()
 39 |     # temp_path = Path(temp_dir)
 40 |     original_dir = os.getcwd()
 41 |     os.chdir(temp_dir)
 42 | 
 43 |     # Define the URL and file path for the script
 44 |     file_url = "https://raw.githubusercontent.com/aperture-data/Cookbook/refs/heads/main/scripts/convert_ingredients_adb_csv.py"
 45 |     file_path = Path("convert_ingredients_adb_csv.py")
 46 | 
 47 |     try:
 48 |         # Download the script file
 49 |         response = requests.get(file_url)
 50 |         file_path.write_text(response.text)
 51 | 
 52 |         runpy.run_path(str(file_path), run_name="__main__")
 53 | 
 54 |         data = ImageDataCSV("dishes.adb.csv")
 55 |         data = CLIPPyTorchEmbeddings(data, client=db)
 56 |         data = ImageProperties(data, client=db)
 57 |         data = CommonProperties(data, client=db)
 58 |         loader = ParallelLoader(db)
 59 |         loader.ingest(data, batchsize=100, stats=True)
 60 | 
 61 |         data = EntityDataCSV("ingredients.adb.csv")
 62 |         loader = ParallelLoader(db)
 63 |         loader.ingest(data, batchsize=100, stats=True)
 64 | 
 65 |         data = ConnectionDataCSV("dish_ingredients.adb.csv")
 66 |         loader = ParallelLoader(db)
 67 |         loader.ingest(data, batchsize=100, stats=True)
 68 |     finally:
 69 |         os.chdir(original_dir)
 70 | 
 71 | 
 72 | # Tag the test functions that depend on the setup as external_network
 73 | def pytest_collection_modifyitems(items):
 74 |     for item in items:
 75 |         if "load_cookbook" in getattr(item, "fixturenames", ()):
 76 |             item.add_marker("external_network")
 77 | 
 78 | # Test functions that depends on the setup
 79 | 
 80 | 
 81 | @pytest.fixture
 82 | def sparql(db):
 83 |     sparql = SPARQL(db)
 84 |     print(sparql.schema)
 85 |     assert sparql.connections, f"No connections {sparql.schema}"
 86 |     assert sparql.properties, f"No properties {sparql.schema}"
 87 |     return sparql
 88 | 
 89 | 
 90 | @pytest.mark.parametrize("description,query", [
 91 |     ('Find all images with chicken and butter as ingredients',
 92 |      """
 93 | SELECT ?s ?caption {
 94 | ?s c:HasIngredient [p:name "chicken"] , [p:name "butter"] ;
 95 |     p:caption ?caption .
 96 | } LIMIT 10
 97 | """),
 98 |     ('Find all images with chicken or butter as ingredients',
 99 |      """
100 | SELECT ?s ?caption WHERE {
101 | VALUES ?ingredient { "chicken" "butter" }
102 | ?s c:HasIngredient [p:name ?ingredient] ;
103 |     p:caption ?caption .
104 | } LIMIT 10
105 | """),
106 |     ('Find the top 10 ingredients',
107 |      """
108 | SELECT (COUNT(*) AS ?count) ?ingredient WHERE {
109 | ?s c:HasIngredient [p:name ?ingredient] .
110 | } GROUP BY ?ingredient ORDER BY DESC(?count) LIMIT 10
111 | """),
112 |     ('Do a descriptor search for a random image',
113 |      f"""
114 | SELECT ?i ?distance ?d ?caption WHERE {{
115 | ?d knn:similarTo [
116 |     knn:set 'ViT-B/16' ;
117 |     knn:k_neighbors 20 ;
118 |     knn:vector "{SPARQL.encode_descriptor(np.random.rand(512))}" ;
119 |     knn:distance ?distance
120 | ] ;
121 |     c:ANY ?i . # Use fake connection because we can't say c:_DescriptorConnection
122 |     ?i p:caption ?caption .
123 | }}
124 | """)])
125 | def test_sparql(load_cookbook, sparql, query, description):
126 |     results = sparql.query(query)
127 |     assert results, f"No results for {description}"
128 | 


--------------------------------------------------------------------------------
/test/test_Server.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from aperturedb.Connector import Connector
 3 | from aperturedb.ConnectorRest import ConnectorRest
 4 | from aperturedb.ParallelLoader import ParallelLoader
 5 | import dbinfo
 6 | import pandas as pd
 7 | 
 8 | import logging
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | class TestBadResponses():
13 | 
14 |     def test_Error_code_2(self, db: Connector, insert_data_from_csv, monkeypatch):
15 |         count = 0
16 |         original_q = db._query
17 | 
18 |         def test_response_half_exist(a: Connector, query, blobs):
19 |             nonlocal count
20 |             if "AddImage" not in query[0]:
21 |                 count += 1
22 |                 resp = original_q(query, blobs)
23 |                 return resp
24 |             response = []
25 |             for i in range(len(query)):
26 |                 result = {"info": "Object Exists!",
27 |                           "status": 2} if i % 2 == 0 else {"status": 0}
28 |                 response.append({"AddImage": result})
29 | 
30 |             return (response, [])
31 |         monkeypatch.setattr(Connector, "_query", test_response_half_exist)
32 |         monkeypatch.setattr(ParallelLoader, "get_existing_indices", lambda x: {
33 |                             "entity": {"_Image": {"id"}}})
34 |         data, loader = insert_data_from_csv(
35 |             in_csv_file = "./input/images.adb.csv")
36 |         assert loader.error_counter == 0
37 |         assert loader.get_succeeded_queries() == len(data)
38 |         assert loader.get_succeeded_commands() == len(data)
39 | 
40 |     def test_Error_code_3(self, db: Connector, insert_data_from_csv, monkeypatch):
41 |         count = 0
42 |         original_q = db._query
43 | 
44 |         def test_response_half_non_unique(a: Connector, query, blobs):
45 |             nonlocal count
46 |             if "AddImage" not in query[0]:
47 |                 count += 1
48 |                 resp = original_q(query, blobs)
49 |                 return resp
50 |             response = None
51 |             for i in range(len(query)):
52 |                 result = {
53 |                     'info': 'JSON Command 1: expecting 1 but got 2', 'status': 3}
54 |                 response = result
55 |                 break
56 | 
57 |             return (response, [])
58 |         monkeypatch.setattr(Connector, "_query", test_response_half_non_unique)
59 |         monkeypatch.setattr(ConnectorRest, "_query",
60 |                             test_response_half_non_unique)
61 |         monkeypatch.setattr(ParallelLoader, "get_existing_indices", lambda x: {
62 |                             "entity": {"_Image": {"id"}}})
63 |         input_data = pd.read_csv("./input/images.adb.csv")
64 |         data, loader = insert_data_from_csv(
65 |             in_csv_file = "./input/images.adb.csv", expected_error_count = len(input_data))
66 |         assert loader.error_counter == 0, f"Error counter: {loader.error_counter=}"
67 |         assert loader.get_succeeded_queries(
68 |         ) == 0, f"Queries: {loader.get_succeeded_queries()=}"
69 |         assert loader.get_succeeded_commands(
70 |         ) == 0, f"Commands: {loader.get_succeeded_commands()=}"
71 | 
72 |     def test_AuthFailure(self, monkeypatch):
73 | 
74 |         def failed_auth_query(conn_obj, ignored_query):
75 |             # generate a response from the server which is not the expected Auth result.
76 |             # _query returns the server response json and an array of blobs.
77 |             return ({"info": "Internal Server Error 42", "status": -1, "ignored": ignored_query}, [])
78 | 
79 |         monkeypatch.setattr(Connector, "_query", failed_auth_query)
80 | 
81 |         with pytest.raises(Exception) as conn_exception:
82 |             db = Connector(
83 |                 host = dbinfo.DB_TCP_HOST,
84 |                 port = dbinfo.DB_TCP_PORT,
85 |                 user = dbinfo.DB_USER,
86 |                 password = dbinfo.DB_PASSWORD,
87 |                 use_ssl = True)
88 |             db.query([{
89 |                 "FindImage": {
90 |                     "results": {
91 |                         "limit": 5
92 |                     }
93 |                 }
94 |             }])
95 | 
96 |         assert "Unexpected response" in str(conn_exception.value)
97 | 


--------------------------------------------------------------------------------
/test/test_Stats.py:
--------------------------------------------------------------------------------
 1 | from aperturedb.DescriptorDataCSV import DescriptorDataCSV
 2 | from aperturedb.ParallelLoader import ParallelLoader
 3 | from io import BytesIO, TextIOWrapper
 4 | import sys
 5 | 
 6 | # stats had some issues with displaying of computed data.
 7 | # These tests reproduce the issue.
 8 | # Run it with: (to avoid pytest's output)
 9 | # pytest  test_Stats.py -s --no-summary
10 | 
11 | 
12 | class TestStats():
13 |     def ingest_with_capture(self, data, db):
14 |         loader = ParallelLoader(db)
15 |         # setup the environment
16 |         old_stdout = sys.stdout
17 |         sys.stdout = TextIOWrapper(BytesIO(), sys.stdout.encoding)
18 | 
19 |         loader.ingest(data, batchsize=99, numthreads=31, stats=True)
20 |         sys.stdout.seek(0)      # jump to the start
21 |         out = sys.stdout.read()  # read output
22 | 
23 |         sys.stdout = old_stdout
24 |         return out
25 | 
26 |     def validate_stats(self, out, assertions):
27 |         for line in out.splitlines():
28 |             if ":" in line:
29 |                 stats = line.split(":")
30 |                 if len(stats) == 2:
31 |                     first, second = line.split(":")
32 |                     print(first, second)
33 |                     if first in assertions:
34 |                         assert assertions[first.strip()](second.strip()) == True, \
35 |                             f"Assertion failed for '{first}' with value {second}"
36 | 
37 |     def test_stats_all_errors_non_equal_last_batch(self, db, utils):
38 |         utils.remove_all_objects()
39 |         # Try to ingest descriptors, with no descriptor set, so all queries fail
40 |         data = DescriptorDataCSV(
41 |             "./input/setA.adb.csv", blobs_relative_to_csv=True)
42 |         out = self.ingest_with_capture(data, db)
43 |         assertions = {
44 |             "Total inserted elements": lambda x: float(x) == 0,
45 |             "Overall insertion throughput (element/s)": lambda x: x == "NaN",
46 |         }
47 |         self.validate_stats(out, assertions)
48 | 


--------------------------------------------------------------------------------
/test/test_Success.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from aperturedb.ParallelQuery import ParallelQuery
 3 | from aperturedb.Query import QueryBuilder
 4 | 
 5 | import logging
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class TestLoaderSuccess():
10 |     def assertEqual(self, expected, actual):
11 |         if expected != actual:
12 |             raise AssertionError(
13 |                 "Expected {}, got {}".format(expected, actual))
14 | 
15 |     def test_Loader(self, utils, insert_data_from_csv):
16 |         # Assert that we have a clean slate to begin with.
17 |         assert utils.remove_all_indexes()
18 |         assert utils.remove_all_objects() == True
19 |         # initial load
20 |         data, _ = insert_data_from_csv(
21 |             in_csv_file = "./input/persons-exist-base.adb.csv")
22 |         self.assertEqual(len(data), utils.count_entities("Person"))
23 | 
24 |         # default configuration does not consider object exists to be a query failure
25 |         def assert_partial(loader, test_data):
26 |             self.assertEqual(len(data) + len(test_data) - loader.get_objects_existed(),
27 |                              utils.count_entities("Person"))
28 |         data, _ = insert_data_from_csv(in_csv_file = "./input/persons-some-exist.adb.csv",
29 |                                        loader_result_lambda = assert_partial)
30 | 
31 |         # change to disallow object exist to qualify as success.
32 |         old_status = ParallelQuery.getSuccessStatus()
33 |         ParallelQuery.setSuccessStatus([0])
34 | 
35 |         # Assert that we have a clean slate to begin with.
36 |         assert utils.remove_all_indexes()
37 |         assert utils.remove_all_objects() == True
38 |         # initial load
39 |         data, _ = insert_data_from_csv(
40 |             in_csv_file = "./input/persons-exist-base.adb.csv")
41 |         # default configuration does not consider object exists to be a query
42 |         # failure
43 |         data, _ = insert_data_from_csv(
44 |             in_csv_file = "./input/persons-some-exist.adb.csv",
45 |             expected_error_count = 3,
46 |             loader_result_lambda=assert_partial)
47 | 
48 |         # reset success status to default
49 |         ParallelQuery.setSuccessStatus(old_status)
50 | 


--------------------------------------------------------------------------------
/test/test_UserConvenience.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from types import SimpleNamespace
 3 | from aperturedb.ConnectorRest import ConnectorRest
 4 | from requests.sessions import Session
 5 | 
 6 | 
 7 | class TestUserConvenience():
 8 |     """
 9 |     This class tests some undocumented features of the Python SDK.
10 |     This cannot rely on dbinfo, or connect as dbinfo and common lib rely on explicit
11 |     arguments.
12 |     """
13 | 
14 |     def test_ConnectorRest_handlesNonePort(self):
15 |         """
16 |         Test that ConnectorRest can handle a None port,
17 |         and will default to the correct port.
18 |         """
19 |         client = ConnectorRest(host="dummy", user="admin", password="password")
20 |         assert "443" in client.url
21 |         posts = 0
22 | 
23 |         def mock_post(self, url, headers, files, verify):
24 |             nonlocal posts
25 |             assert "443" in url
26 |             response1 = {
27 |                 "json": [{"Authenticate": {
28 |                     "status": 0,
29 |                     "session_token": "x",
30 |                     "refresh_token": "2",
31 |                     "session_token_expires_in": 3600,
32 |                     "refresh_token_expires_in": 3600
33 |                 }}],
34 |                 "blobs": []
35 |             }
36 | 
37 |             r = SimpleNamespace(status_code=200, text=json.dumps(response1))
38 |             posts += 1
39 |             return r
40 |         old_post = Session.post
41 |         Session.post = mock_post
42 |         client.query("[{\"FindEntity\": {\"_ref\": 1}}]")
43 |         # Ensure that the mock post was called, 1 time to authenticate, 1 time to query
44 |         assert posts == 2
45 |         Session.post = old_post
46 | 


--------------------------------------------------------------------------------
/test/test_Utils.py:
--------------------------------------------------------------------------------
 1 | class TestUtils():
 2 | 
 3 |     def test_remove_all_objects(self, utils):
 4 |         assert utils.remove_all_objects() == True, \
 5 |             "Failed to remove all objects"
 6 | 
 7 |     def test_remove_all_indexes(self, utils):
 8 |         assert utils.remove_all_indexes() == True, \
 9 |             "Failed to remove all indexes"
10 | 
11 |     def test_get_descriptorset_list(self, utils):
12 |         assert utils.get_descriptorset_list() == []
13 | 


--------------------------------------------------------------------------------
/test/test_torch_connector.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import os
  3 | import logging
  4 | from typing import Union
  5 | 
  6 | import torch
  7 | import torch.distributed as dist
  8 | from aperturedb import Images
  9 | from aperturedb import PyTorchDataset
 10 | from torch.utils.data.dataloader import DataLoader
 11 | from torch.utils.data.dataset import Dataset
 12 | 
 13 | from aperturedb.ConnectorRest import ConnectorRest
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | class TestTorchDatasets():
 19 |     def validate_dataset(self, dataset: Union[DataLoader, Dataset], expected_length):
 20 |         start = time.time()
 21 | 
 22 |         count = 0
 23 |         # Iterate over dataset.
 24 |         for img in dataset:
 25 |             if len(img[0]) < 0:
 26 |                 logger.error("Empty image?")
 27 |                 assert True == False
 28 |             count += len(img[1]) if isinstance(dataset, DataLoader) else 1
 29 |         assert count == expected_length
 30 | 
 31 |         time_taken = time.time() - start
 32 |         if time_taken != 0:
 33 |             logger.info(f"Throughput (imgs/s): {len(dataset) / time_taken}")
 34 | 
 35 |     def test_nativeContraints(self, db, utils, images):
 36 |         assert len(images) > 0
 37 |         # This is a hack against a bug in batch API.
 38 |         dim = 224 if isinstance(db, ConnectorRest) else 225
 39 |         query = [{
 40 |             "FindImage": {
 41 |                 "constraints": {
 42 |                     "age": [">=", 0]
 43 |                 },
 44 |                 "operations": [
 45 |                     {
 46 |                         "type": "resize",
 47 |                         "width": dim,
 48 |                         "height": dim
 49 |                     }
 50 |                 ],
 51 |                 "results": {
 52 |                     "list": ["license"]
 53 |                 }
 54 |             }
 55 |         }]
 56 | 
 57 |         dataset = PyTorchDataset.ApertureDBDataset(
 58 |             db, query, label_prop="license")
 59 | 
 60 |         self.validate_dataset(dataset, utils.count_images())
 61 | 
 62 |     def test_datasetWithMultiprocessing(self, db, utils, images):
 63 |         len_limit = utils.count_images()
 64 |         # This is a hack against a bug in batch API.
 65 |         # TODO Fixme
 66 |         dim = 224 if isinstance(db, ConnectorRest) else 225
 67 |         query = [{
 68 |             "FindImage": {
 69 |                 "constraints": {
 70 |                     "age": [">=", 0]
 71 |                 },
 72 |                 "operations": [
 73 |                     {
 74 |                         "type": "resize",
 75 |                         "width": dim,
 76 |                         "height": dim
 77 |                     }
 78 |                 ],
 79 |                 "results": {
 80 |                     "list": ["license"],
 81 |                     "limit": len_limit
 82 |                 }
 83 |             }
 84 |         }]
 85 | 
 86 |         dataset = PyTorchDataset.ApertureDBDataset(
 87 |             db, query, label_prop="license")
 88 | 
 89 |         self.validate_dataset(dataset, len_limit)
 90 | 
 91 |         # Distributed Data Loader Setup
 92 | 
 93 |         # Needed for init_process_group
 94 |         os.environ['MASTER_ADDR'] = 'localhost'
 95 |         os.environ['MASTER_PORT'] = '12355'
 96 | 
 97 |         dist.init_process_group("gloo", rank=0, world_size=1)
 98 | 
 99 |         # === Distributed Data Loader Sequential
100 |         batch_size = 10
101 |         data_loader = DataLoader(
102 |             dataset,
103 |             batch_size=batch_size,          # pick random values here to test
104 |             num_workers=4,          # num_workers > 1 to test multiprocessing works
105 |             pin_memory=True,
106 |             drop_last=True,
107 |         )
108 | 
109 |         self.validate_dataset(data_loader, len_limit)
110 |         # === Distributed Data Loader Shuffler
111 | 
112 |         # This will generate a random sampler, which will make the use
113 |         # of batching wasteful
114 |         sampler     = torch.utils.data.DistributedSampler(
115 |             dataset, shuffle=True)
116 | 
117 |         data_loader = DataLoader(
118 |             dataset,
119 |             sampler=sampler,
120 |             batch_size=batch_size,          # pick random values here to test
121 |             num_workers=4,          # num_workers > 1 to test multiprocessing works
122 |             pin_memory=True,
123 |             drop_last=True,
124 |         )
125 | 
126 |         self.validate_dataset(data_loader, len_limit)
127 |         dist.destroy_process_group()
128 | 


--------------------------------------------------------------------------------
/version.sh:
--------------------------------------------------------------------------------
1 | # Read version from python code
2 | read_version() {
3 |    BUILD_VERSION=$(awk '$1=="__version__" && $2=="=" {print $3}' aperturedb/__init__.py | tr -d '"')
4 | }


--------------------------------------------------------------------------------