├── .clang-format ├── .dockerignore ├── .github └── workflows │ ├── build-wheels.yml │ ├── ci-tiledb-from-source.yml │ ├── ci.yml │ ├── daily-test-build-numpy.yml │ ├── daily-test-build.yml │ ├── daily-tests.yml │ └── format.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── CMakeLists.txt ├── CONTRIBUTING.md ├── HISTORY.md ├── LICENSE ├── README.md ├── cmake └── DownloadTileDB.cmake ├── doc ├── local-build.sh └── source │ ├── _static │ ├── TileDB_Logo_BlueArtboard_1@1.5x.png │ ├── custom.css │ ├── favicon.ico │ ├── tileDB_uppercase_600_112.png │ ├── tileDB_uppercase_white_600_141.png │ └── tiledb-logo_color_no_margin_@4x.png │ ├── conf.py │ ├── gensidebar.py │ ├── index.rst │ └── python-api.rst ├── docs └── building-from-source.md ├── examples ├── config.py ├── errors.py ├── fragment_info.py ├── fragments_consolidation.py ├── in_memory_vfs.py ├── incomplete_iteration.py ├── multi_attribute.py ├── multirange_indexing.py ├── object.py ├── parallel_csv_ingestion.py ├── query_condition_datetime.py ├── query_condition_dense.py ├── query_condition_sparse.py ├── query_condition_string.py ├── quickstart_dense.py ├── quickstart_dimension_labels.py ├── quickstart_dimension_labels_sparse.py ├── quickstart_sparse.py ├── reading_dense_layouts.py ├── reading_sparse_layouts.py ├── string_dimension_labels.py ├── string_float_int_dimensions.py ├── time_traveling.py ├── using_tiledb_stats.py ├── variable_length.py ├── vfs.py ├── writing_dense_multiple.py ├── writing_dense_padding.py ├── writing_dense_rgb.py └── writing_sparse_multiple.py ├── external ├── LICENSE-string_view.txt ├── LICENSE-tsl_robin_map.txt └── tsl │ ├── robin_growth_policy.h │ ├── robin_hash.h │ ├── robin_map.h │ └── robin_set.h ├── misc └── pypi_linux │ ├── Dockerfile.aarch64.manylinux2014 │ ├── Dockerfile2014 │ ├── build.aarch64.sh │ └── build.sh ├── pyproject.toml ├── setup.py └── tiledb ├── CMakeLists.txt ├── __init__.py ├── aggregation.py ├── array.py ├── array_schema.py ├── attribute.py ├── consolidation_plan.py ├── core.cc ├── ctx.py ├── current_domain.py ├── data_order.py ├── dataframe_.py ├── datatypes.py ├── debug.cc ├── dense_array.py ├── dimension.py ├── dimension_label.py ├── dimension_label_schema.py ├── domain.py ├── domain_indexer.py ├── enumeration.py ├── filestore.py ├── filter.py ├── fragment.cc ├── fragment.py ├── group.py ├── highlevel.py ├── libtiledb ├── CMakeLists.txt ├── array.cc ├── attribute.cc ├── common.cc ├── common.h ├── consolidation_plan.cc ├── context.cc ├── current_domain.cc ├── dimension_label.cc ├── domain.cc ├── enum.cc ├── enumeration.cc ├── filestore.cc ├── filter.cc ├── group.cc ├── metadata.h ├── object.cc ├── query.cc ├── schema.cc ├── subarray.cc ├── tiledbcpp.cc └── vfs.cc ├── main.cc ├── metadata.py ├── multirange_indexing.py ├── ndrectangle.py ├── npbuffer.cc ├── npbuffer.h ├── object.py ├── parquet_.py ├── py_arrow_io_impl.h ├── py_arrowio ├── query.py ├── query_condition.cc ├── query_condition.py ├── schema_evolution.cc ├── schema_evolution.py ├── serialization.cc ├── sparse_array.py ├── stats.py ├── subarray.py ├── tests ├── __init__.py ├── cc │ ├── test_cc.py │ ├── test_filestore.py │ ├── test_group.py │ └── test_vfs.py ├── check_csv_dir.py ├── common.py ├── conftest.py ├── datatypes.py ├── perf │ ├── asv.conf.json │ └── benchmarks │ │ ├── __init__.py │ │ ├── array.py │ │ ├── benchmarks.py │ │ ├── indexing.py │ │ └── metadata.py ├── strategies.py ├── test_aggregates.py ├── test_array_schema.py ├── test_attribute.py ├── test_basic_import.py ├── test_cloud.py ├── test_compat.py ├── test_consolidation_plan.py ├── test_context_and_config.py ├── test_core.py ├── test_current_domain.py ├── test_dask.py ├── test_dimension.py ├── test_dimension_label.py ├── test_domain.py ├── test_domain_index.py ├── test_enumeration.py ├── test_examples.py ├── test_filestore.py ├── test_filters.py ├── test_fixes.py ├── test_fork_ctx.py ├── test_fragments.py ├── test_group.py ├── test_hypothesis.py ├── test_libtiledb.py ├── test_metadata.cc ├── test_metadata.py ├── test_multi_index-hp.py ├── test_multi_index.py ├── test_pandas_dataframe.py ├── test_query.py ├── test_query_condition.py ├── test_read_subarray.py ├── test_repr.py ├── test_schema_evolution.py ├── test_serialization.cc ├── test_serialization.py ├── test_stats.py ├── test_subarray.py ├── test_timestamp_overrides.py ├── test_util.py ├── test_vfs.py ├── test_webp.cc ├── test_webp.py └── test_write_subarray.py ├── util.cc ├── util.h ├── version_helper.py └── vfs.py /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: Google 4 | AccessModifierOffset: -1 5 | AlignAfterOpenBracket: AlwaysBreak 6 | AlignConsecutiveAssignments: false 7 | AlignConsecutiveDeclarations: false 8 | AlignEscapedNewlinesLeft: true 9 | AlignOperands: true 10 | AlignTrailingComments: true 11 | AllowAllParametersOfDeclarationOnNextLine: true 12 | AllowShortBlocksOnASingleLine: false 13 | AllowShortCaseLabelsOnASingleLine: false 14 | AllowShortFunctionsOnASingleLine: None 15 | AllowShortIfStatementsOnASingleLine: false 16 | AllowShortLoopsOnASingleLine: false 17 | AlwaysBreakAfterDefinitionReturnType: None 18 | AlwaysBreakAfterReturnType: None 19 | AlwaysBreakBeforeMultilineStrings: true 20 | AlwaysBreakTemplateDeclarations: true 21 | BinPackArguments: false 22 | BinPackParameters: false 23 | BraceWrapping: 24 | AfterClass: false 25 | AfterControlStatement: false 26 | AfterEnum: false 27 | AfterFunction: false 28 | AfterNamespace: false 29 | AfterObjCDeclaration: false 30 | AfterStruct: false 31 | AfterUnion: false 32 | BeforeCatch: false 33 | BeforeElse: false 34 | IndentBraces: false 35 | BreakBeforeBinaryOperators: None 36 | BreakBeforeBraces: Attach 37 | BreakBeforeTernaryOperators: false 38 | BreakConstructorInitializersBeforeComma: true 39 | BreakAfterJavaFieldAnnotations: false 40 | BreakStringLiterals: true 41 | ColumnLimit: 80 42 | CommentPragmas: '^ IWYU pragma:' 43 | BreakBeforeInheritanceComma: false 44 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 45 | ConstructorInitializerIndentWidth: 4 46 | ContinuationIndentWidth: 4 47 | Cpp11BracedListStyle: true 48 | DerivePointerAlignment: false 49 | DisableFormat: false 50 | ExperimentalAutoDetectBinPacking: false 51 | FixNamespaceComments: true 52 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] 53 | IncludeCategories: 54 | - Regex: '^<.*\.h>' 55 | Priority: 1 56 | - Regex: '^<.*' 57 | Priority: 2 58 | - Regex: '.*' 59 | Priority: 3 60 | IncludeIsMainRegex: '([-_](test|unittest))?$' 61 | IndentCaseLabels: true 62 | IndentWidth: 4 63 | IndentWrappedFunctionNames: false 64 | JavaScriptQuotes: Leave 65 | JavaScriptWrapImports: true 66 | KeepEmptyLinesAtTheStartOfBlocks: false 67 | MacroBlockBegin: '' 68 | MacroBlockEnd: '' 69 | MaxEmptyLinesToKeep: 1 70 | NamespaceIndentation: None 71 | PenaltyBreakAssignment: 300 72 | PenaltyBreakBeforeFirstCallParameter: 1 73 | PenaltyBreakComment: 300 74 | PenaltyBreakFirstLessLess: 120 75 | PenaltyBreakString: 1000 76 | PenaltyExcessCharacter: 1000000 77 | PenaltyReturnTypeOnItsOwnLine: 200 78 | PointerAlignment: Left 79 | ReflowComments: true 80 | SortIncludes: true 81 | SpaceAfterCStyleCast: false 82 | SpaceAfterTemplateKeyword: true 83 | SpaceBeforeAssignmentOperators: true 84 | SpaceBeforeParens: ControlStatements 85 | SpaceInEmptyParentheses: false 86 | SpacesBeforeTrailingComments: 2 87 | SpacesInAngles: false 88 | SpacesInContainerLiterals: true 89 | SpacesInCStyleCastParentheses: false 90 | SpacesInParentheses: false 91 | SpacesInSquareBrackets: false 92 | Standard: Cpp11 93 | TabWidth: 8 94 | UseTab: Never -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | build 2 | vv 3 | venv 4 | cibuildwheel_env 5 | -------------------------------------------------------------------------------- /.github/workflows/ci-tiledb-from-source.yml: -------------------------------------------------------------------------------- 1 | name: TileDB Python CI Using TileDB Core Source Build 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | libtiledb_ref: 7 | default: main 8 | type: string 9 | libtiledb_version: 10 | type: string 11 | 12 | jobs: 13 | 14 | build_libtiledb: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: Checkout TileDB Core ${{ inputs.libtiledb_ref || 'main' }} 18 | uses: actions/checkout@v4 19 | with: 20 | repository: TileDB-Inc/TileDB 21 | ref: ${{ inputs.libtiledb_ref || 'main' }} 22 | 23 | - name: Configure TileDB 24 | run: | 25 | cmake -S . -B build \ 26 | -DCMAKE_BUILD_TYPE=Release \ 27 | -DBUILD_SHARED_LIBS=ON \ 28 | -DCMAKE_INSTALL_PREFIX=./dist \ 29 | -DTILEDB_INSTALL_LIBDIR=lib \ 30 | -DTILEDB_S3=ON \ 31 | -DTILEDB_AZURE=ON \ 32 | -DTILEDB_GCS=ON \ 33 | -DTILEDB_SERIALIZATION=ON \ 34 | -DTILEDB_WEBP=ON \ 35 | -DTILEDB_TESTS=OFF \ 36 | -DVCPKG_TARGET_TRIPLET=x64-linux-release 37 | 38 | - name: Build TileDB 39 | env: 40 | TILEDB_PACKAGE_VERSION: ${{ inputs.libtiledb_version || '0.1' }} 41 | run: cmake --build build --config Release --target package 42 | 43 | - name: Upload TileDB Core Artifact 44 | uses: actions/upload-artifact@v4 45 | with: 46 | name: libtiledb 47 | path: | 48 | build/tiledb-*.tar.gz* 49 | build/tiledb-*.zip* 50 | 51 | build_tiledb_py: 52 | needs: 53 | - build_libtiledb 54 | runs-on: ubuntu-latest 55 | steps: 56 | - name: Set up Python 57 | uses: actions/setup-python@v5 58 | with: 59 | python-version: "3.11" 60 | 61 | - name: Checkout TileDB-Py 62 | uses: actions/checkout@v4 63 | 64 | - name: Download TileDB Core Artifact 65 | uses: actions/download-artifact@v4 66 | with: 67 | name: libtiledb 68 | path: ${{ github.workspace }}/libtiledb 69 | 70 | - name: Unpack Release Archive 71 | run: tar xvf ${{ github.workspace }}/libtiledb/*.tar.gz --directory ${{ github.workspace }}/libtiledb 72 | 73 | - name: Build TileDB-Py Wheel 74 | env: 75 | TILEDB_PATH: ${{ github.workspace }}/libtiledb 76 | run: | 77 | python -m pip wheel -w dist --verbose . 78 | WHEEL=$(ls dist/tiledb-*.whl) 79 | python -m pip install ${WHEEL}[test] 80 | 81 | - name: Upload TileDB Core Artifact 82 | uses: actions/upload-artifact@v4 83 | with: 84 | name: tiledb-py 85 | path: | 86 | dist/tiledb-*.whl 87 | 88 | - name: Run tests 89 | run: | 90 | PROJECT_CWD=$PWD 91 | rm tiledb/__init__.py 92 | cd /tmp 93 | pytest -vv --showlocals $PROJECT_CWD 94 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: TileDB Python CI 2 | 3 | on: [push, pull_request, workflow_dispatch] 4 | 5 | concurrency: 6 | group: ${{ github.head_ref || github.run_id }} 7 | cancel-in-progress: true 8 | 9 | env: 10 | S3_BUCKET: ${{ vars.S3_BUCKET }} 11 | TILEDB_NAMESPACE: ${{ vars.TILEDB_NAMESPACE }} 12 | TILEDB_TOKEN: ${{ secrets.TILEDB_TOKEN }} 13 | 14 | jobs: 15 | build: 16 | runs-on: ${{ matrix.os }} 17 | defaults: 18 | run: 19 | shell: bash 20 | strategy: 21 | matrix: 22 | os: 23 | - ubuntu-latest 24 | - macos-13 25 | - macos-14 26 | - windows-latest 27 | python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ] 28 | fail-fast: false 29 | env: 30 | MACOSX_DEPLOYMENT_TARGET: "11" 31 | steps: 32 | - name: Checkout TileDB-Py `main` 33 | uses: actions/checkout@v4 34 | 35 | - name: Setup MSVC toolset (VS 2022) 36 | uses: TheMrMilchmann/setup-msvc-dev@v3 37 | if: startsWith(matrix.os, 'windows') 38 | with: 39 | arch: x64 40 | 41 | - name: Install Ninja (VS 2022) 42 | uses: seanmiddleditch/gha-setup-ninja@v4 43 | if: startsWith(matrix.os, 'windows') 44 | 45 | - name: Set up Python ${{ matrix.python-version }} 46 | uses: actions/setup-python@v5 47 | with: 48 | python-version: ${{ matrix.python-version }} 49 | 50 | - name: Print Python version 51 | run: | 52 | which python 53 | which pip 54 | python --version 55 | 56 | - name: Print env 57 | run: printenv 58 | 59 | - name: Print pip debug info 60 | run: pip debug --verbose 61 | 62 | # Remove after upstream PR fully-deployed: 63 | # - https://github.com/actions/runner-images/pull/7125 64 | - name: "Install homebrew dependencies" 65 | run: brew install pkg-config 66 | if: startsWith(matrix.os, 'macos') 67 | 68 | - name: "Install libfaketime (Linux only)" 69 | if: ${{ startsWith(matrix.os, 'ubuntu') }} 70 | run: | 71 | git clone https://github.com/wolfcw/libfaketime/ 72 | cd libfaketime 73 | sudo make install 74 | cd .. 75 | 76 | - name: "Build and Install TileDB-Py" 77 | # We use pipx here to produce wheel/sdist to upload as artifact in case of error 78 | run: | 79 | pipx run --python ${{ matrix.python-version }} build 80 | WHEEL_NAME=$(ls dist/*.whl) 81 | pip install --verbose ${WHEEL_NAME}[test] 82 | 83 | - name: "Run tests" 84 | run: | 85 | PROJECT_CWD=$PWD 86 | rm tiledb/__init__.py 87 | cd /tmp 88 | pytest -vv --showlocals $PROJECT_CWD 89 | 90 | - name: "Re-run tests without pandas" 91 | run: | 92 | pip uninstall -y pandas 93 | pytest -vv --showlocals $PROJECT_CWD 94 | 95 | - name: "Print log files (failed build only)" 96 | run: | 97 | set -xeo pipefail 98 | # Display log files if the build failed 99 | echo 'Dumping log files for failed build' 100 | echo '----------------------------------' 101 | for f in $(find build -name *.log); 102 | do echo '------' 103 | echo $f 104 | echo '======' 105 | cat $f 106 | done; 107 | if: failure() 108 | 109 | - name: "Upload files for debug" 110 | if: always() 111 | uses: actions/upload-artifact@v4 112 | with: 113 | name: cibw-wheels-${{ matrix.os }}-${{ matrix.python-version }} 114 | path: "." 115 | -------------------------------------------------------------------------------- /.github/workflows/daily-test-build.yml: -------------------------------------------------------------------------------- 1 | name: Daily Test Build TileDB-Py Against Core 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | libtiledb_version: 7 | required: true 8 | type: string 9 | 10 | jobs: 11 | test: 12 | runs-on: ${{ matrix.os }} 13 | defaults: 14 | run: 15 | shell: bash 16 | strategy: 17 | matrix: 18 | os: [ubuntu-latest, macos-13, macos-14, windows-latest] 19 | fail-fast: false 20 | 21 | env: 22 | TILEDB_VERSION: ${{ inputs.libtiledb_version }} 23 | MACOSX_DEPLOYMENT_TARGET: "11" 24 | VCPKG_BINARY_SOURCES: 'clear;x-gha,readwrite' 25 | 26 | steps: 27 | - name: Setup MSVC toolset (VS 2022) 28 | uses: TheMrMilchmann/setup-msvc-dev@v3 29 | if: matrix.os == 'windows-latest' 30 | with: 31 | arch: x64 32 | 33 | - name: Enable vcpkg binary caching 34 | uses: actions/github-script@v6 35 | with: 36 | script: | 37 | core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); 38 | core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); 39 | 40 | - name: Install Ninja (VS 2022) 41 | uses: seanmiddleditch/gha-setup-ninja@v4 42 | if: matrix.os == 'windows-latest' 43 | 44 | - name: "Set CMAKE_GENERATOR" 45 | run: echo "CMAKE_GENERATOR=Ninja" >> $GITHUB_ENV 46 | if: matrix.os == 'windows-latest' 47 | 48 | - name: Set up Python 49 | uses: actions/setup-python@v5 50 | with: 51 | python-version: "3.11" 52 | 53 | - name: Print Python version 54 | run: | 55 | which python 56 | which pip 57 | python --version 58 | 59 | - name: Print env 60 | run: printenv 61 | 62 | # Remove after upstream PR fully-deployed: 63 | # - https://github.com/actions/runner-images/pull/7125 64 | - name: "Install homebrew dependencies" 65 | run: brew install pkg-config 66 | if: matrix.os == 'macos-13' || matrix.os == 'macos-14' 67 | 68 | - name: Checkout TileDB-Py `main` 69 | uses: actions/checkout@v4 70 | 71 | - name: Build and install TileDB-Py and dependencies 72 | run: python -m pip install --verbose .[test] 73 | 74 | - name: Run tests 75 | run: | 76 | PROJECT_CWD=$PWD 77 | rm tiledb/__init__.py 78 | cd /tmp 79 | pytest -vv --showlocals $PROJECT_CWD 80 | 81 | create_issue_on_fail: 82 | permissions: 83 | issues: write 84 | runs-on: ubuntu-latest 85 | needs: test 86 | if: failure() || cancelled() 87 | steps: 88 | - uses: actions/checkout@v4 89 | - name: Create Issue if Build Fails 90 | uses: TileDB-Inc/github-actions/open-issue@main 91 | with: 92 | name: nightly build 93 | label: bug,nightly-failure 94 | assignee: kounelisagis,nguyenv,ihnorton 95 | -------------------------------------------------------------------------------- /.github/workflows/daily-tests.yml: -------------------------------------------------------------------------------- 1 | name: Daily Tests TileDB-Py 2 | 3 | on: 4 | schedule: 5 | # runs every day at 5:00 UTC (1:00AM EST / Midnight CST) 6 | - cron: "0 5 * * *" 7 | workflow_dispatch: 8 | 9 | jobs: 10 | ci1: 11 | uses: ./.github/workflows/daily-test-build.yml 12 | with: 13 | libtiledb_version: '2.28.0' 14 | 15 | ci2: 16 | uses: ./.github/workflows/daily-test-build.yml 17 | with: 18 | libtiledb_version: '2.27.2' 19 | 20 | ci3: 21 | uses: ./.github/workflows/daily-test-build-numpy.yml 22 | with: 23 | libtiledb_version: '2.28.0' 24 | 25 | ci4: 26 | uses: ./.github/workflows/daily-test-build-numpy.yml 27 | with: 28 | libtiledb_version: '2.27.2' 29 | -------------------------------------------------------------------------------- /.github/workflows/format.yml: -------------------------------------------------------------------------------- 1 | name: TileDB Python Linting 2 | 3 | on: [push, pull_request, workflow_dispatch] 4 | 5 | jobs: 6 | lint: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v3 10 | - uses: actions/setup-python@v4 11 | 12 | - name: Run pre-commit hooks on all files 13 | run: python -m pip -v install pre-commit && pre-commit run -a -v -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | *.dylib 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | tiledb/_generated_version.py 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | doc/source/_build 68 | doc/source/_sidebar.rst.inc 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # dotenv 86 | .env 87 | 88 | # virtualenv 89 | .venv 90 | venv/ 91 | ENV/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # IntelliJ 107 | .idea 108 | 109 | # Visual Studio Code 110 | .vscode/ 111 | 112 | *.DS_Store 113 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/ambv/black 3 | rev: 24.10.0 4 | hooks: 5 | - id: black 6 | - repo: https://github.com/charliermarsh/ruff-pre-commit 7 | rev: v0.9.3 8 | hooks: 9 | - id: ruff 10 | - repo: https://github.com/pre-commit/mirrors-clang-format 11 | rev: v19.1.7 12 | hooks: 13 | - id: clang-format 14 | types_or: [c++, c] 15 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Don't build any extra formats 2 | formats: [] 3 | 4 | # Use RTD config version 2 5 | # https://docs.readthedocs.io/en/stable/config-file/v2.html 6 | version: 2 7 | 8 | # Set build OS to avoid build failures with old compiler 9 | build: 10 | os: "ubuntu-22.04" 11 | tools: 12 | python: "3.12" 13 | 14 | sphinx: 15 | configuration: doc/source/conf.py 16 | 17 | python: 18 | install: 19 | - method: pip 20 | path: . 21 | extra_requirements: 22 | - doc 23 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.15...3.26) 2 | 3 | project(${SKBUILD_PROJECT_NAME}) 4 | 5 | set(PYBIND11_NEWPYTHON ON) 6 | 7 | find_package( 8 | Python 9 | COMPONENTS Interpreter Development.Module NumPy 10 | REQUIRED 11 | ) 12 | 13 | find_package(pybind11 CONFIG REQUIRED) 14 | 15 | if (TILEDB_PATH) 16 | file(REAL_PATH "${TILEDB_PATH}" USER_TILEDB_PATH EXPAND_TILDE) 17 | file(GLOB_RECURSE USER_TILEDB_CONFIG_PATH "${TILEDB_PATH}/**/TileDBConfig.cmake") 18 | cmake_path(GET USER_TILEDB_CONFIG_PATH PARENT_PATH USER_TILEDB_DIR) 19 | set(TileDB_DIR ${USER_TILEDB_DIR}) 20 | endif() 21 | 22 | # In the future we should use vcpkg 23 | if (TILEDB_PATH) 24 | find_package(TileDB 25 | HINTS ${USER_TILEDB_DIR} ${TILEDB_PATH} 26 | QUIET NO_DEFAULT_PATH REQUIRED) 27 | else() 28 | find_package(TileDB QUIET) 29 | endif() 30 | 31 | if (NOT TileDB_FOUND) 32 | include(cmake/DownloadTileDB.cmake) 33 | if (TILEDB_VERSION) 34 | message(STATUS "Downloading TileDB version \"${TILEDB_VERSION}\" ...") 35 | if (TILEDB_HASH) 36 | fetch_prebuilt_tiledb( 37 | VERSION ${TILEDB_VERSION} 38 | RELLIST_HASH SHA256=${TILEDB_HASH} 39 | ) 40 | else() 41 | fetch_prebuilt_tiledb( 42 | VERSION ${TILEDB_VERSION} 43 | ) 44 | endif() 45 | else() 46 | message(STATUS "Downloading TileDB default version ...") 47 | # Download latest release 48 | fetch_prebuilt_tiledb( 49 | VERSION 2.28.0 50 | RELLIST_HASH SHA256=40c8a0b5b7ddfe6150e3ce390fd95761d2b7d5910ea3fd5c7dfb67d431e64660 51 | ) 52 | endif() 53 | find_package(TileDB REQUIRED) 54 | set(TILEDB_DOWNLOADED TRUE) 55 | else() 56 | message(STATUS "Found external TileDB core library") 57 | endif() 58 | 59 | message(STATUS "Using libtiledb path: " "${TileDB_DIR}") 60 | add_subdirectory(tiledb) 61 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to TileDB-Py 2 | 3 | Thanks for your interest in TileDB-Py. The notes below give some pointers for filing issues and bug reports, or contributing to the code. 4 | 5 | ## Contribution Checklist 6 | - Reporting a bug? Please include the following information 7 | - operating system and version (windows, linux, macos, etc.) 8 | - the output of `tiledb.version()` and `tiledb.libtiledb.version()` 9 | - if possible, a minimal working example demonstrating the bug or issue (along with any data to re-create, when feasible) 10 | - Please paste code blocks with triple backquotes (```) so that github will format it nicely. See [GitHub's guide on Markdown](https://guides.github.com/features/mastering-markdown) for more formatting tricks. 11 | 12 | ## Contributing Code 13 | *By contributing code to TileDB-Py, you are agreeing to release it under the [MIT License](https://github.com/TileDB-Inc/TileDB/tree/main/LICENSE).* 14 | 15 | ### Contribution Workflow 16 | 17 | - Quick steps to build locally: 18 | - install prerequisites via pip or conda: `pybind11` `cython` `numpy` `pandas` `pyarrow` 19 | - recommended: install TileDB embedded (libtiledb) 20 | 21 | NOTE: if libtiledb path is not specified with `--tiledb`, it will be built automatically by `setup.py`. However, this build 22 | is internal to the source tree and somewhat difficult to modify. When working on both projects simultaneously, it is 23 | strongly suggested to build libtiledb separately. Changes to libtiledb must be `make install-tiledb` to `dist` in 24 | order to be used with `--tiledb`. 25 | 26 | - from latest release build: https://github.com/TileDB-Inc/TileDB/releases 27 | - `tar xf tiledb--.tar.gz -C /path/to/extract` 28 | - use `--tiledb=/path/to/extract` (note: this path should _contain_ the `lib` directory) 29 | - from [conda-forge](): `mamba install tiledb` 30 | - `--tiledb=$CONDA_PREFIX` 31 | - from source: https://docs.tiledb.com/main/how-to/installation/building-from-source/c-cpp 32 | - use `--tiledb=/path/to/tiledb/dist` option when running ``setup.py`` in the step below 33 | - if building libtiledb from source, to enable serialization pass ``--enable-serialization`` 34 | to the ``bootstrap`` script before compiling 35 | - serialization is optional. if libtiledb is not build with serialization, then it will not be 36 | enabled in TileDB-Py 37 | 38 | - build TileDB-Py 39 | ``` 40 | git clone https://github.com/TileDB-Inc/TileDB-Py 41 | cd TileDB-Py 42 | python setup.py develop --tiledb= 43 | ``` 44 | 45 | - Make changes locally, then rebuild with `python setup.py develop [--tiledb=<>]` 46 | - Make sure to run `pytest` to verify changes against tests (add new tests where applicable). 47 | - Execute the tests as `pytest tiledb` from the top-level directory or `pytest` in the `tiledb/` directory. 48 | - Please submit [pull requests](https://help.github.com/en/desktop/contributing-to-projects/creating-a-pull-request) against the default [`main` branch of TileDB-Py](https://github.com/TileDB-Inc/TileDB-Py/tree/main) 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017-2018 TileDB Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | TileDB logo 2 | 3 | 4 | [![Build Status](https://dev.azure.com/TileDB-Inc/CI/_apis/build/status/TileDB-Inc.TileDB-Py?branchName=main)](https://dev.azure.com/TileDB-Inc/CI/_build/latest?definitionId=1&branchName=main) 5 | ![](https://raw.githubusercontent.com/TileDB-Inc/TileDB/main/doc/anaconda.svg?sanitize=true)[![Anaconda download count badge](https://anaconda.org/conda-forge/TileDB-Py/badges/downloads.svg)](https://anaconda.org/conda-forge/TileDB-Py) 6 | 7 | 8 | # TileDB-Py 9 | 10 | *TileDB-Py* is a [Python](https://python.org) interface to the [TileDB Storage Engine](https://github.com/TileDB-Inc/TileDB). 11 | 12 | # Quick Links 13 | 14 | * [Installation](https://cloud.tiledb.com/academy/structure/arrays/quickstart/) 15 | * [Build Instructions](docs/building-from-source.md) 16 | * [TileDB Academy](https://cloud.tiledb.com/academy/) 17 | * [Python API reference](https://tiledb-inc-tiledb-py.readthedocs-hosted.com/en/stable) 18 | 19 | # Quick Installation 20 | 21 | TileDB-Py is available from either [PyPI](https://pypi.org/project/tiledb/) with ``pip``: 22 | 23 | ``` 24 | pip install tiledb 25 | ``` 26 | 27 | or from [conda-forge](https://anaconda.org/conda-forge/tiledb-py) with 28 | [conda](https://conda.io/docs/) or [mamba](https://github.com/mamba-org/mamba#installation): 29 | 30 | ``` 31 | conda install -c conda-forge tiledb-py 32 | ``` 33 | 34 | Dataframes functionality (`tiledb.from_pandas`, `Array.df[]`) requires [Pandas](https://pandas.pydata.org/) 1.0 or higher, and [PyArrow](https://arrow.apache.org/docs/python/) 1.0 or higher. 35 | 36 | # Contributing 37 | 38 | We welcome contributions, please see [`CONTRIBUTING.md`](https://github.com/TileDB-Inc/TileDB-Py/blob/main/CONTRIBUTING.md) for suggestions and 39 | development-build instructions. For larger features, please open an issue to discuss goals and 40 | approach in order to ensure a smooth PR integration and review process. 41 | -------------------------------------------------------------------------------- /doc/local-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Builds the ReadTheDocs documentation locally. 5 | # Usage. Execute in this directory: 6 | # $ ./local-build.sh 7 | # This creates a Python virtual env 'venv' in the current directory. 8 | # 9 | 10 | # Choose the default directories 11 | source_dir="source" 12 | build_dir="source/_build" 13 | venv_dir="venv" 14 | ext_dir="../" 15 | 16 | die() { 17 | echo "$@" 1>&2 ; popd 2>/dev/null; exit 1 18 | } 19 | 20 | arg() { 21 | echo "$1" | sed "s/^${2-[^=]*=}//" | sed "s/:/;/g" 22 | } 23 | 24 | # Display bootstrap usage 25 | usage() { 26 | echo ' 27 | Usage: '"$0"' [] 28 | Options: [defaults in brackets after descriptions] 29 | Configuration: 30 | --help print this message 31 | --tiledb=PATH (required) path to TileDB repo root 32 | ' 33 | exit 10 34 | } 35 | 36 | # Parse arguments 37 | tiledb="" 38 | while test $# != 0; do 39 | case "$1" in 40 | --tiledb=*) dir=`arg "$1"` 41 | tiledb="$dir";; 42 | --help) usage ;; 43 | *) die "Unknown option: $1" ;; 44 | esac 45 | shift 46 | done 47 | 48 | if [ ! -d "${tiledb}" ]; then 49 | die "invalid tiledb installation directory (use --tiledb)" 50 | fi 51 | 52 | build_ext() { 53 | pushd "${ext_dir}" 54 | TILEDB_PATH=${tiledb} pip install .[doc] || die "could not install tiledb-py" 55 | popd 56 | } 57 | 58 | build_site() { 59 | if [[ $OSTYPE == darwin* ]]; then 60 | export DYLD_LIBRARY_PATH="${tiledb}/lib" 61 | else 62 | export LD_LIBRARY_PATH="${tiledb}/lib" 63 | fi 64 | export TILEDB_PY_NO_VERSION_CHECK="yes" 65 | sphinx-build -E -T -b html -d ${build_dir}/doctrees -D language=en ${source_dir} ${build_dir}/html || \ 66 | die "could not build sphinx site" 67 | } 68 | 69 | run() { 70 | build_ext 71 | build_site 72 | echo "Build complete. Open '${build_dir}/html/index.html' in your browser." 73 | } 74 | 75 | run 76 | -------------------------------------------------------------------------------- /doc/source/_static/TileDB_Logo_BlueArtboard_1@1.5x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TileDB-Inc/TileDB-Py/28714d9b25d44d6c6c1f318525184d3784b7de00/doc/source/_static/TileDB_Logo_BlueArtboard_1@1.5x.png -------------------------------------------------------------------------------- /doc/source/_static/custom.css: -------------------------------------------------------------------------------- 1 | .wy-side-nav-search { 2 | background-color: #fafafa; 3 | color: #404040; 4 | } 5 | 6 | .wy-side-nav-search > a { 7 | color: #2980B9; 8 | } 9 | 10 | .wy-side-nav-search > div.version { 11 | color: rgba(64, 64, 64, 0.3); 12 | } 13 | 14 | .red { 15 | color: red; 16 | } 17 | 18 | .green { 19 | color: green; 20 | } -------------------------------------------------------------------------------- /doc/source/_static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TileDB-Inc/TileDB-Py/28714d9b25d44d6c6c1f318525184d3784b7de00/doc/source/_static/favicon.ico -------------------------------------------------------------------------------- /doc/source/_static/tileDB_uppercase_600_112.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TileDB-Inc/TileDB-Py/28714d9b25d44d6c6c1f318525184d3784b7de00/doc/source/_static/tileDB_uppercase_600_112.png -------------------------------------------------------------------------------- /doc/source/_static/tileDB_uppercase_white_600_141.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TileDB-Inc/TileDB-Py/28714d9b25d44d6c6c1f318525184d3784b7de00/doc/source/_static/tileDB_uppercase_white_600_141.png -------------------------------------------------------------------------------- /doc/source/_static/tiledb-logo_color_no_margin_@4x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TileDB-Inc/TileDB-Py/28714d9b25d44d6c6c1f318525184d3784b7de00/doc/source/_static/tiledb-logo_color_no_margin_@4x.png -------------------------------------------------------------------------------- /doc/source/gensidebar.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file generates the sidebar/toctree for all TileDB projects and should 3 | # be copied to each project when it is updated. 4 | # 5 | # This file is originally from the RobotPy documentation project 6 | # https://github.com/robotpy/robotpy-docs, licensed under Apache v2. 7 | # 8 | 9 | 10 | def write_if_changed(fname, contents): 11 | 12 | try: 13 | with open(fname, "r") as fp: 14 | old_contents = fp.read() 15 | except Exception: 16 | old_contents = "" 17 | 18 | if old_contents != contents: 19 | with open(fname, "w") as fp: 20 | fp.write(contents) 21 | 22 | 23 | def generate_sidebar(conf, conf_api): 24 | 25 | version = conf["rtd_version"] 26 | 27 | lines = [ 28 | "", 29 | ".. DO NOT MODIFY! THIS PAGE IS AUTOGENERATED!", 30 | " To edit the sidebar, modify gensidebar.py and re-build the docs.", 31 | "", 32 | ] 33 | 34 | url_base = "https://tiledb-inc-tiledb.readthedocs-hosted.com" 35 | lang = "en" 36 | 37 | def toctree(name): 38 | lines.extend( 39 | [".. toctree::", " :caption: %s" % name, " :maxdepth: 1", ""] 40 | ) 41 | 42 | def endl(): 43 | lines.append("") 44 | 45 | def write(desc, link): 46 | if conf_api == "tiledb": 47 | args = desc, link 48 | else: 49 | args = desc, "%s/%s/%s/%s.html" % (url_base, lang, version, link) 50 | 51 | lines.append(" %s <%s>" % args) 52 | 53 | def write_api(project, desc, rst_page): 54 | # From non-root project to root project link 55 | if project == "tiledb" and conf_api != "tiledb": 56 | args = desc, url_base, lang, version, rst_page 57 | lines.append(" %s API <%s/%s/%s/%s.html>" % args) 58 | # From anything to non-root project link 59 | elif project != conf_api: 60 | args = desc, url_base, project, lang, version, rst_page 61 | lines.append(" %s API <%s/projects/%s/%s/%s/%s.html>" % args) 62 | # Local project link 63 | else: 64 | args = desc, rst_page 65 | lines.append(" %s API <%s>" % args) 66 | 67 | def write_api_url(desc, url): 68 | lines.append(" %s API <%s>" % (desc, url)) 69 | 70 | # 71 | # Specify the sidebar contents here 72 | # 73 | 74 | toctree("API Reference") 75 | write_api("tiledb", "C", "c-api") 76 | write_api("tiledb", "C++", "c++-api") 77 | write_api("tiledb-py", "Python", "python-api") 78 | write_api_url("R", "https://tiledb-inc.github.io/TileDB-R/reference/index.html") 79 | write_api_url("Java", "https://www.javadoc.io/doc/io.tiledb/tiledb-java") 80 | write_api_url("Go", "https://godoc.org/github.com/TileDB-Inc/TileDB-Go") 81 | endl() 82 | 83 | write_if_changed("_sidebar.rst.inc", "\n".join(lines)) 84 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | TileDB Python Project 2 | ===================== 3 | 4 | This project encompasses the Python language bindings for the TileDB library. 5 | 6 | .. include:: _sidebar.rst.inc 7 | -------------------------------------------------------------------------------- /docs/building-from-source.md: -------------------------------------------------------------------------------- 1 | # Building TileDB-Py from Source 2 | 3 | ## Build dependencies 4 | 5 | * NumPy 6 | * Cython 7 | * pybind11 8 | * scikit-build-core 9 | * C++20 compiler 10 | * CMake 11 | 12 | ## Runtime Dependencies 13 | 14 | * NumPy 15 | 16 | ### macOS and Linux 17 | 18 | Execute the following commands: 19 | 20 | ```bash 21 | $ git clone https://github.com/TileDB-Inc/TileDB-Py.git 22 | $ cd TileDB-Py 23 | $ pip install . 24 | $ cd .. # exit the source directory to avoid import errors 25 | ``` 26 | 27 | If you wish to modify the install process, you can use these environment variables: 28 | 29 | * `TILEDB_PATH`: Path to TileDB core library. If this variable is set and the library is found in the specified folder it is not copied inside of the wheel. 30 | * `TILEDB_VERSION`: Version of the TileDB core library that you wish to download. This version must be present in the Github releases. 31 | * `TILEDB_HASH`: SHA256 sum of the desired TileDB core library release. Only used when `TILEDB_VERSION` is set. 32 | 33 | ```bash 34 | $ TILEDB_PATH=/home/tiledb/dist pip install . 35 | # Or pass it as an argument 36 | $ pip install . -C skbuild.cmake.define.TILEDB_PATH=/home/tiledb/dist 37 | ``` 38 | 39 | To build against `libtiledb` installed with conda, run: 40 | 41 | ```bash 42 | # After activating the desired conda environment 43 | $ conda install tiledb 44 | $ TILEDB_PATH=${PREFIX} python -m pip install --no-build-isolation --no-deps --ignore-installed -v . 45 | ``` 46 | 47 | To test your local installation, install optional dependencies, and then use `pytest`: 48 | 49 | ``` 50 | $ pip install .[test] 51 | $ python -m pytest -v # in the TileDB-Py source directory 52 | ``` 53 | 54 | If TileDB is installed in a non-standard location, you also need to make the dynamic linker aware of `libtiledb`'s location. Otherwise when importing the `tiledb` module you will get an error that the built extension module cannot find `libtiledb`'s symbols: 55 | 56 | ``` 57 | $ env LD_LIBRARY_PATH="/home/tiledb/dist/lib:$LD_LIBRARY_PATH" python -m pytest -v 58 | ``` 59 | 60 | For macOS the linker environment variable is `DYLD_LIBRARY_PATH`**.** 61 | 62 | ### Windows 63 | 64 | If you are building the extension on Windows, first install a Python distribution such as [Miniconda](https://conda.io/miniconda.html). You can then either build TileDB from source, or download the pre-built binaries. 65 | 66 | Once you've installed Miniconda and TileDB, execute: 67 | 68 | ```bash 69 | REM with a conda install of libtiledb: 70 | > pip install . 71 | 72 | REM with a TileDB source build: 73 | > set TILEDB_PATH=C:/path/to/TileDB/dist/bin 74 | > pip install . 75 | 76 | REM to run tests: 77 | > pip install .[test] 78 | > python -m pytest -v 79 | ``` 80 | 81 | Note that if you built TileDB locally from source, then replace `set TILEDB_PATH=C:/path/to/TileDB` with `TILEDB_PATH=C:/path/to/TileDB/dist`. 82 | -------------------------------------------------------------------------------- /examples/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please see the TileDB documentation for more information: 30 | # https://docs.tiledb.com/main/how-to/configuration 31 | # 32 | # This program shows how to set/get the TileDB configuration parameters. 33 | # 34 | 35 | import tiledb 36 | 37 | 38 | def set_get_config_ctx_vfs(): 39 | # Create config object 40 | config = tiledb.Config() 41 | 42 | # Set/get config to/from ctx 43 | ctx = tiledb.Ctx(config) 44 | print(ctx.config()) 45 | 46 | # Set/get config to/from VFS 47 | vfs = tiledb.VFS(config) 48 | print(vfs.config()) 49 | 50 | 51 | def set_get_config(): 52 | config = tiledb.Config() 53 | 54 | # Set value 55 | config["vfs.s3.connect_timeout_ms"] = 5000 56 | 57 | # Get value 58 | tile_cache_size = config["sm.memory_budget"] 59 | print("Memory budget: %s" % str(tile_cache_size)) 60 | 61 | 62 | def print_default(): 63 | config = tiledb.Config() 64 | print("\nDefault settings:") 65 | for p in config.items(): 66 | print('"%s" : "%s"' % (p[0], p[1])) 67 | 68 | 69 | def iter_config_with_prefix(): 70 | config = tiledb.Config() 71 | # Print only the S3 settings. 72 | print("\nVFS S3 settings:") 73 | for p in config.items("vfs.s3."): 74 | print('"%s" : "%s"' % (p[0], p[1])) 75 | 76 | 77 | def save_load_config(): 78 | # Save to file 79 | config = tiledb.Config() 80 | config["sm.memory_budget"] = 1234 81 | config.save("tiledb_config.txt") 82 | 83 | # Load from file 84 | config_load = tiledb.Config.load("tiledb_config.txt") 85 | print( 86 | "\nTile cache size after loading from file: %s" 87 | % str(config_load["sm.memory_budget"]) 88 | ) 89 | 90 | 91 | set_get_config_ctx_vfs() 92 | set_get_config() 93 | print_default() 94 | iter_config_with_prefix() 95 | save_load_config() 96 | -------------------------------------------------------------------------------- /examples/errors.py: -------------------------------------------------------------------------------- 1 | # errors.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please see the TileDB documentation for more information: 30 | # https://docs.tiledb.com/main/how-to/catching-errors 31 | # 32 | # This example shows how to catch errors in TileDB. 33 | # 34 | 35 | import tiledb 36 | 37 | # Catch an error 38 | try: 39 | tiledb.group_create("my_group") 40 | tiledb.group_create("my_group") 41 | except tiledb.TileDBError as e: 42 | print(f"TileDB exception: {e}") 43 | 44 | # clean up 45 | if tiledb.VFS().is_dir("my_group"): 46 | tiledb.remove("my_group") 47 | 48 | # Setting a different error handler for the context is not yet supported. 49 | -------------------------------------------------------------------------------- /examples/fragment_info.py: -------------------------------------------------------------------------------- 1 | # fragment_info.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | 28 | 29 | import numpy as np 30 | 31 | import tiledb 32 | 33 | array_name = "fragment_info" 34 | 35 | 36 | def create_array(): 37 | # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4] and space tiles 2x2. 38 | dom = tiledb.Domain( 39 | tiledb.Dim(name="rows", domain=(1, 4), tile=2, dtype=np.int32), 40 | tiledb.Dim(name="cols", domain=(1, 4), tile=2, dtype=np.int32), 41 | ) 42 | 43 | # The array will be dense with a single attribute "a" so each (i,j) cell can store an integer. 44 | schema = tiledb.ArraySchema( 45 | domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.int32)] 46 | ) 47 | 48 | # Create the (empty) array on disk. 49 | tiledb.Array.create(array_name, schema) 50 | 51 | 52 | def write_array_1(): 53 | with tiledb.open(array_name, mode="w") as A: 54 | A[1:3, 1:5] = np.array(([[1, 2, 3, 4], [5, 6, 7, 8]])) 55 | 56 | 57 | def write_array_2(): 58 | with tiledb.open(array_name, mode="w") as A: 59 | A[2:4, 2:4] = np.array(([[101, 102], [103, 104]])) 60 | 61 | 62 | def write_array_3(): 63 | with tiledb.open(array_name, mode="w") as A: 64 | A[3:4, 4:5] = np.array(([202])) 65 | 66 | 67 | # Create and write array only if it does not exist 68 | if tiledb.object_type(array_name) != "array": 69 | create_array() 70 | write_array_1() 71 | write_array_2() 72 | write_array_3() 73 | 74 | # tiledb.array_fragments() requires TileDB-Py version > 0.8.5 75 | fragments_info = tiledb.array_fragments(array_name) 76 | 77 | print("====== FRAGMENTS INFO ======") 78 | print("array uri: {}".format(fragments_info.array_uri)) 79 | print("number of fragments: {}".format(len(fragments_info))) 80 | 81 | to_vac = fragments_info.to_vacuum 82 | print("number of consolidated fragments to vacuum: {}".format(len(to_vac))) 83 | print("uris of consolidated fragments to vacuum: {}".format(to_vac)) 84 | 85 | print(fragments_info.nonempty_domain) 86 | print(fragments_info.sparse) 87 | 88 | for fragment in fragments_info: 89 | print() 90 | print("===== FRAGMENT NUMBER {} =====".format(fragment.num)) 91 | print("fragment uri: {}".format(fragment.uri)) 92 | print("is sparse: {}".format(fragment.sparse)) 93 | print("cell num: {}".format(fragment.cell_num)) 94 | print("has consolidated metadata: {}".format(fragment.has_consolidated_metadata)) 95 | print("nonempty domain: {}".format(fragment.nonempty_domain)) 96 | print("timestamp range: {}".format(fragment.timestamp_range)) 97 | print( 98 | "number of unconsolidated metadata: {}".format( 99 | fragment.unconsolidated_metadata_num 100 | ) 101 | ) 102 | print("version: {}".format(fragment.version)) 103 | -------------------------------------------------------------------------------- /examples/in_memory_vfs.py: -------------------------------------------------------------------------------- 1 | # in_memory_vfs.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2024 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # When run, this program will create a random numpy array, create a TileDB 30 | # DenseArray from it in memory, and read the entire array back from memory. 31 | # It then compares the original numpy array with the TileDB array to check 32 | # that data was written and read correctly (prints True) or not (prints False). 33 | # 34 | 35 | import numpy as np 36 | 37 | import tiledb 38 | 39 | # Create TileDB VFS 40 | vfs = tiledb.VFS() 41 | 42 | file = "mem://myarray" 43 | data = np.random.rand(10, 10) 44 | 45 | if not vfs.is_file(file): 46 | with tiledb.from_numpy(file, data): 47 | pass 48 | 49 | with tiledb.open(file) as A: 50 | print(np.all(A[:] == data)) 51 | -------------------------------------------------------------------------------- /examples/multirange_indexing.py: -------------------------------------------------------------------------------- 1 | # multirange_indexing.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please see the TileDB documentation for more information: 30 | # https://docs.tiledb.com/main/how-to/arrays/reading-arrays/multi-range-subarrays 31 | # 32 | # When run, this program will create a simple 2D dense array with two 33 | # attributes, write some data to it, and read a slice of the data back on 34 | # (i) both attributes, and (ii) subselecting on only one of the attributes. 35 | # 36 | 37 | 38 | import numpy as np 39 | 40 | import tiledb 41 | 42 | # Name of the array to create. 43 | array_name = "multi_range" 44 | 45 | 46 | def create_array(): 47 | # Check if the array already exists. 48 | if tiledb.object_type(array_name) == "array": 49 | return 50 | 51 | dom = tiledb.Domain( 52 | tiledb.Dim(name="x", domain=(1, 20), tile=4, dtype=np.int64), 53 | tiledb.Dim(name="y", domain=(1, 20), tile=4, dtype=np.int64), 54 | ) 55 | 56 | # Add a single "a" float attribute 57 | schema = tiledb.ArraySchema( 58 | domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.float64)] 59 | ) 60 | 61 | # Create the (empty) array on disk. 62 | tiledb.DenseArray.create(array_name, schema) 63 | 64 | 65 | def write_array(): 66 | # Open the array and write to it. 67 | with tiledb.DenseArray(array_name, mode="w") as A: 68 | data_a = np.arange(400).reshape(20, 20) 69 | A[:, :] = {"a": data_a} 70 | 71 | 72 | def read_array(): 73 | # Open the array and read from it. 74 | with tiledb.DenseArray(array_name, mode="r") as A: 75 | # Slice only rows: (1,3) inclusive, and 5 76 | # cols: 2, 5, 7 77 | data = A.multi_index[[(1, 3), 5], [2, 5, 7]] 78 | print("Reading attribute 'a', [ [1:3, 5], [2,5,7] ]") 79 | a = data["a"] 80 | print(a) 81 | 82 | 83 | create_array() 84 | write_array() 85 | read_array() 86 | -------------------------------------------------------------------------------- /examples/object.py: -------------------------------------------------------------------------------- 1 | # object.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please see the TileDB documentation for more information: 30 | # https://docs.tiledb.com/main/how-to/object-management 31 | # 32 | # This program creates a hierarchy as shown below. Specifically, it creates 33 | # groups `dense_arrays` and `sparse_arrays` in a group `my_group`, and 34 | # then some dense/sparse arrays and key-value store in those groups. 35 | # 36 | # my_group/ 37 | # - dense_arrays/ 38 | # - array_A 39 | # - array_B 40 | # - sparse_arrays/ 41 | # - array_C 42 | # - array_D 43 | # 44 | # The program then shows how to list this hierarchy, as well as 45 | # move/remove TileDB objects. 46 | 47 | import os 48 | 49 | import numpy as np 50 | 51 | import tiledb 52 | 53 | 54 | def create_array(array_name, sparse): 55 | if tiledb.object_type(array_name) == "array": 56 | return 57 | 58 | dom = tiledb.Domain( 59 | tiledb.Dim(name="rows", domain=(1, 4), tile=4, dtype=np.int32), 60 | tiledb.Dim(name="cols", domain=(1, 4), tile=4, dtype=np.int32), 61 | ) 62 | schema = tiledb.ArraySchema( 63 | domain=dom, sparse=sparse, attrs=[tiledb.Attr(name="a", dtype=np.int32)] 64 | ) 65 | if sparse: 66 | tiledb.SparseArray.create(array_name, schema) 67 | else: 68 | tiledb.DenseArray.create(array_name, schema) 69 | 70 | 71 | def path(p): 72 | return os.path.join(os.getcwd(), p) 73 | 74 | 75 | def create_hierarchy(): 76 | # Create groups 77 | tiledb.group_create(path("my_group")) 78 | tiledb.group_create(path("my_group/dense_arrays")) 79 | tiledb.group_create(path("my_group/sparse_arrays")) 80 | 81 | # Create arrays 82 | create_array(path("my_group/dense_arrays/array_A"), False) 83 | create_array(path("my_group/dense_arrays/array_B"), False) 84 | create_array(path("my_group/sparse_arrays/array_C"), True) 85 | create_array(path("my_group/sparse_arrays/array_D"), True) 86 | 87 | 88 | def list_obj(path): 89 | 90 | # List children 91 | print("\nListing hierarchy:") 92 | tiledb.ls(path, lambda obj_path, obj_type: print(obj_path, obj_type)) 93 | 94 | # Walk in a path with a pre- and post-order traversal 95 | print("\nPreorder traversal:") 96 | tiledb.walk( 97 | path, lambda obj_path, obj_type: print(obj_path, obj_type) 98 | ) # Default order is preorder 99 | 100 | print("\nPostorder traversal:") 101 | tiledb.walk( 102 | path, lambda obj_path, obj_type: print(obj_path, obj_type), order="postorder" 103 | ) 104 | 105 | 106 | def move_remove_obj(): 107 | tiledb.move(path("my_group"), path("my_group_2")) 108 | tiledb.remove(path("my_group_2/dense_arrays")) 109 | tiledb.remove(path("my_group_2/sparse_arrays/array_C")) 110 | 111 | 112 | create_hierarchy() 113 | list_obj("my_group") 114 | move_remove_obj() # Renames 'my_group' to 'my_group_2' 115 | list_obj("my_group_2") 116 | 117 | # clean up 118 | tiledb.remove("my_group_2") 119 | -------------------------------------------------------------------------------- /examples/query_condition_datetime.py: -------------------------------------------------------------------------------- 1 | # query_condition_datetime.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2021 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | 28 | # This example creates an array with one datetime-typed attribute, 29 | # writes sample data to the array, and then prints out a filtered 30 | # dataframe using the TileDB QueryCondition feature to select on 31 | # either equality or ranges of the generated attribute values. 32 | 33 | import numpy as np 34 | import pandas as pd 35 | 36 | import tiledb 37 | 38 | uri = "query_condition_datetime" 39 | 40 | data = pd.DataFrame( 41 | np.sort(np.random.randint(438923600, 243892360000, 20, dtype=np.int64)).astype( 42 | "M8[ns]" 43 | ), 44 | columns=["dates"], 45 | ) 46 | data.sort_values(by="dates") 47 | 48 | tiledb.from_pandas( 49 | uri, 50 | data, 51 | column_types={"dates": "datetime64[ns]"}, 52 | ) 53 | 54 | with tiledb.open(uri) as A: 55 | # filter by exact match with the fifth cell 56 | search_date = data["dates"][5].to_numpy().astype(np.int64) 57 | result = A.query(cond=f"dates == {search_date}").df[:] 58 | 59 | print() 60 | print("Attribute dates matching index 5:") 61 | print(result) 62 | 63 | # filter values between cell index 3 and 8 64 | d1 = data["dates"].iloc[3].to_numpy().astype(np.int64) 65 | d2 = data["dates"].iloc[8].to_numpy().astype(np.int64) 66 | result2 = A.query(cond=f"dates > {d1} and dates < {d2}").df[:] 67 | 68 | print() 69 | print("Attribute dates where 'dates[3] < val < dates[8]'") 70 | print(result2) 71 | -------------------------------------------------------------------------------- /examples/query_condition_dense.py: -------------------------------------------------------------------------------- 1 | # query_condition_dense.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2021 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | 28 | # This example creates an array with one string-typed attribute, 29 | # writes sample data to the array, and then prints out a filtered 30 | # dataframe using the TileDB QueryCondition feature. 31 | 32 | from pprint import pprint 33 | 34 | import numpy as np 35 | 36 | import tiledb 37 | 38 | uri = "query_condition_dense" 39 | 40 | 41 | def create_array(path): 42 | # create a dense array 43 | dom = tiledb.Domain( 44 | tiledb.Dim(name="coords", domain=(1, 10), tile=1, dtype=np.uint32) 45 | ) 46 | attrs = [ 47 | tiledb.Attr(name="attr1", dtype=np.uint64), 48 | tiledb.Attr(name="attr2", dtype=np.float64), 49 | ] 50 | schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=False) 51 | tiledb.Array.create(path, schema, overwrite=True) 52 | 53 | # fill array with randomized values 54 | with tiledb.open(path, "w") as arr: 55 | rand = np.random.default_rng() 56 | arr[:] = { 57 | "attr1": rand.integers(low=0, high=10, size=10), 58 | "attr2": rand.random(size=10), 59 | } 60 | 61 | 62 | def read_array(path): 63 | with tiledb.open(uri) as arr: 64 | print("--- without query condition:") 65 | print() 66 | pprint(arr[:]) 67 | print() 68 | 69 | with tiledb.open(uri) as arr: 70 | qc = "(2 < attr1 < 6) and (attr2 < 0.5 or attr2 > 0.85)" 71 | print(f"--- with query condition {qc}:") 72 | 73 | print(f"--- the fill value for attr1 is {arr.attr('attr1').fill}") 74 | print(f"--- the fill value for attr2 is {arr.attr('attr2').fill}") 75 | 76 | print() 77 | res = arr.query(cond=qc)[:] 78 | pprint(res) 79 | 80 | 81 | if __name__ == "__main__": 82 | """Example output for `python query_condition_dense.py`: 83 | 84 | --- without query condition: 85 | 86 | OrderedDict([('attr1', array([4, 0, 9, 7, 6, 0, 0, 5, 7, 5], dtype=uint64)), 87 | ('attr2', 88 | array([0.74476144, 0.47211544, 0.99054245, 0.36640416, 0.91699594, 89 | 0.06216043, 0.58581863, 0.00505695, 0.7486192 , 0.87649422]))]) 90 | 91 | --- with query condition (2 < attr1 < 6) and (attr2 < 0.5 or attr2 > 0.85): 92 | --- the fill value for attr1 is [18446744073709551615] 93 | --- the fill value for attr2 is [nan] 94 | 95 | OrderedDict([('attr1', 96 | array([18446744073709551615, 18446744073709551615, 18446744073709551615, 97 | 18446744073709551615, 18446744073709551615, 18446744073709551615, 98 | 18446744073709551615, 5, 18446744073709551615, 99 | 5], dtype=uint64)), 100 | ('attr2', 101 | array([ nan, nan, nan, nan, nan, 102 | nan, nan, 0.00505695, nan, 0.87649422]))]) 103 | """ 104 | create_array(uri) 105 | read_array(uri) 106 | -------------------------------------------------------------------------------- /examples/query_condition_sparse.py: -------------------------------------------------------------------------------- 1 | # query_condition_sparse.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2021 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | 28 | # This example creates an array with one string-typed attribute, 29 | # writes sample data to the array, and then prints out a filtered 30 | # dataframe using the TileDB QueryCondition feature. 31 | 32 | from pprint import pprint 33 | 34 | import numpy as np 35 | 36 | import tiledb 37 | 38 | uri = "query_condition_sparse" 39 | 40 | 41 | def create_array(path): 42 | # create a sparse array 43 | dom = tiledb.Domain( 44 | tiledb.Dim(name="coords", domain=(1, 10), tile=1, dtype=np.uint32) 45 | ) 46 | attrs = [ 47 | tiledb.Attr(name="attr1", dtype=np.uint64), 48 | tiledb.Attr(name="attr2", dtype=np.float64), 49 | ] 50 | schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) 51 | tiledb.Array.create(path, schema, overwrite=True) 52 | 53 | # fill array with randomized values 54 | with tiledb.open(path, "w") as arr: 55 | rand = np.random.default_rng() 56 | arr[np.arange(1, 11)] = { 57 | "attr1": rand.integers(low=0, high=10, size=10), 58 | "attr2": rand.random(size=10), 59 | } 60 | 61 | 62 | def read_array(path): 63 | with tiledb.open(uri) as arr: 64 | print("--- without query condition:") 65 | print() 66 | pprint(arr[:]) 67 | print() 68 | 69 | with tiledb.open(uri) as arr: 70 | qc = "(2 < attr1 < 6) and (attr2 < 0.5 or attr2 > 0.85)" 71 | print(f"--- with query condition {qc}:") 72 | print() 73 | res = arr.query(cond=qc)[:] 74 | pprint(res) 75 | 76 | 77 | if __name__ == "__main__": 78 | """Example output for `python query_condition_sparse.py`: 79 | 80 | --- without query condition: 81 | 82 | OrderedDict([('attr1', array([2, 4, 4, 3, 4, 7, 5, 2, 2, 8], dtype=uint64)), 83 | ('attr2', 84 | array([0.62445071, 0.32415481, 0.39117764, 0.66609931, 0.48122102, 85 | 0.93561984, 0.70998524, 0.10322076, 0.28343041, 0.33623958])), 86 | ('coords', 87 | array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=uint32))]) 88 | 89 | --- with query condition (2 < attr1 < 6) and (attr2 < 0.5 or attr2 > 0.85): 90 | 91 | OrderedDict([('attr1', array([4, 4, 4], dtype=uint64)), 92 | ('attr2', array([0.32415481, 0.39117764, 0.48122102])), 93 | ('coords', array([2, 3, 5], dtype=uint32))]) 94 | """ 95 | create_array(uri) 96 | read_array(uri) 97 | -------------------------------------------------------------------------------- /examples/query_condition_string.py: -------------------------------------------------------------------------------- 1 | # query_condition_string.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2021 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | 28 | # This example creates an array with one string-typed attribute, 29 | # writes sample data to the array, and then prints out a filtered 30 | # dataframe using the TileDB QueryCondition feature. 31 | 32 | import string 33 | 34 | import numpy as np 35 | 36 | import tiledb 37 | 38 | 39 | def create_array(path): 40 | dom = tiledb.Domain(tiledb.Dim(name="d", domain=(1, 10), tile=1, dtype=np.uint32)) 41 | attrs = [tiledb.Attr(name="ascii_attr", dtype="ascii", var=True)] 42 | 43 | schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) 44 | 45 | tiledb.SparseArray.create(path, schema, overwrite=True) 46 | 47 | # create array of strings from a to a..j 48 | attr_data = np.array([string.ascii_lowercase[0:n] for n in range(1, 11)], dtype="O") 49 | 50 | with tiledb.open(path, "w") as arr: 51 | arr[np.arange(1, 11)] = {"ascii_attr": attr_data} 52 | 53 | 54 | def read_array(path, cond): 55 | with tiledb.open(path) as arr: 56 | print("QueryCondition is: ", cond) 57 | res = arr.query(cond=cond)[:] 58 | return res 59 | 60 | 61 | uri = "query_condition_string" 62 | 63 | create_array(uri) 64 | filtered_df1 = read_array(uri, "ascii_attr == 'abcd'") 65 | print(" result: ", filtered_df1) 66 | filtered_df2 = read_array(uri, "ascii_attr > 'abc'") 67 | print(" result: ", filtered_df2) 68 | -------------------------------------------------------------------------------- /examples/quickstart_dense.py: -------------------------------------------------------------------------------- 1 | # quickstart_dense.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please refer to the TileDB and TileDB-Py documentation for more information: 30 | # https://docs.tiledb.com/main/how-to 31 | # https://tiledb-inc-tiledb.readthedocs-hosted.com/projects/tiledb-py/en/stable/python-api.html 32 | # 33 | # When run, this program will create a simple 2D dense array, write some data 34 | # to it, and read a slice of the data back. 35 | # 36 | 37 | 38 | import numpy as np 39 | 40 | import tiledb 41 | 42 | # Name of the array to create. 43 | array_name = "quickstart_dense" 44 | 45 | 46 | def create_array(): 47 | 48 | # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4]. 49 | dom = tiledb.Domain( 50 | tiledb.Dim(name="rows", domain=(1, 4), tile=4, dtype=np.int32), 51 | tiledb.Dim(name="cols", domain=(1, 4), tile=4, dtype=np.int32), 52 | ) 53 | 54 | # The array will be dense with a single attribute "a" so each (i,j) cell can store an integer. 55 | schema = tiledb.ArraySchema( 56 | domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.int32)] 57 | ) 58 | 59 | # Create the (empty) array on disk. 60 | tiledb.DenseArray.create(array_name, schema) 61 | 62 | 63 | def write_array(): 64 | # Open the array and write to it. 65 | with tiledb.DenseArray(array_name, mode="w") as A: 66 | data = np.array(([1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16])) 67 | A[:] = data 68 | 69 | 70 | def read_array(): 71 | # Open the array and read from it. 72 | with tiledb.DenseArray(array_name, mode="r") as A: 73 | # Slice only rows 1, 2 and cols 2, 3, 4. 74 | data = A[1:3, 2:5] 75 | print(data["a"]) 76 | 77 | 78 | if tiledb.object_type(array_name) != "array": 79 | create_array() 80 | write_array() 81 | 82 | read_array() 83 | -------------------------------------------------------------------------------- /examples/quickstart_sparse.py: -------------------------------------------------------------------------------- 1 | # quickstart_sparse.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please refer to the TileDB and TileDB-Py documentation for more information: 30 | # https://docs.tiledb.com/main/how-to 31 | # https://tiledb-inc-tiledb.readthedocs-hosted.com/projects/tiledb-py/en/stable/python-api.html 32 | # 33 | # When run, this program will create a simple 2D sparse array, write some data 34 | # to it, and read a slice of the data back. 35 | # 36 | 37 | 38 | import numpy as np 39 | 40 | import tiledb 41 | 42 | # Name of the array to create. 43 | array_name = "quickstart_sparse" 44 | 45 | 46 | def create_array(): 47 | # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4]. 48 | dom = tiledb.Domain( 49 | tiledb.Dim(name="rows", domain=(1, 4), tile=4, dtype=np.int32), 50 | tiledb.Dim(name="cols", domain=(1, 4), tile=4, dtype=np.int32), 51 | ) 52 | 53 | # The array will be sparse with a single attribute "a" so each (i,j) cell can store an integer. 54 | schema = tiledb.ArraySchema( 55 | domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.int32)] 56 | ) 57 | 58 | # Create the (empty) array on disk. 59 | tiledb.SparseArray.create(array_name, schema) 60 | 61 | 62 | def write_array(): 63 | # Open the array and write to it. 64 | with tiledb.SparseArray(array_name, mode="w") as A: 65 | # Write some simple data to cells (1, 1), (2, 4) and (2, 3). 66 | IJ = [1, 2, 2], [1, 4, 3] 67 | data = np.array(([1, 2, 3])) 68 | A[IJ] = data 69 | 70 | 71 | def read_array(): 72 | # Open the array and read from it. 73 | with tiledb.SparseArray(array_name, mode="r") as A: 74 | # Slice only rows 1, 2 and cols 2, 3, 4. 75 | data = A[1:3, 2:5] 76 | a_vals = data["a"] 77 | for i, coord in enumerate(zip(data["rows"], data["cols"])): 78 | print("Cell (%d, %d) has data %d" % (coord[0], coord[1], a_vals[i])) 79 | 80 | 81 | if tiledb.object_type(array_name) != "array": 82 | create_array() 83 | write_array() 84 | 85 | read_array() 86 | -------------------------------------------------------------------------------- /examples/reading_sparse_layouts.py: -------------------------------------------------------------------------------- 1 | # reading_sparse_layouts.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please see the TileDB documentation for more information: 30 | # https://docs.tiledb.com/main/how-to/arrays/reading-arrays/basic-reading 31 | # 32 | # When run, this program will create a simple 2D sparse array, write some data 33 | # to it, and read a slice of the data back in the layout of the user's choice 34 | # (passed as an argument to the program: "row", "col", or "global"). 35 | # 36 | 37 | 38 | import sys 39 | 40 | import numpy as np 41 | 42 | import tiledb 43 | 44 | # Name of the array to create. 45 | array_name = "reading_sparse_layouts" 46 | 47 | 48 | def create_array(): 49 | # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4]. 50 | dom = tiledb.Domain( 51 | tiledb.Dim(name="rows", domain=(1, 4), tile=2, dtype=np.int32), 52 | tiledb.Dim(name="cols", domain=(1, 4), tile=2, dtype=np.int32), 53 | ) 54 | 55 | # The array will be sparse with a single attribute "a" so each (i,j) cell can store an integer. 56 | schema = tiledb.ArraySchema( 57 | domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.int32)] 58 | ) 59 | 60 | # Create the (empty) array on disk. 61 | tiledb.SparseArray.create(array_name, schema) 62 | 63 | 64 | def write_array(): 65 | # Open the array and write to it. 66 | with tiledb.SparseArray(array_name, mode="w") as A: 67 | # To write, the coordinates must be split into two vectors, one per dimension 68 | IJ = [1, 1, 2, 1, 2, 2], [1, 2, 2, 4, 3, 4] 69 | data = np.array(([1, 2, 3, 4, 5, 6])) 70 | A[IJ] = data 71 | 72 | 73 | def read_array(order): 74 | # Open the array and read from it. 75 | with tiledb.SparseArray(array_name, mode="r") as A: 76 | # Get non-empty domain 77 | print("Non-empty domain: {}".format(A.nonempty_domain())) 78 | 79 | # Slice only rows 1, 2 and cols 2, 3, 4. 80 | # NOTE: The `query` syntax is required to specify an order 81 | # other than the default row-major 82 | data = A.query(attrs=["a"], order=order, coords=True)[1:3, 2:5] 83 | a_vals = data["a"] 84 | 85 | for i, coord in enumerate(zip(data["rows"], data["cols"])): 86 | print("Cell {} has data {}".format(str(coord), str(a_vals[i]))) 87 | 88 | 89 | # Check if the array already exists. 90 | if tiledb.object_type(array_name) != "array": 91 | create_array() 92 | write_array() 93 | 94 | layout = "" 95 | if len(sys.argv) > 1: 96 | layout = sys.argv[1] 97 | 98 | order = "C" 99 | if layout == "col": 100 | order = "F" 101 | elif layout == "global": 102 | order = "G" 103 | else: 104 | order = "C" 105 | 106 | read_array(order) 107 | -------------------------------------------------------------------------------- /examples/string_dimension_labels.py: -------------------------------------------------------------------------------- 1 | # string_dimension_label.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2023 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please refer to the TileDB and TileDB-Py documentation for more information: 30 | # https://docs.tiledb.com/main/how-to 31 | # https://tiledb-inc-tiledb.readthedocs-hosted.com/projects/tiledb-py/en/stable/python-api.html 32 | # 33 | # When run, this program will create a simple 2D dense array with a string dimension 34 | # dimension on the column dimension, and read a slice back with the dimension label. 35 | # 36 | 37 | import numpy as np 38 | 39 | import tiledb 40 | 41 | 42 | def create_array(uri: str): 43 | """Create array schema with a dimension label on the columns""" 44 | dim1 = tiledb.Dim("row", domain=(1, 5)) 45 | dim2 = tiledb.Dim("column", domain=(1, 5)) 46 | dom = tiledb.Domain(dim1, dim2) 47 | att = tiledb.Attr("a1", dtype=np.int64) 48 | dim_labels = {1: {"name": dim2.create_label_schema("increasing", "ascii")}} 49 | schema = tiledb.ArraySchema(domain=dom, attrs=(att,), dim_labels=dim_labels) 50 | tiledb.Array.create(uri, schema) 51 | 52 | 53 | def write_array(uri: str): 54 | """Write attribute and label data to the array""" 55 | a1_data = np.reshape(np.arange(1, 26), (5, 5)) 56 | label_data = np.array(["alpha", "beta", "gamma", "kappa", "omega"]) 57 | with tiledb.open(uri, "w") as array: 58 | array[:, :] = {"a1": a1_data, "name": label_data} 59 | 60 | 61 | def read_array(uri: str): 62 | """Read the array from the dimension label""" 63 | 64 | with tiledb.open(uri, "r") as array: 65 | data = array.label_index(["name"])[1, "beta":"kappa"] 66 | print( 67 | "Reading array on [[1, 'beta':'kappa']] with label 'name' on dimension 'col'" 68 | ) 69 | for name, value in data.items(): 70 | print(f" '{name}'={value}") 71 | 72 | 73 | if __name__ == "__main__": 74 | # Name of the array to create. 75 | ARRAY_NAME = "string_dimension_labels" 76 | 77 | LIBVERSION = tiledb.libtiledb.version() 78 | 79 | if LIBVERSION[0] == 2 and LIBVERSION[1] < 15: 80 | print( 81 | f"Dimension labels requires libtiledb version >= 2.15.0. Current version is" 82 | f" {LIBVERSION[0]}.{LIBVERSION[1]}.{LIBVERSION[2]}" 83 | ) 84 | 85 | else: 86 | # Only create and write to the array if it doesn't already exist. 87 | if tiledb.object_type(ARRAY_NAME) != "array": 88 | create_array(ARRAY_NAME) 89 | write_array(ARRAY_NAME) 90 | 91 | # Read from the array and print output. 92 | read_array(ARRAY_NAME) 93 | -------------------------------------------------------------------------------- /examples/string_float_int_dimensions.py: -------------------------------------------------------------------------------- 1 | # quickstart_dense.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please see the TileDB documentation for more information: 30 | # https://docs.tiledb.com/main/how-to/arrays/creating-arrays/creating-dimensions 31 | # 32 | # When run, this program will create a simple 2D dense array, write some data 33 | # to it, and read a slice of the data back. 34 | # 35 | 36 | 37 | import numpy as np 38 | 39 | import tiledb 40 | 41 | path = "sparse_mixed_demo" 42 | 43 | dom = tiledb.Domain( 44 | *[ 45 | tiledb.Dim(name="str_dim", domain=(None, None), dtype=np.bytes_), 46 | tiledb.Dim(name="int64_dim", domain=(0, 100), tile=10, dtype=np.int64), 47 | tiledb.Dim( 48 | name="float64_dim", 49 | domain=(-100.0, 100.0), 50 | tile=10, 51 | dtype=np.float64, 52 | ), 53 | ], 54 | ) 55 | 56 | att = tiledb.Attr(name="a", dtype=np.int64) 57 | schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True, capacity=10000) 58 | tiledb.SparseArray.create(path, schema) 59 | 60 | data = [1, 2, 3, 4] 61 | c_str = [b"aa", b"bbb", b"c", b"dddd"] 62 | c_int64 = [0, 10, 20, 30] 63 | c_float64 = [-95.0, -61.5, 1.3, 42.7] 64 | 65 | with tiledb.open(path, "w") as A: 66 | A[c_str, c_int64, c_float64] = data 67 | 68 | with tiledb.open(path) as A: 69 | print("\n\nRead full array:\n") 70 | print(A[:]) 71 | 72 | print("\n\nRead string slice A['c':'dddd']:\n") 73 | print(A["c":"dddd"]) 74 | 75 | print("\n\nRead A[:, 10]: \n") 76 | print(A["aa":"bbb"]) 77 | 78 | print("\n\nRead A.multi_index['aa':'c', 0:10]\n") 79 | print(A.multi_index["aa":"c", 0:10]) 80 | 81 | print("\n\nRead A.multi_index['aa':'bbb', :, -95.0:-61.5]\n") 82 | print(A.multi_index["aa":"bbb", :, -95.0:-61.5]) 83 | -------------------------------------------------------------------------------- /examples/time_traveling.py: -------------------------------------------------------------------------------- 1 | # time_traveling.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # When run, this program will create a simple sparse array, write some data 30 | # to it at specified timestamps, and read the entire array data back. 31 | # 32 | 33 | import numpy as np 34 | 35 | import tiledb 36 | 37 | # Name of the array to create. 38 | array_name = "time_traveling" 39 | 40 | 41 | def create_array(): 42 | dom = tiledb.Domain(tiledb.Dim(domain=(0, 0), tile=1, dtype=np.int64)) 43 | att = tiledb.Attr(name="num", dtype=np.int64) 44 | schema = tiledb.ArraySchema(sparse=True, domain=dom, attrs=(att,)) 45 | tiledb.SparseArray.create(array_name, schema) 46 | 47 | 48 | def write_array(): 49 | # Open the array and write to it. 50 | for timestamp in range(1, 4): 51 | with tiledb.open(array_name, timestamp=timestamp, mode="w") as T: 52 | T[0] = timestamp 53 | 54 | 55 | def read_array(): 56 | # Open the array and read from it. 57 | with tiledb.open(array_name, mode="r") as T: 58 | print(T[:]["num"]) 59 | 60 | with tiledb.open(array_name, mode="r", timestamp=(1, 2)) as T: 61 | print(T[:]["num"]) 62 | 63 | with tiledb.open(array_name, mode="r", timestamp=(2, 3)) as T: 64 | print(T[:]["num"]) 65 | 66 | with tiledb.open(array_name, mode="r", timestamp=1) as T: 67 | print(T[:]["num"]) 68 | 69 | with tiledb.open(array_name, mode="r", timestamp=(1, None)) as T: 70 | print(T[:]["num"]) 71 | 72 | 73 | if tiledb.object_type(array_name) != "array": 74 | create_array() 75 | write_array() 76 | 77 | read_array() 78 | -------------------------------------------------------------------------------- /examples/using_tiledb_stats.py: -------------------------------------------------------------------------------- 1 | # using_tiledb_stats.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please see the TileDB documentation for more information: 30 | # https://docs.tiledb.com/main/how-to/performance/using-performance-statistics 31 | # 32 | # When run, this program will create a 0.5GB dense array, and enable the 33 | # TileDB statistics surrounding reads from the array. 34 | # 35 | 36 | import numpy as np 37 | 38 | import tiledb 39 | 40 | # Name of array. 41 | array_name = "stats_array" 42 | 43 | 44 | def create_array(row_tile_extent, col_tile_extent): 45 | dom = tiledb.Domain( 46 | tiledb.Dim( 47 | name="rows", domain=(1, 12000), tile=row_tile_extent, dtype=np.int32 48 | ), 49 | tiledb.Dim( 50 | name="cols", domain=(1, 12000), tile=col_tile_extent, dtype=np.int32 51 | ), 52 | ) 53 | 54 | schema = tiledb.ArraySchema( 55 | domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.int32)] 56 | ) 57 | 58 | # Create the (empty) array on disk. 59 | tiledb.Array.create(array_name, schema) 60 | 61 | 62 | def write_array(): 63 | # Open the array and write to it. 64 | with tiledb.open(array_name, mode="w") as A: 65 | data = np.arange(12000 * 12000).reshape(12000, 12000) 66 | A[:] = data 67 | 68 | 69 | def read_array(): 70 | # Open the array and read from it. 71 | with tiledb.open(array_name, mode="r") as A: 72 | # Read a slice of 3,000 rows. 73 | # Enable the stats for the read query, and print the report. 74 | tiledb.stats_enable() 75 | print(A[1:3001, 1:12001]) 76 | tiledb.stats_dump() 77 | tiledb.stats_disable() 78 | 79 | 80 | # Create array with each row as a tile. 81 | create_array(1, 12000) 82 | write_array() 83 | read_array() 84 | -------------------------------------------------------------------------------- /examples/vfs.py: -------------------------------------------------------------------------------- 1 | # vfs.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please see the TileDB documentation for more information: 30 | # https://docs.tiledb.com/main/how-to/virtual-filesystem 31 | # 32 | # This program explores the various TileDB VFS tools. 33 | # 34 | 35 | import os 36 | import struct 37 | 38 | import tiledb 39 | 40 | 41 | def path(p): 42 | return os.path.join(os.getcwd(), p) 43 | 44 | 45 | def dirs_files(): 46 | # Create TileDB VFS 47 | vfs = tiledb.VFS() 48 | 49 | # Create directory 50 | if not vfs.is_dir("dir_A"): 51 | vfs.create_dir(path("dir_A")) 52 | print("Created 'dir_A'") 53 | else: 54 | print("'dir_A' already exists") 55 | 56 | # Creating an (empty) file 57 | if not vfs.is_file("dir_A/file_A"): 58 | vfs.touch(path("dir_A/file_A")) 59 | print("Created empty file 'dir_A/file_A'") 60 | else: 61 | print("'dir_A/file_A' already exists") 62 | 63 | # Getting the file size 64 | print("Size of file 'dir_A/file_A': {}".format(vfs.file_size(path("dir_A/file_A")))) 65 | 66 | # Moving files (moving directories is similar) 67 | print("Moving file 'dir_A/file_A' to 'dir_A/file_B'") 68 | vfs.move_file(path("dir_A/file_A"), path("dir_A/file_B")) 69 | 70 | # Deleting files and directories 71 | print("Deleting 'dir_A/file_B' and 'dir_A'") 72 | vfs.remove_file(path("dir_A/file_B")) 73 | vfs.remove_dir(path("dir_A")) 74 | 75 | 76 | def write(): 77 | # Create TileDB VFS 78 | vfs = tiledb.VFS() 79 | 80 | # Write binary data 81 | with vfs.open("tiledb_vfs.bin", "wb") as f: 82 | f.write(struct.pack("f", 153.0)) 83 | f.write("abcd".encode("utf-8")) 84 | 85 | # Write binary data again - this will overwrite the previous file 86 | with vfs.open("tiledb_vfs.bin", "wb") as f: 87 | f.write(struct.pack("f", 153.1)) 88 | f.write("abcdef".encode("utf-8")) 89 | 90 | # Append binary data to existing file (this will NOT work on S3) 91 | with vfs.open("tiledb_vfs.bin", "ab") as f: 92 | f.write("ghijkl".encode("utf-8")) 93 | 94 | 95 | def read(): 96 | # Create TileDB VFS 97 | vfs = tiledb.VFS() 98 | 99 | # Read binary data 100 | with vfs.open("tiledb_vfs.bin", "rb") as f: 101 | # Read the first 4 bytes (bytes [0:4]) 102 | f1 = struct.unpack("f", f.read(4))[0] 103 | 104 | # Read the next 8 bytes (bytes [4:12]) 105 | s1 = bytes.decode(f.read(8), "utf-8") 106 | 107 | print(f"Binary read:\n{f1}\n{s1}") 108 | 109 | 110 | dirs_files() 111 | write() 112 | read() 113 | -------------------------------------------------------------------------------- /examples/writing_dense_multiple.py: -------------------------------------------------------------------------------- 1 | # writing_dense_multiple.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please see the TileDB documentation for more information: 30 | # https://docs.tiledb.com/main/how-to/arrays/writing-arrays/writing-in-dense-subarrays 31 | # 32 | # When run, this program will create a simple 2D dense array, write some data 33 | # to it with two write queries, and read the entire array data back. 34 | # 35 | 36 | 37 | import numpy as np 38 | 39 | import tiledb 40 | 41 | # Name of the array to create. 42 | array_name = "writing_dense_multiple" 43 | 44 | 45 | def create_array(): 46 | # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4]. 47 | dom = tiledb.Domain( 48 | tiledb.Dim(name="rows", domain=(1, 4), tile=2, dtype=np.int32), 49 | tiledb.Dim(name="cols", domain=(1, 4), tile=2, dtype=np.int32), 50 | ) 51 | 52 | # The array will be dense with a single attribute "a" so each (i,j) cell can store an integer. 53 | schema = tiledb.ArraySchema( 54 | domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.int32)] 55 | ) 56 | 57 | # Create the (empty) array on disk. 58 | tiledb.DenseArray.create(array_name, schema) 59 | 60 | 61 | def write_array(): 62 | # Open the array and write to it. 63 | with tiledb.DenseArray(array_name, mode="w") as A: 64 | # First write 65 | data = np.array(([1, 2], [3, 4])) 66 | A[1:3, 1:3] = data 67 | 68 | # Second write 69 | data = np.array(([5, 6, 7, 8], [9, 10, 11, 12])) 70 | A[2:4, 1:5] = data 71 | 72 | 73 | def read_array(): 74 | # Open the array and read from it. 75 | with tiledb.DenseArray(array_name, mode="r") as A: 76 | # Slice the entire array 77 | data = A[:] 78 | print(data["a"]) 79 | 80 | 81 | if tiledb.object_type(array_name) != "array": 82 | create_array() 83 | write_array() 84 | 85 | read_array() 86 | -------------------------------------------------------------------------------- /examples/writing_dense_padding.py: -------------------------------------------------------------------------------- 1 | # writing_dense_padding.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please see the TileDB documentation for more information: 30 | # https://docs.tiledb.com/main/how-to/arrays/writing-arrays/writing-in-dense-subarrays 31 | # 32 | # When run, this program will create a simple 2D dense array, write some data 33 | # to it in a way that some space is empty, and read the entire array data back. 34 | # 35 | 36 | 37 | import numpy as np 38 | 39 | import tiledb 40 | 41 | # Name of the array to create. 42 | array_name = "writing_dense_padding" 43 | 44 | 45 | def create_array(): 46 | # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4]. 47 | dom = tiledb.Domain( 48 | tiledb.Dim(name="rows", domain=(1, 4), tile=2, dtype=np.int32), 49 | tiledb.Dim(name="cols", domain=(1, 4), tile=2, dtype=np.int32), 50 | ) 51 | 52 | # The array will be dense with a single attribute "a" so each (i,j) cell can store an integer. 53 | schema = tiledb.ArraySchema( 54 | domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.int32)] 55 | ) 56 | 57 | # Create the (empty) array on disk. 58 | tiledb.DenseArray.create(array_name, schema) 59 | 60 | 61 | def write_array(): 62 | # Open the array and write to it. 63 | with tiledb.DenseArray(array_name, mode="w") as A: 64 | # Write to [2,3], [1,2] 65 | data = np.array(([1, 2], [3, 4])) 66 | A[2:4, 1:3] = data 67 | 68 | 69 | def read_array(): 70 | # Open the array and read from it. 71 | with tiledb.DenseArray(array_name, mode="r") as A: 72 | # Slice the entire array 73 | data = A[:] 74 | print(data["a"]) 75 | 76 | 77 | if tiledb.object_type(array_name) != "array": 78 | create_array() 79 | write_array() 80 | 81 | read_array() 82 | -------------------------------------------------------------------------------- /examples/writing_dense_rgb.py: -------------------------------------------------------------------------------- 1 | # writing_dense_rgb.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2021 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please see the TileDB documentation for more information: 30 | # https://docs.tiledb.com/main/how-to/arrays/writing-arrays/writing-in-dense-subarrays 31 | # 32 | # When run, this program will create a 2D+1 multi-component (eg RGB) dense array, write some 33 | # data to it, and read the entire array data. 34 | 35 | import numpy as np 36 | 37 | import tiledb 38 | 39 | img_shape = (100, 224, 224) 40 | img_uri = "writing_dense_rgb" 41 | 42 | image_data = np.random.randint(low=0, high=100, size=(*img_shape, 3), dtype=np.int32) 43 | 44 | 45 | def create_array(): 46 | domain = tiledb.Domain( 47 | tiledb.Dim( 48 | name="image_id", domain=(0, img_shape[0] - 1), tile=4, dtype=np.int32 49 | ), 50 | tiledb.Dim( 51 | name="x", domain=(0, img_shape[1] - 1), tile=img_shape[1], dtype=np.int32 52 | ), 53 | tiledb.Dim( 54 | name="y", domain=(0, img_shape[2] - 1), tile=img_shape[2], dtype=np.int32 55 | ), 56 | ) 57 | 58 | # create multi-component attribute with three int32 components 59 | attr = tiledb.Attr(dtype=np.dtype("i4, i4, i4")) 60 | 61 | schema = tiledb.ArraySchema(domain=domain, sparse=False, attrs=[attr]) 62 | 63 | tiledb.Array.create(img_uri, schema, overwrite=True) 64 | 65 | image_data_rgb = image_data.view(np.dtype("i4, i4, i4")).reshape(img_shape) 66 | 67 | with tiledb.open(img_uri, "w") as A: 68 | # write data to 1st image_id slot 69 | A[:] = image_data_rgb 70 | 71 | 72 | def read_array(): 73 | with tiledb.open(img_uri) as A: 74 | print(A[:].shape) 75 | 76 | 77 | if __name__ == "__main__": 78 | create_array() 79 | read_array() 80 | -------------------------------------------------------------------------------- /examples/writing_sparse_multiple.py: -------------------------------------------------------------------------------- 1 | # writing_sparse_multiple.py 2 | # 3 | # LICENSE 4 | # 5 | # The MIT License 6 | # 7 | # Copyright (c) 2020 TileDB, Inc. 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | # 27 | # DESCRIPTION 28 | # 29 | # Please see the TileDB documentation for more information: 30 | # https://docs.tiledb.com/main/how-to/arrays/writing-arrays/writing-sparse-cells 31 | # 32 | # When run, this program will create a simple 2D sparse array, write some data 33 | # to it twice, and read all the data back. 34 | # 35 | 36 | 37 | import numpy as np 38 | 39 | import tiledb 40 | 41 | # Name of the array to create. 42 | array_name = "writing_sparse_multiple" 43 | 44 | 45 | def create_array(): 46 | # The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4]. 47 | dom = tiledb.Domain( 48 | tiledb.Dim(name="rows", domain=(1, 4), tile=4, dtype=np.int32), 49 | tiledb.Dim(name="cols", domain=(1, 4), tile=4, dtype=np.int32), 50 | ) 51 | 52 | # The array will be sparse with a single attribute "a" so each (i,j) cell can store an integer. 53 | schema = tiledb.ArraySchema( 54 | domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.int32)] 55 | ) 56 | 57 | # Create the (empty) array on disk. 58 | tiledb.SparseArray.create(array_name, schema) 59 | 60 | 61 | def write_array(): 62 | # Open the array and write to it. 63 | with tiledb.SparseArray(array_name, mode="w") as A: 64 | # First write 65 | IJ = [1, 2, 2], [1, 4, 3] 66 | data = np.array(([1, 2, 3])) 67 | A[IJ] = data 68 | 69 | # Second write 70 | IJ = [4, 2], [1, 4] 71 | data = np.array(([4, 20])) 72 | A[IJ] = data 73 | 74 | 75 | def read_array(): 76 | # Open the array and read from it. 77 | with tiledb.SparseArray(array_name, mode="r") as A: 78 | # Slice entire array 79 | data = A[1:5, 1:5] 80 | a_vals = data["a"] 81 | for i, coord in enumerate(zip(data["rows"], data["cols"])): 82 | print("Cell (%d, %d) has data %d" % (coord[0], coord[1], a_vals[i])) 83 | 84 | 85 | if tiledb.object_type(array_name) != "array": 86 | create_array() 87 | write_array() 88 | 89 | read_array() 90 | -------------------------------------------------------------------------------- /external/LICENSE-string_view.txt: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /external/LICENSE-tsl_robin_map.txt: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2017 Thibaut Goetghebuer-Planchon 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | -------------------------------------------------------------------------------- /misc/pypi_linux/Dockerfile.aarch64.manylinux2014: -------------------------------------------------------------------------------- 1 | FROM quay.io/pypa/manylinux2014_aarch64 2 | 3 | ############################################### 4 | # version args 5 | ARG LIBTILEDB_VERSION=2.5.2 6 | ENV LIBTILEDB_VERSION=$LIBTILEDB_VERSION 7 | 8 | ARG LIBTILEDB_REPO=https://github.com/TileDB-Inc/TileDB 9 | ENV LIBTILEDB_REPO=$LIBTILEDB_REPO 10 | 11 | ARG TILEDBPY_VERSION=0.11.2 12 | ENV TILEDBPY_VERSION=$TILEDBPY_VERSION 13 | 14 | ARG CMAKE_VERSION=3.21 15 | ENV CMAKE_VERSION=$CMAKE_VERSION 16 | 17 | ############################################### 18 | # python settings 19 | # NOTE: MUST USE the 'mu' variant here to be compatible 20 | # with "most" linux distros (see manylinux README) 21 | ENV PYTHON_BASE /opt/python/cp38-cp38/bin/ 22 | 23 | RUN useradd tiledb 24 | ENV HOME /home/tiledb 25 | 26 | # dependencies: 27 | # - cmake (need recent) and auditwheel from pip 28 | RUN $PYTHON_BASE/pip install cmake==${CMAKE_VERSION} auditwheel cibuildwheel 29 | 30 | ENV CMAKE $PYTHON_BASE/cmake 31 | 32 | ############################################### 33 | # build libtiledb (core) 34 | # notes: 35 | # 1) we are using auditwheel from https://github.com/pypa/auditwheel 36 | # this verifies and tags wheel products with the manylinux1 label, 37 | # and allows us to build libtiledb once, install it to a normal 38 | # system path, and then use it to build wheels for all of the python 39 | # versions. 40 | 41 | # NOTE: NO GCS SUPPORT 42 | 43 | RUN cd /home/tiledb/ && \ 44 | git clone ${LIBTILEDB_REPO} -b ${LIBTILEDB_VERSION} --depth=1 && \ 45 | mkdir build && \ 46 | cd build && \ 47 | $CMAKE -DTILEDB_S3=ON -DTILEDB_AZURE=ON \ 48 | -DTILEDB_SERIALIZATION=ON \ 49 | -DTILEDB_CPP_API=ON -DTILEDB_TESTS=OFF \ 50 | -DTILEDB_GCS=ON \ 51 | -DTILEDB_FORCE_ALL_DEPS:BOOL=ON \ 52 | -DTILEDB_LOG_OUTPUT_ON_FAILURE:BOOL=ON \ 53 | -DSANITIZER=OFF -DTILEDB_WERROR=OFF \ 54 | -DCMAKE_CXX_STANDARD=17 \ 55 | ../TileDB && \ 56 | make -j$(nproc) && \ 57 | make install-tiledb 58 | 59 | ############################################### 60 | # add source directory. note: run from base of tree 61 | ADD . /home/tiledb/TileDB-Py 62 | -------------------------------------------------------------------------------- /misc/pypi_linux/Dockerfile2014: -------------------------------------------------------------------------------- 1 | FROM quay.io/pypa/manylinux2014_x86_64:2022-11-28-5d13db4 2 | 3 | ############################################### 4 | # version args 5 | ARG LIBTILEDB_VERSION=2.2.9 6 | ENV LIBTILEDB_VERSION=$LIBTILEDB_VERSION 7 | 8 | ARG LIBTILEDB_SHA=dc3bb54adc9bb0d99cb3f56ede2ab5b14e62ab76 9 | ENV LIBTILEDB_SHA=$LIBTILEDB_SHA 10 | 11 | ARG TILEDBPY_VERSION=0.8.10 12 | ENV TILEDBPY_VERSION=$TILEDBPY_VERSION 13 | 14 | ARG LIBTILEDB_REPO=https://github.com/TileDB-Inc/TileDB 15 | ENV LIBTILEDB_REPO=$LIBTILEDB_REPO 16 | 17 | ############################################### 18 | # python settings 19 | # NOTE: MUST USE the 'mu' variant here to be compatible 20 | # with "most" linux distros (see manylinux README) 21 | ENV PYTHON_BASE /opt/python/cp38-cp38/bin/ 22 | 23 | RUN useradd tiledb 24 | ENV HOME /home/tiledb 25 | 26 | # dependencies: 27 | # - cmake (need recent) and auditwheel from pip 28 | RUN $PYTHON_BASE/pip install cmake<3.22 auditwheel 29 | 30 | ENV CMAKE $PYTHON_BASE/cmake 31 | 32 | ############################################### 33 | # 1) Nothing builds under GCC 4.8 due to default constructor unused-parameter warnings 34 | # 2) adding -lrt as a work-around for now because python2.7 doesn't link it, but it 35 | # ends up as an unlinked dependency. 36 | # 3) Capnproto (TileDB Serialization) requeries -DKJ_USE_EPOLL=0 -D__BIONIC__=1 per 37 | # https://github.com/capnproto/capnproto/issues/350#issuecomment-270930594 38 | 39 | ENV CXXFLAGS -Wno-unused-parameter -lrt -DKJ_USE_EPOLL=0 -D__BIONIC__=1 40 | ENV CFLAGS -Wno-unused-parameter -lrt -DKJ_USE_EPOLL=0 -D__BIONIC__=1 41 | 42 | # build libtiledb (core) 43 | # notes: 44 | # 1) we are using auditwheel from https://github.com/pypa/auditwheel 45 | # this verifies and tags wheel products with the manylinux1 label, 46 | # and allows us to build libtiledb once, install it to a normal 47 | # system path, and then use it to build wheels for all of the python 48 | # versions. 49 | RUN cd /home/tiledb/ && \ 50 | git clone ${LIBTILEDB_REPO} && \ 51 | git -C TileDB checkout $LIBTILEDB_SHA && \ 52 | mkdir build && \ 53 | cd build && \ 54 | $CMAKE -DTILEDB_S3=ON -DTILEDB_AZURE=ON -DTILEDB_GCS=ON \ 55 | -DTILEDB_CPP_API=ON -DTILEDB_TESTS=OFF \ 56 | -DTILEDB_FORCE_ALL_DEPS:BOOL=ON \ 57 | -DTILEDB_LOG_OUTPUT_ON_FAILURE:BOOL=ON \ 58 | -DSANITIZER="OFF;-DCOMPILER_SUPPORTS_AVX2:BOOL=FALSE" \ 59 | ../TileDB && \ 60 | make -j8 && \ 61 | make install-tiledb 62 | 63 | ADD misc/pypi_linux/build.sh /usr/bin/build.sh 64 | RUN chmod +x /usr/bin/build.sh 65 | 66 | # add source directory as optional TILEDB_PY_REPO 67 | ADD . /opt/TileDB-Py 68 | -------------------------------------------------------------------------------- /misc/pypi_linux/build.aarch64.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Usage: 4 | # 1) update version information 5 | # 2) run from root directory of TileDB-Py checkout 6 | # 3) test and upload wheels to PyPI 7 | 8 | set -xeu 9 | 10 | export LIBTILEDB_VERSION=2.5.3 11 | export TILEDBPY_VERSION=0.11.5 12 | 13 | export CIBW_MANYLINUX_AARCH64_IMAGE=wheel-host-aarch64.manylinux2014-$LIBTILEDB_VERSION 14 | export CIBW_SKIP='cp36-* cp310-* pp-* *_i686 pp* *-musllinux*' 15 | export CIBW_PLATFORM='linux' 16 | export CIBW_ENVIRONMENT='TILEDB_PATH=/usr/local/' 17 | export CIBW_BUILD_VERBOSITY=1 18 | export CIBW_BEFORE_TEST="pip install -r misc/requirements_wheel.txt" 19 | export CIBW_TEST_COMMAND="python -c 'import tiledb'" 20 | export TILEDB_WHEEL_BUILD=1 21 | 22 | docker build --build-arg=LIBTILEDB_VERSION=$LIBTILEDB_VERSION --build-arg TILEDBPY_VERSION=$TILEDBPY_VERSION -t $CIBW_MANYLINUX_AARCH64_IMAGE -f misc/pypi_linux/Dockerfile.aarch64.manylinux2014 . 23 | 24 | rm -rf /tmp/cibuildwheel_venv 25 | python3 -m venv /tmp/cibuildwheel_venv 26 | . /tmp/cibuildwheel_venv/bin/activate 27 | 28 | pip install cibuildwheel 29 | 30 | cibuildwheel --platform=linux --output-dir=wheelhouse . 31 | -------------------------------------------------------------------------------- /misc/pypi_linux/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # USAGE 4 | #------ 5 | # 0) cd TileDB-Py (NOTE: root directory!) 6 | # 1) docker build -f misc/pypi_linux/Dockerfile . -t wheel_builder 7 | # 2) docker run -v `pwd`/misc/pypi_linux/wheels:/wheels -ti wheel_builder build.sh 8 | # 9 | # testing (e.g. using the official python docker images) 10 | # - $ docker run -v `pwd`/misc/pypi_linux/wheels:/wheels --rm -ti python bash 11 | # -- pip3 install /wheels/*cp37*.whl 12 | # -- python3.7 -c "import tiledb; print(tiledb.libtiledb.version()) and assert tiledb.VFS().supports('s3')" 13 | set -ex 14 | 15 | export TILEDB_PY_REPO="/opt/TileDB-Py" 16 | 17 | # build python37 wheel 18 | cd /home/tiledb 19 | git clone $TILEDB_PY_REPO TileDB-Py37 20 | git -C TileDB-Py37 checkout $TILEDBPY_VERSION 21 | 22 | cd /home/tiledb/TileDB-Py37 23 | /opt/python/cp37-cp37m/bin/python3.7 -m pip install -r misc/requirements_wheel.txt 24 | /opt/python/cp37-cp37m/bin/python3.7 setup.py build_ext bdist_wheel --tiledb=/usr/local 25 | auditwheel repair dist/*.whl 26 | /opt/python/cp37-cp37m/bin/python3.7 -m pip install wheelhouse/*.whl 27 | cd tiledb/tests 28 | #/opt/python/cp37-cp37m/bin/python3.7 -m unittest 29 | 30 | # build python38 wheel 31 | cd /home/tiledb 32 | git clone $TILEDB_PY_REPO TileDB-Py38 33 | git -C TileDB-Py38 checkout $TILEDBPY_VERSION 34 | 35 | cd /home/tiledb/TileDB-Py38 36 | /opt/python/cp38-cp38m/bin/python3.8 -m pip install -r misc/requirements_wheel.txt 37 | /opt/python/cp38-cp38/bin/python3.8 setup.py build_ext bdist_wheel --tiledb=/usr/local 38 | auditwheel repair dist/*.whl 39 | /opt/python/cp38-cp38/bin/python3.8 -m pip install wheelhouse/*.whl 40 | cd tiledb/tests 41 | 42 | # build python39 wheel 43 | cd /home/tiledb 44 | git clone $TILEDB_PY_REPO TileDB-Py39 45 | git -C TileDB-Py39 checkout $TILEDBPY_VERSION 46 | 47 | cd /home/tiledb/TileDB-Py39 48 | /opt/python/cp39-cp39m/bin/python3.9 -m pip install -r misc/requirements_wheel.txt 49 | /opt/python/cp39-cp39/bin/python3.9 setup.py build_ext bdist_wheel --tiledb=/usr/local 50 | auditwheel repair dist/*.whl 51 | /opt/python/cp39-cp39/bin/python3.9 -m pip install wheelhouse/*.whl 52 | cd tiledb/tests 53 | 54 | # build python310 wheel 55 | cd /home/tiledb 56 | git clone $TILEDB_PY_REPO TileDB-Py310 57 | git -C TileDB-Py310 checkout $TILEDBPY_VERSION 58 | 59 | cd /home/tiledb/TileDB-Py310 60 | /opt/python/cp310-cp310m/bin/python3.10 -m pip install -r misc/requirements_wheel.txt 61 | /opt/python/cp310-cp310/bin/python3.10 setup.py build_ext bdist_wheel --tiledb=/usr/local 62 | auditwheel repair dist/*.whl 63 | /opt/python/cp310-cp310/bin/python3.10 -m pip install wheelhouse/*.whl 64 | cd tiledb/tests 65 | 66 | # build python311 wheel 67 | cd /home/tiledb 68 | git clone $TILEDB_PY_REPO TileDB-Py311 69 | git -C TileDB-Py311 checkout $TILEDBPY_VERSION 70 | 71 | cd /home/tiledb/TileDB-Py311 72 | /opt/python/cp311-cp311m/bin/python3.11 -m pip install -r misc/requirements_wheel.txt 73 | /opt/python/cp311-cp311/bin/python3.11 setup.py build_ext bdist_wheel --tiledb=/usr/local 74 | auditwheel repair dist/*.whl 75 | /opt/python/cp311-cp311/bin/python3.11 -m pip install wheelhouse/*.whl 76 | cd tiledb/tests 77 | 78 | # copy build products out 79 | cp /home/tiledb/TileDB-Py37/wheelhouse/* /wheels 80 | cp /home/tiledb/TileDB-Py38/wheelhouse/* /wheels 81 | cp /home/tiledb/TileDB-Py39/wheelhouse/* /wheels 82 | cp /home/tiledb/TileDB-Py310/wheelhouse/* /wheels 83 | cp /home/tiledb/TileDB-Py311/wheelhouse/* /wheels 84 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "scikit-build-core", 4 | "pybind11", 5 | "numpy>=2.0.1" 6 | ] 7 | build-backend = "scikit_build_core.build" 8 | 9 | [project] 10 | requires-python = ">=3.9" 11 | name = "tiledb" 12 | description = "Pythonic interface to the TileDB array storage manager" 13 | readme = "README.md" 14 | license = {text = "MIT"} 15 | authors = [ 16 | {name = "TileDB, Inc.", email = "help@tiledb.io"} 17 | ] 18 | maintainers = [ 19 | {name = "TileDB, Inc.", email = "help@tiledb.io"} 20 | ] 21 | classifiers=[ 22 | "Development Status :: 4 - Beta", 23 | "Intended Audience :: Developers", 24 | "Intended Audience :: Information Technology", 25 | "Intended Audience :: Science/Research", 26 | "License :: OSI Approved :: MIT License", 27 | "Programming Language :: Python", 28 | "Topic :: Software Development :: Libraries :: Python Modules", 29 | "Operating System :: Unix", 30 | "Operating System :: POSIX :: Linux", 31 | "Operating System :: MacOS :: MacOS X", 32 | "Programming Language :: Python :: 3", 33 | "Programming Language :: Python :: 3.9", 34 | "Programming Language :: Python :: 3.10", 35 | "Programming Language :: Python :: 3.11", 36 | "Programming Language :: Python :: 3.12", 37 | "Programming Language :: Python :: 3.13", 38 | ] 39 | dependencies = [ 40 | "numpy>=1.25", 41 | "packaging", 42 | ] 43 | dynamic = ["version"] 44 | 45 | [project.optional-dependencies] 46 | doc = [ 47 | "jinja2==3.1.5", 48 | "sphinx-rtd-theme==3.0.2", 49 | "Sphinx==8.1.3", 50 | "nbsphinx==0.9.6", 51 | "breathe", 52 | ] 53 | test = [ 54 | "pytest", 55 | "pytest-rerunfailures", 56 | "hypothesis", 57 | "psutil", 58 | "pyarrow", 59 | "pandas", 60 | "dask[distributed]", 61 | ] 62 | 63 | [project.urls] 64 | homepage = "https://github.com/TileDB-Inc/TileDB-Py" 65 | 66 | [tool.setuptools_scm] 67 | version_file = "tiledb/_generated_version.py" 68 | 69 | [tool.scikit-build] 70 | wheel.expand-macos-universal-tags = true 71 | metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" 72 | wheel.packages = ["tiledb", "examples", "external"] 73 | wheel.license-files = ["LICENSE", "external/LICENSE-*.txt"] 74 | sdist.include = ["tiledb/_generated_version.py"] 75 | 76 | [tool.scikit-build.cmake.define] 77 | TILEDB_PATH = {env="TILEDB_PATH"} 78 | TILEDB_VERSION = {env="TILEDB_VERSION"} 79 | TILEDB_HASH = {env="TILEDB_HASH"} 80 | TILEDB_REMOVE_DEPRECATIONS = "OFF" 81 | TILEDB_SERIALIZATION = "OFF" 82 | 83 | [tool.pytest.ini_options] 84 | python_classes = "*Test*" 85 | python_files = "test_*.py" 86 | testpaths = ["tiledb/tests"] 87 | addopts = [ 88 | "--import-mode=importlib", 89 | "--ignore=tiledb/tests/perf", 90 | "--ignore=tiledb/tests/__pycache__", 91 | ] 92 | filterwarnings = [ 93 | "error", 94 | "default::pytest.PytestWarning", 95 | "default::DeprecationWarning:distributed", 96 | # Pyarrow emits a warning regarding use of deprecated Pandas function 97 | # Remove this once we bump Pyarrow version 98 | "ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning" 99 | ] 100 | 101 | [tool.ruff] 102 | ignore = ["E501", "E722"] 103 | extend-select = ["I001"] 104 | extend-exclude = ["doc"] 105 | fix = true 106 | 107 | [tool.ruff.lint] 108 | select = ["NPY201"] 109 | 110 | [tool.ruff.per-file-ignores] 111 | "tiledb/__init__.py" = ["F401"] 112 | 113 | [tool.cibuildwheel] 114 | test-requires = [ 115 | "pytest", 116 | "pytest-rerunfailures", 117 | "hypothesis", 118 | "psutil", 119 | "pyarrow", 120 | "pandas", 121 | ] 122 | test-command = "pytest {project}" 123 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import subprocess 4 | 5 | 6 | def main(): 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument("command", choices=["install", "develop"]) 9 | parser.add_argument("--tiledb", type=str, required=False) 10 | parser.add_argument("--debug", action="store_true") 11 | parser.add_argument("--enable-deprecations", action="store_true", required=False) 12 | parser.add_argument("--enable-serialization", action="store_true", required=False) 13 | parser.add_argument("-v", action="store_true") 14 | args = parser.parse_args() 15 | 16 | os.getcwd() 17 | 18 | cmd = [ 19 | "pip", 20 | "install", 21 | ] 22 | 23 | if args.command == "develop": 24 | cmd.append("-e") 25 | 26 | cmd.append(os.getcwd()) 27 | 28 | if args.tiledb: 29 | cmd.append(f"-Cskbuild.cmake.define.TILEDB_PATH={args.tiledb}") 30 | 31 | if args.debug: 32 | cmd.append(f"-Cskbuild.cmake.build-type=Debug") 33 | 34 | if args.enable_deprecations: 35 | cmd.append(f"-Cskbuild.cmake.define.TILEDB_REMOVE_DEPRECATIONS=OFF") 36 | 37 | if args.enable_serialization: 38 | cmd.append(f"-Cskbuild.cmake.define.TILEDB_SERIALIZATION=ON") 39 | 40 | if args.v: 41 | cmd.append("-v") 42 | 43 | print( 44 | "Note: 'setup.py' is deprecated in the Python ecosystem. Limited backward compatibility is currently provided for 'install' and 'develop' commands as passthrough to 'pip'." 45 | ) 46 | print(" running: ", f"`{' '.join(cmd)}`") 47 | 48 | subprocess.run(cmd) 49 | 50 | 51 | if __name__ == "__main__": 52 | main() 53 | -------------------------------------------------------------------------------- /tiledb/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Pybind11 2 | 3 | pybind11_add_module( 4 | main 5 | main.cc 6 | core.cc 7 | npbuffer.cc 8 | fragment.cc 9 | schema_evolution.cc 10 | util.cc 11 | tests/test_metadata.cc 12 | tests/test_webp.cc 13 | ) 14 | 15 | if (TILEDB_SERIALIZATION) 16 | target_sources(main PRIVATE serialization.cc tests/test_serialization.cc) 17 | endif() 18 | 19 | target_link_libraries( 20 | main 21 | PUBLIC 22 | TileDB::tiledb_shared 23 | ) 24 | 25 | target_compile_features( 26 | main 27 | PUBLIC 28 | cxx_std_20 29 | ) 30 | 31 | if (TILEDB_SERIALIZATION) 32 | target_compile_definitions( 33 | main 34 | PRIVATE 35 | TILEDB_SERIALIZATION 36 | ) 37 | endif() 38 | 39 | install(TARGETS main DESTINATION tiledb) 40 | 41 | if(TILEDB_DOWNLOADED) 42 | message(STATUS "Adding \"libtiledb\" into install group") 43 | 44 | install(IMPORTED_RUNTIME_ARTIFACTS TileDB::tiledb_shared DESTINATION tiledb) 45 | 46 | if (APPLE) 47 | set_target_properties(main PROPERTIES INSTALL_RPATH "@loader_path") 48 | elseif(UNIX) 49 | set_target_properties(main PROPERTIES INSTALL_RPATH "\$ORIGIN") 50 | endif() 51 | else() 52 | # If using external TileDB core library force it to be linked at runtime using RPATH 53 | get_property(TILEDB_LOCATION TARGET TileDB::tiledb_shared PROPERTY LOCATION) 54 | get_filename_component(TILEDB_LOCATION ${TILEDB_LOCATION} DIRECTORY) 55 | message(STATUS "Setting RPATH for targets \"main\" and \"libtiledb\" to ${TILEDB_LOCATION}") 56 | set_target_properties(main PROPERTIES INSTALL_RPATH ${TILEDB_LOCATION}) 57 | endif() 58 | 59 | add_subdirectory(libtiledb) -------------------------------------------------------------------------------- /tiledb/aggregation.py: -------------------------------------------------------------------------------- 1 | import tiledb.libtiledb as lt 2 | 3 | 4 | class Aggregation: 5 | """ 6 | Proxy object returned by Query.agg to calculate aggregations. 7 | """ 8 | 9 | def __init__(self, query=None, attr_to_aggs={}): 10 | if query is None: 11 | raise ValueError("must pass in a query object") 12 | 13 | self.query = query 14 | self.attr_to_aggs = attr_to_aggs 15 | 16 | def __getitem__(self, selection): 17 | from .main import PyAgg 18 | from .subarray import Subarray 19 | 20 | array = self.query.array 21 | order = self.query.order 22 | 23 | layout = ( 24 | lt.LayoutType.UNORDERED if array.schema.sparse else lt.LayoutType.ROW_MAJOR 25 | ) 26 | if order is None or order == "C": 27 | layout = lt.LayoutType.ROW_MAJOR 28 | elif order == "F": 29 | layout = lt.LayoutType.COL_MAJOR 30 | elif order == "G": 31 | layout = lt.LayoutType.GLOBAL_ORDER 32 | elif order == "U": 33 | layout = lt.LayoutType.UNORDERED 34 | else: 35 | raise ValueError( 36 | "order must be 'C' (TILEDB_ROW_MAJOR), " 37 | "'F' (TILEDB_COL_MAJOR), " 38 | "'G' (TILEDB_GLOBAL_ORDER), " 39 | "or 'U' (TILEDB_UNORDERED)" 40 | ) 41 | 42 | q = PyAgg(array.ctx, array, layout, self.attr_to_aggs) 43 | 44 | from .array import ( 45 | index_as_tuple, 46 | index_domain_subarray, 47 | replace_ellipsis, 48 | replace_scalars_slice, 49 | ) 50 | 51 | selection = index_as_tuple(selection) 52 | dom = array.schema.domain 53 | idx = replace_ellipsis(dom.ndim, selection) 54 | idx, drop_axes = replace_scalars_slice(dom, idx) 55 | dim_ranges = index_domain_subarray(array, dom, idx) 56 | 57 | subarray = Subarray(array, array.ctx) 58 | subarray.add_ranges(dim_ranges) 59 | q.set_subarray(subarray) 60 | 61 | cond = self.query.cond 62 | if cond is not None and cond != "": 63 | from .query_condition import QueryCondition 64 | 65 | if isinstance(cond, str): 66 | q.set_cond(QueryCondition(cond)) 67 | else: 68 | raise TypeError("`cond` expects type str.") 69 | 70 | result = q.get_aggregate() 71 | 72 | # If there was only one attribute, just show the aggregate results 73 | if len(result) == 1: 74 | result = result[list(result.keys())[0]] 75 | 76 | # If there was only one aggregate, just show the value 77 | if len(result) == 1: 78 | result = result[list(result.keys())[0]] 79 | 80 | return result 81 | 82 | @property 83 | def multi_index(self): 84 | """Apply Array.multi_index with query parameters.""" 85 | from .multirange_indexing import MultiRangeAggregation 86 | 87 | return MultiRangeAggregation(self.query.array, query=self) 88 | 89 | @property 90 | def df(self): 91 | raise NotImplementedError(".df indexer not supported for Aggregations") 92 | -------------------------------------------------------------------------------- /tiledb/current_domain.py: -------------------------------------------------------------------------------- 1 | import tiledb.libtiledb as lt 2 | 3 | from .ctx import Ctx, CtxMixin 4 | from .domain import Domain 5 | from .ndrectangle import NDRectangle 6 | 7 | 8 | class CurrentDomain(CtxMixin, lt.CurrentDomain): 9 | """ 10 | Represents a TileDB current domain. 11 | """ 12 | 13 | def __init__(self, ctx: Ctx): 14 | """Class representing the current domain of a TileDB Array. 15 | 16 | :param ctx: A TileDB Context 17 | :raises tiledb.TileDBError: 18 | """ 19 | super().__init__(ctx) 20 | 21 | @property 22 | def type(self): 23 | """The type of the current domain. 24 | 25 | :rtype: tiledb.CurrentDomainType 26 | """ 27 | return self._type 28 | 29 | @property 30 | def is_empty(self): 31 | """Checks if the current domain is empty. 32 | 33 | :rtype: bool 34 | """ 35 | return self._is_empty 36 | 37 | def set_ndrectangle(self, ndrect: NDRectangle): 38 | """Sets an N-dimensional rectangle representation on a current domain. 39 | 40 | :param ndrect: The N-dimensional rectangle to be used. 41 | :raises tiledb.TileDBError: 42 | """ 43 | self._set_ndrectangle(ndrect) 44 | 45 | @property 46 | def ndrectangle(self): 47 | """Gets the N-dimensional rectangle associated with the current domain object. 48 | 49 | :rtype: NDRectangle 50 | :raises tiledb.TileDBError: 51 | """ 52 | return NDRectangle.from_pybind11(self._ctx, self._ndrectangle()) 53 | -------------------------------------------------------------------------------- /tiledb/data_order.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | import tiledb.libtiledb as lt 4 | 5 | 6 | class DataOrder(Enum): 7 | increasing = lt.DataOrder.INCREASING_DATA 8 | decreasing = lt.DataOrder.DECREASING_DATA 9 | unordered = lt.DataOrder.UNORDERED_DATA 10 | -------------------------------------------------------------------------------- /tiledb/debug.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef TILEDBPY_DEBUGCC 4 | #define TILEDBPY_DEBUGCC 5 | 6 | namespace { 7 | extern "C" { 8 | 9 | namespace py = pybind11; 10 | using namespace pybind11::literals; 11 | 12 | // __attribute__((used)) to make the linker keep the symbol 13 | __attribute__((used)) static void pyprint(pybind11::object o) { 14 | pybind11::print(o); 15 | } 16 | 17 | __attribute__((used)) static void pyprint(pybind11::handle h) { 18 | pybind11::print(h); 19 | } 20 | 21 | __attribute__((used)) static std::string pyrepr(py::handle h) { 22 | auto locals = py::dict("_v"_a = h); 23 | return py::cast(py::eval("repr(_v)", py::globals(), locals)); 24 | } 25 | 26 | __attribute__((used)) static std::string pyrepr(py::object o) { 27 | auto locals = py::dict("_v"_a = o); 28 | return py::cast(py::eval("repr(_v)", py::globals(), locals)); 29 | } 30 | 31 | __attribute__((used)) static void pycall1( 32 | const char* expr, pybind11::object o = py::none()) { 33 | // this doesn't work in lldb 34 | // py::scoped_interpreter guard{}; 35 | 36 | /* 37 | * NOTE: the catch statements below do not work in lldb, because exceptions 38 | * are trapped internally. So, an error in eval currently breaks 39 | * use of this function until the process is restarted. 40 | */ 41 | 42 | // usage: given some py::object 'o', exec a string w/ 'local _v'==o, e.g.: 43 | // (lldb) p pycall1("_v.shape", o) 44 | 45 | py::object res = py::none(); 46 | try { 47 | if (!o.is(py::none())) { 48 | auto locals = py::dict("_v"_a = o); 49 | res = py::eval(expr, py::globals(), locals); 50 | } else { 51 | res = py::eval(expr, py::globals()); 52 | } 53 | if (!res.is(py::none())) { 54 | py::print(res); 55 | } 56 | } catch (py::error_already_set& e) { 57 | std::cout << "pycall error_already_set: " << std::endl; 58 | } catch (std::runtime_error& e) { 59 | std::cout << "pycall runtime_error: " << e.what() << std::endl; 60 | } catch (...) { 61 | std::cout << "pycall unknown exception" << std::endl; 62 | } 63 | } 64 | 65 | __attribute__((used)) static void pycall(const char* expr) { 66 | pycall1(expr, py::none()); 67 | } 68 | 69 | __attribute__((used)) static void pyerror() { 70 | // print the last py error, if any 71 | } 72 | } 73 | }; // namespace 74 | 75 | #endif 76 | -------------------------------------------------------------------------------- /tiledb/dimension_label.py: -------------------------------------------------------------------------------- 1 | import io 2 | 3 | import numpy as np 4 | 5 | import tiledb.libtiledb as lt 6 | 7 | from .ctx import CtxMixin 8 | from .data_order import DataOrder 9 | from .datatypes import DataType 10 | 11 | 12 | class DimLabel(CtxMixin, lt.DimensionLabel): 13 | """ 14 | Represents a TileDB dimension label. 15 | """ 16 | 17 | def __repr__(self) -> str: 18 | dtype = "ascii" if self.isascii else self.dtype 19 | return ( 20 | f"DimLabel(name={self.name}, dtype='{dtype!s}', " 21 | f"var={self.isvar!s}, order={self.order!s})" 22 | ) 23 | 24 | def _repr_html_(self): 25 | output = io.StringIO() 26 | 27 | output.write("") 28 | output.write("") 29 | output.write("") 30 | output.write("") 31 | output.write("") 32 | output.write("") 33 | output.write("") 34 | output.write(f"{self._repr_html_row_only_()}") 35 | output.write("
NameData TypeIs Var-LenData Order
") 36 | 37 | return output.getvalue() 38 | 39 | def _repr_html_row_only_(self): 40 | output = io.StringIO() 41 | 42 | output.write("") 43 | output.write(f"{self.name}") 44 | output.write(f"{'ascii' if self.isascii else self.dtype}") 45 | output.write(f"{self.isvar}") 46 | output.write(f"{self.order}") 47 | output.write("") 48 | 49 | return output.getvalue() 50 | 51 | @property 52 | def dim_index(self) -> int: 53 | """Index of the dimension the labels are for. 54 | 55 | :rtype: int 56 | 57 | """ 58 | return self._dim_index 59 | 60 | @property 61 | def dtype(self) -> np.dtype: 62 | """Numpy dtype representation of the label type. 63 | 64 | :rtype: numpy.dtype 65 | 66 | """ 67 | return DataType.from_tiledb(self._tiledb_label_dtype).np_dtype 68 | 69 | @property 70 | def isvar(self) -> bool: 71 | """True if the labels are variable length. 72 | 73 | :rtype: bool 74 | 75 | """ 76 | return self._label_ncell == lt.TILEDB_VAR_NUM() 77 | 78 | @property 79 | def isascii(self) -> bool: 80 | """True if the labels are variable length. 81 | 82 | :rtype: bool 83 | 84 | """ 85 | return self._tiledb_label_dtype == lt.DataType.STRING_ASCII 86 | 87 | @property 88 | def label_attr_name(self) -> str: 89 | """Name of the attribute storing the label data. 90 | 91 | :rtype: str 92 | """ 93 | return self._label_attr_name 94 | 95 | @property 96 | def name(self) -> str: 97 | """The name of the dimension label. 98 | 99 | :rtype: str 100 | 101 | """ 102 | return self._name 103 | 104 | @property 105 | def order(self) -> str: 106 | """The order of the label data in the dimension label. 107 | 108 | :rtype: str 109 | """ 110 | return DataOrder(self._tiledb_label_order).name 111 | 112 | @property 113 | def uri(self) -> str: 114 | """The URI of the array containing the dimension label data. 115 | 116 | :rtype: str 117 | 118 | """ 119 | return self._uri 120 | -------------------------------------------------------------------------------- /tiledb/libtiledb/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | pybind11_add_module( 2 | libtiledb 3 | array.cc 4 | attribute.cc 5 | common.cc 6 | common.h 7 | consolidation_plan.cc 8 | context.cc 9 | current_domain.cc 10 | dimension_label.cc 11 | domain.cc 12 | enum.cc 13 | enumeration.cc 14 | filestore.cc 15 | filter.cc 16 | group.cc 17 | metadata.h 18 | object.cc 19 | query.cc 20 | schema.cc 21 | subarray.cc 22 | tiledbcpp.cc 23 | vfs.cc 24 | ) 25 | 26 | target_link_libraries( 27 | libtiledb 28 | PUBLIC 29 | TileDB::tiledb_shared 30 | ) 31 | 32 | target_compile_features( 33 | libtiledb 34 | PUBLIC 35 | cxx_std_20 36 | ) 37 | 38 | if (TILEDB_REMOVE_DEPRECATIONS) 39 | target_compile_definitions( 40 | libtiledb 41 | PRIVATE 42 | TILEDB_REMOVE_DEPRECATIONS 43 | ) 44 | endif() 45 | 46 | if (TILEDB_SERIALIZATION) 47 | target_compile_definitions( 48 | libtiledb 49 | PRIVATE 50 | TILEDB_SERIALIZATION 51 | ) 52 | endif() 53 | 54 | install(TARGETS libtiledb DESTINATION tiledb) 55 | 56 | if(TILEDB_DOWNLOADED) 57 | if (APPLE) 58 | set_target_properties(libtiledb PROPERTIES INSTALL_RPATH "@loader_path") 59 | elseif(UNIX) 60 | set_target_properties(libtiledb PROPERTIES INSTALL_RPATH "\$ORIGIN") 61 | endif() 62 | else() 63 | # If using external TileDB core library force it to be linked at runtime using RPATH 64 | get_property(TILEDB_LOCATION TARGET TileDB::tiledb_shared PROPERTY LOCATION) 65 | get_filename_component(TILEDB_LOCATION ${TILEDB_LOCATION} DIRECTORY) 66 | message(STATUS "Setting RPATH for target \"libtiledb\" to ${TILEDB_LOCATION}") 67 | set_target_properties(libtiledb PROPERTIES INSTALL_RPATH ${TILEDB_LOCATION}) 68 | endif() 69 | -------------------------------------------------------------------------------- /tiledb/libtiledb/common.h: -------------------------------------------------------------------------------- 1 | #include // for enums 2 | #include // C++ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace tiledb; 10 | namespace py = pybind11; 11 | using namespace pybind11::literals; 12 | 13 | #define TPY_ERROR_LOC(m) \ 14 | throw TileDBPyError( \ 15 | std::string(m) + " (" + __FILE__ + ":" + std::to_string(__LINE__) + \ 16 | ")"); 17 | 18 | class TileDBPyError : std::runtime_error { 19 | public: 20 | explicit TileDBPyError(const char* m) 21 | : std::runtime_error(m) { 22 | } 23 | explicit TileDBPyError(std::string m) 24 | : std::runtime_error(m.c_str()) { 25 | } 26 | 27 | public: 28 | virtual const char* what() const noexcept override { 29 | return std::runtime_error::what(); 30 | } 31 | }; 32 | 33 | namespace tiledbpy::common { 34 | 35 | size_t buffer_nbytes(py::buffer_info& info); 36 | 37 | bool expect_buffer_nbytes( 38 | py::buffer_info& info, tiledb_datatype_t datatype, size_t nbytes); 39 | 40 | } // namespace tiledbpy::common 41 | 42 | py::dtype tdb_to_np_dtype(tiledb_datatype_t type, uint32_t cell_val_num); 43 | tiledb_datatype_t np_to_tdb_dtype(py::dtype type); 44 | 45 | bool is_tdb_num(tiledb_datatype_t type); 46 | bool is_tdb_str(tiledb_datatype_t type); 47 | 48 | py::size_t get_ncells(py::dtype type); 49 | -------------------------------------------------------------------------------- /tiledb/libtiledb/consolidation_plan.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "common.h" 10 | 11 | namespace libtiledbcpp { 12 | 13 | using namespace tiledb; 14 | using namespace tiledbpy::common; 15 | namespace py = pybind11; 16 | 17 | void init_consolidation_plan(py::module& m) { 18 | py::class_(m, "ConsolidationPlan") 19 | 20 | .def( 21 | py::init(), 22 | py::keep_alive<1, 2>(), 23 | py::keep_alive<1, 3>()) 24 | 25 | .def_property_readonly("_num_nodes", &ConsolidationPlan::num_nodes) 26 | .def("_num_fragments", &ConsolidationPlan::num_fragments) 27 | .def("_fragment_uri", &ConsolidationPlan::fragment_uri) 28 | .def("_dump", &ConsolidationPlan::dump); 29 | } 30 | } // namespace libtiledbcpp 31 | -------------------------------------------------------------------------------- /tiledb/libtiledb/context.cc: -------------------------------------------------------------------------------- 1 | #include // C++ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace libtiledbcpp { 9 | 10 | using namespace tiledb; 11 | namespace py = pybind11; 12 | 13 | void init_context(py::module& m) { 14 | py::class_(m, "Context") 15 | .def(py::init()) 16 | .def(py::init()) 17 | .def(py::init()) 18 | .def(py::init()) 19 | 20 | .def( 21 | "__capsule__", 22 | [](Context& ctx) { return py::capsule(ctx.ptr().get(), "ctx"); }) 23 | 24 | .def("config", &Context::config) 25 | .def("set_tag", &Context::set_tag) 26 | .def("get_stats", &Context::stats) 27 | .def("is_supported_fs", &Context::is_supported_fs); 28 | } 29 | 30 | void init_config(py::module& m) { 31 | py::class_(m, "Config") 32 | .def(py::init()) 33 | .def(py::init()) 34 | .def(py::init>()) 35 | .def(py::init()) 36 | 37 | .def( 38 | "__capsule__", 39 | [](Config& config) { 40 | return py::capsule(config.ptr().get(), "config"); 41 | }) 42 | 43 | .def("set", &Config::set) 44 | .def("get", &Config::get) 45 | .def( 46 | "update", 47 | [](Config& cfg, py::dict& odict) { 48 | for (auto item : odict) { 49 | cfg.set( 50 | item.first.cast(), 51 | item.second.cast()); 52 | } 53 | }) 54 | 55 | .def("save_to_file", &Config::save_to_file) 56 | .def("__eq__", &Config::operator==) 57 | .def("__ne__", &Config::operator!=) 58 | //.def("_ptr", &Config::ptr) // TBD should this be capsule? 59 | .def( 60 | "__setitem__", 61 | [](Config& cfg, std::string& param, std::string& val) { 62 | cfg[param] = val; 63 | }) 64 | .def( 65 | "__getitem__", 66 | [](const Config& cfg, std::string& param) { 67 | try { 68 | return cfg.get(param); 69 | } catch (TileDBError& e) { 70 | throw py::key_error(); 71 | } 72 | }) 73 | .def( 74 | "__delitem__", 75 | [](Config& cfg, const std::string& param) { 76 | try { 77 | cfg.unset(param); 78 | } catch (TileDBError& e) { 79 | throw py::key_error(); 80 | } 81 | }) 82 | .def( 83 | "_iter", 84 | [](Config& cfg, std::string prefix) { 85 | return py::make_iterator(cfg.begin(prefix), cfg.end()); 86 | }, 87 | py::keep_alive<0, 1>(), 88 | py::arg("prefix") = "") 89 | .def("unset", &Config::unset); 90 | } 91 | }; // namespace libtiledbcpp 92 | -------------------------------------------------------------------------------- /tiledb/libtiledb/dimension_label.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "common.h" 10 | 11 | namespace libtiledbcpp { 12 | 13 | using namespace tiledb; 14 | using namespace tiledbpy::common; 15 | namespace py = pybind11; 16 | 17 | void init_dimension_label(py::module& m) { 18 | py::class_(m, "DimensionLabel") 19 | .def(py::init()) 20 | 21 | .def(py::init(), py::keep_alive<1, 2>()) 22 | 23 | .def( 24 | "__capsule__", 25 | [](DimensionLabel& dim_label) { 26 | return py::capsule(dim_label.ptr().get(), "dim_label"); 27 | }) 28 | 29 | .def_property_readonly( 30 | "_label_attr_name", &DimensionLabel::label_attr_name) 31 | 32 | .def_property_readonly("_dim_index", &DimensionLabel::dimension_index) 33 | 34 | .def_property_readonly( 35 | "_tiledb_label_order", &DimensionLabel::label_order) 36 | 37 | .def_property_readonly( 38 | "_tiledb_label_dtype", &DimensionLabel::label_type) 39 | 40 | .def_property_readonly( 41 | "_label_ncell", &DimensionLabel::label_cell_val_num) 42 | 43 | .def_property_readonly("_name", &DimensionLabel::name) 44 | 45 | .def_property_readonly("_uri", &DimensionLabel::uri); 46 | } 47 | 48 | }; // namespace libtiledbcpp 49 | -------------------------------------------------------------------------------- /tiledb/libtiledb/object.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "common.h" 9 | 10 | namespace libtiledbcpp { 11 | 12 | using namespace tiledb; 13 | using namespace tiledbpy::common; 14 | namespace py = pybind11; 15 | 16 | void init_object(py::module& m) { 17 | py::class_(m, "Object") 18 | .def(py::init< 19 | const Object::Type&, 20 | const std::string&, 21 | const std::optional&>()) 22 | .def(py::init< 23 | tiledb_object_t, 24 | const std::string&, 25 | const std::optional&>()) 26 | 27 | .def_property_readonly("_type", &Object::type) 28 | .def_property_readonly("_uri", &Object::uri) 29 | .def_property_readonly("_name", &Object::name) 30 | .def("__repr__", &Object::to_str) 31 | 32 | .def_static("_object", &Object::object) 33 | .def_static("_remove", &Object::remove) 34 | .def_static("_move", &Object::move); 35 | } 36 | 37 | } // namespace libtiledbcpp 38 | -------------------------------------------------------------------------------- /tiledb/libtiledb/query.cc: -------------------------------------------------------------------------------- 1 | #include // C++ 2 | #include 3 | 4 | #include "common.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | // #pragma clang diagnostic ignored "-Wdeprecated-declarations" 12 | // #pragma gcc diagnostic ignored "-Wdeprecated-declarations" 13 | 14 | namespace libtiledbcpp { 15 | 16 | using namespace tiledb; 17 | using namespace std; 18 | namespace py = pybind11; 19 | 20 | void init_query(py::module& m) { 21 | py::class_(m, "Query") 22 | 23 | //.def(py::init()) 25 | 26 | .def( 27 | py::init(), 28 | py::keep_alive<1, 2>() /* Keep context alive. */, 29 | py::keep_alive<1, 3>() /* Keep array alive. */) 30 | 31 | .def( 32 | py::init(), 33 | py::keep_alive<1, 2>() /* Keep context alive. */, 34 | py::keep_alive<1, 3>() /* Keep array alive. */) 35 | 36 | // TODO .def("ptr", [&]() -> py::capsule) 37 | 38 | .def_property("layout", &Query::query_layout, &Query::set_layout) 39 | 40 | .def_property_readonly("query_type", &Query::query_type) 41 | 42 | .def_property_readonly( 43 | "_subarray", 44 | [](Query& query) { 45 | // TODO: Before merge make sure the lifetime of 46 | // the resulting subarray is not tied to this 47 | // query. 48 | Subarray subarray(query.ctx(), query.array()); 49 | query.update_subarray_from_query(&subarray); 50 | return subarray; 51 | }) 52 | 53 | // TODO .def("array") -> Array& 54 | 55 | .def("has_results", &Query::has_results) 56 | 57 | .def( 58 | "is_complete", 59 | [](const Query& query) { 60 | return query.query_status() == Query::Status::COMPLETE; 61 | }) 62 | 63 | .def("finalize", &Query::finalize) 64 | 65 | .def("fragment_num", &Query::fragment_num) 66 | 67 | .def("fragment_uri", &Query::fragment_uri) 68 | 69 | .def("fragment_timestamp_range", &Query::fragment_timestamp_range) 70 | 71 | .def("query_status", &Query::query_status) 72 | 73 | .def("set_condition", &Query::set_condition) 74 | 75 | //.def("set_data_buffer", 76 | // (Query& (Query::*)(const std::string&, void*, 77 | // uint64_t))&Query::set_data_buffer); 78 | 79 | .def( 80 | "set_data_buffer", 81 | [](Query& q, std::string name, py::array a, uint64_t nelements) { 82 | QueryExperimental::set_data_buffer( 83 | q, name, const_cast(a.data()), nelements); 84 | }) 85 | 86 | .def( 87 | "set_offsets_buffer", 88 | [](Query& q, std::string name, py::array a, uint64_t nelements) { 89 | q.set_offsets_buffer(name, (uint64_t*)(a.data()), nelements); 90 | }) 91 | 92 | .def( 93 | "set_subarray", 94 | [](Query& query, const Subarray& subarray) { 95 | return query.set_subarray(subarray); 96 | }) 97 | 98 | .def( 99 | "set_validity_buffer", 100 | [](Query& q, std::string name, py::array a, uint64_t nelements) { 101 | q.set_validity_buffer(name, (uint8_t*)(a.data()), nelements); 102 | }) 103 | 104 | .def( 105 | "_submit", &Query::submit, py::call_guard()) 106 | 107 | /** hackery from another branch... */ 108 | //.def("set_fragment_uri", &Query::set_fragment_uri) 109 | //.def("unset_buffer", &Query::unset_buffer) 110 | //.def("set_continuation", [](Query& q) { 111 | // q.ctx().handle_error( 112 | // tiledb_query_set_continuation(q.ctx().ptr().get(), q.ptr().get()) 113 | // ); 114 | //}) 115 | ; 116 | } 117 | 118 | } // namespace libtiledbcpp 119 | -------------------------------------------------------------------------------- /tiledb/libtiledb/tiledbcpp.cc: -------------------------------------------------------------------------------- 1 | #include // C++ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "common.h" 8 | 9 | namespace libtiledbcpp { 10 | 11 | using namespace tiledb; 12 | namespace py = pybind11; 13 | 14 | template 15 | using overload_cast_ = pybind11::detail::overload_cast_impl; 16 | 17 | void init_array(py::module&); 18 | void init_attribute(py::module&); 19 | void init_context(py::module&); 20 | void init_config(py::module&); 21 | void init_consolidation_plan(py::module& m); 22 | void init_current_domain(py::module& m); 23 | void init_enums(py::module&); 24 | void init_enumeration(py::module&); 25 | void init_dimension_label(py::module& m); 26 | void init_domain(py::module& m); 27 | void init_file_handle(py::module&); 28 | void init_filestore(py::module& m); 29 | void init_filter(py::module&); 30 | void init_group(py::module&); 31 | void init_object(py::module& m); 32 | void init_query(py::module& m); 33 | void init_schema(py::module&); 34 | void init_subarray(py::module&); 35 | void init_vfs(py::module& m); 36 | 37 | PYBIND11_MODULE(libtiledb, m) { 38 | init_array(m); 39 | init_attribute(m); 40 | init_context(m); 41 | init_config(m); 42 | init_consolidation_plan(m); 43 | init_current_domain(m); 44 | init_dimension_label(m); 45 | init_domain(m); 46 | init_enums(m); 47 | init_enumeration(m); 48 | init_file_handle(m); 49 | init_filestore(m); 50 | init_filter(m); 51 | init_group(m); 52 | init_object(m); 53 | init_query(m); 54 | init_schema(m); 55 | init_subarray(m); 56 | init_vfs(m); 57 | 58 | m.def("version", []() { 59 | int major = 0; 60 | int minor = 0; 61 | int rev = 0; 62 | tiledb_version(&major, &minor, &rev); 63 | return std::make_tuple(major, minor, rev); 64 | }); 65 | 66 | py::register_exception(m, "TileDBError"); 67 | 68 | /* 69 | We need to make sure C++ TileDBError is translated to a correctly-typed py 70 | error. Note that using py::exception(..., "TileDBError") creates a new 71 | exception in the *readquery* module, so we must import to reference. 72 | */ 73 | py::register_exception_translator([](std::exception_ptr p) { 74 | auto tiledb_py_error = (py::object)py::module::import("tiledb").attr( 75 | "TileDBError"); 76 | 77 | try { 78 | if (p) 79 | std::rethrow_exception(p); 80 | } catch (const TileDBPyError& e) { 81 | PyErr_SetString(tiledb_py_error.ptr(), e.what()); 82 | } catch (const tiledb::TileDBError& e) { 83 | PyErr_SetString(tiledb_py_error.ptr(), e.what()); 84 | } catch (py::builtin_exception& e) { 85 | throw; 86 | }; 87 | }); 88 | } 89 | 90 | }; // namespace libtiledbcpp 91 | -------------------------------------------------------------------------------- /tiledb/main.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace tiledbpy { 4 | 5 | namespace py = pybind11; 6 | 7 | void init_core(py::module&); 8 | // void _debug(py::module &); 9 | void init_fragment(py::module&); 10 | // void init_query_condition(py::module &); 11 | void init_schema_evolution(py::module&); 12 | #if defined(TILEDB_SERIALIZATION) 13 | void init_serialization(py::module&); 14 | void init_test_serialization(py::module&); 15 | #endif 16 | void init_test_metadata(py::module&); 17 | void init_test_webp_filter(py::module&); 18 | 19 | PYBIND11_MODULE(main, m) { 20 | init_core(m); 21 | //_debug(m); 22 | init_fragment(m); 23 | //_query_condition(m); 24 | init_schema_evolution(m); 25 | #if defined(TILEDB_SERIALIZATION) 26 | init_serialization(m); 27 | init_test_serialization(m); 28 | #endif 29 | init_test_metadata(m); 30 | init_test_webp_filter(m); 31 | } 32 | 33 | } // namespace tiledbpy 34 | -------------------------------------------------------------------------------- /tiledb/ndrectangle.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Union 2 | 3 | import tiledb.libtiledb as lt 4 | 5 | from .ctx import Ctx, CtxMixin 6 | from .domain import Domain 7 | 8 | 9 | class NDRectangle(CtxMixin, lt.NDRectangle): 10 | """ 11 | Represents a TileDB N-Dimensional Rectangle. 12 | """ 13 | 14 | def __init__(self, ctx: Ctx, domain: Domain): 15 | """Class representing an N-Dimensional Rectangle of a TileDB Domain. 16 | 17 | :param ctx: A TileDB Context 18 | :param domain: A TileDB Domain 19 | :raises tiledb.TileDBError: 20 | """ 21 | super().__init__(ctx, domain) 22 | 23 | def __str__(self) -> str: 24 | dimensions_str = ", ".join( 25 | f"{self._domain.dim(i).name}: {self.range(i)}" 26 | for i in range(self._domain.ndim) 27 | ) 28 | return f"NDRectangle({dimensions_str})" 29 | 30 | def set_range( 31 | self, 32 | dim: Union[str, int], 33 | start: Union[int, float, str], 34 | end: Union[int, float, str], 35 | ): 36 | """Sets a range for the given dimension. 37 | 38 | :param dim: Dimension name or index 39 | :param start: Range start value 40 | :param end: Range end value 41 | :raises tiledb.TileDBError: 42 | """ 43 | self._set_range(dim, start, end) 44 | 45 | def range( 46 | self, dim: Union[str, int] 47 | ) -> Union[Tuple[int, int], Tuple[float, float], Tuple[str, str]]: 48 | """Gets the range for the given dimension. 49 | 50 | :param dim: Dimension name or index 51 | :return: Range as a tuple (start, end) 52 | :raises tiledb.TileDBError: 53 | """ 54 | return self._range(dim) 55 | -------------------------------------------------------------------------------- /tiledb/npbuffer.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "util.h" 5 | 6 | namespace tiledbpy { 7 | 8 | namespace py = pybind11; 9 | using namespace pybind11::literals; 10 | 11 | py::tuple convert_np( 12 | py::array input, bool allow_unicode = true, bool use_fallback = false); 13 | 14 | } // namespace tiledbpy 15 | -------------------------------------------------------------------------------- /tiledb/object.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import tiledb 4 | import tiledb.libtiledb as lt 5 | 6 | 7 | class Object(lt.Object): 8 | """ 9 | Represents a TileDB object which may be of type Array, Group, or Invalid. 10 | """ 11 | 12 | def __init__(self, type: lt.ObjectType, uri: str, name: Optional[str] = None): 13 | super().__init__(type, uri, name) 14 | 15 | @property 16 | def uri(self) -> str: 17 | """ 18 | :return: URI of the Object. 19 | :rtype: str 20 | """ 21 | return self._uri 22 | 23 | @property 24 | def type(self) -> type: 25 | """ 26 | :return: Valid TileDB object types are Array and Group. 27 | :rtype: type 28 | """ 29 | if self._type == lt.ObjectType.ARRAY: 30 | return tiledb.Array 31 | if self._type == lt.ObjectType.GROUP: 32 | return tiledb.Group 33 | raise KeyError(f"Unknown object type: {self._type}") 34 | 35 | @property 36 | def name(self) -> Optional[str]: 37 | """ 38 | :return: Name of the Object if given. Otherwise, None. 39 | :rtype: str 40 | """ 41 | return self._name 42 | -------------------------------------------------------------------------------- /tiledb/parquet_.py: -------------------------------------------------------------------------------- 1 | import tiledb 2 | 3 | 4 | def from_parquet(uri, parquet_uri): 5 | import pandas as pd 6 | 7 | df = pd.read_parquet(parquet_uri) 8 | 9 | tiledb.from_pandas(uri, df) 10 | -------------------------------------------------------------------------------- /tiledb/py_arrowio: -------------------------------------------------------------------------------- 1 | /** -*-C++-*- 2 | * vim: set ft=cpp: 3 | * @file arrowio 4 | * 5 | * @section LICENSE 6 | * 7 | * The MIT License 8 | * 9 | * @copyright Copyright (c) 2020-2021 TileDB, Inc. 10 | * 11 | * Permission is hereby granted, free of charge, to any person obtaining a copy 12 | * of this software and associated documentation files (the "Software"), to deal 13 | * in the Software without restriction, including without limitation the rights 14 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 | * copies of the Software, and to permit persons to whom the Software is 16 | * furnished to do so, subject to the following conditions: 17 | * 18 | * The above copyright notice and this permission notice shall be included in 19 | * all copies or substantial portions of the Software. 20 | * 21 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 27 | * THE SOFTWARE. 28 | * 29 | * @section DESCRIPTION 30 | * 31 | * This file defines the experimental TileDB interoperation with Apache Arrow. 32 | */ 33 | 34 | #include 35 | 36 | #include 37 | 38 | namespace tiledb { 39 | namespace arrow { 40 | 41 | class ArrowImporter; 42 | class ArrowExporter; 43 | 44 | /** 45 | * Adapter to export TileDB (read) Query results to Apache Arrow buffers 46 | * and import Arrow buffers into a TileDB (write) Query. 47 | * 48 | * This adapter exports buffers conforming to the Arrow C Data Interface 49 | * as documented at: 50 | * 51 | * https://arrow.apache.org/docs/format/CDataInterface.html 52 | * 53 | */ 54 | 55 | class ArrowAdapter { 56 | public: 57 | /* Constructs an ArrowAdapter wrapping the given TileDB C++ Query */ 58 | ArrowAdapter(Context* ctx, Query* query); 59 | ~ArrowAdapter(); 60 | 61 | /** 62 | * Exports named Query buffer to ArrowArray/ArrowSchema struct pair, 63 | * as defined in the Arrow C Data Interface. 64 | * 65 | * @param name The name of the buffer to export. 66 | * @param arrow_array Pointer to pre-allocated ArrowArray struct 67 | * @param arrow_schema Pointer to pre-allocated ArrowSchema struct 68 | * @throws tiledb::TileDBError with error-specific message. 69 | */ 70 | typedef void (*release_cb)(void*); 71 | void export_buffer(const char* name, void* arrow_array, void* arrow_schema, 72 | release_cb cb, void* private_data); 73 | 74 | /** 75 | * Set named Query buffer from ArrowArray/ArrowSchema struct pair 76 | * representing external data buffers conforming to the 77 | * Arrow C Data Interface. 78 | * 79 | * @param name The name of the buffer to export. 80 | * @param arrow_array Pointer to pre-allocated ArrowArray struct 81 | * @param arrow_schema Pointer to pre-allocated ArrowSchema struct 82 | * @throws tiledb::TileDBError with error-specific message. 83 | */ 84 | void import_buffer(const char* name, void* arrow_array, void* arrow_schema); 85 | 86 | private: 87 | ArrowImporter* importer_; 88 | ArrowExporter* exporter_; 89 | }; 90 | 91 | } // end namespace arrow 92 | } // end namespace tiledb 93 | 94 | #include "py_arrow_io_impl.h" 95 | -------------------------------------------------------------------------------- /tiledb/schema_evolution.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import tiledb 4 | 5 | from .enumeration import Enumeration 6 | from .main import ArraySchemaEvolution as ASE 7 | 8 | 9 | class ArraySchemaEvolution: 10 | """This class provides the capability to evolve the ArraySchema of 11 | a TileDB array in place by adding and removing attributes. 12 | """ 13 | 14 | def __init__(self, ctx: Optional[tiledb.Ctx] = None): 15 | ctx = ctx or tiledb.default_ctx() 16 | 17 | self.ase = ASE(ctx) 18 | 19 | def add_attribute(self, attr: tiledb.Attr): 20 | """Add the given attribute to the schema evolution plan. 21 | Note: this function does not apply any changes; the changes are 22 | only applied when `ArraySchemaEvolution.array_evolve` is called.""" 23 | 24 | self.ase.add_attribute(attr) 25 | 26 | def drop_attribute(self, attr_name: str): 27 | """Drop the given attribute (by name) in the schema evolution. 28 | Note: this function does not apply any changes; the changes are 29 | only applied when `ArraySchemaEvolution.array_evolve` is called.""" 30 | 31 | self.ase.drop_attribute(attr_name) 32 | 33 | def add_enumeration(self, enmr: Enumeration): 34 | """Add the given enumeration to the schema evolution plan. 35 | Note: this function does not apply any changes; the changes are 36 | only applied when `ArraySchemaEvolution.array_evolve` is called.""" 37 | 38 | self.ase.add_enumeration(enmr) 39 | 40 | def drop_enumeration(self, enmr_name: str): 41 | """Drop the given enumeration (by name) in the schema evolution. 42 | Note: this function does not apply any changes; the changes are 43 | only applied when `ArraySchemaEvolution.array_evolve` is called.""" 44 | 45 | self.ase.drop_enumeration(enmr_name) 46 | 47 | def extend_enumeration(self, enmr: Enumeration): 48 | """Extend the existing enumeration (by name) in the schema evolution. 49 | Note: this function does not apply any changes; the changes are 50 | only applied when `ArraySchemaEvolution.array_evolve` is called.""" 51 | 52 | self.ase.extend_enumeration(enmr) 53 | 54 | if tiledb.libtiledb.version() >= (2, 26): 55 | from .current_domain import CurrentDomain 56 | 57 | def expand_current_domain(self, current_domain: CurrentDomain): 58 | """Expand the current domain in the schema evolution. 59 | Note: this function does not apply any changes; the changes are 60 | only applied when `ArraySchemaEvolution.array_evolve` is called.""" 61 | 62 | self.ase.expand_current_domain(current_domain) 63 | 64 | def array_evolve(self, uri: str): 65 | """Apply ArraySchemaEvolution actions to Array at given URI.""" 66 | 67 | self.ase.array_evolve(uri) 68 | 69 | def timestamp(self, timestamp: int): 70 | """Sets the timestamp of the schema file.""" 71 | if not isinstance(timestamp, int): 72 | raise ValueError("'timestamp' argument expects int") 73 | 74 | self.ase.set_timestamp_range(timestamp) 75 | -------------------------------------------------------------------------------- /tiledb/serialization.cc: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include // C 10 | #include // C++ 11 | #include "util.h" 12 | 13 | #if !defined(NDEBUG) 14 | // #include "debug.cc" 15 | #endif 16 | 17 | namespace tiledbpy { 18 | 19 | using namespace std; 20 | using namespace tiledb; 21 | namespace py = pybind11; 22 | using namespace pybind11::literals; 23 | 24 | class PySerialization { 25 | public: 26 | static void* deserialize_query( 27 | py::object ctx, 28 | py::object array, 29 | py::buffer buffer, 30 | tiledb_serialization_type_t serialize_type, 31 | int32_t client_side) { 32 | int rc; 33 | 34 | tiledb_ctx_t* ctx_c; 35 | tiledb_array_t* arr_c; 36 | tiledb_query_t* qry_c; 37 | tiledb_buffer_t* buf_c; 38 | 39 | ctx_c = (py::capsule)ctx.attr("__capsule__")(); 40 | if (ctx_c == nullptr) 41 | TPY_ERROR_LOC("Invalid context pointer."); 42 | 43 | arr_c = (py::capsule)array.attr("__capsule__")(); 44 | if (arr_c == nullptr) 45 | TPY_ERROR_LOC("Invalid array pointer."); 46 | 47 | rc = tiledb_query_alloc(ctx_c, arr_c, TILEDB_READ, &qry_c); 48 | if (rc == TILEDB_ERR) 49 | TPY_ERROR_LOC("Could not allocate query."); 50 | 51 | rc = tiledb_buffer_alloc(ctx_c, &buf_c); 52 | if (rc == TILEDB_ERR) 53 | TPY_ERROR_LOC("Could not allocate buffer."); 54 | 55 | py::buffer_info buf_info = buffer.request(); 56 | rc = tiledb_buffer_set_data( 57 | ctx_c, buf_c, buf_info.ptr, buf_info.shape[0]); 58 | if (rc == TILEDB_ERR) 59 | TPY_ERROR_LOC("Could not set buffer."); 60 | 61 | rc = tiledb_deserialize_query( 62 | ctx_c, buf_c, serialize_type, client_side, qry_c); 63 | if (rc == TILEDB_ERR) 64 | TPY_ERROR_LOC("Could not deserialize query."); 65 | 66 | return qry_c; 67 | } 68 | }; 69 | 70 | void init_serialization(py::module& m) { 71 | py::class_(m, "serialization") 72 | .def_static("deserialize_query", &PySerialization::deserialize_query); 73 | 74 | py::enum_( 75 | m, "tiledb_serialization_type_t", py::arithmetic()) 76 | .value("TILEDB_CAPNP", TILEDB_CAPNP) 77 | .value("TILEDB_JSON", TILEDB_JSON) 78 | .export_values(); 79 | } 80 | 81 | }; // namespace tiledbpy 82 | -------------------------------------------------------------------------------- /tiledb/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TileDB-Inc/TileDB-Py/28714d9b25d44d6c6c1f318525184d3784b7de00/tiledb/tests/__init__.py -------------------------------------------------------------------------------- /tiledb/tests/cc/test_filestore.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | import tiledb 6 | import tiledb.libtiledb as lt 7 | 8 | 9 | @pytest.fixture 10 | def text_fname(tmp_path): 11 | path = os.path.join(tmp_path, "text_fname") 12 | vfs = tiledb.VFS() 13 | vfs.touch(path) 14 | with vfs.open(path, "wb") as fio: 15 | fio.write(b"Simple text file.\n") 16 | fio.write(b"With two lines.") 17 | return path 18 | 19 | 20 | def test_lt_schema_create(text_fname): 21 | ctx = lt.Context() 22 | schema = lt.Filestore._schema_create(ctx, text_fname) 23 | assert type(schema) == lt.ArraySchema 24 | 25 | 26 | def test_libtiledb_schema_create_buffer(tmp_path, text_fname): 27 | ctx = lt.Context() 28 | path = os.path.join(tmp_path, "test_libtiledb_schema_create_buffer") 29 | schema = tiledb.ArraySchema.from_file(text_fname) 30 | tiledb.Array.create(path, schema) 31 | 32 | data = b"buffer" 33 | lt.Filestore._buffer_import(ctx, path, data, lt.MIMEType.AUTODETECT) 34 | assert bytes(data) == lt.Filestore._buffer_export(ctx, path, 0, len(data)) 35 | assert lt.Filestore._size(ctx, path) == len(data) 36 | 37 | output_file = os.path.join(tmp_path, "output_file") 38 | vfs = tiledb.VFS() 39 | vfs.touch(output_file) 40 | lt.Filestore._uri_export(ctx, path, output_file) 41 | with vfs.open(output_file, "rb") as fio: 42 | assert fio.read() == data 43 | 44 | 45 | def test_libtiledb_schema_create_uri(tmp_path, text_fname): 46 | ctx = lt.Context() 47 | path = os.path.join(tmp_path, "test_libtiledb_schema_create_uri") 48 | schema = tiledb.ArraySchema.from_file(text_fname) 49 | tiledb.Array.create(path, schema) 50 | 51 | lt.Filestore._uri_import(ctx, path, text_fname, lt.MIMEType.AUTODETECT) 52 | with open(text_fname, "rb") as text: 53 | data = text.read() 54 | assert data == lt.Filestore._buffer_export(ctx, path, 0, len(data)) 55 | assert lt.Filestore._size(ctx, path) == len(data) 56 | 57 | 58 | def test_mime_type(): 59 | to_str = { 60 | lt.MIMEType.AUTODETECT: "AUTODETECT", 61 | lt.MIMEType.TIFF: "image/tiff", 62 | lt.MIMEType.PDF: "application/pdf", 63 | } 64 | 65 | for k in to_str: 66 | assert lt.Filestore._mime_type_to_str(k) == to_str[k] 67 | 68 | from_str = { 69 | "AUTODETECT": lt.MIMEType.AUTODETECT, 70 | "image/tiff": lt.MIMEType.TIFF, 71 | "application/pdf": lt.MIMEType.PDF, 72 | } 73 | 74 | for k in from_str: 75 | assert lt.Filestore._mime_type_from_str(k) == from_str[k] 76 | -------------------------------------------------------------------------------- /tiledb/tests/cc/test_group.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import numpy as np 5 | from numpy.testing import assert_array_equal 6 | 7 | import tiledb.libtiledb as lt 8 | 9 | 10 | def test_group_metadata(tmp_path): 11 | int_data = np.array([1, 2, 3]) 12 | flt_data = np.array([1.5, 2.5, 3.5]) 13 | 14 | ctx = lt.Context() 15 | 16 | grp_path = os.path.join(tmp_path, "test_groups_metadata") 17 | lt.Group._create(ctx, grp_path) 18 | grp = lt.Group(ctx, grp_path, lt.QueryType.WRITE) 19 | grp._put_metadata("int", int_data) 20 | grp._put_metadata("flt", flt_data) 21 | grp._close() 22 | 23 | grp._open(lt.QueryType.READ) 24 | assert grp._metadata_num() == 2 25 | assert grp._has_metadata("int") 26 | assert_array_equal(grp._get_metadata("int", False)[0], int_data) 27 | assert grp._has_metadata("flt") 28 | assert_array_equal(grp._get_metadata("flt", False)[0], flt_data) 29 | grp._close() 30 | 31 | grp._open(lt.QueryType.WRITE) 32 | grp._delete_metadata("int") 33 | grp._close() 34 | 35 | grp = lt.Group(ctx, grp_path, lt.QueryType.READ) 36 | assert grp._metadata_num() == 1 37 | assert not grp._has_metadata("int") 38 | grp._close() 39 | 40 | 41 | def test_group_members(tmp_path): 42 | ctx = lt.Context() 43 | 44 | grp_path = os.path.join(tmp_path, "test_group_metadata") 45 | lt.Group._create(ctx, grp_path) 46 | grp = lt.Group(ctx, grp_path, lt.QueryType.WRITE) 47 | 48 | subgrp_path = os.path.join(tmp_path, "test_group_0") 49 | lt.Group._create(ctx, subgrp_path) 50 | grp._add(subgrp_path) 51 | grp._close() 52 | 53 | grp._open(lt.QueryType.READ) 54 | assert grp._member_count() == 1 55 | member = grp._member(0) 56 | assert os.path.basename(member._uri) == os.path.basename(subgrp_path) 57 | assert member._type == lt.ObjectType.GROUP 58 | grp._close() 59 | 60 | grp._open(lt.QueryType.WRITE) 61 | grp._remove(subgrp_path) 62 | grp._close() 63 | 64 | grp = lt.Group(ctx, grp_path, lt.QueryType.READ) 65 | assert grp._member_count() == 0 66 | grp._close() 67 | -------------------------------------------------------------------------------- /tiledb/tests/cc/test_vfs.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import tiledb.libtiledb as lt 4 | 5 | 6 | def test_dir(tmp_path): 7 | ctx = lt.Context() 8 | vfs = lt.VFS(ctx) 9 | 10 | path = os.path.join(tmp_path, "test_dir") 11 | 12 | vfs._create_dir(path) 13 | assert vfs._is_dir(path) is True 14 | assert vfs._dir_size(path) == 0 15 | vfs._remove_dir(path) 16 | assert vfs._is_dir(path) is False 17 | 18 | 19 | def test_file_handle(tmp_path): 20 | ctx = lt.Context() 21 | vfs = lt.VFS(ctx) 22 | 23 | path = os.path.join(tmp_path, "test_file_handle") 24 | 25 | fh = lt.FileHandle(ctx, vfs, path, lt.VFSMode.WRITE) 26 | fh._write(b"Hello") 27 | 28 | fh = lt.FileHandle(ctx, vfs, path, lt.VFSMode.READ) 29 | assert fh._read(0, 5) == b"Hello" 30 | 31 | fh = lt.FileHandle(ctx, vfs, path, lt.VFSMode.APPEND) 32 | fh._write(b", world!") 33 | 34 | fh = lt.FileHandle(ctx, vfs, path, lt.VFSMode.READ) 35 | assert fh._read(0, 13) == b"Hello, world!" 36 | 37 | assert fh._closed is False 38 | -------------------------------------------------------------------------------- /tiledb/tests/check_csv_dir.py: -------------------------------------------------------------------------------- 1 | # This is a helper function to run tests on an external 2 | # directory, for example the contents of the Pandas 3 | # CSV tests: 4 | # https://github.com/pandas-dev/pandas/tree/master/pandas/tests/io/data/csv 5 | # It takes one argument, the test directory, and checks that all 6 | # .csv files contained within are correctly round-tripped via 7 | # `tiledb.from_csv` and `tiledb.open_dataframe` 8 | 9 | import os 10 | import sys 11 | import tempfile 12 | from glob import glob 13 | 14 | import pandas as pd 15 | import pandas._testing as tm 16 | 17 | import tiledb 18 | 19 | 20 | def check_csv_roundtrip(input_csv): 21 | basename = os.path.basename(input_csv) 22 | tmp = tempfile.mktemp(prefix="csvtest-" + basename) 23 | os.mkdir(tmp) 24 | 25 | array_uri = os.path.join(tmp, "tiledb_from_csv") 26 | tiledb.from_csv(array_uri, input_csv) 27 | 28 | df_csv = pd.read_csv(input_csv) 29 | df_back = tiledb.open_dataframe(array_uri) 30 | 31 | tm.assert_frame_equal(df_csv, df_back) 32 | return True 33 | 34 | 35 | def check_csv_dir(path): 36 | files = glob(os.path.join(path, "*.csv")) 37 | res = [check_csv_roundtrip(f) for f in files] 38 | 39 | assert len(res) == len(files), "Failed to check all files!" 40 | 41 | 42 | if __name__ == "__main__": 43 | if len(sys.argv) != 2: 44 | print("expected one argument: path to CSV directory") 45 | 46 | check_csv_dir(sys.argv[1]) 47 | -------------------------------------------------------------------------------- /tiledb/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | import os 3 | import sys 4 | 5 | import pytest 6 | 7 | import tiledb 8 | 9 | from .common import DiskTestCase 10 | 11 | 12 | # fixture wrapper to use with pytest: 13 | # mark.parametrize does not work with DiskTestCase subclasses 14 | # (unittest.TestCase methods cannot take arguments) 15 | @pytest.fixture(scope="class") 16 | def checked_path(): 17 | dtc = DiskTestCase() 18 | dtc.setup_method() 19 | yield dtc 20 | dtc.teardown_method() 21 | 22 | 23 | if sys.platform != "win32": 24 | 25 | @pytest.fixture(scope="function", autouse=True) 26 | def no_output(capfd): 27 | yield 28 | 29 | # flush stdout 30 | libc = ctypes.CDLL(None) 31 | libc.fflush(None) 32 | 33 | out, err = capfd.readouterr() 34 | if out or err: 35 | pytest.fail(f"Output captured: {out + err}") 36 | 37 | 38 | def pytest_addoption(parser): 39 | parser.addoption("--vfs", default="file") 40 | parser.addoption("--vfs-config", default=None) 41 | 42 | 43 | def pytest_configure(config): 44 | # we need to try importing here so that we don't potentially cause 45 | # a slowdown in the DenseArray/SparseArray.__new__ path when 46 | # running `tiledb.open`. 47 | try: 48 | import tiledb.cloud # noqa: F401 49 | except ImportError: 50 | pass 51 | 52 | # default must be set here rather than globally 53 | pytest.tiledb_vfs = "file" 54 | 55 | 56 | @pytest.fixture(scope="function", autouse=True) 57 | def isolate_os_fork(original_os_fork): 58 | """Guarantee that tests start and finish with no os.fork patch.""" 59 | # Python 3.12 warns about fork() and threads. Tiledb only patches 60 | # os.fork for Pythons 3.8-3.11. 61 | if original_os_fork: 62 | tiledb.ctx._needs_fork_wrapper = True 63 | os.fork = original_os_fork 64 | yield 65 | if original_os_fork: 66 | tiledb.ctx._needs_fork_wrapper = True 67 | os.fork = original_os_fork 68 | 69 | 70 | @pytest.fixture(scope="session") 71 | def original_os_fork(): 72 | """Provides the original unpatched os.fork.""" 73 | if sys.platform != "win32": 74 | return os.fork 75 | -------------------------------------------------------------------------------- /tiledb/tests/datatypes.py: -------------------------------------------------------------------------------- 1 | """Minimal Pandas ExtensionDtype and ExtensionArray for representing ragged arrays""" 2 | 3 | import re 4 | 5 | import numpy as np 6 | import pytest 7 | 8 | pd = pytest.importorskip("pandas") 9 | 10 | 11 | @pd.api.extensions.register_extension_dtype 12 | class RaggedDtype(pd.api.extensions.ExtensionDtype): 13 | type = np.ndarray 14 | na_value = None 15 | 16 | def __init__(self, subtype=np.float64): 17 | self.subtype = np.dtype(subtype) 18 | 19 | @property 20 | def name(self): 21 | return f"Ragged[{self.subtype}]" 22 | 23 | @classmethod 24 | def construct_array_type(cls): 25 | return RaggedArray 26 | 27 | @classmethod 28 | def construct_from_string(cls, string): 29 | if string.lower() == "ragged": 30 | return cls() 31 | match = re.match(r"^ragged\[(\w+)\]$", string, re.IGNORECASE) 32 | if match: 33 | return cls(match.group(1)) 34 | raise TypeError(f"Cannot construct a 'RaggedDtype' from '{string}'") 35 | 36 | 37 | class RaggedArray(pd.api.extensions.ExtensionArray): 38 | def __init__(self, arrays, dtype): 39 | assert isinstance(dtype, RaggedDtype) 40 | self._dtype = dtype 41 | self._flat_arrays = [np.asarray(array, dtype=dtype.subtype) for array in arrays] 42 | 43 | @classmethod 44 | def _from_sequence(cls, scalars, dtype=None, copy=False): 45 | return cls(scalars, dtype) 46 | 47 | def __len__(self): 48 | return len(self._flat_arrays) 49 | 50 | def __getitem__(self, i): 51 | return self._flat_arrays[i] 52 | 53 | @property 54 | def dtype(self): 55 | return self._dtype 56 | 57 | def copy(self): 58 | return type(self)(self._flat_arrays, self._dtype) 59 | -------------------------------------------------------------------------------- /tiledb/tests/perf/benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tiledb/tests/perf/benchmarks/array.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import tempfile 3 | 4 | import numpy as np 5 | 6 | import tiledb 7 | 8 | 9 | class Basic: 10 | def setup(self, *shape): 11 | self.path = tempfile.mkdtemp() 12 | self.array = np.random.rand(4) 13 | tiledb.from_numpy(self.path, self.array) 14 | 15 | def time_open(self): 16 | for i in range(5_000): 17 | with tiledb.open(self.path): 18 | pass 19 | 20 | 21 | class DenseRead: 22 | # parameterize over different array shapes 23 | # the functions below will be called with permutations 24 | # of these tuples 25 | params = [ 26 | (100, 500), 27 | (1000, 100000), 28 | ] 29 | 30 | def setup(self, *shape): 31 | self.path = tempfile.mkdtemp() 32 | self.array = np.random.rand(*shape) 33 | tiledb.from_numpy(self.path, self.array) 34 | 35 | def time_read(self, *shape): 36 | with tiledb.open(self.path) as A: 37 | A[:] 38 | 39 | def teardown(self, *shape): 40 | shutil.rmtree(self.path) 41 | 42 | 43 | class DenseWrite: 44 | params = [ 45 | (100, 500), 46 | (1000, 100000), 47 | ] 48 | paths = [] 49 | 50 | def setup(self, *shape): 51 | self.array = np.random.rand(*shape) 52 | 53 | def time_write(self, *shape): 54 | path = tempfile.mkdtemp() 55 | tiledb.from_numpy(path, self.array) 56 | -------------------------------------------------------------------------------- /tiledb/tests/perf/benchmarks/benchmarks.py: -------------------------------------------------------------------------------- 1 | # TODO 2 | # [x] dense 3 | # - simple rw 4 | # [] sparse 5 | # [] metadata 6 | # [] property access 7 | # [] strings (attrs and dims) 8 | # [] "interesting" query range distributions? 9 | -------------------------------------------------------------------------------- /tiledb/tests/perf/benchmarks/indexing.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | 3 | import numpy as np 4 | 5 | import tiledb 6 | 7 | 8 | class MultiIndex: 9 | params = [10, 100, 1000, 10_000, 100_000] 10 | 11 | def setup(self, _): 12 | self.uri = tempfile.mkdtemp() 13 | 14 | self.dmin = -10_000_000 15 | self.dmax = 10_000_000 16 | self.ncoords = 3_000_000 17 | 18 | schema = tiledb.ArraySchema( 19 | tiledb.Domain([tiledb.Dim(dtype=np.int64, domain=(self.dmin, self.dmax))]), 20 | attrs=[ 21 | tiledb.Attr(name="", dtype="float64", var=False, nullable=False), 22 | ], 23 | cell_order="row-major", 24 | tile_order="row-major", 25 | capacity=1000, 26 | sparse=True, 27 | ) 28 | 29 | tiledb.Array.create(self.uri, schema) 30 | 31 | # use `choice` here because randint doesn't support non-replacement 32 | self.coords = np.random.choice( 33 | np.arange(self.dmin, self.dmax + 1), size=self.ncoords, replace=False 34 | ) 35 | 36 | with tiledb.open(self.uri, "w") as A: 37 | A[self.coords] = np.random.rand(self.ncoords) 38 | 39 | def time_multiindex_read(self, coords_count): 40 | coords = np.random.choice(self.coords, size=coords_count, replace=False) 41 | 42 | with tiledb.open(self.uri) as A: 43 | A.multi_index[list(coords)] 44 | -------------------------------------------------------------------------------- /tiledb/tests/perf/benchmarks/metadata.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | 3 | import numpy as np 4 | 5 | import tiledb 6 | 7 | 8 | class MetadataTest: 9 | def setup(self): 10 | self.path = tempfile.mkdtemp() 11 | print(self.path) 12 | self.array = np.random.rand(4) 13 | tiledb.from_numpy(self.path, self.array) 14 | 15 | 16 | class MetadataWrite(MetadataTest): 17 | def setup(self): 18 | super().setup() 19 | 20 | def time_write(self): 21 | with tiledb.open(self.path, "w") as A: 22 | for i in range(1_000_000): 23 | A.meta["x"] = "xyz" 24 | 25 | 26 | class MetadataRead(MetadataTest): 27 | def setup(self): 28 | super().setup() 29 | 30 | with tiledb.open(self.path, "w") as A: 31 | A.meta["x"] = "xyz" 32 | 33 | def time_read(self): 34 | with tiledb.open(self.path) as A: 35 | for i in range(1_000_000): 36 | A.meta["x"] 37 | -------------------------------------------------------------------------------- /tiledb/tests/strategies.py: -------------------------------------------------------------------------------- 1 | from hypothesis import strategies as st 2 | from hypothesis.strategies import composite 3 | 4 | # Helpers for Hypothesis-Python based property tests 5 | # (custom strategies, etc.) 6 | 7 | 8 | @composite 9 | def ranged_slices(draw, min_value=0, max_value=10): 10 | bdd = st.one_of(st.none(), st.integers(min_value=min_value, max_value=max_value)) 11 | start = draw(bdd) 12 | stop = draw(bdd) 13 | step = draw(bdd) 14 | 15 | return slice(start, stop, step) 16 | -------------------------------------------------------------------------------- /tiledb/tests/test_basic_import.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import subprocess 3 | import sys 4 | 5 | from packaging.version import Version 6 | 7 | 8 | def tiledb_cloud_eagerly_imports_pandas() -> bool: 9 | try: 10 | import pandas 11 | 12 | import tiledb.cloud 13 | except ImportError: 14 | # Can't import something that's not installed. 15 | return False 16 | if Version(tiledb.cloud.__version__) < Version("0.10.21"): 17 | # Old versions of tiledb-cloud will import Pandas eagerly. 18 | return True 19 | if Version(pandas.__version__) < Version("1.5"): 20 | # If an old version of Pandas is installed, tiledb-cloud needs to 21 | # import it eagerly to patch it. 22 | return True 23 | return False 24 | 25 | 26 | def test_dont_import_pandas() -> None: 27 | """Verifies that when we import TileDB, we don't import Pandas eagerly.""" 28 | 29 | # We import tiledb.cloud within tiledb-py, if available, in order to hook 30 | # Array.apply and other functionality. If the version of tiledb-cloud 31 | # we have installed would import Pandas eagerly on its own, we need to 32 | # suppress its importation. 33 | suppress_cloud = ( 34 | "sys.modules['tiledb.cloud'] = None;" 35 | if tiledb_cloud_eagerly_imports_pandas() 36 | else "" 37 | ) 38 | # Get a list of all modules from a completely fresh interpreter. 39 | all_mods_str = subprocess.check_output( 40 | ( 41 | sys.executable, 42 | "-c", 43 | f"import sys; {suppress_cloud} import tiledb; print(list(sys.modules))", 44 | ) 45 | ) 46 | all_mods = ast.literal_eval(all_mods_str.decode()) 47 | assert "pandas" not in all_mods 48 | -------------------------------------------------------------------------------- /tiledb/tests/test_cloud.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import random 4 | import string 5 | 6 | import numpy as np 7 | import pytest 8 | 9 | import tiledb 10 | from tiledb.tests.common import DiskTestCase 11 | 12 | tiledb_token = os.getenv("TILEDB_TOKEN") 13 | tiledb_namespace = os.getenv("TILEDB_NAMESPACE") 14 | s3_bucket = os.getenv("S3_BUCKET") 15 | 16 | 17 | @pytest.mark.skipif( 18 | os.getenv("CI") == None 19 | or tiledb_token == None 20 | or tiledb_namespace == None 21 | or s3_bucket == None, 22 | reason="No token was provided in a non-CI environment. Please set the TILEDB_TOKEN environment variable to run this test.", 23 | ) 24 | class CloudTest(DiskTestCase): 25 | def test_save_and_open_array_from_cloud(self): 26 | config = tiledb.Config({"rest.token": tiledb_token}) 27 | ctx = tiledb.Ctx(config=config) 28 | 29 | # Useful to include the datetime in the array name to handle multiple consecutive runs of the test. 30 | # Random letters are added to the end to ensure that conflicts are avoided, especially in CI environments where multiple tests may run in parallel. 31 | array_name = ( 32 | datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") 33 | + "-" 34 | + "".join(random.choice(string.ascii_letters) for _ in range(5)) 35 | ) 36 | uri = f"tiledb://{tiledb_namespace}/s3://{s3_bucket}/{array_name}" 37 | 38 | with tiledb.from_numpy(uri, np.random.rand(3, 2), ctx=ctx) as T: 39 | self.assertTrue(tiledb.array_exists(uri, ctx=ctx)) 40 | self.assertTrue( 41 | T.schema 42 | == tiledb.ArraySchema( 43 | domain=tiledb.Domain( 44 | tiledb.Dim( 45 | name="__dim_0", 46 | domain=(0, 2), 47 | tile=3, 48 | dtype="uint64", 49 | filters=tiledb.FilterList([tiledb.ZstdFilter(level=-1)]), 50 | ), 51 | tiledb.Dim( 52 | name="__dim_1", 53 | domain=(0, 1), 54 | tile=2, 55 | dtype="uint64", 56 | filters=tiledb.FilterList([tiledb.ZstdFilter(level=-1)]), 57 | ), 58 | ), 59 | attrs=[ 60 | tiledb.Attr( 61 | name="", 62 | dtype="float64", 63 | var=False, 64 | nullable=False, 65 | enum_label=None, 66 | ), 67 | ], 68 | cell_order="row-major", 69 | tile_order="row-major", 70 | sparse=False, 71 | ) 72 | ) 73 | 74 | tiledb.Array.delete_array(uri, ctx=ctx) 75 | -------------------------------------------------------------------------------- /tiledb/tests/test_consolidation_plan.py: -------------------------------------------------------------------------------- 1 | import json 2 | import xml 3 | 4 | import numpy as np 5 | import pytest 6 | 7 | import tiledb 8 | from tiledb.tests.common import DiskTestCase 9 | 10 | 11 | class ConsolidationPlanTest(DiskTestCase): 12 | def test_consolidation_plan(self): 13 | path = self.path("test_consolidation_plan") 14 | 15 | array = np.random.rand(4) 16 | tiledb.from_numpy(path, array) 17 | 18 | with tiledb.open(path, "r") as A: 19 | cons_plan = tiledb.ConsolidationPlan(tiledb.default_ctx(), A, 2) 20 | assert cons_plan.num_nodes == 1 21 | assert cons_plan.num_nodes == len(cons_plan) 22 | assert cons_plan.num_fragments(0) == 1 23 | # check that it has a nodes key 24 | assert "nodes" in json.loads(cons_plan.dump()) 25 | # check that it has a list of nodes 26 | assert isinstance(json.loads(cons_plan.dump())["nodes"], list) 27 | # check that each node has a uri key 28 | for node in json.loads(cons_plan.dump())["nodes"]: 29 | assert "uri" in node["uris"][0] 30 | # test __repr__ 31 | try: 32 | assert ( 33 | xml.etree.ElementTree.fromstring(cons_plan._repr_html_()) 34 | is not None 35 | ) 36 | except: 37 | pytest.fail( 38 | f"Could not parse cons_plan._repr_html_(). Saw {cons_plan._repr_html_()}" 39 | ) 40 | # test __getitem__ 41 | assert cons_plan[0] == { 42 | "num_fragments": 1, 43 | "fragment_uris": [cons_plan.fragment_uri(0, 0)], 44 | } 45 | 46 | # write a second fragment to the array and check the new consolidation plan 47 | with tiledb.open(path, "w") as A: 48 | A[:] = np.random.rand(4) 49 | 50 | with tiledb.open(path, "r") as A: 51 | cons_plan = tiledb.ConsolidationPlan(tiledb.default_ctx(), A, 4) 52 | assert cons_plan.num_nodes == 1 53 | assert cons_plan.num_fragments(0) == 2 54 | -------------------------------------------------------------------------------- /tiledb/tests/test_domain.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | import tiledb 7 | 8 | from .common import DiskTestCase, assert_captured 9 | 10 | 11 | class DomainTest(DiskTestCase): 12 | def test_domain(self, capfd): 13 | dims = [ 14 | tiledb.Dim("d1", (1, 4), 2, dtype="u8"), 15 | tiledb.Dim("d2", (1, 4), 2, dtype="u8"), 16 | ] 17 | dom = tiledb.Domain(*dims) 18 | 19 | # check that dumping works 20 | dom.dump() 21 | assert_captured(capfd, "Name: d1") 22 | 23 | self.assertEqual(dom.ndim, 2) 24 | self.assertEqual(dom.dtype, np.dtype("uint64")) 25 | self.assertEqual(dom.shape, (4, 4)) 26 | 27 | # check that we can iterate over the dimensions 28 | dim_names = [dim.name for dim in dom] 29 | self.assertEqual(["d1", "d2"], dim_names) 30 | 31 | # check that we can access dim by name 32 | dim_d1 = dom.dim("d1") 33 | self.assertEqual(dim_d1, dom.dim(0)) 34 | 35 | # check that we can construct directly from a List[Dim] 36 | dom2 = tiledb.Domain(dims) 37 | self.assertEqual(dom, dom2) 38 | 39 | try: 40 | assert xml.etree.ElementTree.fromstring(dom._repr_html_()) is not None 41 | except: 42 | pytest.fail(f"Could not parse dom._repr_html_(). Saw {dom._repr_html_()}") 43 | 44 | def test_datetime_domain(self): 45 | dim = tiledb.Dim( 46 | name="d1", 47 | domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), 48 | tile=np.timedelta64(20, "D"), 49 | dtype=np.datetime64("", "D"), 50 | ) 51 | dom = tiledb.Domain(dim) 52 | self.assertEqual(dom, dom) 53 | self.assertEqual(dom.dtype, np.datetime64("", "D")) 54 | 55 | def test_domain_mixed_names_error(self): 56 | with self.assertRaises(tiledb.TileDBError): 57 | tiledb.Domain( 58 | tiledb.Dim("d1", (1, 4), 2, dtype="u8"), 59 | tiledb.Dim("__dim_0", (1, 4), 2, dtype="u8"), 60 | ) 61 | 62 | def test_ascii_domain(self, capfd): 63 | path = self.path("test_ascii_domain") 64 | 65 | dim = tiledb.Dim(name="d", dtype="ascii") 66 | assert dim.dtype == np.bytes_ 67 | 68 | dom = tiledb.Domain(dim) 69 | self.assertEqual(dom, dom) 70 | dom.dump() 71 | assert_captured(capfd, "Type: STRING_ASCII") 72 | 73 | att = tiledb.Attr(name="a", dtype=np.int64) 74 | schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) 75 | tiledb.SparseArray.create(path, schema) 76 | 77 | ascii_coords = ["a", "b", "c", "ABC"] 78 | unicode_coords = ["±", "×", "÷", "√"] 79 | data = [1, 2, 3, 4] 80 | 81 | with tiledb.open(path, "w") as A: 82 | with self.assertRaises(tiledb.TileDBError): 83 | A[unicode_coords] = data 84 | A[ascii_coords] = data 85 | -------------------------------------------------------------------------------- /tiledb/tests/test_examples.py: -------------------------------------------------------------------------------- 1 | import doctest 2 | import glob 3 | import os 4 | import subprocess 5 | import sys 6 | import tempfile 7 | 8 | import pytest 9 | 10 | from .common import has_pandas 11 | 12 | 13 | # override locally to avoid conflict with capsys used below 14 | @pytest.fixture(scope="function", autouse=True) 15 | def no_output(): 16 | pass 17 | 18 | 19 | class ExamplesTest: 20 | """Test runnability of scripts in examples/""" 21 | 22 | PROJECT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")) 23 | 24 | @pytest.mark.parametrize( 25 | "path", glob.glob(os.path.join(PROJECT_DIR, "examples", "*.py")) 26 | ) 27 | def test_examples(self, path): 28 | # run example script 29 | # - in a separate process 30 | # - in tmpdir so we don't pollute the source tree 31 | # - with exit status checking (should fail tests if example fails) 32 | requires_pd = [ 33 | os.path.join(self.PROJECT_DIR, "examples", f"{fn}.py") 34 | for fn in [ 35 | "incomplete_iteration", 36 | "parallel_csv_ingestion", 37 | "query_condition_datetime", 38 | ] 39 | ] 40 | if not has_pandas() and path in requires_pd: 41 | pytest.mark.skip("pandas>=1.0,<3.0 not installed") 42 | else: 43 | with tempfile.TemporaryDirectory() as tmpdir: 44 | try: 45 | subprocess.run( 46 | [sys.executable, path], 47 | cwd=tmpdir, 48 | check=True, 49 | stdout=subprocess.PIPE, 50 | stderr=subprocess.PIPE, 51 | encoding="utf8", 52 | ) 53 | except subprocess.CalledProcessError as ex: 54 | pytest.fail(ex.stderr, pytrace=False) 55 | 56 | @pytest.mark.skipif( 57 | sys.platform == "win32", 58 | reason="Some doctests are missing a clean-up step on windows", 59 | ) 60 | def test_docs(self, capsys): 61 | path = os.path.join(self.PROJECT_DIR, "tiledb", "fragment.py") 62 | failures, _ = doctest.testfile( 63 | path, 64 | module_relative=False, 65 | verbose=False, 66 | optionflags=doctest.NORMALIZE_WHITESPACE, 67 | ) 68 | if failures: 69 | stderr = capsys.readouterr().out 70 | if "No module named 'pandas'" in stderr or ( 71 | "Pandas version >= 1.0 and < 3.0 required for dataframe functionality" 72 | in stderr 73 | and not has_pandas() 74 | ): 75 | pytest.skip("pandas>=1.0,<3.0 not installed") 76 | else: 77 | pytest.fail(stderr) 78 | -------------------------------------------------------------------------------- /tiledb/tests/test_filestore.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | import tiledb 5 | 6 | from .common import DiskTestCase, assert_captured 7 | 8 | 9 | class FilestoreTest(DiskTestCase): 10 | @pytest.fixture 11 | def text_fname(self): 12 | path = self.path("text_fname") 13 | vfs = tiledb.VFS() 14 | vfs.touch(path) 15 | with vfs.open(path, "wb") as fio: 16 | fio.write(b"Simple text file.\n") 17 | fio.write(b"With two lines.") 18 | return path 19 | 20 | def test_buffer(self, capfd): 21 | path = self.path("test_buffer") 22 | data = b"buffer" 23 | 24 | fs = tiledb.Filestore(path) 25 | 26 | with self.assertRaises(tiledb.TileDBError): 27 | fs.write(data) 28 | 29 | schema = tiledb.ArraySchema.from_file() 30 | tiledb.Array.create(path, schema) 31 | 32 | assert schema.attr(0).name == "contents" 33 | assert schema.attr(0).dtype == np.bytes_ 34 | 35 | schema.attr(0).dump() 36 | assert_captured(capfd, "Type: BLOB") 37 | 38 | fs = tiledb.Filestore(path) 39 | fs.write(data) 40 | assert bytes(data) == fs.read() 41 | 42 | def test_small_buffer(self, capfd): 43 | path = self.path("test_small_buffer") 44 | # create a 4 byte array 45 | data = b"abcd" 46 | 47 | fs = tiledb.Filestore(path) 48 | 49 | with self.assertRaises(tiledb.TileDBError): 50 | fs.write(data) 51 | 52 | schema = tiledb.ArraySchema.from_file() 53 | tiledb.Array.create(path, schema) 54 | 55 | assert schema.attr(0).name == "contents" 56 | assert schema.attr(0).dtype == np.bytes_ 57 | 58 | schema.attr(0).dump() 59 | assert_captured(capfd, "Type: BLOB") 60 | 61 | fs = tiledb.Filestore(path) 62 | fs.write(data) 63 | assert data[3:4] == fs.read(offset=3, size=1) 64 | 65 | def test_uri(self, text_fname): 66 | path = self.path("test_uri") 67 | schema = tiledb.ArraySchema.from_file(text_fname) 68 | tiledb.Array.create(path, schema) 69 | 70 | fs = tiledb.Filestore(path) 71 | tiledb.Filestore.copy_from(path, text_fname) 72 | with open(text_fname, "rb") as text: 73 | data = text.read() 74 | assert data == fs.read(0, len(data)) 75 | assert len(fs) == len(data) 76 | 77 | def test_multiple_writes(self): 78 | path = self.path("test_buffer") 79 | schema = tiledb.ArraySchema.from_file() 80 | tiledb.Array.create(path, schema) 81 | 82 | fs = tiledb.Filestore(path) 83 | for i in range(1, 4): 84 | fs.write(("x" * i).encode()) 85 | 86 | assert fs.read() == ("x" * i).encode() 87 | 88 | timestamps = [t[0] for t in tiledb.array_fragments(path).timestamp_range] 89 | for i, ts in enumerate(timestamps, start=1): 90 | with tiledb.open(path, timestamp=ts) as A: 91 | assert A.meta["file_size"] == i 92 | -------------------------------------------------------------------------------- /tiledb/tests/test_fork_ctx.py: -------------------------------------------------------------------------------- 1 | """Tests combining fork with tiledb context threads. 2 | 3 | Background: the core tiledb library uses threads and it's easy to 4 | experience deadlocks when forking a process that is using tiledb. The 5 | project doesn't have a solution for this at the moment other than to 6 | avoid using fork(), which is the same recommendation that Python makes. 7 | Python 3.12 warns if you fork() when multiple threads are detected and 8 | Python 3.14 will make it so you never accidentally fork(): 9 | multiprocessing will default to "spawn" on Linux. 10 | """ 11 | 12 | import subprocess 13 | import sys 14 | 15 | import pytest 16 | 17 | 18 | def run_in_subprocess(code): 19 | """Runs code in a separate subprocess.""" 20 | script = f""" 21 | import os 22 | import warnings 23 | import tiledb 24 | import multiprocessing 25 | 26 | warnings.simplefilter('error') 27 | warnings.filterwarnings("ignore", category=DeprecationWarning) 28 | warnings.filterwarnings("ignore", category=UserWarning) 29 | 30 | def wrapper_func(): 31 | {code} 32 | 33 | wrapper_func() 34 | """ 35 | result = subprocess.run( 36 | [sys.executable, "-c", script], capture_output=True, text=True 37 | ) 38 | if result.returncode != 0: 39 | print(result.stderr) 40 | assert result.returncode == 0 41 | 42 | 43 | @pytest.mark.skipif( 44 | sys.platform == "win32", reason="fork() is not available on Windows" 45 | ) 46 | def test_no_warning_fork_without_ctx(): 47 | """Get no warning if no tiledb context exists.""" 48 | run_in_subprocess( 49 | """ 50 | pid = os.fork() 51 | if pid == 0: 52 | os._exit(0) 53 | else: 54 | os.wait() 55 | """ 56 | ) 57 | 58 | 59 | @pytest.mark.skipif( 60 | sys.platform == "win32", reason="fork() is not available on Windows" 61 | ) 62 | def test_warning_fork_with_ctx(): 63 | """Get a warning if we fork after creating a tiledb context.""" 64 | run_in_subprocess( 65 | """ 66 | _ = tiledb.Ctx() 67 | pid = os.fork() 68 | if pid == 0: 69 | os._exit(0) 70 | else: 71 | os.wait() 72 | """ 73 | ) 74 | 75 | 76 | @pytest.mark.skipif( 77 | sys.platform == "win32", reason="fork() is not available on Windows" 78 | ) 79 | def test_warning_fork_with_default_ctx(): 80 | """Get a warning if we fork after creating a default context.""" 81 | run_in_subprocess( 82 | """ 83 | _ = tiledb.default_ctx() 84 | pid = os.fork() 85 | if pid == 0: 86 | os._exit(0) 87 | else: 88 | os.wait() 89 | """ 90 | ) 91 | 92 | 93 | @pytest.mark.skipif( 94 | sys.platform == "win32", reason="fork() is not available on Windows" 95 | ) 96 | def test_no_warning_multiprocessing_without_ctx(): 97 | """Get no warning if no tiledb context exists.""" 98 | run_in_subprocess( 99 | """ 100 | mp = multiprocessing.get_context("fork") 101 | p = mp.Process() 102 | p.start() 103 | p.join() 104 | """ 105 | ) 106 | 107 | 108 | @pytest.mark.skipif( 109 | sys.platform == "win32", reason="fork() is not available on Windows" 110 | ) 111 | def test_warning_multiprocessing_with_ctx(): 112 | """Get a warning if we fork after creating a tiledb context.""" 113 | run_in_subprocess( 114 | """ 115 | _ = tiledb.Ctx() 116 | mp = multiprocessing.get_context("fork") 117 | p = mp.Process() 118 | p.start() 119 | p.join() 120 | """ 121 | ) 122 | 123 | 124 | @pytest.mark.skipif( 125 | sys.platform == "win32", reason="fork() is not available on Windows" 126 | ) 127 | def test_warning_multiprocessing_with_default_ctx(): 128 | """Get a warning if we fork after creating a default context.""" 129 | run_in_subprocess( 130 | """ 131 | _ = tiledb.default_ctx() 132 | mp = multiprocessing.get_context("fork") 133 | p = mp.Process() 134 | p.start() 135 | p.join() 136 | """ 137 | ) 138 | -------------------------------------------------------------------------------- /tiledb/tests/test_hypothesis.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import hypothesis as hp 4 | import hypothesis.strategies as st 5 | import numpy as np 6 | import pytest 7 | 8 | import tiledb 9 | 10 | from .common import has_pandas 11 | 12 | pd = pytest.importorskip("pandas") 13 | tm = pd._testing 14 | 15 | 16 | @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed") 17 | @pytest.mark.parametrize("mode", ["np", "df"]) 18 | @hp.settings(deadline=None, verbosity=hp.Verbosity.verbose) 19 | @hp.given(st.binary()) 20 | def test_bytes_npdf(checked_path, mode, data): 21 | start = time.time() 22 | 23 | uri = "mem://" + checked_path.path() 24 | hp.note(f"!!! path '{uri}' time: {time.time() - start}") 25 | 26 | array = np.array([data], dtype="S0") 27 | 28 | start_ingest = time.time() 29 | if mode == "np": 30 | with tiledb.from_numpy(uri, array) as A: 31 | pass 32 | else: 33 | series = pd.Series(array) 34 | df = pd.DataFrame({"": series}) 35 | # NOTE: ctx required here for mem:// 36 | tiledb.from_pandas(uri, df, sparse=False, ctx=tiledb.default_ctx()) 37 | 38 | hp.note(f"{mode} ingest time: {time.time() - start_ingest}") 39 | 40 | # DEBUG 41 | tiledb.stats_enable() 42 | tiledb.stats_reset() 43 | # END DEBUG 44 | 45 | with tiledb.open(uri) as A: 46 | if mode == "np": 47 | np.testing.assert_array_equal(A.multi_index[:][""], array) 48 | else: 49 | tm.assert_frame_equal(A.df[:], df) 50 | 51 | hp.note(tiledb.stats_dump(print_out=False)) 52 | 53 | # DEBUG 54 | tiledb.stats_disable() 55 | 56 | duration = time.time() - start 57 | hp.note(f"!!! test_bytes_{mode} duration: {duration}") 58 | if duration > 2: 59 | # Hypothesis setup is (maybe) causing deadline exceeded errors 60 | # https://github.com/TileDB-Inc/TileDB-Py/issues/1194 61 | # Set deadline=None and use internal timing instead. 62 | pytest.fail(f"!!! {mode} function body duration exceeded 2s: {duration}") 63 | -------------------------------------------------------------------------------- /tiledb/tests/test_metadata.cc: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #define TILEDB_DEPRECATED 10 | #define TILEDB_DEPRECATED_EXPORT 11 | 12 | #include // C++ 13 | #include "../util.h" 14 | 15 | #if !defined(NDEBUG) 16 | // #include "debug.cc" 17 | #endif 18 | 19 | namespace tiledbpy { 20 | 21 | using namespace std; 22 | using namespace tiledb; 23 | namespace py = pybind11; 24 | using namespace pybind11::literals; 25 | 26 | class PyASCIIMetadataTest { 27 | public: 28 | static void write_ascii(py::str uri) { 29 | Context ctx; 30 | Array array(ctx, uri, TILEDB_WRITE); 31 | 32 | std::string st = "xyz"; 33 | array.put_metadata("abc", TILEDB_STRING_ASCII, st.length(), st.c_str()); 34 | 35 | array.close(); 36 | } 37 | }; 38 | 39 | void init_test_metadata(py::module& m) { 40 | py::class_(m, "metadata_test_aux") 41 | .def_static("write_ascii", &PyASCIIMetadataTest::write_ascii); 42 | } 43 | 44 | }; // namespace tiledbpy 45 | -------------------------------------------------------------------------------- /tiledb/tests/test_query.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | import tiledb 5 | from tiledb.tests.common import DiskTestCase 6 | 7 | 8 | class QueryTest(DiskTestCase): 9 | @pytest.mark.skipif( 10 | tiledb.libtiledb.version() < (2, 15), 11 | reason="dimension labels requires libtiledb version 2.15 or greater", 12 | ) 13 | def test_label_range_query(self): 14 | # Create array schema with dimension labels 15 | dim = tiledb.Dim("d1", domain=(1, 10)) 16 | dom = tiledb.Domain(dim) 17 | att = tiledb.Attr("a1", dtype=np.int64) 18 | dim_labels = {0: {"l1": dim.create_label_schema("increasing", np.int64)}} 19 | schema = tiledb.ArraySchema(domain=dom, attrs=(att,), dim_labels=dim_labels) 20 | 21 | # Create array 22 | uri = self.path("dense_array_with_label") 23 | tiledb.Array.create(uri, schema) 24 | 25 | # Write data to the array and the label 26 | attr_data = np.arange(11, 21) 27 | label_data = np.arange(-10, 0) 28 | with tiledb.open(uri, "w") as array: 29 | array[:] = {"a1": attr_data, "l1": label_data} 30 | 31 | # Read and check the data using label indexer on parent array 32 | with tiledb.open(uri, "r") as array: 33 | input_subarray = tiledb.Subarray(array) 34 | input_subarray.add_label_range("l1", (-10, -10)) 35 | input_subarray.add_label_range("l1", (-8, -6)) 36 | query = tiledb.Query(array) 37 | query.set_subarray(input_subarray) 38 | query._submit() 39 | output_subarray = query.subarray() 40 | assert output_subarray.num_dim_ranges(0) == 2 41 | -------------------------------------------------------------------------------- /tiledb/tests/test_repr.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import re 3 | import textwrap 4 | import warnings 5 | 6 | import numpy as np 7 | 8 | import tiledb 9 | 10 | from .common import ( 11 | DiskTestCase, 12 | fx_sparse_cell_order, # noqa: F401 13 | ) 14 | 15 | 16 | class ReprTest(DiskTestCase): 17 | def test_attr_repr(self): 18 | attr = tiledb.Attr(name="itsanattr", dtype=np.float64) 19 | self.assertTrue( 20 | re.match( 21 | r"Attr\(name=[u]?'itsanattr', dtype='float64', var=False, nullable=False, enum_label=None\)", 22 | repr(attr), 23 | ) 24 | ) 25 | 26 | g = dict() 27 | exec("from tiledb import Attr; from numpy import float64", g) 28 | self.assertEqual(eval(repr(attr), g), attr) 29 | 30 | def test_dim_repr(self): 31 | dtype_set = [bytes, np.bytes_] 32 | opts = { 33 | None: None, 34 | "var": True, 35 | "domain": (None, None), 36 | "filters": [tiledb.GzipFilter()], 37 | } 38 | 39 | dim_test_imports = textwrap.dedent( 40 | """ 41 | from tiledb import Dim, FilterList, GzipFilter 42 | import numpy 43 | from numpy import float64 44 | """ 45 | ) 46 | 47 | for dtype in dtype_set: 48 | opt_choices = [ 49 | itertools.combinations(opts.keys(), r=n) 50 | for n in range(1, len(opts) + 1) 51 | ] 52 | for opt_set in itertools.chain(*opt_choices): 53 | opt_kwarg = {k: opts[k] for k in opt_set if k} 54 | g = dict() 55 | exec(dim_test_imports, g) 56 | 57 | dim = tiledb.Dim(name="d1", dtype=dtype, **opt_kwarg) 58 | self.assertEqual(eval(repr(dim), g), dim) 59 | 60 | # test datetime 61 | g = dict() 62 | exec(dim_test_imports, g) 63 | dim = tiledb.Dim( 64 | name="d1", 65 | domain=(np.datetime64("2010-01-01"), np.datetime64("2020")), 66 | tile=2, 67 | dtype=np.datetime64("", "D"), 68 | ) 69 | self.assertEqual(eval(repr(dim), g), dim) 70 | 71 | def test_arrayschema_repr(self, fx_sparse_cell_order): # noqa: F811 72 | filters = tiledb.FilterList([tiledb.ZstdFilter(-1)]) 73 | for sparse in [False, True]: 74 | cell_order = fx_sparse_cell_order if sparse else None 75 | domain = tiledb.Domain( 76 | tiledb.Dim(domain=(1, 8), tile=2), tiledb.Dim(domain=(1, 8), tile=2) 77 | ) 78 | a1 = tiledb.Attr("val", dtype="f8", filters=filters) 79 | orig_schema = tiledb.ArraySchema( 80 | domain=domain, attrs=(a1,), sparse=sparse, cell_order=cell_order 81 | ) 82 | 83 | schema_repr = repr(orig_schema) 84 | g = dict() 85 | setup = "from tiledb import *\n" "import numpy as np\n" 86 | 87 | exec(setup, g) 88 | new_schema = None 89 | try: 90 | new_schema = eval(schema_repr, g) 91 | except Exception: 92 | warn_str = ( 93 | """Exception during ReprTest schema eval""" 94 | + """, schema string was:\n""" 95 | + """'''""" 96 | + """\n{}\n'''""".format(schema_repr) 97 | ) 98 | warnings.warn(warn_str) 99 | raise 100 | 101 | self.assertEqual(new_schema, orig_schema) 102 | 103 | def test_arrayschema_repr_hilbert(self): 104 | domain = tiledb.Domain(tiledb.Dim(domain=(1, 8), tile=2)) 105 | a = tiledb.Attr("a", dtype="f8") 106 | schema = tiledb.ArraySchema( 107 | domain=domain, attrs=(a,), cell_order="hilbert", sparse=True 108 | ) 109 | 110 | assert schema.cell_order == "hilbert" 111 | assert schema.tile_order is None 112 | -------------------------------------------------------------------------------- /tiledb/tests/test_serialization.cc: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #define TILEDB_DEPRECATED 10 | #define TILEDB_DEPRECATED_EXPORT 11 | 12 | #include // C 13 | #include // C++ 14 | #include "../util.h" 15 | 16 | #if !defined(NDEBUG) 17 | // #include "debug.cc" 18 | #endif 19 | 20 | namespace tiledbpy { 21 | 22 | using namespace std; 23 | using namespace tiledb; 24 | namespace py = pybind11; 25 | using namespace pybind11::literals; 26 | 27 | class PySerializationTest { 28 | public: 29 | static py::bytes create_serialized_test_query( 30 | py::object pyctx, py::object pyarray) { 31 | int rc; 32 | 33 | tiledb_ctx_t* ctx; 34 | tiledb_array_t* array; 35 | 36 | ctx = (py::capsule)pyctx.attr("__capsule__")(); 37 | if (ctx == nullptr) 38 | TPY_ERROR_LOC("Invalid context pointer."); 39 | 40 | tiledb_ctx_alloc(NULL, &ctx); 41 | array = (py::capsule)pyarray.attr("__capsule__")(); 42 | if (array == nullptr) 43 | TPY_ERROR_LOC("Invalid array pointer."); 44 | 45 | uint32_t subarray_v[] = {3, 7}; 46 | int64_t data[5]; 47 | uint64_t data_size = sizeof(data); 48 | 49 | tiledb_subarray_t* subarray; 50 | tiledb_subarray_alloc(ctx, array, &subarray); 51 | tiledb_subarray_set_subarray(ctx, subarray, &subarray_v); 52 | 53 | tiledb_query_t* query; 54 | tiledb_query_alloc(ctx, array, TILEDB_READ, &query); 55 | tiledb_query_set_subarray_t(ctx, query, subarray); 56 | tiledb_query_set_layout(ctx, query, TILEDB_UNORDERED); 57 | tiledb_query_set_data_buffer(ctx, query, "", data, &data_size); 58 | 59 | tiledb_buffer_list_t* buff_list; 60 | tiledb_buffer_t* buff; 61 | 62 | rc = tiledb_serialize_query(ctx, query, TILEDB_CAPNP, 1, &buff_list); 63 | if (rc == TILEDB_ERR) 64 | TPY_ERROR_LOC("Could not serialize the query."); 65 | 66 | rc = tiledb_buffer_list_flatten(ctx, buff_list, &buff); 67 | if (rc == TILEDB_ERR) 68 | TPY_ERROR_LOC("Could not flatten the buffer list."); 69 | 70 | void* buff_data; 71 | uint64_t buff_num_bytes; 72 | 73 | rc = tiledb_buffer_get_data(ctx, buff, &buff_data, &buff_num_bytes); 74 | if (rc == TILEDB_ERR) 75 | TPY_ERROR_LOC("Could not get the data from the buffer."); 76 | 77 | py::bytes output((char*)buff_data, buff_num_bytes); 78 | 79 | tiledb_buffer_free(&buff); 80 | tiledb_buffer_list_free(&buff_list); 81 | tiledb_subarray_free(&subarray); 82 | tiledb_query_free(&query); 83 | 84 | return output; 85 | } 86 | }; 87 | 88 | void init_test_serialization(py::module& m) { 89 | py::class_(m, "test_serialization") 90 | .def_static( 91 | "create_serialized_test_query", 92 | &PySerializationTest::create_serialized_test_query); 93 | } 94 | 95 | }; // namespace tiledbpy 96 | -------------------------------------------------------------------------------- /tiledb/tests/test_serialization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | import tiledb 5 | 6 | from .common import DiskTestCase 7 | 8 | try: 9 | from tiledb.main import test_serialization as ser_test 10 | except ImportError: 11 | pytest.skip("Serialization not enabled.", allow_module_level=True) 12 | 13 | 14 | class SerializationTest(DiskTestCase): 15 | def test_query_deserialization(self): 16 | path = self.path("test_query_deserialization") 17 | dom = tiledb.Domain(tiledb.Dim(domain=(1, 10), dtype=np.uint32)) 18 | attrs = [tiledb.Attr(dtype=np.int64)] 19 | schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) 20 | tiledb.Array.create(path, schema) 21 | 22 | data = np.random.randint(-5, 5, 10) 23 | 24 | with tiledb.open(path, "w") as A: 25 | A[np.arange(1, 11)] = data 26 | 27 | with tiledb.open(path, "r") as A: 28 | ctx = tiledb.default_ctx() 29 | ser_qry = ser_test.create_serialized_test_query(ctx, A) 30 | np.testing.assert_array_equal(A.query()[3:8][""], A.set_query(ser_qry)[""]) 31 | -------------------------------------------------------------------------------- /tiledb/tests/test_stats.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_array_equal 3 | 4 | import tiledb 5 | 6 | from .common import ( 7 | DiskTestCase, 8 | assert_captured, 9 | ) 10 | 11 | 12 | class StatsTest(DiskTestCase): 13 | def test_stats(self, capfd): 14 | tiledb.stats_enable() 15 | tiledb.stats_reset() 16 | tiledb.stats_disable() 17 | 18 | tiledb.stats_enable() 19 | 20 | path = self.path("test_stats") 21 | 22 | with tiledb.from_numpy(path, np.arange(10)) as T: 23 | pass 24 | 25 | # check that Writer stats are printed 26 | tiledb.stats_dump() 27 | 28 | if tiledb.libtiledb.version() >= (2, 27): 29 | assert_captured(capfd, "Context.Query.Writer") 30 | else: 31 | assert_captured(capfd, "Context.StorageManager.Query.Writer") 32 | 33 | # check that Writer stats are not printed because of reset 34 | tiledb.stats_reset() 35 | tiledb.stats_dump() 36 | 37 | if tiledb.libtiledb.version() >= (2, 27): 38 | assert_captured(capfd, "Context.Query.Writer", expected=False) 39 | else: 40 | assert_captured( 41 | capfd, "Context.StorageManager.Query.Writer", expected=False 42 | ) 43 | 44 | with tiledb.open(path) as T: 45 | tiledb.stats_enable() 46 | assert_array_equal(T, np.arange(10)) 47 | 48 | # test stdout version 49 | tiledb.stats_dump() 50 | assert_captured(capfd, "TileDB Embedded Version:") 51 | 52 | # check that Reader stats are printed 53 | tiledb.stats_dump() 54 | if tiledb.libtiledb.version() >= (2, 27): 55 | assert_captured(capfd, "Context.Query.Reader") 56 | else: 57 | assert_captured(capfd, "Context.StorageManager.Query.Reader") 58 | 59 | # test string version 60 | stats_v = tiledb.stats_dump(print_out=False) 61 | if tiledb.libtiledb.version() < (2, 3): 62 | self.assertTrue("==== READ ====" in stats_v) 63 | else: 64 | self.assertTrue('"timers": {' in stats_v) 65 | self.assertTrue("==== Python Stats ====" in stats_v) 66 | 67 | stats_quiet = tiledb.stats_dump(print_out=False, verbose=False) 68 | if tiledb.libtiledb.version() < (2, 3): 69 | self.assertTrue("Time to load array schema" not in stats_quiet) 70 | 71 | # TODO seems to be a regression, no JSON 72 | stats_json = tiledb.stats_dump(json=True) 73 | self.assertTrue(isinstance(stats_json, dict)) 74 | self.assertTrue("CONSOLIDATE_COPY_ARRAY" in stats_json) 75 | else: 76 | self.assertTrue("==== READ ====" in stats_quiet) 77 | 78 | # check that Writer stats are not printed because of reset 79 | tiledb.stats_reset() 80 | tiledb.stats_dump() 81 | if tiledb.libtiledb.version() >= (2, 27): 82 | assert_captured(capfd, "Context.Query.Reader", expected=False) 83 | else: 84 | assert_captured( 85 | capfd, "Context.StorageManager.Query.Reader", expected=False 86 | ) 87 | 88 | def test_stats_include_python_json(self): 89 | tiledb.stats_enable() 90 | 91 | path = self.path("test_stats") 92 | 93 | with tiledb.from_numpy(path, np.arange(10)) as T: 94 | pass 95 | 96 | tiledb.stats_reset() 97 | with tiledb.open(path) as T: 98 | tiledb.stats_enable() 99 | assert_array_equal(T, np.arange(10)) 100 | json_stats = tiledb.stats_dump(print_out=False, json=True) 101 | assert isinstance(json_stats, str) 102 | assert "python" in json_stats 103 | assert "timers" in json_stats 104 | assert "counters" in json_stats 105 | -------------------------------------------------------------------------------- /tiledb/tests/test_webp.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | namespace tiledbpy { 7 | using namespace tiledb; 8 | namespace py = pybind11; 9 | 10 | class WebpFilter { 11 | public: 12 | static bool webp_filter_exists() { 13 | Context ctx; 14 | try { 15 | auto f = Filter(ctx, TILEDB_FILTER_WEBP); 16 | } catch (TileDBError&) { 17 | // Can't create WebP filter; built with TILEDB_WEBP=OFF 18 | return false; 19 | } 20 | return true; 21 | } 22 | }; 23 | 24 | void init_test_webp_filter(py::module& m) { 25 | py::class_(m, "test_webp_filter") 26 | .def_static("webp_filter_exists", &WebpFilter::webp_filter_exists); 27 | } 28 | 29 | }; // namespace tiledbpy 30 | -------------------------------------------------------------------------------- /tiledb/util.cc: -------------------------------------------------------------------------------- 1 | #include "util.h" 2 | 3 | #include 4 | 5 | std::string _get_tiledb_err_str(tiledb_error_t* err_ptr) { 6 | const char* err_msg_ptr = NULL; 7 | int ret = tiledb_error_message(err_ptr, &err_msg_ptr); 8 | 9 | if (ret != TILEDB_OK) { 10 | tiledb_error_free(&err_ptr); 11 | if (ret == TILEDB_OOM) { 12 | throw std::bad_alloc(); 13 | } 14 | return "error retrieving error message"; 15 | } 16 | return std::string(err_msg_ptr); 17 | } 18 | 19 | std::string get_last_ctx_err_str(tiledb_ctx_t* ctx_ptr, int rc) { 20 | if (rc == TILEDB_OOM) 21 | throw std::bad_alloc(); 22 | 23 | tiledb_error_t* err_ptr = NULL; 24 | int ret = tiledb_ctx_get_last_error(ctx_ptr, &err_ptr); 25 | 26 | if (ret != TILEDB_OK) { 27 | tiledb_error_free(&err_ptr); 28 | if (ret == TILEDB_OOM) { 29 | throw std::bad_alloc(); 30 | } 31 | return "error retrieving error object from ctx"; 32 | } 33 | return _get_tiledb_err_str(err_ptr); 34 | } 35 | -------------------------------------------------------------------------------- /tiledb/util.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifndef TILEDB_PY_UTIL_H 5 | #define TILEDB_PY_UTIL_H 6 | 7 | const uint64_t DEFAULT_INIT_BUFFER_BYTES = 1310720 * 8; 8 | const uint64_t DEFAULT_ALLOC_MAX_BYTES = uint64_t(5 * pow(2, 30)); 9 | 10 | std::string get_last_ctx_err_str(tiledb_ctx_t*, int); 11 | 12 | #define TPY_ERROR_STR(m) \ 13 | [](auto m) -> std::string { \ 14 | return std::string(m) + " (" + __FILE__ + ":" + \ 15 | std::to_string(__LINE__) + ")"); \ 16 | }(); 17 | 18 | #define TPY_ERROR_LOC(m) \ 19 | throw TileDBPyError( \ 20 | std::string(m) + " (" + __FILE__ + ":" + std::to_string(__LINE__) + \ 21 | ")"); 22 | 23 | class TileDBPyError : std::runtime_error { 24 | public: 25 | explicit TileDBPyError(const char* m) 26 | : std::runtime_error(m) { 27 | } 28 | explicit TileDBPyError(std::string m) 29 | : std::runtime_error(m.c_str()) { 30 | } 31 | 32 | public: 33 | virtual const char* what() const noexcept override { 34 | return std::runtime_error::what(); 35 | } 36 | }; 37 | 38 | #endif // TILEDB_PY_UTIL_H 39 | -------------------------------------------------------------------------------- /tiledb/version_helper.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | from typing import Tuple, Union 3 | 4 | from . import _generated_version 5 | 6 | 7 | @dataclasses.dataclass(frozen=True) 8 | class VersionHelper: 9 | version: str 10 | version_tuple: Tuple[Union[str, int], ...] 11 | 12 | def __call__(self) -> Tuple[Union[str, int], ...]: 13 | return self.version_tuple 14 | 15 | 16 | version = VersionHelper(_generated_version.version, _generated_version.version_tuple) 17 | --------------------------------------------------------------------------------