├── .clang-format ├── .clang-tidy ├── .clangd ├── .codecov.yml ├── .editorconfig ├── .githooks └── post-checkout ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ └── config.yml └── workflows │ ├── InternalIssuesCreateMirror.yml │ ├── InternalIssuesUpdateMirror.yml │ ├── cleanup_pypi.yml │ ├── code_quality.yml │ ├── coverage.yml │ ├── on_pr.yml │ ├── on_push.yml │ ├── packaging.yml │ ├── packaging_sdist.yml │ ├── packaging_wheels.yml │ ├── release.yml │ ├── submodule_auto_pr.yml │ ├── submodule_sanity.yml │ └── targeted_test.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── _duckdb-stubs ├── __init__.pyi ├── _func.pyi └── _sqltypes.pyi ├── adbc_driver_duckdb ├── __init__.py └── dbapi.py ├── cmake ├── compiler_launcher.cmake └── duckdb_loader.cmake ├── duckdb ├── __init__.py ├── _dbapi_type_object.py ├── _version.py ├── bytes_io_wrapper.py ├── experimental │ ├── __init__.py │ └── spark │ │ ├── LICENSE │ │ ├── __init__.py │ │ ├── _globals.py │ │ ├── _typing.py │ │ ├── conf.py │ │ ├── context.py │ │ ├── errors │ │ ├── __init__.py │ │ ├── error_classes.py │ │ ├── exceptions │ │ │ ├── __init__.py │ │ │ └── base.py │ │ └── utils.py │ │ ├── exception.py │ │ └── sql │ │ ├── __init__.py │ │ ├── _typing.py │ │ ├── catalog.py │ │ ├── column.py │ │ ├── conf.py │ │ ├── dataframe.py │ │ ├── functions.py │ │ ├── group.py │ │ ├── readwriter.py │ │ ├── session.py │ │ ├── streaming.py │ │ ├── type_utils.py │ │ ├── types.py │ │ └── udf.py ├── filesystem.py ├── func │ └── __init__.py ├── functional │ └── __init__.py ├── polars_io.py ├── py.typed ├── query_graph │ └── __main__.py ├── sqltypes │ └── __init__.py ├── typing │ └── __init__.py ├── udf.py └── value │ ├── __init__.py │ └── constant │ └── __init__.py ├── duckdb_packaging ├── __init__.py ├── _versioning.py ├── build_backend.py ├── pypi_cleanup.py └── setuptools_scm_version.py ├── pyodide.md ├── pyproject.toml ├── scripts ├── cache_data.json ├── connection_methods.json ├── connection_wrapper_methods.json ├── generate_connection_code.py ├── generate_connection_methods.py ├── generate_connection_stubs.py ├── generate_connection_wrapper_methods.py ├── generate_connection_wrapper_stubs.py ├── generate_import_cache_cpp.py ├── generate_import_cache_json.py ├── get_cpp_methods.py ├── imports.py ├── install_spark_in_cibuildwheels_linux_container.sh └── regenerate_python_stubs.sh ├── src └── duckdb_py │ ├── CMakeLists.txt │ ├── arrow │ ├── CMakeLists.txt │ ├── arrow_array_stream.cpp │ ├── arrow_export_utils.cpp │ └── pyarrow_filter_pushdown.cpp │ ├── common │ ├── CMakeLists.txt │ └── exceptions.cpp │ ├── dataframe.cpp │ ├── duckdb_python.cpp │ ├── functional │ ├── CMakeLists.txt │ └── functional.cpp │ ├── importer.cpp │ ├── include │ └── duckdb_python │ │ ├── arrow │ │ ├── arrow_array_stream.hpp │ │ ├── arrow_export_utils.hpp │ │ └── pyarrow_filter_pushdown.hpp │ │ ├── conversions │ │ └── optional_wrapper.hpp │ │ ├── expression │ │ └── pyexpression.hpp │ │ ├── filesystem_object.hpp │ │ ├── functional.hpp │ │ ├── import_cache │ │ ├── importer.hpp │ │ ├── modules │ │ │ ├── collections_module.hpp │ │ │ ├── datetime_module.hpp │ │ │ ├── decimal_module.hpp │ │ │ ├── duckdb_module.hpp │ │ │ ├── ipython_module.hpp │ │ │ ├── ipywidgets_module.hpp │ │ │ ├── numpy_module.hpp │ │ │ ├── pandas_module.hpp │ │ │ ├── pathlib_module.hpp │ │ │ ├── polars_module.hpp │ │ │ ├── pyarrow_module.hpp │ │ │ ├── pytz_module.hpp │ │ │ ├── types_module.hpp │ │ │ ├── typing_module.hpp │ │ │ └── uuid_module.hpp │ │ ├── python_import_cache.hpp │ │ ├── python_import_cache_item.hpp │ │ └── python_import_cache_modules.hpp │ │ ├── jupyter_progress_bar_display.hpp │ │ ├── map.hpp │ │ ├── numpy │ │ ├── array_wrapper.hpp │ │ ├── numpy_bind.hpp │ │ ├── numpy_result_conversion.hpp │ │ ├── numpy_scan.hpp │ │ ├── numpy_type.hpp │ │ └── raw_array_wrapper.hpp │ │ ├── pandas │ │ ├── column │ │ │ └── pandas_numpy_column.hpp │ │ ├── pandas_analyzer.hpp │ │ ├── pandas_bind.hpp │ │ ├── pandas_column.hpp │ │ └── pandas_scan.hpp │ │ ├── path_like.hpp │ │ ├── pybind11 │ │ ├── conversions │ │ │ ├── exception_handling_enum.hpp │ │ │ ├── explain_enum.hpp │ │ │ ├── null_handling_enum.hpp │ │ │ ├── pyconnection_default.hpp │ │ │ ├── python_csv_line_terminator_enum.hpp │ │ │ ├── python_udf_type_enum.hpp │ │ │ └── render_mode_enum.hpp │ │ ├── dataframe.hpp │ │ ├── exceptions.hpp │ │ ├── gil_wrapper.hpp │ │ ├── pybind_wrapper.hpp │ │ ├── python_object_container.hpp │ │ └── registered_py_object.hpp │ │ ├── pyconnection │ │ └── pyconnection.hpp │ │ ├── pyfilesystem.hpp │ │ ├── pyrelation.hpp │ │ ├── pyresult.hpp │ │ ├── pystatement.hpp │ │ ├── python_conversion.hpp │ │ ├── python_dependency.hpp │ │ ├── python_objects.hpp │ │ ├── python_replacement_scan.hpp │ │ ├── pytype.hpp │ │ ├── pyutil.hpp │ │ └── typing.hpp │ ├── jupyter │ ├── CMakeLists.txt │ └── jupyter_progress_bar_display.cpp │ ├── map.cpp │ ├── native │ ├── CMakeLists.txt │ ├── python_conversion.cpp │ └── python_objects.cpp │ ├── numpy │ ├── CMakeLists.txt │ ├── array_wrapper.cpp │ ├── numpy_bind.cpp │ ├── numpy_result_conversion.cpp │ ├── numpy_scan.cpp │ ├── raw_array_wrapper.cpp │ └── type.cpp │ ├── pandas │ ├── CMakeLists.txt │ ├── analyzer.cpp │ ├── bind.cpp │ └── scan.cpp │ ├── path_like.cpp │ ├── pybind11 │ ├── CMakeLists.txt │ └── pybind_wrapper.cpp │ ├── pyconnection.cpp │ ├── pyconnection │ ├── CMakeLists.txt │ └── type_creation.cpp │ ├── pyexpression.cpp │ ├── pyexpression │ ├── CMakeLists.txt │ └── initialize.cpp │ ├── pyfilesystem.cpp │ ├── pyrelation.cpp │ ├── pyrelation │ ├── CMakeLists.txt │ └── initialize.cpp │ ├── pyresult.cpp │ ├── pystatement.cpp │ ├── python_dependency.cpp │ ├── python_import_cache.cpp │ ├── python_replacement_scan.cpp │ ├── python_udf.cpp │ └── typing │ ├── CMakeLists.txt │ ├── pytype.cpp │ └── typing.cpp └── tests ├── conftest.py ├── coverage └── test_pandas_categorical_coverage.py ├── extensions ├── json │ ├── data │ │ └── example.json │ └── test_read_json.py ├── test_extensions_loading.py └── test_httpfs.py ├── fast ├── adbc │ ├── test_adbc.py │ ├── test_connection_get_info.py │ └── test_statement_bind.py ├── api │ ├── test_3324.py │ ├── test_3654.py │ ├── test_3728.py │ ├── test_6315.py │ ├── test_attribute_getter.py │ ├── test_config.py │ ├── test_connection_close.py │ ├── test_connection_interrupt.py │ ├── test_cursor.py │ ├── test_dbapi00.py │ ├── test_dbapi01.py │ ├── test_dbapi04.py │ ├── test_dbapi05.py │ ├── test_dbapi07.py │ ├── test_dbapi08.py │ ├── test_dbapi09.py │ ├── test_dbapi10.py │ ├── test_dbapi11.py │ ├── test_dbapi12.py │ ├── test_dbapi13.py │ ├── test_dbapi_fetch.py │ ├── test_duckdb_connection.py │ ├── test_duckdb_execute.py │ ├── test_duckdb_query.py │ ├── test_explain.py │ ├── test_fsspec.py │ ├── test_insert_into.py │ ├── test_join.py │ ├── test_native_tz.py │ ├── test_query_interrupt.py │ ├── test_query_progress.py │ ├── test_read_csv.py │ ├── test_relation_to_view.py │ ├── test_streaming_result.py │ ├── test_to_csv.py │ ├── test_to_parquet.py │ └── test_with_propagating_exceptions.py ├── arrow │ ├── data │ │ ├── arrow_table │ │ ├── unsigned.parquet │ │ └── userdata1.parquet │ ├── parquet_write_roundtrip.py │ ├── test_10795.py │ ├── test_12384.py │ ├── test_14344.py │ ├── test_2426.py │ ├── test_5547.py │ ├── test_6584.py │ ├── test_6796.py │ ├── test_7652.py │ ├── test_7699.py │ ├── test_8522.py │ ├── test_9443.py │ ├── test_arrow_batch_index.py │ ├── test_arrow_binary_view.py │ ├── test_arrow_case_sensitive.py │ ├── test_arrow_decimal256.py │ ├── test_arrow_decimal_32_64.py │ ├── test_arrow_extensions.py │ ├── test_arrow_fetch.py │ ├── test_arrow_fetch_recordbatch.py │ ├── test_arrow_fixed_binary.py │ ├── test_arrow_ipc.py │ ├── test_arrow_list.py │ ├── test_arrow_offsets.py │ ├── test_arrow_pycapsule.py │ ├── test_arrow_recordbatchreader.py │ ├── test_arrow_replacement_scan.py │ ├── test_arrow_run_end_encoding.py │ ├── test_arrow_scanner.py │ ├── test_arrow_string_view.py │ ├── test_arrow_types.py │ ├── test_arrow_union.py │ ├── test_arrow_version_format.py │ ├── test_binary_type.py │ ├── test_buffer_size_option.py │ ├── test_dataset.py │ ├── test_date.py │ ├── test_dictionary_arrow.py │ ├── test_filter_pushdown.py │ ├── test_integration.py │ ├── test_interval.py │ ├── test_large_offsets.py │ ├── test_large_string.py │ ├── test_multiple_reads.py │ ├── test_nested_arrow.py │ ├── test_parallel.py │ ├── test_polars.py │ ├── test_progress.py │ ├── test_projection_pushdown.py │ ├── test_time.py │ ├── test_timestamp_timezone.py │ ├── test_timestamps.py │ ├── test_tpch.py │ ├── test_unregister.py │ └── test_view.py ├── data │ ├── binary_string.parquet │ ├── category.csv │ ├── datetime.csv │ ├── example.json │ ├── integers.csv │ ├── nullpadding.csv │ ├── problematic.csv │ ├── quote_escape.csv │ ├── stress_test.csv │ ├── tz.parquet │ └── unquote_without_delimiter.csv ├── numpy │ └── test_numpy_new_path.py ├── pandas │ ├── test_2304.py │ ├── test_append_df.py │ ├── test_bug2281.py │ ├── test_bug5922.py │ ├── test_column_order.py │ ├── test_copy_on_write.py │ ├── test_create_table_from_pandas.py │ ├── test_date_as_datetime.py │ ├── test_datetime_time.py │ ├── test_datetime_timestamp.py │ ├── test_df_analyze.py │ ├── test_df_object_resolution.py │ ├── test_df_recursive_nested.py │ ├── test_fetch_df_chunk.py │ ├── test_fetch_nested.py │ ├── test_implicit_pandas_scan.py │ ├── test_import_cache.py │ ├── test_issue_1767.py │ ├── test_limit.py │ ├── test_pandas_arrow.py │ ├── test_pandas_category.py │ ├── test_pandas_df_none.py │ ├── test_pandas_enum.py │ ├── test_pandas_limit.py │ ├── test_pandas_na.py │ ├── test_pandas_object.py │ ├── test_pandas_string.py │ ├── test_pandas_timestamp.py │ ├── test_pandas_types.py │ ├── test_pandas_unregister.py │ ├── test_pandas_update.py │ ├── test_parallel_pandas_scan.py │ ├── test_partitioned_pandas_scan.py │ ├── test_progress_bar.py │ ├── test_pyarrow_projection_pushdown.py │ ├── test_same_name.py │ ├── test_stride.py │ ├── test_timedelta.py │ └── test_timestamp.py ├── relational_api │ ├── test_groupings.py │ ├── test_joins.py │ ├── test_pivot.py │ ├── test_rapi_aggregations.py │ ├── test_rapi_close.py │ ├── test_rapi_description.py │ ├── test_rapi_functions.py │ ├── test_rapi_query.py │ ├── test_rapi_windows.py │ └── test_table_function.py ├── spark │ ├── test_replace_column_value.py │ ├── test_replace_empty_value.py │ ├── test_spark_arrow_table.py │ ├── test_spark_catalog.py │ ├── test_spark_column.py │ ├── test_spark_dataframe.py │ ├── test_spark_dataframe_sort.py │ ├── test_spark_drop_duplicates.py │ ├── test_spark_except.py │ ├── test_spark_filter.py │ ├── test_spark_function_concat_ws.py │ ├── test_spark_functions_array.py │ ├── test_spark_functions_base64.py │ ├── test_spark_functions_dataframe.py │ ├── test_spark_functions_date.py │ ├── test_spark_functions_expr.py │ ├── test_spark_functions_hash.py │ ├── test_spark_functions_hex.py │ ├── test_spark_functions_miscellaneous.py │ ├── test_spark_functions_null.py │ ├── test_spark_functions_numeric.py │ ├── test_spark_functions_sort.py │ ├── test_spark_functions_string.py │ ├── test_spark_group_by.py │ ├── test_spark_intersect.py │ ├── test_spark_join.py │ ├── test_spark_limit.py │ ├── test_spark_order_by.py │ ├── test_spark_pandas_dataframe.py │ ├── test_spark_readcsv.py │ ├── test_spark_readjson.py │ ├── test_spark_readparquet.py │ ├── test_spark_runtime_config.py │ ├── test_spark_session.py │ ├── test_spark_to_csv.py │ ├── test_spark_to_parquet.py │ ├── test_spark_transform.py │ ├── test_spark_types.py │ ├── test_spark_udf.py │ ├── test_spark_union.py │ ├── test_spark_union_by_name.py │ ├── test_spark_with_column.py │ ├── test_spark_with_column_renamed.py │ ├── test_spark_with_columns.py │ └── test_spark_with_columns_renamed.py ├── sqlite │ └── test_types.py ├── test_alex_multithread.py ├── test_all_types.py ├── test_ambiguous_prepare.py ├── test_case_alias.py ├── test_context_manager.py ├── test_duckdb_api.py ├── test_expression.py ├── test_filesystem.py ├── test_get_table_names.py ├── test_import_export.py ├── test_insert.py ├── test_json_logging.py ├── test_many_con_same_file.py ├── test_map.py ├── test_metatransaction.py ├── test_module.py ├── test_multi_statement.py ├── test_multithread.py ├── test_non_default_conn.py ├── test_parameter_list.py ├── test_parquet.py ├── test_pypi_cleanup.py ├── test_pytorch.py ├── test_relation.py ├── test_relation_dependency_leak.py ├── test_replacement_scan.py ├── test_result.py ├── test_runtime_error.py ├── test_sql_expression.py ├── test_string_annotation.py ├── test_tf.py ├── test_transaction.py ├── test_type.py ├── test_type_explicit.py ├── test_unicode.py ├── test_union.py ├── test_value.py ├── test_version.py ├── test_versioning.py ├── test_windows_abs_path.py ├── types │ ├── test_blob.py │ ├── test_boolean.py │ ├── test_datetime_date.py │ ├── test_datetime_datetime.py │ ├── test_decimal.py │ ├── test_hugeint.py │ ├── test_nan.py │ ├── test_nested.py │ ├── test_null.py │ ├── test_numeric.py │ ├── test_numpy.py │ ├── test_object_int.py │ ├── test_time_tz.py │ └── test_unsigned.py └── udf │ ├── test_null_filtering.py │ ├── test_remove_function.py │ ├── test_scalar.py │ ├── test_scalar_arrow.py │ ├── test_scalar_native.py │ └── test_transactionality.py ├── slow └── test_h2oai_arrow.py └── spark_namespace ├── __init__.py ├── errors.py └── sql ├── __init__.py ├── catalog.py ├── column.py ├── dataframe.py ├── functions.py └── types.py /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: LLVM 3 | SortIncludes: false 4 | TabWidth: 4 5 | IndentWidth: 4 6 | ColumnLimit: 120 7 | AllowShortFunctionsOnASingleLine: false 8 | --- 9 | UseTab: ForIndentation 10 | DerivePointerAlignment: false 11 | PointerAlignment: Right 12 | AlignConsecutiveMacros: true 13 | AlignTrailingComments: true 14 | AllowAllArgumentsOnNextLine: true 15 | AllowAllConstructorInitializersOnNextLine: true 16 | AllowAllParametersOfDeclarationOnNextLine: true 17 | AlignAfterOpenBracket: Align 18 | SpaceBeforeCpp11BracedList: true 19 | SpaceBeforeCtorInitializerColon: true 20 | SpaceBeforeInheritanceColon: true 21 | SpacesInAngles: false 22 | SpacesInCStyleCastParentheses: false 23 | SpacesInConditionalStatement: false 24 | AllowShortLambdasOnASingleLine: Inline 25 | AllowShortLoopsOnASingleLine: false 26 | AlwaysBreakTemplateDeclarations: Yes 27 | IncludeBlocks: Regroup 28 | Language: Cpp 29 | AccessModifierOffset: -4 30 | --- 31 | Language: Java 32 | SpaceAfterCStyleCast: true 33 | --- 34 | -------------------------------------------------------------------------------- /.clangd: -------------------------------------------------------------------------------- 1 | CompileFlags: 2 | CompilationDatabase: build/clangd 3 | Add: -Wno-unqualified-std-cast-call 4 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | coverage: 3 | precision: 2 4 | round: down 5 | range: "0...100" 6 | status: 7 | project: 8 | default: 9 | # advanced settings 10 | if_not_found: success 11 | if_ci_failed: failure 12 | informational: true 13 | only_pulls: false 14 | patch: 15 | default: 16 | branches: 17 | - main 18 | if_not_found: success 19 | if_ci_failed: error 20 | informational: true 21 | only_pulls: true 22 | paths: 23 | - "src" -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | insert_final_newline = true 7 | trim_trailing_whitespace = true 8 | 9 | [*.{py,pyi}] 10 | indent_style = space 11 | indent_size = 4 12 | 13 | [*.{c,cpp,h,hpp}] 14 | indent_style = tab 15 | tab_width = 4 16 | indent_size = tab 17 | max_line_length = 120 18 | x-soft-wrap-text = true 19 | x-soft-wrap-mode = CharacterWidth 20 | x-soft-wrap-limit = 120 21 | x-show-invisibles = false 22 | x-show-spaces = false 23 | 24 | [Makefile] 25 | indent_style = tab 26 | tab_width = 4 27 | indent_size = tab 28 | x-soft-wrap-text = false 29 | -------------------------------------------------------------------------------- /.githooks/post-checkout: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | git submodule update --init --recursive 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Feature Request 4 | url: https://github.com/duckdb/duckdb-python/discussions/new?category=ideas&title=Feature%20Request:%20...&labels=feature&body=Why%20do%20you%20want%20this%20feature%3F 5 | about: Submit feature requests here 6 | - name: Discussions 7 | url: https://github.com/duckdb/duckdb-python/discussions 8 | about: Please ask and answer general questions here. 9 | -------------------------------------------------------------------------------- /.github/workflows/code_quality.yml: -------------------------------------------------------------------------------- 1 | name: Code Quality Checks 2 | on: 3 | workflow_dispatch: 4 | inputs: 5 | git_ref: 6 | type: string 7 | description: Git ref of the DuckDB python package 8 | required: false 9 | workflow_call: 10 | inputs: 11 | git_ref: 12 | type: string 13 | description: Git ref of the DuckDB python package 14 | required: false 15 | 16 | defaults: 17 | run: 18 | shell: bash 19 | 20 | jobs: 21 | run_checks: 22 | name: Run linting, formatting and static type checker 23 | runs-on: ubuntu-latest 24 | steps: 25 | - uses: actions/checkout@v4 26 | with: 27 | ref: ${{ inputs.git_ref }} 28 | fetch-depth: 0 29 | persist-credentials: false 30 | 31 | - name: Install Astral UV 32 | uses: astral-sh/setup-uv@v7 33 | with: 34 | version: "0.9.0" 35 | python-version: 3.9 36 | 37 | - name: pre-commit (cache) 38 | uses: actions/cache@v4 39 | with: 40 | path: ~/.cache/pre-commit 41 | key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} 42 | 43 | - name: pre-commit (--all-files) 44 | run: | 45 | uvx pre-commit run --show-diff-on-failure --color=always --all-files 46 | -------------------------------------------------------------------------------- /.github/workflows/on_pr.yml: -------------------------------------------------------------------------------- 1 | name: Tests and builds on PR 2 | on: 3 | pull_request: 4 | branches: 5 | - main 6 | - v*.*-* 7 | types: [opened, reopened, ready_for_review, converted_to_draft, synchronize] 8 | paths-ignore: 9 | - '**.md' 10 | - 'LICENSE' 11 | - '.editorconfig' 12 | - 'scripts/**' 13 | - '.github//**' 14 | - '!.github/workflows/on_push.yml' 15 | - '!.github/workflows/coverage.yml' 16 | 17 | concurrency: 18 | group: ${{ github.workflow }}-${{ github.ref }} 19 | cancel-in-progress: true 20 | 21 | jobs: 22 | submodule_sanity_guard: 23 | name: Make sure submodule is in a sane state 24 | uses: ./.github/workflows/submodule_sanity.yml 25 | 26 | code_quality: 27 | name: Code-quality checks 28 | needs: submodule_sanity_guard 29 | uses: ./.github/workflows/code_quality.yml 30 | 31 | packaging_test: 32 | name: Build a minimal set of packages and run all tests on them 33 | needs: code_quality 34 | # Skip packaging tests for draft PRs 35 | if: ${{ github.event_name != 'pull_request' || github.event.pull_request.draft == false }} 36 | uses: ./.github/workflows/packaging.yml 37 | with: 38 | minimal: true 39 | testsuite: all 40 | duckdb-sha: ${{ github.base_ref }} 41 | 42 | coverage_test: 43 | name: Run coverage tests 44 | needs: code_quality 45 | # Only run coverage test for draft PRs 46 | if: ${{ github.event_name == 'pull_request' && github.event.pull_request.draft == true }} 47 | uses: ./.github/workflows/coverage.yml 48 | with: 49 | duckdb_git_ref: ${{ github.base_ref }} 50 | testsuite: all 51 | -------------------------------------------------------------------------------- /.github/workflows/on_push.yml: -------------------------------------------------------------------------------- 1 | name: Tests and coverage on push 2 | on: 3 | push: 4 | branches-ignore: 5 | - main 6 | - v*.*-* 7 | paths-ignore: 8 | - '**.md' 9 | - 'LICENSE' 10 | - '.editorconfig' 11 | - 'scripts/**' 12 | - '.github//**' 13 | - '!.github/workflows/on_push.yml' 14 | - '!.github/workflows/coverage.yml' 15 | 16 | concurrency: 17 | group: ${{ github.workflow }}-${{ github.ref }} 18 | cancel-in-progress: true 19 | 20 | jobs: 21 | code_quality: 22 | name: Code-quality checks 23 | uses: ./.github/workflows/code_quality.yml 24 | 25 | test: 26 | name: Run coverage tests 27 | needs: code_quality 28 | uses: ./.github/workflows/coverage.yml 29 | with: 30 | git_ref: ${{ github.ref }} 31 | testsuite: fast 32 | -------------------------------------------------------------------------------- /.github/workflows/submodule_sanity.yml: -------------------------------------------------------------------------------- 1 | name: Check DuckDB submodule sanity 2 | on: 3 | workflow_call: 4 | workflow_dispatch: 5 | jobs: 6 | submodule_sanity: 7 | name: Make sure submodule is in a sane state 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout DuckDB Python 11 | uses: actions/checkout@v4 12 | with: 13 | fetch-depth: 0 14 | 15 | - name: Verify submodule origin 16 | shell: bash 17 | run: | 18 | set -eux 19 | git submodule update --init 20 | cd external/duckdb 21 | remote_count=$(git remote | wc -l) 22 | if [[ $remote_count -gt 1 ]]; then 23 | echo "::error::Multiple remotes found - only origin allowed" 24 | git remote -v 25 | fi 26 | origin_url=$(git remote get-url origin) 27 | if [[ "$origin_url" != "https://github.com/duckdb/duckdb"* ]]; then 28 | echo "::error::Submodule origin has been tampered with: $origin_url" 29 | exit 1 30 | fi 31 | 32 | - name: Disallow changes to .gitmodules in PRs and pushes 33 | if: ${{ github.event_name == 'pull_request' || github.event_name == 'push' }} 34 | shell: bash 35 | run: | 36 | set -eux 37 | before=${{ github.event_name == 'push' && github.event.before || format('origin/{0}', github.base_ref) }} 38 | after=${{ github.event_name == 'push' && github.event.after || github.head_ref }} 39 | if git diff --name-only $before...$after | grep -q "^\.gitmodules$"; then 40 | echo "::error::.gitmodules may not be modified. If you see a reason to update, please discuss with the maintainers." 41 | exit 1 42 | fi 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #==============================================================================# 2 | # This file specifies intentionally untracked files that git should ignore. 3 | #==============================================================================# 4 | 5 | #==============================================================================# 6 | # General 7 | #==============================================================================# 8 | # Temp files created by most text editors. 9 | *~ 10 | # Merge files created by git. 11 | *.orig 12 | # vim swap files 13 | .*.sw? 14 | .sw? 15 | #OS X specific files. 16 | .DS_store 17 | 18 | #==============================================================================# 19 | # Build artifacts 20 | #==============================================================================# 21 | *.o 22 | *.lo 23 | *.la 24 | *.lai 25 | *.lib 26 | *.slo 27 | *.cuo 28 | *.pdf 29 | *.swp 30 | a.out 31 | *.so 32 | *.dylib 33 | *.dll 34 | 35 | build 36 | .build_debug/* 37 | .build_release/* 38 | distribute/* 39 | *.testbin 40 | *.bin 41 | cmake_build 42 | .cmake_build 43 | cmake-build-debug 44 | cmake-build-release 45 | cmake-build-relwithdebinfo 46 | duckdb_packaging/duckdb_version.txt 47 | test.db 48 | 49 | #==============================================================================# 50 | # Python 51 | #==============================================================================# 52 | 53 | *.pyc 54 | .venv 55 | uv.lock 56 | dist 57 | duckdb.egg-info 58 | .eggs 59 | .pytest_cache 60 | .coverage 61 | duckdb_build 62 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "external/duckdb"] 2 | path = external/duckdb 3 | url = https://github.com/duckdb/duckdb.git 4 | branch = main 5 | [submodule] 6 | recurse = true 7 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.4.0 4 | hooks: 5 | - id: check-yaml 6 | args: ["--allow-multiple-documents"] 7 | - id: check-toml 8 | - id: check-added-large-files 9 | - id: detect-private-key 10 | - id: check-merge-conflict 11 | - id: forbid-new-submodules 12 | 13 | - repo: https://github.com/astral-sh/ruff-pre-commit 14 | # Ruff version. 15 | rev: v0.13.3 16 | hooks: 17 | # Run the linter. 18 | - id: ruff-check 19 | # Run the formatter. 20 | - id: ruff-format 21 | 22 | - repo: https://github.com/pre-commit/mirrors-clang-format 23 | rev: v21.1.2 # pick the version of clang-format you want 24 | hooks: 25 | - id: clang-format 26 | files: \.(c|cpp|cc|h|hpp|cxx|hxx)$ 27 | 28 | - repo: https://github.com/cheshirekow/cmake-format-precommit 29 | rev: v0.6.13 30 | hooks: 31 | - id: cmake-format 32 | 33 | - repo: https://github.com/pre-commit/mirrors-mypy 34 | rev: v1.18.2 35 | hooks: 36 | - id: mypy 37 | entry: mypy 38 | files: ^(duckdb/|_duckdb-stubs/) 39 | exclude: ^duckdb/(experimental|query_graph)/ 40 | additional_dependencies: [ numpy, polars ] 41 | 42 | - repo: local 43 | hooks: 44 | - id: post-checkout-submodules 45 | name: Update submodule post-checkout 46 | entry: .githooks/post-checkout 47 | language: script 48 | stages: [ post-checkout ] 49 | pass_filenames: false 50 | always_run: true 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018-2025 Stichting DuckDB Foundation 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /cmake/compiler_launcher.cmake: -------------------------------------------------------------------------------- 1 | # cmake/compiler_launcher.cmake 2 | include_guard(GLOBAL) # only include once 3 | include(CMakeParseArguments) 4 | 5 | # ──────────────────────────────────────────── 6 | # setup_compiler_launcher_if_available() 7 | # 8 | # Function to look for ccache and sccache to speed up builds, if available 9 | # ──────────────────────────────────────────── 10 | function(setup_compiler_launcher_if_available) 11 | if(NOT DEFINED CMAKE_C_COMPILER_LAUNCHER) 12 | find_program(COMPILER_LAUNCHER NAMES ccache sccache) 13 | if(COMPILER_LAUNCHER) 14 | message(STATUS "Using ${COMPILER_LAUNCHER} as C compiler launcher") 15 | set(CMAKE_C_COMPILER_LAUNCHER 16 | "${COMPILER_LAUNCHER}" 17 | CACHE STRING "" FORCE) 18 | endif() 19 | endif() 20 | 21 | if(NOT DEFINED CMAKE_CXX_COMPILER_LAUNCHER) 22 | find_program(COMPILER_LAUNCHER NAMES ccache sccache) 23 | if(COMPILER_LAUNCHER) 24 | message(STATUS "Using ${COMPILER_LAUNCHER} as C++ compiler launcher") 25 | set(CMAKE_CXX_COMPILER_LAUNCHER 26 | "${COMPILER_LAUNCHER}" 27 | CACHE STRING "" FORCE) 28 | endif() 29 | endif() 30 | endfunction() 31 | -------------------------------------------------------------------------------- /duckdb/_version.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Version API 3 | # 4 | # We provide three symbols: 5 | # - duckdb.__version__: The version of this package 6 | # - duckdb.__duckdb_version__: The version of duckdb that is bundled 7 | # - duckdb.version(): A human-readable version string containing both of the above 8 | # ---------------------------------------------------------------------- 9 | from importlib.metadata import version as _dist_version 10 | 11 | import _duckdb 12 | 13 | __version__: str = _dist_version("duckdb") 14 | """Version of the DuckDB Python Package.""" 15 | 16 | __duckdb_version__: str = _duckdb.__version__ 17 | """Version of DuckDB that is bundled.""" 18 | 19 | 20 | def version() -> str: 21 | """Human-friendly formatted version string of both the distribution package and the bundled DuckDB engine.""" 22 | return f"{__version__} (with duckdb {_duckdb.__version__})" 23 | -------------------------------------------------------------------------------- /duckdb/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | from . import spark # noqa: D104 2 | 3 | __all__ = [ 4 | "spark", 5 | ] 6 | -------------------------------------------------------------------------------- /duckdb/experimental/spark/__init__.py: -------------------------------------------------------------------------------- 1 | from .conf import SparkConf # noqa: D104 2 | from .context import SparkContext 3 | from .exception import ContributionsAcceptedError 4 | from .sql import DataFrame, SparkSession 5 | 6 | __all__ = ["ContributionsAcceptedError", "DataFrame", "SparkConf", "SparkContext", "SparkSession"] 7 | -------------------------------------------------------------------------------- /duckdb/experimental/spark/_typing.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | from collections.abc import Iterable, Sized 20 | from typing import Callable, TypeVar, Union 21 | 22 | from numpy import float32, float64, int32, int64, ndarray 23 | from typing_extensions import Literal, Protocol, Self 24 | 25 | F = TypeVar("F", bound=Callable) 26 | T_co = TypeVar("T_co", covariant=True) 27 | 28 | PrimitiveType = Union[bool, float, int, str] 29 | 30 | NonUDFType = Literal[0] 31 | 32 | 33 | class SupportsIAdd(Protocol): 34 | def __iadd__(self, other: "SupportsIAdd") -> Self: ... 35 | 36 | 37 | class SupportsOrdering(Protocol): 38 | def __lt__(self, other: "SupportsOrdering") -> bool: ... 39 | 40 | 41 | class SizedIterable(Protocol, Sized, Iterable[T_co]): ... 42 | 43 | 44 | S = TypeVar("S", bound=SupportsOrdering) 45 | 46 | NumberOrArray = TypeVar("NumberOrArray", float, int, complex, int32, int64, float32, float64, ndarray) 47 | -------------------------------------------------------------------------------- /duckdb/experimental/spark/conf.py: -------------------------------------------------------------------------------- 1 | from typing import Optional # noqa: D100 2 | 3 | from duckdb.experimental.spark.exception import ContributionsAcceptedError 4 | 5 | 6 | class SparkConf: # noqa: D101 7 | def __init__(self) -> None: # noqa: D107 8 | raise NotImplementedError 9 | 10 | def contains(self, key: str) -> bool: # noqa: D102 11 | raise ContributionsAcceptedError 12 | 13 | def get(self, key: str, defaultValue: Optional[str] = None) -> Optional[str]: # noqa: D102 14 | raise ContributionsAcceptedError 15 | 16 | def getAll(self) -> list[tuple[str, str]]: # noqa: D102 17 | raise ContributionsAcceptedError 18 | 19 | def set(self, key: str, value: str) -> "SparkConf": # noqa: D102 20 | raise ContributionsAcceptedError 21 | 22 | def setAll(self, pairs: list[tuple[str, str]]) -> "SparkConf": # noqa: D102 23 | raise ContributionsAcceptedError 24 | 25 | def setAppName(self, value: str) -> "SparkConf": # noqa: D102 26 | raise ContributionsAcceptedError 27 | 28 | def setExecutorEnv( # noqa: D102 29 | self, key: Optional[str] = None, value: Optional[str] = None, pairs: Optional[list[tuple[str, str]]] = None 30 | ) -> "SparkConf": 31 | raise ContributionsAcceptedError 32 | 33 | def setIfMissing(self, key: str, value: str) -> "SparkConf": # noqa: D102 34 | raise ContributionsAcceptedError 35 | 36 | def setMaster(self, value: str) -> "SparkConf": # noqa: D102 37 | raise ContributionsAcceptedError 38 | 39 | def setSparkHome(self, value: str) -> "SparkConf": # noqa: D102 40 | raise ContributionsAcceptedError 41 | 42 | def toDebugString(self) -> str: # noqa: D102 43 | raise ContributionsAcceptedError 44 | 45 | 46 | __all__ = ["SparkConf"] 47 | -------------------------------------------------------------------------------- /duckdb/experimental/spark/errors/exceptions/__init__.py: -------------------------------------------------------------------------------- 1 | # # noqa: D104 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /duckdb/experimental/spark/exception.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa: D100 2 | from typing import Optional 3 | 4 | 5 | class ContributionsAcceptedError(NotImplementedError): 6 | """This method is not planned to be implemented, if you would like to implement this method 7 | or show your interest in this method to other members of the community, 8 | feel free to open up a PR or a Discussion over on https://github.com/duckdb/duckdb. 9 | """ # noqa: D205 10 | 11 | def __init__(self, message: Optional[str] = None) -> None: # noqa: D107 12 | doc = self.__class__.__doc__ 13 | if message: 14 | doc = message + "\n" + doc 15 | super().__init__(doc) 16 | 17 | 18 | __all__ = ["ContributionsAcceptedError"] 19 | -------------------------------------------------------------------------------- /duckdb/experimental/spark/sql/__init__.py: -------------------------------------------------------------------------------- 1 | from .catalog import Catalog # noqa: D104 2 | from .conf import RuntimeConfig 3 | from .dataframe import DataFrame 4 | from .readwriter import DataFrameWriter 5 | from .session import SparkSession 6 | 7 | __all__ = ["Catalog", "DataFrame", "DataFrameWriter", "RuntimeConfig", "SparkSession"] 8 | -------------------------------------------------------------------------------- /duckdb/experimental/spark/sql/conf.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Union # noqa: D100 2 | 3 | from duckdb import DuckDBPyConnection 4 | from duckdb.experimental.spark._globals import _NoValue, _NoValueType 5 | 6 | 7 | class RuntimeConfig: # noqa: D101 8 | def __init__(self, connection: DuckDBPyConnection) -> None: # noqa: D107 9 | self._connection = connection 10 | 11 | def set(self, key: str, value: str) -> None: # noqa: D102 12 | raise NotImplementedError 13 | 14 | def isModifiable(self, key: str) -> bool: # noqa: D102 15 | raise NotImplementedError 16 | 17 | def unset(self, key: str) -> None: # noqa: D102 18 | raise NotImplementedError 19 | 20 | def get(self, key: str, default: Union[Optional[str], _NoValueType] = _NoValue) -> str: # noqa: D102 21 | raise NotImplementedError 22 | 23 | 24 | __all__ = ["RuntimeConfig"] 25 | -------------------------------------------------------------------------------- /duckdb/experimental/spark/sql/streaming.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING, Optional, Union # noqa: D100 2 | 3 | from .types import StructType 4 | 5 | if TYPE_CHECKING: 6 | from .dataframe import DataFrame 7 | from .session import SparkSession 8 | 9 | PrimitiveType = Union[bool, float, int, str] 10 | OptionalPrimitiveType = Optional[PrimitiveType] 11 | 12 | 13 | class DataStreamWriter: # noqa: D101 14 | def __init__(self, dataframe: "DataFrame") -> None: # noqa: D107 15 | self.dataframe = dataframe 16 | 17 | def toTable(self, table_name: str) -> None: # noqa: D102 18 | # Should we register the dataframe or create a table from the contents? 19 | raise NotImplementedError 20 | 21 | 22 | class DataStreamReader: # noqa: D101 23 | def __init__(self, session: "SparkSession") -> None: # noqa: D107 24 | self.session = session 25 | 26 | def load( # noqa: D102 27 | self, 28 | path: Optional[str] = None, 29 | format: Optional[str] = None, 30 | schema: Union[StructType, str, None] = None, 31 | **options: OptionalPrimitiveType, 32 | ) -> "DataFrame": 33 | raise NotImplementedError 34 | 35 | 36 | __all__ = ["DataStreamReader", "DataStreamWriter"] 37 | -------------------------------------------------------------------------------- /duckdb/experimental/spark/sql/udf.py: -------------------------------------------------------------------------------- 1 | # https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/ # noqa: D100 2 | from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union 3 | 4 | from .types import DataType 5 | 6 | if TYPE_CHECKING: 7 | from .session import SparkSession 8 | 9 | DataTypeOrString = Union[DataType, str] 10 | UserDefinedFunctionLike = TypeVar("UserDefinedFunctionLike") 11 | 12 | 13 | class UDFRegistration: # noqa: D101 14 | def __init__(self, sparkSession: "SparkSession") -> None: # noqa: D107 15 | self.sparkSession = sparkSession 16 | 17 | def register( # noqa: D102 18 | self, 19 | name: str, 20 | f: Union[Callable[..., Any], "UserDefinedFunctionLike"], 21 | returnType: Optional["DataTypeOrString"] = None, 22 | ) -> "UserDefinedFunctionLike": 23 | self.sparkSession.conn.create_function(name, f, return_type=returnType) 24 | 25 | def registerJavaFunction( # noqa: D102 26 | self, 27 | name: str, 28 | javaClassName: str, 29 | returnType: Optional["DataTypeOrString"] = None, 30 | ) -> None: 31 | raise NotImplementedError 32 | 33 | def registerJavaUDAF(self, name: str, javaClassName: str) -> None: # noqa: D102 34 | raise NotImplementedError 35 | 36 | 37 | __all__ = ["UDFRegistration"] 38 | -------------------------------------------------------------------------------- /duckdb/filesystem.py: -------------------------------------------------------------------------------- 1 | """In-memory filesystem to store ephemeral dependencies. 2 | 3 | Warning: Not for external use. May change at any moment. Likely to be made internal. 4 | """ 5 | 6 | from __future__ import annotations 7 | 8 | import io 9 | import typing 10 | 11 | from fsspec import AbstractFileSystem 12 | from fsspec.implementations.memory import MemoryFile, MemoryFileSystem 13 | 14 | from .bytes_io_wrapper import BytesIOWrapper 15 | 16 | 17 | class ModifiedMemoryFileSystem(MemoryFileSystem): 18 | """In-memory filesystem implementation that uses its own protocol.""" 19 | 20 | protocol = ("DUCKDB_INTERNAL_OBJECTSTORE",) 21 | # defer to the original implementation that doesn't hardcode the protocol 22 | _strip_protocol: typing.Callable[[str], str] = classmethod(AbstractFileSystem._strip_protocol.__func__) # type: ignore[assignment] 23 | 24 | def add_file(self, obj: io.IOBase | BytesIOWrapper | object, path: str) -> None: 25 | """Add a file to the filesystem.""" 26 | if not (hasattr(obj, "read") and hasattr(obj, "seek")): 27 | msg = "Can not read from a non file-like object" 28 | raise TypeError(msg) 29 | if isinstance(obj, io.TextIOBase): 30 | # Wrap this so that we can return a bytes object from 'read' 31 | obj = BytesIOWrapper(obj) 32 | path = self._strip_protocol(path) 33 | self.store[path] = MemoryFile(self, path, obj.read()) 34 | -------------------------------------------------------------------------------- /duckdb/func/__init__.py: -------------------------------------------------------------------------------- 1 | from _duckdb._func import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType # noqa: D104 2 | 3 | __all__ = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"] 4 | -------------------------------------------------------------------------------- /duckdb/functional/__init__.py: -------------------------------------------------------------------------------- 1 | """DuckDB function constants and types. DEPRECATED: please use `duckdb.func` instead.""" 2 | 3 | import warnings 4 | 5 | from duckdb.func import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType 6 | 7 | __all__ = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"] 8 | 9 | warnings.warn( 10 | "`duckdb.functional` is deprecated and will be removed in a future version. Please use `duckdb.func` instead.", 11 | DeprecationWarning, 12 | stacklevel=2, 13 | ) 14 | -------------------------------------------------------------------------------- /duckdb/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /duckdb/sqltypes/__init__.py: -------------------------------------------------------------------------------- 1 | """DuckDB's SQL types.""" 2 | 3 | from _duckdb._sqltypes import ( 4 | BIGINT, 5 | BIT, 6 | BLOB, 7 | BOOLEAN, 8 | DATE, 9 | DOUBLE, 10 | FLOAT, 11 | HUGEINT, 12 | INTEGER, 13 | INTERVAL, 14 | SMALLINT, 15 | SQLNULL, 16 | TIME, 17 | TIME_TZ, 18 | TIMESTAMP, 19 | TIMESTAMP_MS, 20 | TIMESTAMP_NS, 21 | TIMESTAMP_S, 22 | TIMESTAMP_TZ, 23 | TINYINT, 24 | UBIGINT, 25 | UHUGEINT, 26 | UINTEGER, 27 | USMALLINT, 28 | UTINYINT, 29 | UUID, 30 | VARCHAR, 31 | DuckDBPyType, 32 | ) 33 | 34 | __all__ = [ 35 | "BIGINT", 36 | "BIT", 37 | "BLOB", 38 | "BOOLEAN", 39 | "DATE", 40 | "DOUBLE", 41 | "FLOAT", 42 | "HUGEINT", 43 | "INTEGER", 44 | "INTERVAL", 45 | "SMALLINT", 46 | "SQLNULL", 47 | "TIME", 48 | "TIMESTAMP", 49 | "TIMESTAMP_MS", 50 | "TIMESTAMP_NS", 51 | "TIMESTAMP_S", 52 | "TIMESTAMP_TZ", 53 | "TIME_TZ", 54 | "TINYINT", 55 | "UBIGINT", 56 | "UHUGEINT", 57 | "UINTEGER", 58 | "USMALLINT", 59 | "UTINYINT", 60 | "UUID", 61 | "VARCHAR", 62 | "DuckDBPyType", 63 | ] 64 | -------------------------------------------------------------------------------- /duckdb/typing/__init__.py: -------------------------------------------------------------------------------- 1 | """DuckDB's SQL types. DEPRECATED. Please use `duckdb.sqltypes` instead.""" 2 | 3 | import warnings 4 | 5 | from duckdb.sqltypes import ( 6 | BIGINT, 7 | BIT, 8 | BLOB, 9 | BOOLEAN, 10 | DATE, 11 | DOUBLE, 12 | FLOAT, 13 | HUGEINT, 14 | INTEGER, 15 | INTERVAL, 16 | SMALLINT, 17 | SQLNULL, 18 | TIME, 19 | TIME_TZ, 20 | TIMESTAMP, 21 | TIMESTAMP_MS, 22 | TIMESTAMP_NS, 23 | TIMESTAMP_S, 24 | TIMESTAMP_TZ, 25 | TINYINT, 26 | UBIGINT, 27 | UHUGEINT, 28 | UINTEGER, 29 | USMALLINT, 30 | UTINYINT, 31 | UUID, 32 | VARCHAR, 33 | DuckDBPyType, 34 | ) 35 | 36 | __all__ = [ 37 | "BIGINT", 38 | "BIT", 39 | "BLOB", 40 | "BOOLEAN", 41 | "DATE", 42 | "DOUBLE", 43 | "FLOAT", 44 | "HUGEINT", 45 | "INTEGER", 46 | "INTERVAL", 47 | "SMALLINT", 48 | "SQLNULL", 49 | "TIME", 50 | "TIMESTAMP", 51 | "TIMESTAMP_MS", 52 | "TIMESTAMP_NS", 53 | "TIMESTAMP_S", 54 | "TIMESTAMP_TZ", 55 | "TIME_TZ", 56 | "TINYINT", 57 | "UBIGINT", 58 | "UHUGEINT", 59 | "UINTEGER", 60 | "USMALLINT", 61 | "UTINYINT", 62 | "UUID", 63 | "VARCHAR", 64 | "DuckDBPyType", 65 | ] 66 | 67 | warnings.warn( 68 | "`duckdb.typing` is deprecated and will be removed in a future version. Please use `duckdb.sqltypes` instead.", 69 | DeprecationWarning, 70 | stacklevel=2, 71 | ) 72 | -------------------------------------------------------------------------------- /duckdb/udf.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa: D100 2 | import typing 3 | 4 | 5 | def vectorized(func: typing.Callable[..., typing.Any]) -> typing.Callable[..., typing.Any]: 6 | """Decorate a function with annotated function parameters. 7 | 8 | This allows DuckDB to infer that the function should be provided with pyarrow arrays and should expect 9 | pyarrow array(s) as output. 10 | """ 11 | import types 12 | from inspect import signature 13 | 14 | new_func = types.FunctionType(func.__code__, func.__globals__, func.__name__, func.__defaults__, func.__closure__) 15 | # Construct the annotations: 16 | import pyarrow as pa 17 | 18 | new_annotations = {} 19 | sig = signature(func) 20 | for param in sig.parameters: 21 | new_annotations[param] = pa.lib.ChunkedArray 22 | 23 | new_func.__annotations__ = new_annotations 24 | return new_func 25 | -------------------------------------------------------------------------------- /duckdb/value/__init__.py: -------------------------------------------------------------------------------- 1 | # noqa: D104 2 | -------------------------------------------------------------------------------- /duckdb_packaging/__init__.py: -------------------------------------------------------------------------------- 1 | """DuckDB Python packaging, versioning, and build tooling. 2 | 3 | Requires Python >= 3.5 and does not work on mobile platforms due to the use of the `subprocess` module. 4 | """ 5 | -------------------------------------------------------------------------------- /scripts/generate_connection_code.py: -------------------------------------------------------------------------------- 1 | import generate_connection_methods 2 | import generate_connection_stubs 3 | import generate_connection_wrapper_methods 4 | import generate_connection_wrapper_stubs 5 | 6 | if __name__ == "__main__": 7 | generate_connection_methods.generate() 8 | generate_connection_stubs.generate() 9 | generate_connection_wrapper_methods.generate() 10 | generate_connection_wrapper_stubs.generate() 11 | -------------------------------------------------------------------------------- /scripts/install_spark_in_cibuildwheels_linux_container.sh: -------------------------------------------------------------------------------- 1 | # The cibuildwheels manylinux image runs CentOS 2 | yum install java-11 -y 3 | yum install wget -y 4 | 5 | mkdir spark_installation 6 | cd spark_installation 7 | wget https://blobs.duckdb.org/ci/spark-3.5.3-bin-hadoop3.tgz 8 | tar -xvzf spark-3.5.3-bin-hadoop3.tgz 9 | mv spark-3.5.3-bin-hadoop3 spark 10 | -------------------------------------------------------------------------------- /scripts/regenerate_python_stubs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -ex 4 | 5 | # this script regenerates python stub files using 6 | # https://mypy.readthedocs.io/en/stable/stubgen.html . 7 | # Stubs are written to 8 | OUTPUT_DIR="tools/pythonpkg/duckdb-stubs" 9 | # which is installed as an auxilliary package in the duckdb egg. 10 | 11 | # Unfortunately, stubgen is good but not quite perfect, and 12 | # the stubs it generates need a little bit of tweaking, which 13 | # this regeneration process will blow away. git add -p is your friend. 14 | # To allow for this, please annotate any tweaks you subsequently 15 | # make with something like 16 | # # stubgen override 17 | # . 18 | # If you get particularly sick of this then there's a skeleton of 19 | # a solution in https://stackoverflow.com/a/36510671/5264127 20 | # but it might be overengineering things... 21 | 22 | 23 | rm -rf "${OUTPUT_DIR}" 24 | 25 | 26 | stubgen \ 27 | --verbose \ 28 | --package duckdb \ 29 | --output "${OUTPUT_DIR}" 30 | 31 | 32 | # We need this while `duckdb` is a single file module and not a package. 33 | # If `duckdb` becomes a proper package, this can be removed. 34 | mv "${OUTPUT_DIR}/duckdb.pyi" "${OUTPUT_DIR}/__init__.pyi" 35 | 36 | add_header() ( 37 | { set +x; } 2>/dev/null 38 | cat - "$1" > "$1.tmp" < &types, const vector &names, const py::list &batches, 21 | ClientProperties &options) { 22 | py::gil_scoped_acquire acquire; 23 | 24 | auto pyarrow_lib_module = py::module::import("pyarrow").attr("lib"); 25 | auto from_batches_func = pyarrow_lib_module.attr("Table").attr("from_batches"); 26 | auto schema_import_func = pyarrow_lib_module.attr("Schema").attr("_import_from_c"); 27 | ArrowSchema schema; 28 | ArrowConverter::ToArrowSchema(&schema, types, names, options); 29 | auto schema_obj = schema_import_func(reinterpret_cast(&schema)); 30 | 31 | return py::cast(from_batches_func(batches, schema_obj)); 32 | } 33 | 34 | } // namespace pyarrow 35 | 36 | } // namespace duckdb 37 | -------------------------------------------------------------------------------- /src/duckdb_py/common/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # this is used for clang-tidy checks 2 | add_library(python_common OBJECT exceptions.cpp) 3 | 4 | target_link_libraries(python_common PRIVATE _duckdb_dependencies) 5 | -------------------------------------------------------------------------------- /src/duckdb_py/functional/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # this is used for clang-tidy checks 2 | add_library(python_functional OBJECT functional.cpp) 3 | 4 | target_link_libraries(python_functional PRIVATE _duckdb_dependencies) 5 | -------------------------------------------------------------------------------- /src/duckdb_py/functional/functional.cpp: -------------------------------------------------------------------------------- 1 | #include "duckdb_python/functional.hpp" 2 | 3 | namespace duckdb { 4 | 5 | void DuckDBPyFunctional::Initialize(py::module_ &parent) { 6 | auto m = parent.def_submodule("_func", "This module contains classes and methods related to functions and udf"); 7 | 8 | py::enum_(m, "PythonUDFType") 9 | .value("NATIVE", duckdb::PythonUDFType::NATIVE) 10 | .value("ARROW", duckdb::PythonUDFType::ARROW) 11 | .export_values(); 12 | 13 | py::enum_(m, "FunctionNullHandling") 14 | .value("DEFAULT", duckdb::FunctionNullHandling::DEFAULT_NULL_HANDLING) 15 | .value("SPECIAL", duckdb::FunctionNullHandling::SPECIAL_HANDLING) 16 | .export_values(); 17 | } 18 | 19 | } // namespace duckdb 20 | -------------------------------------------------------------------------------- /src/duckdb_py/importer.cpp: -------------------------------------------------------------------------------- 1 | #include "duckdb_python/import_cache/importer.hpp" 2 | #include "duckdb_python/import_cache/python_import_cache.hpp" 3 | #include "duckdb_python/import_cache/python_import_cache_item.hpp" 4 | #include "duckdb_python/pyconnection/pyconnection.hpp" 5 | 6 | namespace duckdb { 7 | 8 | py::handle PythonImporter::Import(stack> &hierarchy, bool load) { 9 | auto &import_cache = *DuckDBPyConnection::ImportCache(); 10 | py::handle source(nullptr); 11 | while (!hierarchy.empty()) { 12 | // From top to bottom, import them 13 | auto &item = hierarchy.top(); 14 | hierarchy.pop(); 15 | source = item.get().Load(import_cache, source, load); 16 | if (!source) { 17 | // If load is false, or the module load fails and is not required, we return early 18 | break; 19 | } 20 | } 21 | return source; 22 | } 23 | 24 | } // namespace duckdb 25 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/arrow/arrow_export_utils.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 4 | 5 | namespace duckdb { 6 | 7 | namespace pyarrow { 8 | 9 | py::object ToArrowTable(const vector &types, const vector &names, const py::list &batches, 10 | ClientProperties &options); 11 | 12 | } // namespace pyarrow 13 | 14 | } // namespace duckdb 15 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/arrow/pyarrow_filter_pushdown.hpp: -------------------------------------------------------------------------------- 1 | //===----------------------------------------------------------------------===// 2 | // DuckDB 3 | // 4 | // duckdb_python/arrow/pyarrow_filter_pushdown.hpp 5 | // 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #pragma once 10 | 11 | #include "duckdb/common/arrow/arrow_wrapper.hpp" 12 | #include "duckdb/function/table/arrow/arrow_duck_schema.hpp" 13 | #include "duckdb/common/unordered_map.hpp" 14 | #include "duckdb/planner/table_filter.hpp" 15 | #include "duckdb/main/client_properties.hpp" 16 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 17 | 18 | namespace duckdb { 19 | 20 | struct PyArrowFilterPushdown { 21 | static py::object TransformFilter(TableFilterSet &filter_collection, unordered_map &columns, 22 | unordered_map filter_to_col, const ClientProperties &config, 23 | const ArrowTableSchema &arrow_table); 24 | }; 25 | 26 | } // namespace duckdb 27 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/conversions/optional_wrapper.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb_python/pyconnection.hpp" 4 | #include "duckdb/common/helper.hpp" 5 | 6 | using duckdb::Optional; 7 | 8 | namespace py = pybind11; 9 | 10 | namespace PYBIND11_NAMESPACE { 11 | namespace detail { 12 | 13 | template 14 | struct type_caster> : public type_caster_base> { 15 | using base = type_caster_base>; 16 | using child = type_caster_base; 17 | Optional tmp; 18 | 19 | public: 20 | bool load(handle src, bool convert) { 21 | if (base::load(src, convert)) { 22 | return true; 23 | } else if (child::load(src, convert)) { 24 | return true; 25 | } 26 | return false; 27 | } 28 | 29 | static handle cast(Optional src, return_value_policy policy, handle parent) { 30 | return base::cast(src, policy, parent); 31 | } 32 | }; 33 | 34 | } // namespace detail 35 | } // namespace PYBIND11_NAMESPACE 36 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/filesystem_object.hpp: -------------------------------------------------------------------------------- 1 | //===----------------------------------------------------------------------===// 2 | // DuckDB 3 | // 4 | // duckdb_python/filesystem_object.hpp 5 | // 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #pragma once 10 | #include "duckdb_python/pybind11/registered_py_object.hpp" 11 | #include "duckdb_python/pyfilesystem.hpp" 12 | 13 | namespace duckdb { 14 | 15 | class FileSystemObject : public RegisteredObject { 16 | public: 17 | explicit FileSystemObject(py::object fs, vector filenames_p) 18 | : RegisteredObject(std::move(fs)), filenames(std::move(filenames_p)) { 19 | } 20 | ~FileSystemObject() override { 21 | py::gil_scoped_acquire acquire; 22 | // Assert that the 'obj' is a filesystem 23 | D_ASSERT(py::isinstance(obj, DuckDBPyConnection::ImportCache()->duckdb.filesystem.ModifiedMemoryFileSystem())); 24 | for (auto &file : filenames) { 25 | obj.attr("delete")(file); 26 | } 27 | } 28 | 29 | vector filenames; 30 | }; 31 | 32 | } // namespace duckdb 33 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/functional.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 4 | #include "duckdb_python/pytype.hpp" 5 | #include "duckdb_python/pyconnection/pyconnection.hpp" 6 | 7 | namespace duckdb { 8 | 9 | class DuckDBPyFunctional { 10 | public: 11 | DuckDBPyFunctional() = delete; 12 | 13 | public: 14 | static void Initialize(py::module_ &m); 15 | }; 16 | 17 | } // namespace duckdb 18 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/import_cache/importer.hpp: -------------------------------------------------------------------------------- 1 | //===----------------------------------------------------------------------===// 2 | // DuckDB 3 | // 4 | // duckdb_python/import_cache/python_import_cache.hpp 5 | // 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #pragma once 10 | 11 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 12 | #include "duckdb.hpp" 13 | #include "duckdb/common/vector.hpp" 14 | #include "duckdb_python/import_cache/python_import_cache_modules.hpp" 15 | #include "duckdb/common/stack.hpp" 16 | 17 | namespace duckdb { 18 | 19 | struct PythonImporter { 20 | public: 21 | static py::handle Import(stack> &hierarchy, bool load = true); 22 | }; 23 | 24 | } // namespace duckdb 25 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/import_cache/modules/collections_module.hpp: -------------------------------------------------------------------------------- 1 | 2 | //===----------------------------------------------------------------------===// 3 | // DuckDB 4 | // 5 | // duckdb_python/import_cache/modules/collections_module.hpp 6 | // 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #pragma once 11 | 12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp" 13 | 14 | //! Note: This class is generated using scripts. 15 | //! If you need to add a new object to the cache you must: 16 | //! 1. adjust scripts/imports.py 17 | //! 2. run python scripts/generate_import_cache_json.py 18 | //! 3. run python scripts/generate_import_cache_cpp.py 19 | //! 4. run pre-commit to fix formatting errors 20 | 21 | namespace duckdb { 22 | 23 | struct CollectionsAbcCacheItem : public PythonImportCacheItem { 24 | 25 | public: 26 | static constexpr const char *Name = "collections.abc"; 27 | 28 | public: 29 | CollectionsAbcCacheItem() 30 | : PythonImportCacheItem("collections.abc"), Iterable("Iterable", this), Mapping("Mapping", this) { 31 | } 32 | ~CollectionsAbcCacheItem() override { 33 | } 34 | 35 | PythonImportCacheItem Iterable; 36 | PythonImportCacheItem Mapping; 37 | }; 38 | 39 | struct CollectionsCacheItem : public PythonImportCacheItem { 40 | 41 | public: 42 | static constexpr const char *Name = "collections"; 43 | 44 | public: 45 | CollectionsCacheItem() : PythonImportCacheItem("collections"), abc() { 46 | } 47 | ~CollectionsCacheItem() override { 48 | } 49 | 50 | CollectionsAbcCacheItem abc; 51 | }; 52 | 53 | } // namespace duckdb 54 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/import_cache/modules/decimal_module.hpp: -------------------------------------------------------------------------------- 1 | 2 | //===----------------------------------------------------------------------===// 3 | // DuckDB 4 | // 5 | // duckdb_python/import_cache/modules/decimal_module.hpp 6 | // 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #pragma once 11 | 12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp" 13 | 14 | //! Note: This class is generated using scripts. 15 | //! If you need to add a new object to the cache you must: 16 | //! 1. adjust scripts/imports.py 17 | //! 2. run python scripts/generate_import_cache_json.py 18 | //! 3. run python scripts/generate_import_cache_cpp.py 19 | //! 4. run pre-commit to fix formatting errors 20 | 21 | namespace duckdb { 22 | 23 | struct DecimalCacheItem : public PythonImportCacheItem { 24 | 25 | public: 26 | static constexpr const char *Name = "decimal"; 27 | 28 | public: 29 | DecimalCacheItem() : PythonImportCacheItem("decimal"), Decimal("Decimal", this) { 30 | } 31 | ~DecimalCacheItem() override { 32 | } 33 | 34 | PythonImportCacheItem Decimal; 35 | }; 36 | 37 | } // namespace duckdb 38 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/import_cache/modules/ipython_module.hpp: -------------------------------------------------------------------------------- 1 | 2 | //===----------------------------------------------------------------------===// 3 | // DuckDB 4 | // 5 | // duckdb_python/import_cache/modules/ipython_module.hpp 6 | // 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #pragma once 11 | 12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp" 13 | 14 | //! Note: This class is generated using scripts. 15 | //! If you need to add a new object to the cache you must: 16 | //! 1. adjust scripts/imports.py 17 | //! 2. run python scripts/generate_import_cache_json.py 18 | //! 3. run python scripts/generate_import_cache_cpp.py 19 | //! 4. run pre-commit to fix formatting errors 20 | 21 | namespace duckdb { 22 | 23 | struct IpythonDisplayCacheItem : public PythonImportCacheItem { 24 | 25 | public: 26 | IpythonDisplayCacheItem(optional_ptr parent) 27 | : PythonImportCacheItem("display", parent), display("display", this), HTML("HTML", this) { 28 | } 29 | ~IpythonDisplayCacheItem() override { 30 | } 31 | 32 | PythonImportCacheItem display; 33 | PythonImportCacheItem HTML; 34 | }; 35 | 36 | struct IpythonCacheItem : public PythonImportCacheItem { 37 | 38 | public: 39 | static constexpr const char *Name = "IPython"; 40 | 41 | public: 42 | IpythonCacheItem() : PythonImportCacheItem("IPython"), get_ipython("get_ipython", this), display(this) { 43 | } 44 | ~IpythonCacheItem() override { 45 | } 46 | 47 | PythonImportCacheItem get_ipython; 48 | IpythonDisplayCacheItem display; 49 | 50 | protected: 51 | bool IsRequired() const override final { 52 | return false; 53 | } 54 | }; 55 | 56 | } // namespace duckdb 57 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/import_cache/modules/ipywidgets_module.hpp: -------------------------------------------------------------------------------- 1 | 2 | //===----------------------------------------------------------------------===// 3 | // DuckDB 4 | // 5 | // duckdb_python/import_cache/modules/ipywidgets_module.hpp 6 | // 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #pragma once 11 | 12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp" 13 | 14 | //! Note: This class is generated using scripts. 15 | //! If you need to add a new object to the cache you must: 16 | //! 1. adjust scripts/imports.py 17 | //! 2. run python scripts/generate_import_cache_json.py 18 | //! 3. run python scripts/generate_import_cache_cpp.py 19 | //! 4. run pre-commit to fix formatting errors 20 | 21 | namespace duckdb { 22 | 23 | struct IpywidgetsCacheItem : public PythonImportCacheItem { 24 | 25 | public: 26 | static constexpr const char *Name = "ipywidgets"; 27 | 28 | public: 29 | IpywidgetsCacheItem() : PythonImportCacheItem("ipywidgets"), FloatProgress("FloatProgress", this) { 30 | } 31 | ~IpywidgetsCacheItem() override { 32 | } 33 | 34 | PythonImportCacheItem FloatProgress; 35 | 36 | protected: 37 | bool IsRequired() const override final { 38 | return false; 39 | } 40 | }; 41 | 42 | } // namespace duckdb 43 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/import_cache/modules/pathlib_module.hpp: -------------------------------------------------------------------------------- 1 | 2 | //===----------------------------------------------------------------------===// 3 | // DuckDB 4 | // 5 | // duckdb_python/import_cache/modules/pathlib_module.hpp 6 | // 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #pragma once 11 | 12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp" 13 | 14 | //! Note: This class is generated using scripts. 15 | //! If you need to add a new object to the cache you must: 16 | //! 1. adjust scripts/imports.py 17 | //! 2. run python scripts/generate_import_cache_json.py 18 | //! 3. run python scripts/generate_import_cache_cpp.py 19 | //! 4. run pre-commit to fix formatting errors 20 | 21 | namespace duckdb { 22 | 23 | struct PathlibCacheItem : public PythonImportCacheItem { 24 | 25 | public: 26 | static constexpr const char *Name = "pathlib"; 27 | 28 | public: 29 | PathlibCacheItem() : PythonImportCacheItem("pathlib"), Path("Path", this) { 30 | } 31 | ~PathlibCacheItem() override { 32 | } 33 | 34 | PythonImportCacheItem Path; 35 | 36 | protected: 37 | bool IsRequired() const override final { 38 | return false; 39 | } 40 | }; 41 | 42 | } // namespace duckdb 43 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/import_cache/modules/polars_module.hpp: -------------------------------------------------------------------------------- 1 | 2 | //===----------------------------------------------------------------------===// 3 | // DuckDB 4 | // 5 | // duckdb_python/import_cache/modules/polars_module.hpp 6 | // 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #pragma once 11 | 12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp" 13 | 14 | //! Note: This class is generated using scripts. 15 | //! If you need to add a new object to the cache you must: 16 | //! 1. adjust scripts/imports.py 17 | //! 2. run python scripts/generate_import_cache_json.py 18 | //! 3. run python scripts/generate_import_cache_cpp.py 19 | //! 4. run pre-commit to fix formatting errors 20 | 21 | namespace duckdb { 22 | 23 | struct PolarsCacheItem : public PythonImportCacheItem { 24 | 25 | public: 26 | static constexpr const char *Name = "polars"; 27 | 28 | public: 29 | PolarsCacheItem() : PythonImportCacheItem("polars"), DataFrame("DataFrame", this), LazyFrame("LazyFrame", this) { 30 | } 31 | ~PolarsCacheItem() override { 32 | } 33 | 34 | PythonImportCacheItem DataFrame; 35 | PythonImportCacheItem LazyFrame; 36 | 37 | protected: 38 | bool IsRequired() const override final { 39 | return false; 40 | } 41 | }; 42 | 43 | } // namespace duckdb 44 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/import_cache/modules/pytz_module.hpp: -------------------------------------------------------------------------------- 1 | 2 | //===----------------------------------------------------------------------===// 3 | // DuckDB 4 | // 5 | // duckdb_python/import_cache/modules/pytz_module.hpp 6 | // 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #pragma once 11 | 12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp" 13 | 14 | //! Note: This class is generated using scripts. 15 | //! If you need to add a new object to the cache you must: 16 | //! 1. adjust scripts/imports.py 17 | //! 2. run python scripts/generate_import_cache_json.py 18 | //! 3. run python scripts/generate_import_cache_cpp.py 19 | //! 4. run pre-commit to fix formatting errors 20 | 21 | namespace duckdb { 22 | 23 | struct PytzCacheItem : public PythonImportCacheItem { 24 | 25 | public: 26 | static constexpr const char *Name = "pytz"; 27 | 28 | public: 29 | PytzCacheItem() : PythonImportCacheItem("pytz"), timezone("timezone", this) { 30 | } 31 | ~PytzCacheItem() override { 32 | } 33 | 34 | PythonImportCacheItem timezone; 35 | }; 36 | 37 | } // namespace duckdb 38 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/import_cache/modules/types_module.hpp: -------------------------------------------------------------------------------- 1 | 2 | //===----------------------------------------------------------------------===// 3 | // DuckDB 4 | // 5 | // duckdb_python/import_cache/modules/types_module.hpp 6 | // 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #pragma once 11 | 12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp" 13 | 14 | //! Note: This class is generated using scripts. 15 | //! If you need to add a new object to the cache you must: 16 | //! 1. adjust scripts/imports.py 17 | //! 2. run python scripts/generate_import_cache_json.py 18 | //! 3. run python scripts/generate_import_cache_cpp.py 19 | //! 4. run pre-commit to fix formatting errors 20 | 21 | namespace duckdb { 22 | 23 | struct TypesCacheItem : public PythonImportCacheItem { 24 | 25 | public: 26 | static constexpr const char *Name = "types"; 27 | 28 | public: 29 | TypesCacheItem() 30 | : PythonImportCacheItem("types"), UnionType("UnionType", this), GenericAlias("GenericAlias", this), 31 | BuiltinFunctionType("BuiltinFunctionType", this) { 32 | } 33 | ~TypesCacheItem() override { 34 | } 35 | 36 | PythonImportCacheItem UnionType; 37 | PythonImportCacheItem GenericAlias; 38 | PythonImportCacheItem BuiltinFunctionType; 39 | }; 40 | 41 | } // namespace duckdb 42 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/import_cache/modules/typing_module.hpp: -------------------------------------------------------------------------------- 1 | 2 | //===----------------------------------------------------------------------===// 3 | // DuckDB 4 | // 5 | // duckdb_python/import_cache/modules/typing_module.hpp 6 | // 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #pragma once 11 | 12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp" 13 | 14 | //! Note: This class is generated using scripts. 15 | //! If you need to add a new object to the cache you must: 16 | //! 1. adjust scripts/imports.py 17 | //! 2. run python scripts/generate_import_cache_json.py 18 | //! 3. run python scripts/generate_import_cache_cpp.py 19 | //! 4. run pre-commit to fix formatting errors 20 | 21 | namespace duckdb { 22 | 23 | struct TypingCacheItem : public PythonImportCacheItem { 24 | 25 | public: 26 | static constexpr const char *Name = "typing"; 27 | 28 | public: 29 | TypingCacheItem() : PythonImportCacheItem("typing"), Union("Union", this), get_origin("get_origin", this) { 30 | } 31 | ~TypingCacheItem() override { 32 | } 33 | 34 | PythonImportCacheItem Union; 35 | PythonImportCacheItem get_origin; 36 | }; 37 | 38 | } // namespace duckdb 39 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/import_cache/modules/uuid_module.hpp: -------------------------------------------------------------------------------- 1 | 2 | //===----------------------------------------------------------------------===// 3 | // DuckDB 4 | // 5 | // duckdb_python/import_cache/modules/uuid_module.hpp 6 | // 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #pragma once 11 | 12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp" 13 | 14 | //! Note: This class is generated using scripts. 15 | //! If you need to add a new object to the cache you must: 16 | //! 1. adjust scripts/imports.py 17 | //! 2. run python scripts/generate_import_cache_json.py 18 | //! 3. run python scripts/generate_import_cache_cpp.py 19 | //! 4. run pre-commit to fix formatting errors 20 | 21 | namespace duckdb { 22 | 23 | struct UuidCacheItem : public PythonImportCacheItem { 24 | 25 | public: 26 | static constexpr const char *Name = "uuid"; 27 | 28 | public: 29 | UuidCacheItem() : PythonImportCacheItem("uuid"), UUID("UUID", this) { 30 | } 31 | ~UuidCacheItem() override { 32 | } 33 | 34 | PythonImportCacheItem UUID; 35 | }; 36 | 37 | } // namespace duckdb 38 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/import_cache/python_import_cache.hpp: -------------------------------------------------------------------------------- 1 | 2 | //===----------------------------------------------------------------------===// 3 | // DuckDB 4 | // 5 | // duckdb_python/import_cache/python_import_cache.hpp 6 | // 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #pragma once 11 | 12 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 13 | #include "duckdb.hpp" 14 | #include "duckdb/common/vector.hpp" 15 | #include "duckdb_python/import_cache/python_import_cache_modules.hpp" 16 | 17 | namespace duckdb { 18 | 19 | struct PythonImportCache { 20 | public: 21 | explicit PythonImportCache() { 22 | } 23 | ~PythonImportCache(); 24 | 25 | public: 26 | PyarrowCacheItem pyarrow; 27 | PandasCacheItem pandas; 28 | DatetimeCacheItem datetime; 29 | DecimalCacheItem decimal; 30 | IpythonCacheItem IPython; 31 | IpywidgetsCacheItem ipywidgets; 32 | NumpyCacheItem numpy; 33 | PathlibCacheItem pathlib; 34 | PolarsCacheItem polars; 35 | DuckdbCacheItem duckdb; 36 | PytzCacheItem pytz; 37 | TypesCacheItem types; 38 | TypingCacheItem typing; 39 | UuidCacheItem uuid; 40 | CollectionsCacheItem collections; 41 | 42 | public: 43 | py::handle AddCache(py::object item); 44 | 45 | private: 46 | vector owned_objects; 47 | }; 48 | 49 | } // namespace duckdb 50 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/import_cache/python_import_cache_modules.hpp: -------------------------------------------------------------------------------- 1 | #include "duckdb_python/import_cache/modules/pyarrow_module.hpp" 2 | #include "duckdb_python/import_cache/modules/pandas_module.hpp" 3 | #include "duckdb_python/import_cache/modules/datetime_module.hpp" 4 | #include "duckdb_python/import_cache/modules/decimal_module.hpp" 5 | #include "duckdb_python/import_cache/modules/ipython_module.hpp" 6 | #include "duckdb_python/import_cache/modules/ipywidgets_module.hpp" 7 | #include "duckdb_python/import_cache/modules/numpy_module.hpp" 8 | #include "duckdb_python/import_cache/modules/pathlib_module.hpp" 9 | #include "duckdb_python/import_cache/modules/polars_module.hpp" 10 | #include "duckdb_python/import_cache/modules/duckdb_module.hpp" 11 | #include "duckdb_python/import_cache/modules/pytz_module.hpp" 12 | #include "duckdb_python/import_cache/modules/types_module.hpp" 13 | #include "duckdb_python/import_cache/modules/typing_module.hpp" 14 | #include "duckdb_python/import_cache/modules/uuid_module.hpp" 15 | #include "duckdb_python/import_cache/modules/collections_module.hpp" -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/jupyter_progress_bar_display.hpp: -------------------------------------------------------------------------------- 1 | //===----------------------------------------------------------------------===// 2 | // DuckDB 3 | // 4 | // duckdb_python/jupyter_progress_bar_display.hpp 5 | // 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #pragma once 10 | 11 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 12 | #include "duckdb/common/progress_bar/progress_bar_display.hpp" 13 | #include "duckdb/common/helper.hpp" 14 | 15 | namespace duckdb { 16 | 17 | class JupyterProgressBarDisplay : public ProgressBarDisplay { 18 | public: 19 | JupyterProgressBarDisplay(); 20 | virtual ~JupyterProgressBarDisplay() { 21 | } 22 | 23 | static unique_ptr Create(); 24 | 25 | public: 26 | void Update(double progress); 27 | void Finish(); 28 | 29 | private: 30 | void Initialize(); 31 | 32 | private: 33 | py::object progress_bar; 34 | }; 35 | 36 | } // namespace duckdb 37 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/map.hpp: -------------------------------------------------------------------------------- 1 | //===----------------------------------------------------------------------===// 2 | // DuckDB 3 | // 4 | // duckdb_python/pandas/pandas_scan.hpp 5 | // 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #pragma once 10 | 11 | #include "duckdb.hpp" 12 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 13 | #include "duckdb/parser/parsed_data/create_table_function_info.hpp" 14 | #include "duckdb/execution/execution_context.hpp" 15 | 16 | namespace duckdb { 17 | 18 | struct MapFunction : public TableFunction { 19 | 20 | public: 21 | MapFunction(); 22 | 23 | static unique_ptr MapFunctionBind(ClientContext &context, TableFunctionBindInput &input, 24 | vector &return_types, vector &names); 25 | 26 | static OperatorResultType MapFunctionExec(ExecutionContext &context, TableFunctionInput &data, DataChunk &input, 27 | DataChunk &output); 28 | }; 29 | 30 | } // namespace duckdb 31 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/numpy/numpy_bind.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 4 | #include "duckdb/common/common.hpp" 5 | 6 | namespace duckdb { 7 | 8 | struct PandasColumnBindData; 9 | class ClientContext; 10 | 11 | struct NumpyBind { 12 | static void Bind(const ClientContext &config, py::handle df, vector &out, 13 | vector &return_types, vector &names); 14 | }; 15 | 16 | } // namespace duckdb 17 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/numpy/numpy_result_conversion.hpp: -------------------------------------------------------------------------------- 1 | //===----------------------------------------------------------------------===// 2 | // DuckDB 3 | // 4 | // duckdb_python/numpy/numpy_result_conversion.hpp 5 | // 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #pragma once 10 | 11 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 12 | #include "duckdb_python/numpy/array_wrapper.hpp" 13 | #include "duckdb.hpp" 14 | 15 | namespace duckdb { 16 | 17 | class NumpyResultConversion { 18 | public: 19 | NumpyResultConversion(const vector &types, idx_t initial_capacity, 20 | const ClientProperties &client_properties, bool pandas = false); 21 | 22 | void Append(DataChunk &chunk); 23 | 24 | py::object ToArray(idx_t col_idx) { 25 | return owned_data[col_idx].ToArray(); 26 | } 27 | bool ToPandas() const { 28 | return pandas; 29 | } 30 | 31 | private: 32 | void Resize(idx_t new_capacity); 33 | 34 | private: 35 | vector owned_data; 36 | idx_t count; 37 | idx_t capacity; 38 | bool pandas; 39 | }; 40 | 41 | } // namespace duckdb 42 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/numpy/numpy_scan.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 4 | #include "duckdb/common/common.hpp" 5 | 6 | namespace duckdb { 7 | 8 | struct PandasColumnBindData; 9 | 10 | struct NumpyScan { 11 | static void Scan(PandasColumnBindData &bind_data, idx_t count, idx_t offset, Vector &out); 12 | static void ScanObjectColumn(PyObject **col, idx_t stride, idx_t count, idx_t offset, Vector &out); 13 | }; 14 | 15 | } // namespace duckdb 16 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/numpy/raw_array_wrapper.hpp: -------------------------------------------------------------------------------- 1 | //===----------------------------------------------------------------------===// 2 | // DuckDB 3 | // 4 | // duckdb_python/array_wrapper.hpp 5 | // 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #pragma once 10 | 11 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 12 | #include "duckdb.hpp" 13 | 14 | namespace duckdb { 15 | 16 | struct RawArrayWrapper { 17 | 18 | explicit RawArrayWrapper(const LogicalType &type); 19 | 20 | py::array array; 21 | data_ptr_t data; 22 | LogicalType type; 23 | idx_t type_width; 24 | idx_t count; 25 | 26 | public: 27 | static string DuckDBToNumpyDtype(const LogicalType &type); 28 | void Initialize(idx_t capacity); 29 | void Resize(idx_t new_capacity); 30 | void Append(idx_t current_offset, Vector &input, idx_t count); 31 | }; 32 | 33 | } // namespace duckdb 34 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/pandas/column/pandas_numpy_column.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb_python/pandas/pandas_column.hpp" 4 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 5 | 6 | namespace duckdb { 7 | 8 | class PandasNumpyColumn : public PandasColumn { 9 | public: 10 | PandasNumpyColumn(py::array array_p) : PandasColumn(PandasColumnBackend::NUMPY), array(std::move(array_p)) { 11 | D_ASSERT(py::hasattr(array, "strides")); 12 | stride = array.attr("strides").attr("__getitem__")(0).cast(); 13 | } 14 | 15 | public: 16 | py::array array; 17 | idx_t stride; 18 | }; 19 | 20 | } // namespace duckdb 21 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/pandas/pandas_bind.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 4 | #include "duckdb_python/pybind11/python_object_container.hpp" 5 | #include "duckdb_python/numpy/numpy_type.hpp" 6 | #include "duckdb/common/helper.hpp" 7 | #include "duckdb_python/pandas/pandas_column.hpp" 8 | 9 | namespace duckdb { 10 | 11 | class ClientContext; 12 | 13 | struct RegisteredArray { 14 | explicit RegisteredArray(py::array numpy_array) : numpy_array(std::move(numpy_array)) { 15 | } 16 | py::array numpy_array; 17 | }; 18 | 19 | struct PandasColumnBindData { 20 | NumpyType numpy_type; 21 | unique_ptr pandas_col; 22 | unique_ptr mask; 23 | //! Only for categorical types 24 | string internal_categorical_type; 25 | //! Hold ownership of objects created during scanning 26 | PythonObjectContainer object_str_val; 27 | }; 28 | 29 | struct Pandas { 30 | static void Bind(const ClientContext &config, py::handle df, vector &out, 31 | vector &return_types, vector &names); 32 | }; 33 | 34 | } // namespace duckdb 35 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/pandas/pandas_column.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace duckdb { 4 | 5 | enum class PandasColumnBackend { NUMPY }; 6 | 7 | class PandasColumn { 8 | public: 9 | PandasColumn(PandasColumnBackend backend) : backend(backend) { 10 | } 11 | virtual ~PandasColumn() { 12 | } 13 | 14 | public: 15 | PandasColumnBackend Backend() const { 16 | return backend; 17 | } 18 | 19 | protected: 20 | PandasColumnBackend backend; 21 | }; 22 | 23 | } // namespace duckdb 24 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/path_like.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/common/common.hpp" 4 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 5 | #include "duckdb/main/external_dependencies.hpp" 6 | #include "duckdb/common/types/value.hpp" 7 | 8 | namespace duckdb { 9 | 10 | struct DuckDBPyConnection; 11 | 12 | struct PathLike { 13 | static PathLike Create(const py::object &object, DuckDBPyConnection &connection); 14 | // The file(s) extracted from object 15 | vector files; 16 | shared_ptr dependency; 17 | }; 18 | 19 | } // namespace duckdb 20 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/pybind11/conversions/pyconnection_default.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb_python/pyconnection/pyconnection.hpp" 4 | #include "duckdb/common/helper.hpp" 5 | 6 | using duckdb::DuckDBPyConnection; 7 | using duckdb::shared_ptr; 8 | 9 | namespace py = pybind11; 10 | 11 | namespace PYBIND11_NAMESPACE { 12 | namespace detail { 13 | 14 | template <> 15 | class type_caster> 16 | : public copyable_holder_caster> { 17 | using type = DuckDBPyConnection; 18 | using holder_caster = copyable_holder_caster>; 19 | // This is used to generate documentation on duckdb-web 20 | PYBIND11_TYPE_CASTER(shared_ptr, const_name("duckdb.DuckDBPyConnection")); 21 | 22 | bool load(handle src, bool convert) { 23 | if (py::none().is(src)) { 24 | value = DuckDBPyConnection::DefaultConnection(); 25 | return true; 26 | } 27 | if (!holder_caster::load(src, convert)) { 28 | return false; 29 | } 30 | value = std::move(holder); 31 | return true; 32 | } 33 | 34 | static handle cast(shared_ptr base, return_value_policy rvp, handle h) { 35 | return holder_caster::cast(base, rvp, h); 36 | } 37 | }; 38 | 39 | template <> 40 | struct is_holder_type> : std::true_type {}; 41 | 42 | } // namespace detail 43 | } // namespace PYBIND11_NAMESPACE 44 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/pybind11/conversions/render_mode_enum.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/common/common.hpp" 4 | #include "duckdb/common/exception.hpp" 5 | #include "duckdb/common/string_util.hpp" 6 | #include "duckdb/common/box_renderer.hpp" 7 | #include "duckdb/common/enum_util.hpp" 8 | 9 | using duckdb::InvalidInputException; 10 | using duckdb::RenderMode; 11 | using duckdb::string; 12 | using duckdb::StringUtil; 13 | 14 | namespace py = pybind11; 15 | 16 | static RenderMode RenderModeFromInteger(int64_t value) { 17 | if (value == 0) { 18 | return RenderMode::ROWS; 19 | } else if (value == 1) { 20 | return RenderMode::COLUMNS; 21 | } else { 22 | throw InvalidInputException("Unrecognized type for 'render_mode'"); 23 | } 24 | } 25 | 26 | namespace PYBIND11_NAMESPACE { 27 | namespace detail { 28 | 29 | template <> 30 | struct type_caster : public type_caster_base { 31 | using base = type_caster_base; 32 | RenderMode tmp; 33 | 34 | public: 35 | bool load(handle src, bool convert) { 36 | if (base::load(src, convert)) { 37 | return true; 38 | } else if (py::isinstance(src)) { 39 | string render_mode_str = py::str(src); 40 | auto render_mode = 41 | duckdb::EnumUtil::FromString(render_mode_str.empty() ? "ROWS" : render_mode_str); 42 | value = &render_mode; 43 | return true; 44 | } else if (py::isinstance(src)) { 45 | tmp = RenderModeFromInteger(src.cast()); 46 | value = &tmp; 47 | return true; 48 | } 49 | return false; 50 | } 51 | 52 | static handle cast(RenderMode src, return_value_policy policy, handle parent) { 53 | return base::cast(src, policy, parent); 54 | } 55 | }; 56 | 57 | } // namespace detail 58 | } // namespace PYBIND11_NAMESPACE 59 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/pybind11/dataframe.hpp: -------------------------------------------------------------------------------- 1 | //===----------------------------------------------------------------------===// 2 | // DuckDB 3 | // 4 | // duckdb_python/pybind11/dataframe.hpp 5 | // 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #pragma once 10 | 11 | #include "duckdb/common/types.hpp" 12 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 13 | 14 | namespace duckdb { 15 | 16 | class PandasDataFrame : public py::object { 17 | public: 18 | PandasDataFrame(const py::object &o) : py::object(o, borrowed_t {}) { 19 | } 20 | using py::object::object; 21 | 22 | public: 23 | static bool check_(const py::handle &object); // NOLINT 24 | static bool IsPyArrowBacked(const py::handle &df); 25 | static py::object ToArrowTable(const py::object &df); 26 | }; 27 | 28 | class PolarsDataFrame : public py::object { 29 | public: 30 | PolarsDataFrame(const py::object &o) : py::object(o, borrowed_t {}) { 31 | } 32 | using py::object::object; 33 | 34 | public: 35 | static bool IsDataFrame(const py::handle &object); 36 | static bool IsLazyFrame(const py::handle &object); 37 | static bool check_(const py::handle &object); // NOLINT 38 | }; 39 | } // namespace duckdb 40 | 41 | namespace pybind11 { 42 | namespace detail { 43 | template <> 44 | struct handle_type_name { 45 | static constexpr auto name = _("pandas.DataFrame"); 46 | }; 47 | } // namespace detail 48 | } // namespace pybind11 49 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/pybind11/exceptions.hpp: -------------------------------------------------------------------------------- 1 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 2 | 3 | namespace py = pybind11; 4 | 5 | namespace duckdb { 6 | 7 | void RegisterExceptions(const py::module &m); 8 | 9 | } // namespace duckdb 10 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/pybind11/gil_wrapper.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 4 | 5 | namespace duckdb { 6 | 7 | struct PythonGILWrapper { 8 | py::gil_scoped_acquire acquire; 9 | }; 10 | 11 | } // namespace duckdb 12 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/pybind11/python_object_container.hpp: -------------------------------------------------------------------------------- 1 | //===----------------------------------------------------------------------===// 2 | // DuckDB 3 | // 4 | // duckdb_python/pybind11/python_object_container.hpp 5 | // 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #pragma once 10 | 11 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 12 | #include "duckdb/common/vector.hpp" 13 | #include "duckdb_python/pybind11/gil_wrapper.hpp" 14 | #include "duckdb/common/helper.hpp" 15 | 16 | namespace duckdb { 17 | 18 | //! Every Python Object Must be created through our container 19 | //! The Container ensures that the GIL is HOLD on Python Object Construction/Destruction/Modification 20 | class PythonObjectContainer { 21 | public: 22 | PythonObjectContainer() { 23 | } 24 | 25 | ~PythonObjectContainer() { 26 | py::gil_scoped_acquire acquire; 27 | py_obj.clear(); 28 | } 29 | 30 | void Push(py::object &&obj) { 31 | py::gil_scoped_acquire gil; 32 | PushInternal(std::move(obj)); 33 | } 34 | 35 | const py::object &LastAddedObject() { 36 | D_ASSERT(!py_obj.empty()); 37 | return py_obj.back(); 38 | } 39 | 40 | private: 41 | void PushInternal(py::object &&obj) { 42 | py_obj.emplace_back(obj); 43 | } 44 | 45 | vector py_obj; 46 | }; 47 | } // namespace duckdb 48 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/pybind11/registered_py_object.hpp: -------------------------------------------------------------------------------- 1 | //===----------------------------------------------------------------------===// 2 | // DuckDB 3 | // 4 | // duckdb_python/pybind11/registered_py_object.hpp 5 | // 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #pragma once 10 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 11 | 12 | namespace duckdb { 13 | 14 | class RegisteredObject { 15 | public: 16 | explicit RegisteredObject(py::object obj_p) : obj(std::move(obj_p)) { 17 | } 18 | virtual ~RegisteredObject() { 19 | py::gil_scoped_acquire acquire; 20 | obj = py::none(); 21 | } 22 | 23 | py::object obj; 24 | }; 25 | 26 | } // namespace duckdb 27 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/pystatement.hpp: -------------------------------------------------------------------------------- 1 | //===----------------------------------------------------------------------===// 2 | // DuckDB 3 | // 4 | // duckdb_python/pystatement.hpp 5 | // 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #pragma once 10 | 11 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 12 | #include "duckdb.hpp" 13 | 14 | namespace duckdb { 15 | 16 | struct DuckDBPyStatement { 17 | public: 18 | explicit DuckDBPyStatement(unique_ptr statement); 19 | 20 | public: 21 | //! Create a copy of the wrapped statement 22 | unique_ptr GetStatement(); 23 | string Query() const; 24 | py::set NamedParameters() const; 25 | StatementType Type() const; 26 | py::list ExpectedResultType() const; 27 | 28 | public: 29 | static void Initialize(py::handle &m); 30 | 31 | private: 32 | unique_ptr statement; 33 | }; 34 | 35 | } // namespace duckdb 36 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/python_conversion.hpp: -------------------------------------------------------------------------------- 1 | //===----------------------------------------------------------------------===// 2 | // DuckDB 3 | // 4 | // duckdb_python/pyresult.hpp 5 | // 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #pragma once 10 | 11 | #include "duckdb_python/numpy/array_wrapper.hpp" 12 | #include "duckdb.hpp" 13 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 14 | #include "duckdb_python/python_objects.hpp" 15 | #include "duckdb/common/types.hpp" 16 | #include "duckdb/common/types/hugeint.hpp" 17 | 18 | #include "datetime.h" // from Python 19 | 20 | namespace duckdb { 21 | 22 | enum class PythonObjectType { 23 | Other, 24 | None, 25 | Integer, 26 | Float, 27 | Bool, 28 | Decimal, 29 | Uuid, 30 | Datetime, 31 | Date, 32 | Time, 33 | Timedelta, 34 | String, 35 | ByteArray, 36 | MemoryView, 37 | Bytes, 38 | List, 39 | Tuple, 40 | Dict, 41 | NdArray, 42 | NdDatetime, 43 | Value 44 | }; 45 | 46 | PythonObjectType GetPythonObjectType(py::handle &ele); 47 | 48 | bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &target_type = LogicalType::UNKNOWN); 49 | bool DictionaryHasMapFormat(const PyDictionary &dict); 50 | void TransformPythonObject(py::handle ele, Vector &vector, idx_t result_offset, bool nan_as_null = true); 51 | Value TransformPythonValue(py::handle ele, const LogicalType &target_type = LogicalType::UNKNOWN, 52 | bool nan_as_null = true); 53 | 54 | } // namespace duckdb 55 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/python_dependency.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/common/string.hpp" 4 | #include "duckdb/common/unique_ptr.hpp" 5 | #include "duckdb/common/case_insensitive_map.hpp" 6 | #include "duckdb/main/external_dependencies.hpp" 7 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 8 | #include "duckdb_python/pybind11/registered_py_object.hpp" 9 | 10 | namespace duckdb { 11 | 12 | class PythonDependencyItem : public DependencyItem { 13 | public: 14 | explicit PythonDependencyItem(unique_ptr &&object); 15 | ~PythonDependencyItem() override; 16 | 17 | public: 18 | static shared_ptr Create(py::object object); 19 | static shared_ptr Create(unique_ptr &&object); 20 | 21 | public: 22 | unique_ptr object; 23 | }; 24 | 25 | } // namespace duckdb 26 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/python_replacement_scan.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/main/client_context_state.hpp" 4 | #include "duckdb/common/case_insensitive_map.hpp" 5 | #include "duckdb/parser/tableref.hpp" 6 | #include "duckdb/function/replacement_scan.hpp" 7 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 8 | 9 | namespace duckdb { 10 | 11 | struct PythonReplacementScan { 12 | public: 13 | static unique_ptr Replace(ClientContext &context, ReplacementScanInput &input, 14 | optional_ptr data); 15 | //! Try to perform a replacement, returns NULL on error 16 | static unique_ptr TryReplacementObject(const py::object &entry, const string &name, 17 | ClientContext &context, bool relation = false); 18 | //! Perform a replacement or throw if it failed 19 | static unique_ptr ReplacementObject(const py::object &entry, const string &name, ClientContext &context, 20 | bool relation = false); 21 | }; 22 | 23 | } // namespace duckdb 24 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/pytype.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 4 | #include "duckdb/common/types.hpp" 5 | 6 | namespace duckdb { 7 | 8 | class PyGenericAlias : public py::object { 9 | public: 10 | using py::object::object; 11 | 12 | public: 13 | static bool check_(const py::handle &object); 14 | }; 15 | 16 | class PyUnionType : public py::object { 17 | public: 18 | using py::object::object; 19 | 20 | public: 21 | static bool check_(const py::handle &object); 22 | }; 23 | 24 | class DuckDBPyType : public enable_shared_from_this { 25 | public: 26 | explicit DuckDBPyType(LogicalType type); 27 | 28 | public: 29 | static void Initialize(py::handle &m); 30 | 31 | public: 32 | bool Equals(const shared_ptr &other) const; 33 | bool EqualsString(const string &type_str) const; 34 | shared_ptr GetAttribute(const string &name) const; 35 | py::list Children() const; 36 | string ToString() const; 37 | const LogicalType &Type() const; 38 | string GetId() const; 39 | 40 | private: 41 | private: 42 | LogicalType type; 43 | }; 44 | 45 | } // namespace duckdb 46 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/pyutil.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 4 | #include "duckdb/common/types.hpp" 5 | 6 | namespace duckdb { 7 | 8 | struct PyUtil { 9 | static idx_t PyByteArrayGetSize(py::handle &obj) { 10 | return PyByteArray_GET_SIZE(obj.ptr()); // NOLINT 11 | } 12 | 13 | static Py_buffer *PyMemoryViewGetBuffer(py::handle &obj) { 14 | return PyMemoryView_GET_BUFFER(obj.ptr()); 15 | } 16 | 17 | static bool PyUnicodeIsCompactASCII(py::handle &obj) { 18 | return PyUnicode_IS_COMPACT_ASCII(obj.ptr()); 19 | } 20 | 21 | static const char *PyUnicodeData(py::handle &obj) { 22 | return const_char_ptr_cast(PyUnicode_DATA(obj.ptr())); 23 | } 24 | 25 | static char *PyUnicodeDataMutable(py::handle &obj) { 26 | return char_ptr_cast(PyUnicode_DATA(obj.ptr())); 27 | } 28 | 29 | static idx_t PyUnicodeGetLength(py::handle &obj) { 30 | return PyUnicode_GET_LENGTH(obj.ptr()); 31 | } 32 | 33 | static bool PyUnicodeIsCompact(PyCompactUnicodeObject *obj) { 34 | return PyUnicode_IS_COMPACT(obj); 35 | } 36 | 37 | static bool PyUnicodeIsASCII(PyCompactUnicodeObject *obj) { 38 | return PyUnicode_IS_ASCII(obj); 39 | } 40 | 41 | static int PyUnicodeKind(py::handle &obj) { 42 | return PyUnicode_KIND(obj.ptr()); 43 | } 44 | 45 | static Py_UCS1 *PyUnicode1ByteData(py::handle &obj) { 46 | return PyUnicode_1BYTE_DATA(obj.ptr()); 47 | } 48 | 49 | static Py_UCS2 *PyUnicode2ByteData(py::handle &obj) { 50 | return PyUnicode_2BYTE_DATA(obj.ptr()); 51 | } 52 | 53 | static Py_UCS4 *PyUnicode4ByteData(py::handle &obj) { 54 | return PyUnicode_4BYTE_DATA(obj.ptr()); 55 | } 56 | }; 57 | 58 | } // namespace duckdb 59 | -------------------------------------------------------------------------------- /src/duckdb_py/include/duckdb_python/typing.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 4 | #include "duckdb_python/pytype.hpp" 5 | #include "duckdb_python/pyconnection/pyconnection.hpp" 6 | 7 | namespace duckdb { 8 | 9 | class DuckDBPyTyping { 10 | public: 11 | DuckDBPyTyping() = delete; 12 | 13 | public: 14 | static void Initialize(py::module_ &m); 15 | }; 16 | 17 | } // namespace duckdb 18 | -------------------------------------------------------------------------------- /src/duckdb_py/jupyter/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # this is used for clang-tidy checks 2 | add_library(python_jupyter OBJECT jupyter_progress_bar_display.cpp) 3 | 4 | target_link_libraries(python_jupyter PRIVATE _duckdb_dependencies) 5 | -------------------------------------------------------------------------------- /src/duckdb_py/jupyter/jupyter_progress_bar_display.cpp: -------------------------------------------------------------------------------- 1 | #include "duckdb_python/jupyter_progress_bar_display.hpp" 2 | #include "duckdb_python/pyconnection/pyconnection.hpp" 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 4 | 5 | namespace duckdb { 6 | 7 | unique_ptr JupyterProgressBarDisplay::Create() { 8 | return make_uniq(); 9 | } 10 | 11 | void JupyterProgressBarDisplay::Initialize() { 12 | auto &import_cache = *DuckDBPyConnection::ImportCache(); 13 | auto float_progress_attr = import_cache.ipywidgets.FloatProgress(); 14 | D_ASSERT(float_progress_attr.ptr() != nullptr); 15 | // Initialize the progress bar 16 | py::dict style; 17 | style["bar_color"] = "black"; 18 | progress_bar = float_progress_attr((py::arg("min") = 0, py::arg("max") = 100, py::arg("style") = style)); 19 | 20 | progress_bar.attr("layout").attr("width") = "auto"; 21 | 22 | // Display the progress bar 23 | auto display_attr = import_cache.IPython.display.display(); 24 | D_ASSERT(display_attr.ptr() != nullptr); 25 | display_attr(progress_bar); 26 | } 27 | 28 | JupyterProgressBarDisplay::JupyterProgressBarDisplay() : ProgressBarDisplay() { 29 | // Empty, we need the GIL to initialize, which we don't have here 30 | } 31 | 32 | void JupyterProgressBarDisplay::Update(double progress) { 33 | py::gil_scoped_acquire gil; 34 | if (progress_bar.ptr() == nullptr) { 35 | // First print, we first need to initialize the display 36 | Initialize(); 37 | } 38 | progress_bar.attr("value") = py::cast(progress); 39 | } 40 | 41 | void JupyterProgressBarDisplay::Finish() { 42 | Update(100); 43 | } 44 | 45 | } // namespace duckdb 46 | -------------------------------------------------------------------------------- /src/duckdb_py/native/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # this is used for clang-tidy checks 2 | add_library(python_native OBJECT python_objects.cpp python_conversion.cpp) 3 | 4 | target_link_libraries(python_native PRIVATE _duckdb_dependencies) 5 | -------------------------------------------------------------------------------- /src/duckdb_py/numpy/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # this is used for clang-tidy checks 2 | add_library( 3 | python_numpy OBJECT 4 | type.cpp numpy_scan.cpp array_wrapper.cpp raw_array_wrapper.cpp 5 | numpy_bind.cpp numpy_result_conversion.cpp) 6 | 7 | target_link_libraries(python_numpy PRIVATE _duckdb_dependencies) 8 | -------------------------------------------------------------------------------- /src/duckdb_py/numpy/numpy_result_conversion.cpp: -------------------------------------------------------------------------------- 1 | #include "duckdb_python/numpy/array_wrapper.hpp" 2 | #include "duckdb_python/numpy/numpy_result_conversion.hpp" 3 | 4 | namespace duckdb { 5 | 6 | NumpyResultConversion::NumpyResultConversion(const vector &types, idx_t initial_capacity, 7 | const ClientProperties &client_properties, bool pandas) 8 | : count(0), capacity(0), pandas(pandas) { 9 | owned_data.reserve(types.size()); 10 | for (auto &type : types) { 11 | owned_data.emplace_back(type, client_properties, pandas); 12 | } 13 | Resize(initial_capacity); 14 | } 15 | 16 | void NumpyResultConversion::Resize(idx_t new_capacity) { 17 | if (capacity == 0) { 18 | for (auto &data : owned_data) { 19 | data.Initialize(new_capacity); 20 | } 21 | } else { 22 | for (auto &data : owned_data) { 23 | data.Resize(new_capacity); 24 | } 25 | } 26 | capacity = new_capacity; 27 | } 28 | 29 | void NumpyResultConversion::Append(DataChunk &chunk) { 30 | if (count + chunk.size() > capacity) { 31 | Resize(capacity * 2); 32 | } 33 | auto chunk_types = chunk.GetTypes(); 34 | auto source_offset = 0; 35 | auto source_size = chunk.size(); 36 | auto to_append = chunk.size(); 37 | for (idx_t col_idx = 0; col_idx < owned_data.size(); col_idx++) { 38 | owned_data[col_idx].Append(count, chunk.data[col_idx], source_size, source_offset, to_append); 39 | } 40 | count += to_append; 41 | #ifdef DEBUG 42 | for (auto &data : owned_data) { 43 | D_ASSERT(data.data->count == count); 44 | D_ASSERT(data.mask->count == count); 45 | } 46 | #endif 47 | } 48 | 49 | } // namespace duckdb 50 | -------------------------------------------------------------------------------- /src/duckdb_py/pandas/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # this is used for clang-tidy checks 2 | add_library(python_pandas OBJECT scan.cpp analyzer.cpp bind.cpp) 3 | 4 | target_link_libraries(python_pandas PRIVATE _duckdb_dependencies) 5 | -------------------------------------------------------------------------------- /src/duckdb_py/pybind11/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # this is used for clang-tidy checks 2 | add_library(python_pybind11 OBJECT pybind_wrapper.cpp) 3 | 4 | target_link_libraries(python_pybind11 PRIVATE _duckdb_dependencies) 5 | -------------------------------------------------------------------------------- /src/duckdb_py/pybind11/pybind_wrapper.cpp: -------------------------------------------------------------------------------- 1 | #include "duckdb_python/pybind11/pybind_wrapper.hpp" 2 | #include "duckdb/common/exception.hpp" 3 | #include "duckdb_python/pyconnection/pyconnection.hpp" 4 | 5 | namespace pybind11 { 6 | 7 | // NOLINTNEXTLINE(readability-identifier-naming) 8 | bool gil_check() { 9 | return (bool)PyGILState_Check(); 10 | } 11 | 12 | // NOLINTNEXTLINE(readability-identifier-naming) 13 | void gil_assert() { 14 | if (!gil_check()) { 15 | throw duckdb::InternalException("The GIL should be held for this operation, but it's not!"); 16 | } 17 | } 18 | 19 | // NOLINTNEXTLINE(readability-identifier-naming) 20 | bool is_list_like(handle obj) { 21 | if (isinstance(obj) || isinstance(obj)) { 22 | return false; 23 | } 24 | if (is_dict_like(obj)) { 25 | return false; 26 | } 27 | auto &import_cache = *duckdb::DuckDBPyConnection::ImportCache(); 28 | auto iterable = import_cache.collections.abc.Iterable(); 29 | return isinstance(obj, iterable); 30 | } 31 | 32 | // NOLINTNEXTLINE(readability-identifier-naming) 33 | bool is_dict_like(handle obj) { 34 | auto &import_cache = *duckdb::DuckDBPyConnection::ImportCache(); 35 | auto mapping = import_cache.collections.abc.Mapping(); 36 | return isinstance(obj, mapping); 37 | } 38 | 39 | // NOLINTNEXTLINE(readability-identifier-naming) 40 | std::string to_string(const object &obj) { 41 | return std::string(py::str(obj)); 42 | } 43 | 44 | } // namespace pybind11 45 | -------------------------------------------------------------------------------- /src/duckdb_py/pyconnection/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # this is used for clang-tidy checks 2 | add_library(python_connection OBJECT type_creation.cpp) 3 | 4 | target_link_libraries(python_connection PRIVATE _duckdb_dependencies) 5 | -------------------------------------------------------------------------------- /src/duckdb_py/pyexpression/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # this is used for clang-tidy checks 2 | add_library(python_expression OBJECT initialize.cpp) 3 | 4 | target_link_libraries(python_expression PRIVATE _duckdb_dependencies) 5 | -------------------------------------------------------------------------------- /src/duckdb_py/pyrelation/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # this is used for clang-tidy checks 2 | add_library(python_relation OBJECT initialize.cpp) 3 | 4 | target_link_libraries(python_relation PRIVATE _duckdb_dependencies) 5 | -------------------------------------------------------------------------------- /src/duckdb_py/python_dependency.cpp: -------------------------------------------------------------------------------- 1 | #include "duckdb_python/python_dependency.hpp" 2 | #include "duckdb/common/helper.hpp" 3 | 4 | namespace duckdb { 5 | 6 | PythonDependencyItem::PythonDependencyItem(unique_ptr &&object) : object(std::move(object)) { 7 | } 8 | 9 | PythonDependencyItem::~PythonDependencyItem() { // NOLINT - cannot throw in exception 10 | py::gil_scoped_acquire gil; 11 | object.reset(); 12 | } 13 | 14 | shared_ptr PythonDependencyItem::Create(py::object object) { 15 | auto registered_object = make_uniq(std::move(object)); 16 | return make_shared_ptr(std::move(registered_object)); 17 | } 18 | 19 | shared_ptr PythonDependencyItem::Create(unique_ptr &&object) { 20 | return make_shared_ptr(std::move(object)); 21 | } 22 | 23 | } // namespace duckdb 24 | -------------------------------------------------------------------------------- /src/duckdb_py/typing/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # this is used for clang-tidy checks 2 | add_library(python_type OBJECT pytype.cpp typing.cpp) 3 | 4 | target_link_libraries(python_type PRIVATE _duckdb_dependencies) 5 | -------------------------------------------------------------------------------- /tests/extensions/json/data/example.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"id":1,"name":"O Brother, Where Art Thou?"}, 3 | {"id":2,"name":"Home for the Holidays"}, 4 | {"id":3,"name":"The Firm"}, 5 | {"id":4,"name":"Broadcast News"}, 6 | {"id":5,"name":"Raising Arizona"} 7 | ] 8 | -------------------------------------------------------------------------------- /tests/fast/api/test_3324.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import duckdb 4 | 5 | 6 | class Test3324: 7 | def test_3324(self, duckdb_cursor): 8 | duckdb_cursor.execute( 9 | """ 10 | create or replace table my_table as 11 | select 'test1' as column1, 1 as column2, 'quack' as column3 12 | union all 13 | select 'test2' as column1, 2 as column2, 'quacks' as column3 14 | union all 15 | select 'test3' as column1, 3 as column2, 'quacking' as column3 16 | """ 17 | ).fetch_df() 18 | duckdb_cursor.execute( 19 | """ 20 | prepare v1 as 21 | select 22 | column1 23 | , column2 24 | , column3 25 | from my_table 26 | where 27 | column1 = $1""" 28 | ).fetch_df() 29 | 30 | with pytest.raises(duckdb.BinderException, match="Unexpected prepared parameter"): 31 | duckdb_cursor.execute("""execute v1(?)""", ("test1",)).fetch_df() 32 | -------------------------------------------------------------------------------- /tests/fast/api/test_3654.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import duckdb 4 | 5 | try: 6 | import pyarrow as pa 7 | 8 | can_run = True 9 | except Exception: 10 | can_run = False 11 | from conftest import ArrowPandas, NumpyPandas 12 | 13 | 14 | class Test3654: 15 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()]) 16 | def test_3654_pandas(self, duckdb_cursor, pandas): 17 | df1 = pandas.DataFrame( 18 | { 19 | "id": [1, 1, 2], 20 | } 21 | ) 22 | con = duckdb.connect() 23 | con.register("df1", df1) 24 | rel = con.view("df1") 25 | print(rel.execute().fetchall()) 26 | assert rel.execute().fetchall() == [(1,), (1,), (2,)] 27 | 28 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()]) 29 | def test_3654_arrow(self, duckdb_cursor, pandas): 30 | if not can_run: 31 | return 32 | 33 | df1 = pandas.DataFrame( 34 | { 35 | "id": [1, 1, 2], 36 | } 37 | ) 38 | table = pa.Table.from_pandas(df1) 39 | con = duckdb.connect() 40 | con.register("df1", table) 41 | rel = con.view("df1") 42 | print(rel.execute().fetchall()) 43 | assert rel.execute().fetchall() == [(1,), (1,), (2,)] 44 | -------------------------------------------------------------------------------- /tests/fast/api/test_3728.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | class Test3728: 5 | def test_3728_describe_enum(self, duckdb_cursor): 6 | # Create an in-memory database, but the problem is also present in file-backed DBs 7 | cursor = duckdb.connect(":memory:") 8 | 9 | # Create an arbitrary enum type 10 | cursor.execute("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');") 11 | 12 | # Create a table where one or more columns are enum typed 13 | cursor.execute("CREATE TABLE person (name text, current_mood mood);") 14 | 15 | # This fails with "RuntimeError: Not implemented Error: unsupported type: mood" 16 | assert cursor.table("person").execute().description == [ 17 | ("name", "VARCHAR", None, None, None, None, None), 18 | ("current_mood", "ENUM('sad', 'ok', 'happy')", None, None, None, None, None), 19 | ] 20 | -------------------------------------------------------------------------------- /tests/fast/api/test_6315.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | class Test6315: 5 | def test_6315(self, duckdb_cursor): 6 | # segfault when accessing description after fetching rows 7 | c = duckdb.connect(":memory:") 8 | rv = c.execute("select * from sqlite_master where type = 'table'") 9 | rv.fetchall() 10 | desc = rv.description 11 | names = [x[0] for x in desc] 12 | assert names == ["type", "name", "tbl_name", "rootpage", "sql"] 13 | 14 | # description of relation 15 | rel = c.sql("select * from sqlite_master where type = 'table'") 16 | desc = rel.description 17 | names = [x[0] for x in desc] 18 | assert names == ["type", "name", "tbl_name", "rootpage", "sql"] 19 | 20 | rel.fetchall() 21 | desc = rel.description 22 | names = [x[0] for x in desc] 23 | assert names == ["type", "name", "tbl_name", "rootpage", "sql"] 24 | -------------------------------------------------------------------------------- /tests/fast/api/test_connection_interrupt.py: -------------------------------------------------------------------------------- 1 | import platform 2 | import threading 3 | import time 4 | 5 | import pytest 6 | 7 | import duckdb 8 | 9 | 10 | class TestConnectionInterrupt: 11 | @pytest.mark.xfail( 12 | condition=platform.system() == "Emscripten", 13 | reason="threads not allowed on Emscripten", 14 | ) 15 | def test_connection_interrupt(self): 16 | conn = duckdb.connect() 17 | 18 | def interrupt() -> None: 19 | # Wait for query to start running before interrupting 20 | time.sleep(0.1) 21 | conn.interrupt() 22 | 23 | thread = threading.Thread(target=interrupt) 24 | thread.start() 25 | with pytest.raises(duckdb.InterruptException): 26 | conn.execute("select count(*) from range(100000000000)").fetchall() 27 | thread.join() 28 | 29 | def test_interrupt_closed_connection(self): 30 | conn = duckdb.connect() 31 | conn.close() 32 | with pytest.raises(duckdb.ConnectionException): 33 | conn.interrupt() 34 | -------------------------------------------------------------------------------- /tests/fast/api/test_dbapi01.py: -------------------------------------------------------------------------------- 1 | # multiple result sets 2 | 3 | import numpy 4 | 5 | import duckdb 6 | 7 | 8 | class TestMultipleResultSets: 9 | def test_regular_selection(self, duckdb_cursor, integers): 10 | duckdb_cursor.execute("SELECT * FROM integers") 11 | duckdb_cursor.execute("SELECT * FROM integers") 12 | result = duckdb_cursor.fetchall() 13 | assert result == [ 14 | (0,), 15 | (1,), 16 | (2,), 17 | (3,), 18 | (4,), 19 | (5,), 20 | (6,), 21 | (7,), 22 | (8,), 23 | (9,), 24 | (None,), 25 | ], "Incorrect result returned" 26 | 27 | def test_numpy_selection(self, duckdb_cursor, integers): 28 | duckdb_cursor.execute("SELECT * FROM integers") 29 | duckdb_cursor.execute("SELECT * FROM integers") 30 | result = duckdb_cursor.fetchnumpy() 31 | expected = numpy.ma.masked_array(numpy.arange(11), mask=([False] * 10 + [True])) 32 | 33 | numpy.testing.assert_array_equal(result["i"], expected) 34 | 35 | def test_numpy_materialized(self, duckdb_cursor, integers): 36 | connection = duckdb.connect("") 37 | cursor = connection.cursor() 38 | cursor.execute("CREATE TABLE integers (i integer)") 39 | cursor.execute("INSERT INTO integers VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9),(NULL)") 40 | rel = connection.table("integers") 41 | res = rel.aggregate("sum(i)").execute().fetchnumpy() 42 | assert res["sum(i)"][0] == 45 43 | -------------------------------------------------------------------------------- /tests/fast/api/test_dbapi04.py: -------------------------------------------------------------------------------- 1 | # simple DB API testcase 2 | 3 | 4 | class TestSimpleDBAPI: 5 | def test_regular_selection(self, duckdb_cursor, integers): 6 | duckdb_cursor.execute("SELECT * FROM integers") 7 | result = duckdb_cursor.fetchall() 8 | assert result == [ 9 | (0,), 10 | (1,), 11 | (2,), 12 | (3,), 13 | (4,), 14 | (5,), 15 | (6,), 16 | (7,), 17 | (8,), 18 | (9,), 19 | (None,), 20 | ], "Incorrect result returned" 21 | -------------------------------------------------------------------------------- /tests/fast/api/test_dbapi05.py: -------------------------------------------------------------------------------- 1 | # simple DB API testcase 2 | 3 | 4 | class TestSimpleDBAPI: 5 | def test_prepare(self, duckdb_cursor): 6 | result = duckdb_cursor.execute("SELECT CAST(? AS INTEGER), CAST(? AS INTEGER)", ["42", "84"]).fetchall() 7 | assert result == [ 8 | ( 9 | 42, 10 | 84, 11 | ) 12 | ], "Incorrect result returned" 13 | 14 | c = duckdb_cursor 15 | 16 | # from python docs 17 | c.execute( 18 | """CREATE TABLE stocks 19 | (date text, trans text, symbol text, qty real, price real)""" 20 | ) 21 | c.execute("INSERT INTO stocks VALUES ('2006-01-05','BUY','RHAT',100,35.14)") 22 | 23 | t = ("RHAT",) 24 | result = c.execute("SELECT COUNT(*) FROM stocks WHERE symbol=?", t).fetchone() 25 | assert result == (1,) 26 | 27 | t = ["RHAT"] 28 | result = c.execute("SELECT COUNT(*) FROM stocks WHERE symbol=?", t).fetchone() 29 | assert result == (1,) 30 | 31 | # Larger example that inserts many records at a time 32 | purchases = [ 33 | ("2006-03-28", "BUY", "IBM", 1000, 45.00), 34 | ("2006-04-05", "BUY", "MSFT", 1000, 72.00), 35 | ("2006-04-06", "SELL", "IBM", 500, 53.00), 36 | ] 37 | c.executemany("INSERT INTO stocks VALUES (?,?,?,?,?)", purchases) 38 | 39 | result = c.execute("SELECT count(*) FROM stocks").fetchone() 40 | assert result == (4,) 41 | -------------------------------------------------------------------------------- /tests/fast/api/test_dbapi07.py: -------------------------------------------------------------------------------- 1 | # timestamp ms precision 2 | 3 | from datetime import datetime 4 | 5 | import numpy 6 | 7 | 8 | class TestNumpyTimestampMilliseconds: 9 | def test_numpy_timestamp(self, duckdb_cursor): 10 | res = duckdb_cursor.execute("SELECT TIMESTAMP '2019-11-26 21:11:42.501' as test_time").fetchnumpy() 11 | assert res["test_time"] == numpy.datetime64("2019-11-26 21:11:42.501") 12 | 13 | 14 | class TestTimestampMilliseconds: 15 | def test_numpy_timestamp(self, duckdb_cursor): 16 | res = duckdb_cursor.execute("SELECT TIMESTAMP '2019-11-26 21:11:42.501' as test_time").fetchone()[0] 17 | assert res == datetime.strptime("2019-11-26 21:11:42.501", "%Y-%m-%d %H:%M:%S.%f") 18 | -------------------------------------------------------------------------------- /tests/fast/api/test_dbapi08.py: -------------------------------------------------------------------------------- 1 | # test fetchdf with various types 2 | import pytest 3 | from conftest import NumpyPandas 4 | 5 | import duckdb 6 | 7 | 8 | class TestType: 9 | @pytest.mark.parametrize("pandas", [NumpyPandas()]) 10 | def test_fetchdf(self, pandas): 11 | con = duckdb.connect() 12 | con.execute("CREATE TABLE items(item VARCHAR)") 13 | con.execute("INSERT INTO items VALUES ('jeans'), (''), (NULL)") 14 | res = con.execute("SELECT item FROM items").fetchdf() 15 | assert isinstance(res, pandas.core.frame.DataFrame) 16 | 17 | df = pandas.DataFrame({"item": ["jeans", "", None]}) 18 | 19 | print(res) 20 | print(df) 21 | pandas.testing.assert_frame_equal(res, df) 22 | -------------------------------------------------------------------------------- /tests/fast/api/test_dbapi09.py: -------------------------------------------------------------------------------- 1 | # date type 2 | 3 | import datetime 4 | 5 | import numpy 6 | import pandas 7 | 8 | 9 | class TestNumpyDate: 10 | def test_fetchall_date(self, duckdb_cursor): 11 | res = duckdb_cursor.execute("SELECT DATE '2020-01-10' as test_date").fetchall() 12 | assert res == [(datetime.date(2020, 1, 10),)] 13 | 14 | def test_fetchnumpy_date(self, duckdb_cursor): 15 | res = duckdb_cursor.execute("SELECT DATE '2020-01-10' as test_date").fetchnumpy() 16 | arr = numpy.array(["2020-01-10"], dtype="datetime64[s]") 17 | arr = numpy.ma.masked_array(arr) 18 | numpy.testing.assert_array_equal(res["test_date"], arr) 19 | 20 | def test_fetchdf_date(self, duckdb_cursor): 21 | res = duckdb_cursor.execute("SELECT DATE '2020-01-10' as test_date").fetchdf() 22 | ser = pandas.Series(numpy.array(["2020-01-10"], dtype="datetime64[us]"), name="test_date") 23 | pandas.testing.assert_series_equal(res["test_date"], ser) 24 | -------------------------------------------------------------------------------- /tests/fast/api/test_dbapi11.py: -------------------------------------------------------------------------------- 1 | # cursor description 2 | 3 | import tempfile 4 | 5 | import duckdb 6 | 7 | 8 | def check_exception(f): 9 | had_exception = False 10 | try: 11 | f() 12 | except Exception: 13 | had_exception = True 14 | assert had_exception 15 | 16 | 17 | class TestReadOnly: 18 | def test_readonly(self, duckdb_cursor): 19 | with tempfile.NamedTemporaryFile() as tmp: 20 | db = tmp.name 21 | 22 | # this is forbidden 23 | check_exception(lambda: duckdb.connect(":memory:", True)) 24 | 25 | con_rw = duckdb.connect(db, False) 26 | con_rw.cursor().execute("create table a (i integer)") 27 | con_rw.cursor().execute("insert into a values (42)") 28 | con_rw.close() 29 | 30 | con_ro = duckdb.connect(db, True) 31 | con_ro.cursor().execute("select * from a").fetchall() 32 | check_exception(lambda: con_ro.execute("delete from a")) 33 | con_ro.close() 34 | 35 | con_rw = duckdb.connect(db, False) 36 | con_rw.cursor().execute("drop table a") 37 | con_rw.close() 38 | -------------------------------------------------------------------------------- /tests/fast/api/test_dbapi13.py: -------------------------------------------------------------------------------- 1 | # time type 2 | 3 | import datetime 4 | 5 | import numpy 6 | import pandas 7 | 8 | 9 | class TestNumpyTime: 10 | def test_fetchall_time(self, duckdb_cursor): 11 | res = duckdb_cursor.execute("SELECT TIME '13:06:40' as test_time").fetchall() 12 | assert res == [(datetime.time(13, 6, 40),)] 13 | 14 | def test_fetchnumpy_time(self, duckdb_cursor): 15 | res = duckdb_cursor.execute("SELECT TIME '13:06:40' as test_time").fetchnumpy() 16 | arr = numpy.array([datetime.time(13, 6, 40)], dtype="object") 17 | arr = numpy.ma.masked_array(arr) 18 | numpy.testing.assert_array_equal(res["test_time"], arr) 19 | 20 | def test_fetchdf_time(self, duckdb_cursor): 21 | res = duckdb_cursor.execute("SELECT TIME '13:06:40' as test_time").fetchdf() 22 | ser = pandas.Series(numpy.array([datetime.time(13, 6, 40)], dtype="object"), name="test_time") 23 | pandas.testing.assert_series_equal(res["test_time"], ser) 24 | -------------------------------------------------------------------------------- /tests/fast/api/test_explain.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import duckdb 4 | 5 | 6 | class TestExplain: 7 | def test_explain_basic(self, duckdb_cursor): 8 | res = duckdb_cursor.sql("select 42").explain() 9 | assert isinstance(res, str) 10 | 11 | def test_explain_standard(self, duckdb_cursor): 12 | res = duckdb_cursor.sql("select 42").explain("standard") 13 | assert isinstance(res, str) 14 | 15 | res = duckdb_cursor.sql("select 42").explain("STANDARD") 16 | assert isinstance(res, str) 17 | 18 | res = duckdb_cursor.sql("select 42").explain(duckdb.ExplainType.STANDARD) 19 | assert isinstance(res, str) 20 | 21 | res = duckdb_cursor.sql("select 42").explain(0) 22 | assert isinstance(res, str) 23 | 24 | def test_explain_analyze(self, duckdb_cursor): 25 | res = duckdb_cursor.sql("select 42").explain("analyze") 26 | assert isinstance(res, str) 27 | 28 | res = duckdb_cursor.sql("select 42").explain("ANALYZE") 29 | assert isinstance(res, str) 30 | 31 | res = duckdb_cursor.sql("select 42").explain(duckdb.ExplainType.ANALYZE) 32 | assert isinstance(res, str) 33 | 34 | res = duckdb_cursor.sql("select 42").explain(1) 35 | assert isinstance(res, str) 36 | 37 | def test_explain_df(self, duckdb_cursor): 38 | pd = pytest.importorskip("pandas") 39 | df = pd.DataFrame({"a": [42]}) # noqa: F841 40 | res = duckdb_cursor.sql("select * from df").explain("ANALYZE") 41 | assert isinstance(res, str) 42 | -------------------------------------------------------------------------------- /tests/fast/api/test_insert_into.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pandas import DataFrame 3 | 4 | import duckdb 5 | 6 | 7 | class TestInsertInto: 8 | def test_insert_into_schema(self, duckdb_cursor): 9 | # open connection 10 | con = duckdb.connect() 11 | con.execute("CREATE SCHEMA s") 12 | con.execute("CREATE TABLE s.t (id INTEGER PRIMARY KEY)") 13 | 14 | # make relation 15 | df = DataFrame([1], columns=["id"]) 16 | rel = con.from_df(df) 17 | 18 | rel.insert_into("s.t") 19 | 20 | assert con.execute("select * from s.t").fetchall() == [(1,)] 21 | 22 | # This should fail since this will go to default schema 23 | with pytest.raises(duckdb.CatalogException): 24 | rel.insert_into("t") 25 | 26 | # If we add t in the default schema it should work. 27 | con.execute("CREATE TABLE t (id INTEGER PRIMARY KEY)") 28 | rel.insert_into("t") 29 | assert con.execute("select * from t").fetchall() == [(1,)] 30 | -------------------------------------------------------------------------------- /tests/fast/api/test_query_interrupt.py: -------------------------------------------------------------------------------- 1 | import _thread as thread 2 | import platform 3 | import threading 4 | import time 5 | 6 | import pytest 7 | 8 | import duckdb 9 | 10 | 11 | def send_keyboard_interrupt(): 12 | # Wait a little, so we're sure the 'execute' has started 13 | time.sleep(0.1) 14 | # Send an interrupt to the main thread 15 | thread.interrupt_main() 16 | 17 | 18 | class TestQueryInterruption: 19 | @pytest.mark.xfail( 20 | condition=platform.system() == "Emscripten", 21 | reason="Emscripten builds cannot use threads", 22 | ) 23 | def test_query_interruption(self): 24 | con = duckdb.connect() 25 | thread = threading.Thread(target=send_keyboard_interrupt) 26 | # Start the thread 27 | thread.start() 28 | try: 29 | con.execute("select count(*) from range(100000000000)").fetchall() 30 | except RuntimeError: 31 | # If this is not reached, we could not cancel the query before it completed 32 | # indicating that the query interruption functionality is broken 33 | assert True 34 | except KeyboardInterrupt: 35 | pytest.fail("Interrupted by user") 36 | thread.join() 37 | -------------------------------------------------------------------------------- /tests/fast/api/test_relation_to_view.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import duckdb 4 | 5 | 6 | class TestRelationToView: 7 | def test_values_to_view(self, duckdb_cursor): 8 | rel = duckdb_cursor.values(["test", "this is a long string"]) 9 | res = rel.fetchall() 10 | assert res == [("test", "this is a long string")] 11 | 12 | rel.to_view("vw1") 13 | 14 | view = duckdb_cursor.table("vw1") 15 | res = view.fetchall() 16 | assert res == [("test", "this is a long string")] 17 | 18 | def test_relation_to_view(self, duckdb_cursor): 19 | rel = duckdb_cursor.sql("select 'test', 'this is a long string'") 20 | 21 | res = rel.fetchall() 22 | assert res == [("test", "this is a long string")] 23 | 24 | rel.to_view("vw1") 25 | 26 | view = duckdb_cursor.table("vw1") 27 | res = view.fetchall() 28 | assert res == [("test", "this is a long string")] 29 | 30 | def test_registered_relation(self, duckdb_cursor): 31 | rel = duckdb_cursor.sql("select 'test', 'this is a long string'") 32 | 33 | con = duckdb.connect() 34 | # Register on a different connection is not allowed 35 | with pytest.raises( 36 | duckdb.InvalidInputException, 37 | match="was created by another Connection and can therefore not be used by this Connection", 38 | ): 39 | con.register("cross_connection", rel) 40 | 41 | # Register on the same connection just creates a view 42 | duckdb_cursor.register("same_connection", rel) 43 | view = duckdb_cursor.table("same_connection") 44 | res = view.fetchall() 45 | assert res == [("test", "this is a long string")] 46 | -------------------------------------------------------------------------------- /tests/fast/api/test_with_propagating_exceptions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import duckdb 4 | 5 | 6 | class TestWithPropagatingExceptions: 7 | def test_with(self): 8 | # Should propagate exception raised in the 'with duckdb.connect() ..' 9 | with pytest.raises(duckdb.ParserException, match=r"syntax error at or near *"), duckdb.connect() as con: 10 | con.execute("invalid") 11 | 12 | # Does not raise an exception 13 | with duckdb.connect() as con: 14 | con.execute("select 1") 15 | -------------------------------------------------------------------------------- /tests/fast/arrow/data/arrow_table: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duckdb/duckdb-python/HEAD/tests/fast/arrow/data/arrow_table -------------------------------------------------------------------------------- /tests/fast/arrow/data/unsigned.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duckdb/duckdb-python/HEAD/tests/fast/arrow/data/unsigned.parquet -------------------------------------------------------------------------------- /tests/fast/arrow/data/userdata1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duckdb/duckdb-python/HEAD/tests/fast/arrow/data/userdata1.parquet -------------------------------------------------------------------------------- /tests/fast/arrow/test_10795.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import duckdb 4 | 5 | pyarrow = pytest.importorskip("pyarrow") 6 | 7 | 8 | @pytest.mark.parametrize("arrow_large_buffer_size", [True, False]) 9 | def test_10795(arrow_large_buffer_size): 10 | conn = duckdb.connect() 11 | conn.sql(f"set arrow_large_buffer_size={arrow_large_buffer_size}") 12 | arrow = conn.sql("select map(['non-inlined string', 'test', 'duckdb'], [42, 1337, 123]) as map").to_arrow_table() 13 | assert arrow.to_pydict() == {"map": [[("non-inlined string", 42), ("test", 1337), ("duckdb", 123)]]} 14 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_12384.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | import duckdb 6 | 7 | pa = pytest.importorskip("pyarrow") 8 | 9 | 10 | def test_10795(): 11 | arrow_filename = Path(__file__).parent / "data" / "arrow_table" 12 | with pa.memory_map(str(arrow_filename), "r") as source: 13 | reader = pa.ipc.RecordBatchFileReader(source) 14 | taxi_fhvhv_arrow = reader.read_all() 15 | con = duckdb.connect(database=":memory:") 16 | con.execute("SET TimeZone='UTC';") 17 | con.register("taxi_fhvhv", taxi_fhvhv_arrow) 18 | res = con.execute(""" 19 | SELECT PULocationID, pickup_datetime 20 | FROM taxi_fhvhv 21 | WHERE pickup_datetime >= '2023-01-01T00:00:00-05:00' AND PULocationID = 244 22 | """).fetchall() 23 | 24 | assert len(res) == 3685 25 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_14344.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | 3 | import pytest 4 | 5 | pa = pytest.importorskip("pyarrow") 6 | 7 | 8 | def test_14344(duckdb_cursor): 9 | my_table = pa.Table.from_pydict({"foo": pa.array([hashlib.sha256(b"foo").digest()], type=pa.binary())}) # noqa: F841 10 | my_table2 = pa.Table.from_pydict( # noqa: F841 11 | {"foo": pa.array([hashlib.sha256(b"foo").digest()], type=pa.binary()), "a": ["123"]} 12 | ) 13 | 14 | res = duckdb_cursor.sql( 15 | """ 16 | SELECT 17 | my_table2.* EXCLUDE (foo) 18 | FROM 19 | my_table 20 | LEFT JOIN 21 | my_table2 22 | USING (foo) 23 | """ 24 | ).fetchall() 25 | assert res == [("123",)] 26 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_2426.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import duckdb 4 | 5 | pytest.importorskip("pyarrow") 6 | 7 | try: 8 | can_run = True 9 | except Exception: 10 | can_run = False 11 | 12 | 13 | class Test2426: 14 | def test_2426(self, duckdb_cursor): 15 | if not can_run: 16 | return 17 | 18 | con = duckdb.connect() 19 | con.execute("Create Table test (a integer)") 20 | 21 | for i in range(1024): 22 | for _j in range(2): 23 | con.execute("Insert Into test values ('" + str(i) + "')") 24 | con.execute("Insert Into test values ('5000')") 25 | con.execute("Insert Into test values ('6000')") 26 | sql = """ 27 | SELECT a, COUNT(*) AS repetitions 28 | FROM test 29 | GROUP BY a 30 | """ 31 | 32 | result_df = con.execute(sql).df() 33 | 34 | arrow_table = con.execute(sql).fetch_arrow_table() 35 | 36 | arrow_df = arrow_table.to_pandas() 37 | assert result_df["repetitions"].sum() == arrow_df["repetitions"].sum() 38 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_5547.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | from pandas.testing import assert_frame_equal 4 | 5 | import duckdb 6 | 7 | pa = pytest.importorskip("pyarrow") 8 | 9 | 10 | def test_5547(): 11 | num_rows = 2**17 + 1 12 | 13 | tbl = pa.Table.from_pandas( 14 | pd.DataFrame.from_records( 15 | [ 16 | { 17 | "id": i, 18 | "nested": { 19 | "a": i, 20 | }, 21 | } 22 | for i in range(num_rows) 23 | ] 24 | ) 25 | ) 26 | 27 | con = duckdb.connect() 28 | expected = tbl.to_pandas() 29 | result = con.execute( 30 | """ 31 | SELECT * FROM tbl 32 | """ 33 | ).df() 34 | 35 | assert_frame_equal(expected, result) 36 | 37 | con.close() 38 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_6584.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import ThreadPoolExecutor 2 | 3 | import pytest 4 | 5 | import duckdb 6 | 7 | pyarrow = pytest.importorskip("pyarrow") 8 | 9 | 10 | def f(cur, i, data): 11 | cur.execute(f"create table t_{i} as select * from data") 12 | return cur.execute(f"select * from t_{i}").fetch_arrow_table() 13 | 14 | 15 | def test_6584(): 16 | pool = ThreadPoolExecutor(max_workers=2) 17 | data = pyarrow.Table.from_pydict({"a": [1, 2, 3]}) 18 | c = duckdb.connect() 19 | futures = [] 20 | for i in range(2): 21 | fut = pool.submit(f, c.cursor(), i, data) 22 | futures.append(fut) 23 | 24 | for fut in futures: 25 | arrow_res = fut.result() 26 | assert data.equals(arrow_res) 27 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_6796.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from conftest import ArrowPandas, NumpyPandas 3 | 4 | import duckdb 5 | 6 | pyarrow = pytest.importorskip("pyarrow") 7 | 8 | 9 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()]) 10 | def test_6796(pandas): 11 | conn = duckdb.connect() 12 | input_df = pandas.DataFrame({"foo": ["bar"]}) 13 | conn.register("input_df", input_df) 14 | 15 | query = """ 16 | select * from input_df 17 | union all 18 | select * from input_df 19 | """ 20 | 21 | # fetching directly into Pandas works 22 | res_df = conn.execute(query).fetch_df() 23 | res_arrow = conn.execute(query).fetch_arrow_table() # noqa: F841 24 | 25 | df_arrow_table = pyarrow.Table.from_pandas(res_df) # noqa: F841 26 | 27 | result_1 = conn.execute("select * from df_arrow_table order by all").fetchall() 28 | 29 | result_2 = conn.execute("select * from res_arrow order by all").fetchall() 30 | 31 | assert result_1 == result_2 32 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_7699.py: -------------------------------------------------------------------------------- 1 | import string 2 | 3 | import pytest 4 | 5 | pa = pytest.importorskip("pyarrow") 6 | pq = pytest.importorskip("pyarrow.parquet") 7 | pl = pytest.importorskip("polars") 8 | 9 | 10 | class Test7699: 11 | def test_7699(self, duckdb_cursor): 12 | pl_tbl = pl.DataFrame( 13 | { 14 | "col1": pl.Series([string.ascii_uppercase[ix + 10] for ix in list(range(2)) + list(range(3))]).cast( 15 | pl.Categorical 16 | ), 17 | } 18 | ) 19 | 20 | nickname = "df1234" 21 | duckdb_cursor.register(nickname, pl_tbl) 22 | 23 | rel = duckdb_cursor.sql("select * from df1234") 24 | res = rel.fetchall() 25 | assert res == [("K",), ("L",), ("K",), ("L",), ("M",)] 26 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_8522.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | 3 | import pytest 4 | 5 | pa = pytest.importorskip("pyarrow") 6 | 7 | 8 | # Reconstruct filters when pushing down into arrow scan 9 | # arrow supports timestamp_tz with different units than US, we only support US 10 | # so we have to convert ConstantValues back to their native unit when pushing the filter 11 | # expression containing them down to pyarrow 12 | class Test8522: 13 | def test_8522(self, duckdb_cursor): 14 | t_us = pa.Table.from_arrays( # noqa: F841 15 | arrays=[pa.array([dt.datetime(2022, 1, 1)])], 16 | schema=pa.schema([pa.field("time", pa.timestamp("us", tz="UTC"))]), 17 | ) 18 | 19 | t_ms = pa.Table.from_arrays( # noqa: F841 20 | arrays=[pa.array([dt.datetime(2022, 1, 1)])], 21 | schema=pa.schema([pa.field("time", pa.timestamp("ms", tz="UTC"))]), 22 | ) 23 | 24 | expected = duckdb_cursor.sql("FROM t_us").filter("time>='2022-01-01'").fetchall() 25 | assert len(expected) == 1 26 | 27 | actual = duckdb_cursor.sql("FROM t_ms").filter("time>='2022-01-01'").fetchall() 28 | assert actual == expected 29 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_9443.py: -------------------------------------------------------------------------------- 1 | from datetime import time 2 | from pathlib import PurePosixPath 3 | 4 | import pytest 5 | 6 | pq = pytest.importorskip("pyarrow.parquet") 7 | pa = pytest.importorskip("pyarrow") 8 | 9 | 10 | class Test9443: 11 | def test_9443(self, tmp_path, duckdb_cursor): 12 | arrow_table = pa.Table.from_pylist( 13 | [ 14 | {"col1": time(1, 2, 3)}, 15 | ] 16 | ) # col1: time64[us] 17 | 18 | print(arrow_table) 19 | 20 | temp_file = str(PurePosixPath(tmp_path.as_posix()) / "test9443.parquet") 21 | pq.write_table(arrow_table, temp_file) 22 | 23 | sql = f'SELECT * FROM "{temp_file}"' 24 | 25 | duckdb_cursor.execute(sql) 26 | duckdb_cursor.fetch_record_batch() 27 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_arrow_batch_index.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import duckdb 4 | 5 | pa = pytest.importorskip("pyarrow") 6 | 7 | 8 | class TestArrowBatchIndex: 9 | def test_arrow_batch_index(self, duckdb_cursor): 10 | con = duckdb.connect() 11 | df = con.execute("SELECT * FROM range(10000000) t(i)").df() 12 | arrow_tbl = pa.Table.from_pandas(df) # noqa: F841 13 | 14 | con.execute("CREATE TABLE tbl AS SELECT * FROM arrow_tbl") 15 | 16 | result = con.execute("SELECT * FROM tbl LIMIT 5").fetchall() 17 | assert [x[0] for x in result] == [0, 1, 2, 3, 4] 18 | 19 | result = con.execute("SELECT * FROM tbl LIMIT 5 OFFSET 777778").fetchall() 20 | assert [x[0] for x in result] == [777778, 777779, 777780, 777781, 777782] 21 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_arrow_binary_view.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import duckdb 4 | 5 | pa = pytest.importorskip("pyarrow") 6 | 7 | 8 | class TestArrowBinaryView: 9 | def test_arrow_binary_view(self, duckdb_cursor): 10 | con = duckdb.connect() 11 | tab = pa.table({"x": pa.array([b"abc", b"thisisaverybigbinaryyaymorethanfifteen", None], pa.binary_view())}) 12 | assert con.execute("FROM tab").fetchall() == [(b"abc",), (b"thisisaverybigbinaryyaymorethanfifteen",), (None,)] 13 | # By default we won't export a view 14 | assert not con.execute("FROM tab").fetch_arrow_table().equals(tab) 15 | # We do the binary view from 1.4 onwards 16 | con.execute("SET arrow_output_version = 1.4") 17 | assert con.execute("FROM tab").fetch_arrow_table().equals(tab) 18 | 19 | assert con.execute("FROM tab where x = 'thisisaverybigbinaryyaymorethanfifteen'").fetchall() == [ 20 | (b"thisisaverybigbinaryyaymorethanfifteen",) 21 | ] 22 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_arrow_case_sensitive.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | pa = pytest.importorskip("pyarrow") 4 | 5 | 6 | class TestArrowCaseSensitive: 7 | def test_arrow_case_sensitive(self, duckdb_cursor): 8 | data = (pa.array([1], type=pa.int32()), pa.array([1000], type=pa.int32())) 9 | arrow_table = pa.Table.from_arrays([data[0], data[1]], ["A1", "a1"]) 10 | 11 | duckdb_cursor.register("arrow_tbl", arrow_table) 12 | assert duckdb_cursor.table("arrow_tbl").columns == ["A1", "a1_1"] 13 | assert duckdb_cursor.execute("select A1 from arrow_tbl;").fetchall() == [(1,)] 14 | assert duckdb_cursor.execute("select a1_1 from arrow_tbl;").fetchall() == [(1000,)] 15 | assert arrow_table.column_names == ["A1", "a1"] 16 | 17 | def test_arrow_case_sensitive_repeated(self, duckdb_cursor): 18 | data = (pa.array([1], type=pa.int32()), pa.array([1000], type=pa.int32())) 19 | arrow_table = pa.Table.from_arrays([data[0], data[1], data[1]], ["A1", "a1_1", "a1"]) 20 | 21 | duckdb_cursor.register("arrow_tbl", arrow_table) 22 | assert duckdb_cursor.table("arrow_tbl").columns == ["A1", "a1_1", "a1_2"] 23 | assert arrow_table.column_names == ["A1", "a1_1", "a1"] 24 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_arrow_decimal256.py: -------------------------------------------------------------------------------- 1 | from decimal import Decimal 2 | 3 | import pytest 4 | 5 | import duckdb 6 | 7 | pa = pytest.importorskip("pyarrow") 8 | 9 | 10 | class TestArrowDecimal256: 11 | def test_decimal_256_throws(self, duckdb_cursor): 12 | with duckdb.connect() as conn: 13 | pa_decimal256 = pa.Table.from_pylist( # noqa: F841 14 | [{"data": Decimal("100.00")} for _ in range(4)], 15 | pa.schema([("data", pa.decimal256(12, 4))]), 16 | ) 17 | with pytest.raises( 18 | duckdb.NotImplementedException, match="Unsupported Internal Arrow Type for Decimal d:12,4,256" 19 | ): 20 | conn.execute("select * from pa_decimal256;").fetch_arrow_table().to_pylist() 21 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_arrow_fixed_binary.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | pa = pytest.importorskip("pyarrow") 4 | 5 | 6 | class TestArrowFixedBinary: 7 | def test_arrow_fixed_binary(self, duckdb_cursor): 8 | ids = [ 9 | None, 10 | b"\x66\x4d\xf4\xae\xb1\x5c\xb0\x4a\xdd\x5d\x1d\x54", 11 | b"\x66\x4d\xf4\xf0\xa3\xfc\xec\x5b\x26\x81\x4e\x1d", 12 | ] 13 | 14 | id_array = pa.array(ids, type=pa.binary(12)) 15 | arrow_table = pa.Table.from_arrays([id_array], names=["id"]) # noqa: F841 16 | res = duckdb_cursor.sql( 17 | """ 18 | SELECT lower(hex(id)) as id FROM arrow_table 19 | """ 20 | ).fetchall() 21 | assert res == [(None,), ("664df4aeb15cb04add5d1d54",), ("664df4f0a3fcec5b26814e1d",)] 22 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_arrow_ipc.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import duckdb 4 | 5 | pa = pytest.importorskip("pyarrow") 6 | 7 | ipc = pytest.importorskip("pyarrow.ipc") 8 | 9 | 10 | def get_record_batch(): 11 | data = [pa.array([1, 2, 3, 4]), pa.array(["foo", "bar", "baz", None]), pa.array([True, None, False, True])] 12 | return pa.record_batch(data, names=["f0", "f1", "f2"]) 13 | 14 | 15 | class TestArrowIPCExtension: 16 | # Only thing we can test in core is that it suggests the 17 | # instalation and loading of the extension 18 | def test_single_buffer(self, duckdb_cursor): 19 | batch = get_record_batch() 20 | sink = pa.BufferOutputStream() 21 | 22 | with ipc.new_stream(sink, batch.schema) as writer: 23 | for _ in range(5): # Write 5 batches into one stream 24 | writer.write_batch(batch) 25 | 26 | buffer = sink.getvalue() 27 | 28 | with pa.BufferReader(buffer) as buf_reader: # Use pyarrow.BufferReader 29 | stream = ipc.MessageReader.open_stream(buf_reader) 30 | # This fails 31 | with pytest.raises( 32 | duckdb.Error, match="The nanoarrow community extension is needed to read the Arrow IPC protocol" 33 | ): 34 | duckdb_cursor.from_arrow(stream).fetchall() 35 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_arrow_union.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | pyarrow = pytest.importorskip("pyarrow") 4 | 5 | 6 | def test_nested(duckdb_cursor): 7 | res = run(duckdb_cursor, "select 42::UNION(name VARCHAR, attr UNION(age INT, veteran BOOL)) as res") 8 | assert pyarrow.types.is_union(res.type) 9 | assert res.value.value == pyarrow.scalar(42, type=pyarrow.int32()) 10 | 11 | 12 | def test_union_contains_nested_data(duckdb_cursor): 13 | _ = pytest.importorskip("pyarrow", minversion="11") 14 | res = run(duckdb_cursor, "select ['hello']::UNION(first_name VARCHAR, middle_names VARCHAR[]) as res") 15 | assert pyarrow.types.is_union(res.type) 16 | assert res.value == pyarrow.scalar(["hello"], type=pyarrow.list_(pyarrow.string())) 17 | 18 | 19 | def test_unions_inside_lists_structs_maps(duckdb_cursor): 20 | res = run(duckdb_cursor, "select [union_value(name := 'Frank')] as res") 21 | assert pyarrow.types.is_list(res.type) 22 | assert pyarrow.types.is_union(res.type.value_type) 23 | assert res[0].value == pyarrow.scalar("Frank", type=pyarrow.string()) 24 | 25 | 26 | def test_unions_with_struct(duckdb_cursor): 27 | duckdb_cursor.execute( 28 | """ 29 | CREATE TABLE tbl (a UNION(a STRUCT(a INT, b BOOL))) 30 | """ 31 | ) 32 | duckdb_cursor.execute( 33 | """ 34 | INSERT INTO tbl VALUES ({'a': 42, 'b': true}) 35 | """ 36 | ) 37 | 38 | rel = duckdb_cursor.table("tbl") 39 | arrow = rel.fetch_arrow_table() # noqa: F841 40 | 41 | duckdb_cursor.execute("create table other as select * from arrow") 42 | rel2 = duckdb_cursor.table("other") 43 | res = rel2.fetchall() 44 | assert res == [({"a": 42, "b": True},)] 45 | 46 | 47 | def run(conn, query): 48 | return conn.sql(query).fetch_arrow_table().columns[0][0] 49 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_binary_type.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | try: 4 | import pyarrow as pa 5 | 6 | can_run = True 7 | except Exception: 8 | can_run = False 9 | 10 | 11 | def create_binary_table(type): 12 | schema = pa.schema([("data", type)]) 13 | inputs = [pa.array([b"foo", b"bar", b"baz"], type=type)] 14 | return pa.Table.from_arrays(inputs, schema=schema) 15 | 16 | 17 | class TestArrowBinary: 18 | def test_binary_types(self, duckdb_cursor): 19 | if not can_run: 20 | return 21 | 22 | # Fixed Size Binary 23 | arrow_table = create_binary_table(pa.binary(3)) 24 | rel = duckdb.from_arrow(arrow_table) 25 | res = rel.execute().fetchall() 26 | assert res == [(b"foo",), (b"bar",), (b"baz",)] 27 | 28 | # Normal Binary 29 | arrow_table = create_binary_table(pa.binary()) 30 | rel = duckdb.from_arrow(arrow_table) 31 | res = rel.execute().fetchall() 32 | assert res == [(b"foo",), (b"bar",), (b"baz",)] 33 | 34 | # Large Binary 35 | arrow_table = create_binary_table(pa.large_binary()) 36 | rel = duckdb.from_arrow(arrow_table) 37 | res = rel.execute().fetchall() 38 | assert res == [(b"foo",), (b"bar",), (b"baz",)] 39 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_buffer_size_option.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import duckdb 4 | from duckdb.sqltypes import VARCHAR 5 | 6 | pa = pytest.importorskip("pyarrow") 7 | 8 | 9 | class TestArrowBufferSize: 10 | def test_arrow_buffer_size(self): 11 | con = duckdb.connect() 12 | 13 | # All small string 14 | res = con.query("select 'bla'").fetch_arrow_table() 15 | assert res[0][0].type == pa.string() 16 | res = con.query("select 'bla'").fetch_record_batch() 17 | assert res.schema[0].type == pa.string() 18 | 19 | # All Large String 20 | con.execute("SET arrow_large_buffer_size=True") 21 | res = con.query("select 'bla'").fetch_arrow_table() 22 | assert res[0][0].type == pa.large_string() 23 | res = con.query("select 'bla'").fetch_record_batch() 24 | assert res.schema[0].type == pa.large_string() 25 | 26 | # All small string again 27 | con.execute("SET arrow_large_buffer_size=False") 28 | res = con.query("select 'bla'").fetch_arrow_table() 29 | assert res[0][0].type == pa.string() 30 | res = con.query("select 'bla'").fetch_record_batch() 31 | assert res.schema[0].type == pa.string() 32 | 33 | def test_arrow_buffer_size_udf(self): 34 | def just_return(x): 35 | return x 36 | 37 | con = duckdb.connect() 38 | con.create_function("just_return", just_return, [VARCHAR], VARCHAR, type="arrow") 39 | 40 | res = con.query("select just_return('bla')").fetch_arrow_table() 41 | 42 | assert res[0][0].type == pa.string() 43 | 44 | # All Large String 45 | con.execute("SET arrow_large_buffer_size=True") 46 | 47 | res = con.query("select just_return('bla')").fetch_arrow_table() 48 | assert res[0][0].type == pa.large_string() 49 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_date.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | try: 4 | import pyarrow as pa 5 | 6 | can_run = True 7 | except Exception: 8 | can_run = False 9 | 10 | 11 | class TestArrowDate: 12 | def test_date_types(self, duckdb_cursor): 13 | if not can_run: 14 | return 15 | 16 | data = (pa.array([1000 * 60 * 60 * 24], type=pa.date64()), pa.array([1], type=pa.date32())) 17 | arrow_table = pa.Table.from_arrays([data[0], data[1]], ["a", "b"]) 18 | rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() 19 | assert rel["a"] == arrow_table["b"] 20 | assert rel["b"] == arrow_table["b"] 21 | 22 | def test_date_null(self, duckdb_cursor): 23 | if not can_run: 24 | return 25 | data = (pa.array([None], type=pa.date64()), pa.array([None], type=pa.date32())) 26 | arrow_table = pa.Table.from_arrays([data[0], data[1]], ["a", "b"]) 27 | rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() 28 | assert rel["a"] == arrow_table["b"] 29 | assert rel["b"] == arrow_table["b"] 30 | 31 | def test_max_date(self, duckdb_cursor): 32 | if not can_run: 33 | return 34 | data = (pa.array([2147483647], type=pa.date32()), pa.array([2147483647], type=pa.date32())) 35 | result = pa.Table.from_arrays([data[0], data[1]], ["a", "b"]) 36 | data = ( 37 | pa.array([2147483647 * (1000 * 60 * 60 * 24)], type=pa.date64()), 38 | pa.array([2147483647], type=pa.date32()), 39 | ) 40 | arrow_table = pa.Table.from_arrays([data[0], data[1]], ["a", "b"]) 41 | rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() 42 | assert rel["a"] == result["a"] 43 | assert rel["b"] == result["b"] 44 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_large_string.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | try: 4 | import pyarrow as pa 5 | 6 | can_run = True 7 | except Exception: 8 | can_run = False 9 | 10 | 11 | class TestArrowLargeString: 12 | def test_large_string_type(self, duckdb_cursor): 13 | if not can_run: 14 | return 15 | 16 | schema = pa.schema([("data", pa.large_string())]) 17 | inputs = [pa.array(["foo", "baaaar", "b"], type=pa.large_string())] 18 | arrow_table = pa.Table.from_arrays(inputs, schema=schema) 19 | 20 | rel = duckdb.from_arrow(arrow_table) 21 | res = rel.execute().fetchall() 22 | assert res == [("foo",), ("baaaar",), ("b",)] 23 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_multiple_reads.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import duckdb 4 | 5 | try: 6 | import pyarrow 7 | import pyarrow.parquet 8 | 9 | can_run = True 10 | except Exception: 11 | can_run = False 12 | 13 | 14 | class TestArrowReads: 15 | def test_multiple_queries_same_relation(self, duckdb_cursor): 16 | if not can_run: 17 | return 18 | parquet_filename = str(Path(__file__).parent / "data" / "userdata1.parquet") 19 | userdata_parquet_table = pyarrow.parquet.read_table(parquet_filename) 20 | userdata_parquet_table.validate(full=True) 21 | rel = duckdb.from_arrow(userdata_parquet_table) 22 | assert rel.aggregate("(avg(salary))::INT").execute().fetchone()[0] == 149005 23 | assert rel.aggregate("(avg(salary))::INT").execute().fetchone()[0] == 149005 24 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_projection_pushdown.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | class TestArrowProjectionPushdown: 5 | def test_projection_pushdown_no_filter(self, duckdb_cursor): 6 | pytest.importorskip("pyarrow") 7 | ds = pytest.importorskip("pyarrow.dataset") 8 | 9 | duckdb_cursor.execute( 10 | """ 11 | CREATE TABLE test (a INTEGER, b INTEGER, c INTEGER) 12 | """ 13 | ) 14 | duckdb_cursor.execute( 15 | """ 16 | INSERT INTO test VALUES 17 | (1,2,3), 18 | (10,20,30), 19 | (100,200,300), 20 | (NULL,NULL,NULL) 21 | """ 22 | ) 23 | duck_tbl = duckdb_cursor.table("test") 24 | arrow_table = duck_tbl.fetch_arrow_table() 25 | assert duckdb_cursor.execute("SELECT sum(c) FROM arrow_table").fetchall() == [(333,)] 26 | 27 | # RecordBatch does not use projection pushdown, test that this also still works 28 | record_batch = arrow_table.to_batches()[0] # noqa: F841 29 | assert duckdb_cursor.execute("SELECT sum(c) FROM record_batch").fetchall() == [(333,)] 30 | 31 | arrow_dataset = ds.dataset(arrow_table) # noqa: F841 32 | assert duckdb_cursor.execute("SELECT sum(c) FROM arrow_dataset").fetchall() == [(333,)] 33 | -------------------------------------------------------------------------------- /tests/fast/arrow/test_view.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | pa = pytest.importorskip("pyarrow") 6 | pq = pytest.importorskip("pyarrow.parquet") 7 | 8 | 9 | class TestArrowView: 10 | def test_arrow_view(self, duckdb_cursor): 11 | parquet_filename = str(Path(__file__).parent / "data" / "userdata1.parquet") 12 | userdata_parquet_table = pa.parquet.read_table(parquet_filename) 13 | userdata_parquet_table.validate(full=True) 14 | duckdb_cursor.from_arrow(userdata_parquet_table).create_view("arrow_view") 15 | assert duckdb_cursor.execute("PRAGMA show_tables").fetchone() == ("arrow_view",) 16 | assert duckdb_cursor.execute("select avg(salary)::INT from arrow_view").fetchone()[0] == 149005 17 | -------------------------------------------------------------------------------- /tests/fast/data/binary_string.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duckdb/duckdb-python/HEAD/tests/fast/data/binary_string.parquet -------------------------------------------------------------------------------- /tests/fast/data/category.csv: -------------------------------------------------------------------------------- 1 | CATEGORY_ID|NAME|LAST_UPDATE 2 | 1|Action|2006-02-15 04:46:27 3 | 2|Animation|2006-02-15 04:46:27 4 | 3|Children|2006-02-15 04:46:27 5 | 4|Classics|2006-02-15 04:46:27 6 | 5|Comedy|2006-02-15 04:46:27 7 | 6|Documentary|2006-02-15 04:46:27 8 | 7|Drama|2006-02-15 04:46:27 9 | 8|Family|2006-02-15 04:46:27 10 | 9|Foreign|2006-02-15 04:46:27 11 | 10|Games|2006-02-15 04:46:27 12 | 11|Horror|2006-02-15 04:46:27 13 | 12|Music|2006-02-15 04:46:27 14 | 13|New|2006-02-15 04:46:27 15 | 14|Sci-Fi|2006-02-15 04:46:27 16 | 15|Sports|2006-02-15 04:46:27 17 | 16|Travel|2006-02-15 04:46:27 18 | -------------------------------------------------------------------------------- /tests/fast/data/datetime.csv: -------------------------------------------------------------------------------- 1 | a,b,t,d,ts 2 | 123,TEST2,12:12:12,2000-01-01,2000-01-01 12:12:00 3 | 345,TEST2,14:15:30,2002-02-02,2002-02-02 14:15:00 4 | 346,TEST2,15:16:17,2004-12-13,2004-12-13 15:16:00 5 | -------------------------------------------------------------------------------- /tests/fast/data/example.json: -------------------------------------------------------------------------------- 1 | {"id":1,"name":"O Brother, Where Art Thou?"} 2 | {"id":2,"name":"Home for the Holidays"} 3 | {"id":3,"name":"The Firm"} 4 | {"id":4,"name":"Broadcast News"} 5 | {"id":5,"name":"Raising Arizona"} -------------------------------------------------------------------------------- /tests/fast/data/integers.csv: -------------------------------------------------------------------------------- 1 | 1;10;0 2 | 2;50;30 -------------------------------------------------------------------------------- /tests/fast/data/nullpadding.csv: -------------------------------------------------------------------------------- 1 | # this file has a bunch of gunk at the top 2 | one,two,three,four 3 | 1,a,alice 4 | 2,b,bob -------------------------------------------------------------------------------- /tests/fast/data/problematic.csv: -------------------------------------------------------------------------------- 1 | a|b|c 2 | 1|1|1 3 | 1|1|1 4 | 1|1|1 5 | 1|1|1 6 | 1|1|1 7 | 1|1|1 8 | not_a_number|also_not_a_number|definitely_not_a_number -------------------------------------------------------------------------------- /tests/fast/data/quote_escape.csv: -------------------------------------------------------------------------------- 1 | 123|TEST6|text1 2 | 345|TEST6|"text""2""text" 3 | "567"|TEST6|text3 4 | -------------------------------------------------------------------------------- /tests/fast/data/tz.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duckdb/duckdb-python/HEAD/tests/fast/data/tz.parquet -------------------------------------------------------------------------------- /tests/fast/data/unquote_without_delimiter.csv: -------------------------------------------------------------------------------- 1 | "AAA"BB -------------------------------------------------------------------------------- /tests/fast/pandas/test_bug2281.py: -------------------------------------------------------------------------------- 1 | import io 2 | 3 | import pandas as pd 4 | 5 | 6 | class TestPandasStringNull: 7 | def test_pandas_string_null(self, duckdb_cursor): 8 | csv = """what,is_control,is_test 9 | ,0,0 10 | foo,1,0""" 11 | df = pd.read_csv(io.StringIO(csv)) 12 | duckdb_cursor.register("c", df) 13 | duckdb_cursor.execute("select what, count(*) from c group by what") 14 | duckdb_cursor.fetchdf() 15 | assert True # Should not crash ^^ 16 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_bug5922.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from conftest import ArrowPandas, NumpyPandas 3 | 4 | import duckdb 5 | 6 | 7 | class TestPandasAcceptFloat16: 8 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()]) 9 | def test_pandas_accept_float16(self, duckdb_cursor, pandas): 10 | df = pandas.DataFrame({"col": [1, 2, 3]}) 11 | df16 = df.astype({"col": "float16"}) # noqa: F841 12 | con = duckdb.connect() 13 | con.execute("CREATE TABLE tbl AS SELECT * FROM df16") 14 | con.execute("select * from tbl") 15 | df_result = con.fetchdf() 16 | df32 = df.astype({"col": "float32"}) 17 | assert (df32["col"] == df_result["col"]).all() 18 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_column_order.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | class TestColumnOrder: 5 | def test_column_order(self, duckdb_cursor): 6 | to_execute = """ 7 | CREATE OR REPLACE TABLE t1 AS ( 8 | SELECT NULL AS col1, 9 | NULL::TIMESTAMPTZ AS timepoint, 10 | NULL::DATE AS date, 11 | ); 12 | SELECT timepoint, date, col1 FROM t1; 13 | """ 14 | df = duckdb.execute(to_execute).fetchdf() 15 | cols = list(df.columns) 16 | assert cols == ["timepoint", "date", "col1"] 17 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_copy_on_write.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pytest 4 | 5 | import duckdb 6 | 7 | # https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html 8 | pandas = pytest.importorskip("pandas", "1.5", reason="copy_on_write does not exist in earlier versions") 9 | 10 | 11 | # Make sure the variable get's properly reset even in case of error 12 | @pytest.fixture(autouse=True) 13 | def scoped_copy_on_write_setting(): 14 | old_value = pandas.options.mode.copy_on_write 15 | pandas.options.mode.copy_on_write = True 16 | yield 17 | # Reset it at the end of the function 18 | pandas.options.mode.copy_on_write = old_value 19 | return 20 | 21 | 22 | def convert_to_result(col): 23 | return [(x,) for x in col] 24 | 25 | 26 | class TestCopyOnWrite: 27 | @pytest.mark.parametrize( 28 | "col", 29 | [ 30 | ["a", "b", "this is a long string"], 31 | [1.2334, None, 234.12], 32 | [123234, -213123, 2324234], 33 | [datetime.date(1990, 12, 7), None, datetime.date(1940, 1, 13)], 34 | [datetime.datetime(2012, 6, 21, 13, 23, 45, 328), None], 35 | ], 36 | ) 37 | def test_copy_on_write(self, col): 38 | assert pandas.options.mode.copy_on_write 39 | con = duckdb.connect() 40 | df_in = pandas.DataFrame( # noqa: F841 41 | { 42 | "numbers": col, 43 | } 44 | ) 45 | rel = con.sql("select * from df_in") 46 | res = rel.fetchall() 47 | print(res) 48 | expected = convert_to_result(col) 49 | assert res == expected 50 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_create_table_from_pandas.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from conftest import ArrowPandas, NumpyPandas 3 | 4 | import duckdb 5 | 6 | 7 | def assert_create(internal_data, expected_result, data_type, pandas): 8 | conn = duckdb.connect() 9 | df_in = pandas.DataFrame(data=internal_data, dtype=data_type) # noqa: F841 10 | 11 | conn.execute("CREATE TABLE t AS SELECT * FROM df_in") 12 | 13 | result = conn.execute("SELECT * FROM t").fetchall() 14 | assert result == expected_result 15 | 16 | 17 | def assert_create_register(internal_data, expected_result, data_type, pandas): 18 | conn = duckdb.connect() 19 | df_in = pandas.DataFrame(data=internal_data, dtype=data_type) 20 | conn.register("dataframe", df_in) 21 | conn.execute("CREATE TABLE t AS SELECT * FROM dataframe") 22 | 23 | result = conn.execute("SELECT * FROM t").fetchall() 24 | assert result == expected_result 25 | 26 | 27 | class TestCreateTableFromPandas: 28 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()]) 29 | def test_integer_create_table(self, duckdb_cursor, pandas): 30 | # TODO: This should work with other data types e.g., int8... # noqa: TD002, TD003 31 | data_types = ["Int8", "Int16", "Int32", "Int64"] 32 | internal_data = [1, 2, 3, 4] 33 | expected_result = [(1,), (2,), (3,), (4,)] 34 | for data_type in data_types: 35 | print(data_type) 36 | assert_create_register(internal_data, expected_result, data_type, pandas) 37 | assert_create(internal_data, expected_result, data_type, pandas) 38 | 39 | # TODO: Also test other data types # noqa: TD002, TD003 40 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_date_as_datetime.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pandas as pd 4 | 5 | import duckdb 6 | 7 | 8 | def run_checks(df): 9 | assert type(df["d"][0]) is datetime.date 10 | assert df["d"][0] == datetime.date(1992, 7, 30) 11 | assert pd.isnull(df["d"][1]) 12 | 13 | 14 | def test_date_as_datetime(): 15 | con = duckdb.connect() 16 | con.execute("create table t (d date)") 17 | con.execute("insert into t values ('1992-07-30'), (NULL)") 18 | 19 | # Connection Methods 20 | run_checks(con.execute("Select * from t").df(date_as_object=True)) 21 | run_checks(con.execute("Select * from t").fetchdf(date_as_object=True)) 22 | run_checks(con.execute("Select * from t").fetch_df_chunk(date_as_object=True)) 23 | run_checks(con.execute("Select * from t").fetch_df(date_as_object=True)) 24 | 25 | # Relation Methods 26 | rel = con.table("t") 27 | run_checks(rel.df(date_as_object=True)) 28 | run_checks(rel.to_df(date_as_object=True)) 29 | 30 | # Result Methods 31 | run_checks(rel.query("t_1", "select * from t_1").df(date_as_object=True)) 32 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_implicit_pandas_scan.py: -------------------------------------------------------------------------------- 1 | # simple DB API testcase 2 | 3 | import pandas as pd 4 | import pytest 5 | from conftest import ArrowPandas, NumpyPandas 6 | from packaging.version import Version 7 | 8 | import duckdb 9 | 10 | numpy_nullable_df = pd.DataFrame([{"COL1": "val1", "CoL2": 1.05}, {"COL1": "val4", "CoL2": 17}]) 11 | 12 | try: 13 | from pandas.compat import pa_version_under7p0 14 | 15 | pyarrow_dtypes_enabled = not pa_version_under7p0 16 | except Exception: 17 | pyarrow_dtypes_enabled = False 18 | 19 | if Version(pd.__version__) >= Version("2.0.0") and pyarrow_dtypes_enabled: 20 | pyarrow_df = numpy_nullable_df.convert_dtypes(dtype_backend="pyarrow") 21 | else: 22 | # dtype_backend is not supported in pandas < 2.0.0 23 | pyarrow_df = numpy_nullable_df 24 | 25 | 26 | class TestImplicitPandasScan: 27 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()]) 28 | def test_local_pandas_scan(self, duckdb_cursor, pandas): 29 | con = duckdb.connect() 30 | df = pandas.DataFrame([{"COL1": "val1", "CoL2": 1.05}, {"COL1": "val3", "CoL2": 17}]) # noqa: F841 31 | r1 = con.execute("select * from df").fetchdf() 32 | assert r1["COL1"][0] == "val1" 33 | assert r1["COL1"][1] == "val3" 34 | assert r1["CoL2"][0] == 1.05 35 | assert r1["CoL2"][1] == 17 36 | 37 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()]) 38 | def test_global_pandas_scan(self, duckdb_cursor, pandas): 39 | con = duckdb.connect() 40 | r1 = con.execute(f"select * from {pandas.backend}_df").fetchdf() 41 | assert r1["COL1"][0] == "val1" 42 | assert r1["COL1"][1] == "val4" 43 | assert r1["CoL2"][0] == 1.05 44 | assert r1["CoL2"][1] == 17 45 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_import_cache.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from conftest import ArrowPandas, NumpyPandas 3 | 4 | import duckdb 5 | 6 | 7 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()]) 8 | def test_import_cache_explicit_dtype(pandas): 9 | df = pandas.DataFrame( # noqa: F841 10 | { 11 | "id": [1, 2, 3], 12 | "value": pandas.Series(["123.123", pandas.NaT, pandas.NA], dtype=pandas.StringDtype(storage="python")), 13 | } 14 | ) 15 | con = duckdb.connect() 16 | result_df = con.query("select id, value from df").df() 17 | 18 | assert result_df["value"][1] is None 19 | assert result_df["value"][2] is None 20 | 21 | 22 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()]) 23 | def test_import_cache_implicit_dtype(pandas): 24 | df = pandas.DataFrame({"id": [1, 2, 3], "value": pandas.Series(["123.123", pandas.NaT, pandas.NA])}) # noqa: F841 25 | con = duckdb.connect() 26 | result_df = con.query("select id, value from df").df() 27 | 28 | assert result_df["value"][1] is None 29 | assert result_df["value"][2] is None 30 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_issue_1767.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pytest 4 | from conftest import ArrowPandas, NumpyPandas 5 | 6 | import duckdb 7 | 8 | 9 | # Join from pandas not matching identical strings #1767 10 | class TestIssue1767: 11 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()]) 12 | def test_unicode_join_pandas(self, duckdb_cursor, pandas): 13 | A = pandas.DataFrame({"key": ["a", "п"]}) 14 | B = pandas.DataFrame({"key": ["a", "п"]}) 15 | con = duckdb.connect(":memory:") 16 | arrow = con.register("A", A).register("B", B) 17 | q = arrow.query("""SELECT key FROM "A" FULL JOIN "B" USING ("key") ORDER BY key""") 18 | result = q.df() 19 | 20 | d = {"key": ["a", "п"]} 21 | df = pandas.DataFrame(data=d) 22 | pandas.testing.assert_frame_equal(result, df) 23 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_limit.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from conftest import ArrowPandas, NumpyPandas 3 | 4 | import duckdb 5 | 6 | 7 | class TestLimitPandas: 8 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()]) 9 | def test_limit_df(self, duckdb_cursor, pandas): 10 | df_in = pandas.DataFrame( 11 | { 12 | "numbers": [1, 2, 3, 4, 5], 13 | } 14 | ) 15 | limit_df = duckdb.limit(df_in, 2) 16 | assert len(limit_df.execute().fetchall()) == 2 17 | 18 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()]) 19 | def test_aggregate_df(self, duckdb_cursor, pandas): 20 | df_in = pandas.DataFrame( 21 | { 22 | "numbers": [1, 2, 2, 2], 23 | } 24 | ) 25 | aggregate_df = duckdb.aggregate(df_in, "count(numbers)", "numbers").order("all") 26 | assert aggregate_df.execute().fetchall() == [(1,), (3,)] 27 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_pandas_df_none.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | class TestPandasDFNone: 5 | # This used to decrease the ref count of None 6 | def test_none_deref(self): 7 | con = duckdb.connect() 8 | df = con.sql("select NULL::VARCHAR as a from range(1000000)").df() # noqa: F841 9 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_pandas_enum.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | 4 | import duckdb 5 | 6 | 7 | class TestPandasEnum: 8 | def test_3480(self, duckdb_cursor): 9 | duckdb_cursor.execute( 10 | """ 11 | create type cat as enum ('marie', 'duchess', 'toulouse'); 12 | create table tab ( 13 | cat cat, 14 | amt int 15 | ); 16 | """ 17 | ) 18 | df = duckdb_cursor.query("SELECT * FROM tab LIMIT 0;").to_df() 19 | assert df["cat"].cat.categories.equals(pd.Index(["marie", "duchess", "toulouse"])) 20 | duckdb_cursor.execute("DROP TABLE tab") 21 | duckdb_cursor.execute("DROP TYPE cat") 22 | 23 | def test_3479(self, duckdb_cursor): 24 | duckdb_cursor.execute( 25 | """ 26 | create type cat as enum ('marie', 'duchess', 'toulouse'); 27 | create table tab ( 28 | cat cat, 29 | amt int 30 | ); 31 | """ 32 | ) 33 | 34 | df = pd.DataFrame( 35 | { 36 | "cat2": pd.Series(["duchess", "toulouse", "marie", None, "berlioz", "o_malley"], dtype="category"), 37 | "amt": [1, 2, 3, 4, 5, 6], 38 | } 39 | ) 40 | duckdb_cursor.register("df", df) 41 | with pytest.raises( 42 | duckdb.ConversionException, 43 | match="Type UINT8 with value 0 can't be cast because the value is out of range for the destination " 44 | "type UINT8", 45 | ): 46 | duckdb_cursor.execute("INSERT INTO tab SELECT * FROM df;") 47 | 48 | assert duckdb_cursor.execute("select * from tab").fetchall() == [] 49 | duckdb_cursor.execute("DROP TABLE tab") 50 | duckdb_cursor.execute("DROP TYPE cat") 51 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_pandas_limit.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | class TestPandasLimit: 5 | def test_pandas_limit(self, duckdb_cursor): 6 | con = duckdb.connect() 7 | df = con.execute("select * from range(10000000) tbl(i)").df() # noqa: F841 8 | 9 | con.execute("SET threads=8") 10 | 11 | limit_df = con.execute("SELECT * FROM df WHERE i=334 OR i>9967864 LIMIT 5").df() 12 | assert list(limit_df["i"]) == [334, 9967865, 9967866, 9967867, 9967868] 13 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_pandas_string.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import pandas as pd 3 | 4 | import duckdb 5 | 6 | 7 | class TestPandasString: 8 | def test_pandas_string(self, duckdb_cursor): 9 | strings = numpy.array(["foo", "bar", "baz"]) 10 | 11 | # https://pandas.pydata.org/pandas-docs/stable/user_guide/text.html 12 | df_in = pd.DataFrame( 13 | { 14 | "object": pd.Series(strings, dtype="object"), 15 | } 16 | ) 17 | # Only available in pandas 1.0.0 18 | if hasattr(pd, "StringDtype"): 19 | df_in["string"] = pd.Series(strings, dtype=pd.StringDtype()) 20 | 21 | df_out = duckdb.query_df(df_in, "data", "SELECT * FROM data").df() 22 | 23 | assert numpy.all(df_out["object"] == strings) 24 | if hasattr(pd, "StringDtype"): 25 | assert numpy.all(df_out["string"] == strings) 26 | 27 | def test_bug_2467(self, duckdb_cursor): 28 | N = 1_000_000 29 | # Create DataFrame with string attribute 30 | df = pd.DataFrame({"city": ["Amsterdam", "New York", "London"] * N}) 31 | # Copy Dataframe to DuckDB 32 | con = duckdb.connect() 33 | con.register("df", df) 34 | con.execute( 35 | """ 36 | CREATE TABLE t1 AS SELECT * FROM df 37 | """ 38 | ) 39 | assert con.execute( 40 | """ 41 | SELECT count(*) from t1 42 | """ 43 | ).fetchall() == [(3000000,)] 44 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_pandas_timestamp.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import pandas 4 | import pytest 5 | from conftest import pandas_2_or_higher 6 | 7 | import duckdb 8 | 9 | 10 | @pytest.mark.parametrize("timezone", ["UTC", "CET", "Asia/Kathmandu"]) 11 | @pytest.mark.skipif(not pandas_2_or_higher(), reason="Pandas <2.0.0 does not support timezones in the metadata string") 12 | def test_run_pandas_with_tz(timezone): 13 | con = duckdb.connect() 14 | con.execute(f"SET TimeZone = '{timezone}'") 15 | df = pandas.DataFrame( 16 | { 17 | "timestamp": pandas.Series( 18 | data=[pandas.Timestamp(year=2022, month=1, day=1, hour=10, minute=15, tz=timezone, unit="us")], 19 | dtype=f"datetime64[us, {timezone}]", 20 | ) 21 | } 22 | ) 23 | duck_df = con.from_df(df).df() 24 | assert duck_df["timestamp"][0] == df["timestamp"][0] 25 | 26 | 27 | def test_timestamp_conversion(duckdb_cursor): 28 | tzinfo = pandas.Timestamp("2024-01-01 00:00:00+0100", tz="Europe/Copenhagen").tzinfo 29 | ts_df = pandas.DataFrame( # noqa: F841 30 | { 31 | "ts": [ 32 | pandas.Timestamp("2024-01-01 00:00:00+0100", tz=tzinfo), 33 | pandas.Timestamp("2024-01-02 00:00:00+0100", tz=tzinfo), 34 | ] 35 | } 36 | ) 37 | 38 | query = """ 39 | select 40 | * 41 | from ts_df 42 | where ts = $notationtime 43 | """ 44 | params_zoneinfo = {"notationtime": datetime(2024, 1, 1, tzinfo=tzinfo)} 45 | duckdb_cursor.execute("set TimeZone = 'Europe/Copenhagen'") 46 | rel = duckdb_cursor.execute(query, parameters=params_zoneinfo) 47 | res = rel.fetchall() 48 | assert res[0][0] == datetime(2024, 1, 1, tzinfo=tzinfo) 49 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_pandas_update.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | import duckdb 4 | 5 | 6 | class TestPandasUpdateList: 7 | def test_pandas_update_list(self, duckdb_cursor): 8 | duckdb_cursor = duckdb.connect(":memory:") 9 | duckdb_cursor.execute("create table t (l int[])") 10 | duckdb_cursor.execute("insert into t values ([1, 2]), ([3,4])") 11 | duckdb_cursor.execute("update t set l = [5, 6]") 12 | expected = pd.DataFrame({"l": [[5, 6], [5, 6]]}) 13 | res = duckdb_cursor.execute("select * from t").fetchdf() 14 | pd.testing.assert_frame_equal(expected, res) 15 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_partitioned_pandas_scan.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import pandas as pd 3 | 4 | import duckdb 5 | 6 | 7 | class TestPartitionedPandasScan: 8 | def test_parallel_pandas(self, duckdb_cursor): 9 | con = duckdb.connect() 10 | df = pd.DataFrame({"i": numpy.arange(10000000)}) 11 | 12 | con.register("df", df) 13 | 14 | seq_results = con.execute("SELECT SUM(i) FROM df").fetchall() 15 | 16 | con.execute("PRAGMA threads=4") 17 | parallel_results = con.execute("SELECT SUM(i) FROM df").fetchall() 18 | 19 | assert seq_results[0][0] == 49999995000000 20 | assert parallel_results[0][0] == 49999995000000 21 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_progress_bar.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import pandas as pd 3 | 4 | import duckdb 5 | 6 | 7 | class TestProgressBarPandas: 8 | def test_progress_pandas_single(self, duckdb_cursor): 9 | con = duckdb.connect() 10 | df = pd.DataFrame({"i": numpy.arange(10000000)}) 11 | 12 | con.register("df", df) 13 | con.register("df_2", df) 14 | con.execute("PRAGMA progress_bar_time=1") 15 | con.execute("PRAGMA disable_print_progress_bar") 16 | result = con.execute("SELECT SUM(df.i) FROM df inner join df_2 on (df.i = df_2.i)").fetchall() 17 | assert result[0][0] == 49999995000000 18 | 19 | def test_progress_pandas_parallel(self, duckdb_cursor): 20 | con = duckdb.connect() 21 | df = pd.DataFrame({"i": numpy.arange(10000000)}) 22 | 23 | con.register("df", df) 24 | con.register("df_2", df) 25 | con.execute("PRAGMA progress_bar_time=1") 26 | con.execute("PRAGMA disable_print_progress_bar") 27 | con.execute("PRAGMA threads=4") 28 | parallel_results = con.execute("SELECT SUM(df.i) FROM df inner join df_2 on (df.i = df_2.i)").fetchall() 29 | assert parallel_results[0][0] == 49999995000000 30 | 31 | def test_progress_pandas_empty(self, duckdb_cursor): 32 | con = duckdb.connect() 33 | df = pd.DataFrame({"i": []}) 34 | con.register("df", df) 35 | con.execute("PRAGMA progress_bar_time=1") 36 | con.execute("PRAGMA disable_print_progress_bar") 37 | result = con.execute("SELECT SUM(df.i) from df").fetchall() 38 | assert result[0][0] is None 39 | -------------------------------------------------------------------------------- /tests/fast/pandas/test_pyarrow_projection_pushdown.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from conftest import pandas_supports_arrow_backend 3 | 4 | import duckdb 5 | 6 | pa = pytest.importorskip("pyarrow") 7 | ds = pytest.importorskip("pyarrow.dataset") 8 | _ = pytest.importorskip("pandas", "2.0.0") 9 | 10 | 11 | @pytest.mark.skipif(not pandas_supports_arrow_backend(), reason="pandas does not support the 'pyarrow' backend") 12 | class TestArrowDFProjectionPushdown: 13 | def test_projection_pushdown_no_filter(self, duckdb_cursor): 14 | duckdb_conn = duckdb.connect() 15 | duckdb_conn.execute("CREATE TABLE test (a INTEGER, b INTEGER, c INTEGER)") 16 | duckdb_conn.execute("INSERT INTO test VALUES (1,1,1),(10,10,10),(100,10,100),(NULL,NULL,NULL)") 17 | duck_tbl = duckdb_conn.table("test") 18 | arrow_table = duck_tbl.df().convert_dtypes(dtype_backend="pyarrow") 19 | duckdb_conn.register("testarrowtable", arrow_table) 20 | assert duckdb_conn.execute("SELECT sum(a) FROM testarrowtable").fetchall() == [(111,)] 21 | -------------------------------------------------------------------------------- /tests/fast/relational_api/test_groupings.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import duckdb 4 | 5 | 6 | @pytest.fixture 7 | def con(): 8 | conn = duckdb.connect() 9 | conn.execute( 10 | """ 11 | create table tbl as (SELECT * FROM (VALUES 12 | (1, 'a', 12), 13 | (1, 'a', 10), 14 | (2, 'b', 5), 15 | (2, 'a', 7), 16 | (3, 'a', 5), 17 | (5, 'c', 2) 18 | ) AS tbl(a, b, c)) 19 | """ 20 | ) 21 | return conn 22 | 23 | 24 | class TestGroupings: 25 | def test_basic_grouping(self, con): 26 | rel = con.table("tbl").sum("a", "b") 27 | res = rel.fetchall() 28 | assert res == [(7,), (2,), (5,)] 29 | 30 | rel = con.sql("select sum(a) from tbl GROUP BY b") 31 | res2 = rel.fetchall() 32 | assert res == res2 33 | 34 | def test_cubed(self, con): 35 | rel = con.table("tbl").sum("a", "CUBE (b)").order("ALL") 36 | res = rel.fetchall() 37 | assert res == [(2,), (5,), (7,), (14,)] 38 | 39 | rel = con.sql("select sum(a) from tbl GROUP BY CUBE (b) ORDER BY ALL") 40 | res2 = rel.fetchall() 41 | assert res == res2 42 | 43 | def test_rollup(self, con): 44 | rel = con.table("tbl").sum("a", "ROLLUP (b, c)").order("ALL") 45 | res = rel.fetchall() 46 | assert res == [(1,), (1,), (2,), (2,), (2,), (3,), (5,), (5,), (7,), (14,)] 47 | 48 | rel = con.sql("select sum(a) from tbl GROUP BY ROLLUP (b, c) ORDER BY ALL") 49 | res2 = rel.fetchall() 50 | assert res == res2 51 | -------------------------------------------------------------------------------- /tests/fast/relational_api/test_pivot.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | from pathlib import Path 3 | 4 | 5 | class TestPivot: 6 | def test_pivot_issue_14600(self, duckdb_cursor): 7 | duckdb_cursor.sql( 8 | "create table input_data as select unnest(['u','v','w']) as a, unnest(['x','y','z']) as b, unnest([1,2,3]) as c;" # noqa: E501 9 | ) 10 | pivot_1 = duckdb_cursor.query("pivot input_data on a using max(c) group by b;") 11 | pivot_2 = duckdb_cursor.query("pivot input_data on b using max(c) group by a;") 12 | pivot_1.create("pivot_1") 13 | pivot_2.create("pivot_2") 14 | pivot_1_tbl = duckdb_cursor.table("pivot_1") 15 | pivot_2_tbl = duckdb_cursor.table("pivot_2") 16 | assert set(pivot_1.columns) == set(pivot_1_tbl.columns) 17 | assert set(pivot_2.columns) == set(pivot_2_tbl.columns) 18 | 19 | def test_pivot_issue_14601(self, duckdb_cursor): 20 | duckdb_cursor.sql( 21 | "create table input_data as select unnest(['u','v','w']) as a, unnest(['x','y','z']) as b, unnest([1,2,3]) as c;" # noqa: E501 22 | ) 23 | pivot_1 = duckdb_cursor.query("pivot input_data on a using max(c) group by b;") 24 | pivot_1.create("pivot_1") 25 | export_dir = tempfile.mkdtemp() 26 | duckdb_cursor.query(f"EXPORT DATABASE '{export_dir}'") 27 | assert "CREATE TYPE" not in (Path(export_dir) / "schema.sql").read_text() 28 | -------------------------------------------------------------------------------- /tests/fast/relational_api/test_rapi_functions.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | class TestRAPIFunctions: 5 | def test_rapi_str_print(self, duckdb_cursor): 6 | res = duckdb_cursor.query("select 42::INT AS a, 84::BIGINT AS b") 7 | assert str(res) is not None 8 | res.show() 9 | 10 | def test_rapi_relation_sql_query(self): 11 | res = duckdb.table_function("range", [10]) 12 | assert res.sql_query() == 'SELECT * FROM "range"(10)' 13 | -------------------------------------------------------------------------------- /tests/fast/relational_api/test_table_function.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | import duckdb 6 | 7 | script_path = Path(__file__).parent 8 | 9 | 10 | class TestTableFunction: 11 | def test_table_function(self, duckdb_cursor): 12 | path = str(script_path / ".." / "data/integers.csv") 13 | rel = duckdb_cursor.table_function("read_csv", [path]) 14 | res = rel.fetchall() 15 | assert res == [(1, 10, 0), (2, 50, 30)] 16 | 17 | # Provide only a string as argument, should error, needs a list 18 | with pytest.raises(duckdb.InvalidInputException, match=r"'params' has to be a list of parameters"): 19 | rel = duckdb_cursor.table_function("read_csv", path) 20 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_arrow_table.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _ = pytest.importorskip("duckdb.experimental.spark") 4 | pa = pytest.importorskip("pyarrow") 5 | from spark_namespace import USE_ACTUAL_SPARK 6 | from spark_namespace.sql.dataframe import DataFrame 7 | 8 | 9 | class TestArrowTable: 10 | @pytest.mark.skipif( 11 | USE_ACTUAL_SPARK and not hasattr(DataFrame, "toArrow"), 12 | reason="toArrow is only introduced in PySpark 4.0.0", 13 | ) 14 | def test_spark_to_arrow_table(self, spark): 15 | if USE_ACTUAL_SPARK: 16 | return 17 | data = [ 18 | ("firstRowFirstColumn",), 19 | ("2ndRowFirstColumn",), 20 | ] 21 | df = spark.createDataFrame(data, ["firstColumn"]) 22 | arrow_table = df.toArrow() 23 | assert arrow_table.num_columns == 1 24 | assert arrow_table.num_rows == 2 25 | assert arrow_table.column_names == ["firstColumn"] 26 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_except.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _ = pytest.importorskip("duckdb.experimental.spark") 4 | 5 | from duckdb.experimental.spark.sql.types import Row 6 | 7 | 8 | @pytest.fixture 9 | def df(spark): 10 | return spark.createDataFrame([("a", 1), ("a", 1), ("a", 1), ("a", 2), ("b", 3), ("c", 4)], ["C1", "C2"]) 11 | 12 | 13 | @pytest.fixture 14 | def df2(spark): 15 | return spark.createDataFrame([("a", 1), ("b", 3)], ["C1", "C2"]) 16 | 17 | 18 | class TestDataFrameIntersect: 19 | def test_exceptAll(self, spark, df, df2): 20 | df3 = df.exceptAll(df2).sort(*df.columns) 21 | res = df3.collect() 22 | 23 | assert res == [ 24 | Row(C1="a", C2=1), 25 | Row(C1="a", C2=1), 26 | Row(C1="a", C2=2), 27 | Row(C1="c", C2=4), 28 | ] 29 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_function_concat_ws.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _ = pytest.importorskip("duckdb.experimental.spark") 4 | from spark_namespace.sql.functions import col, concat_ws 5 | from spark_namespace.sql.types import Row 6 | 7 | 8 | class TestReplaceEmpty: 9 | def test_replace_empty(self, spark): 10 | data = [ 11 | ("firstRowFirstColumn", "firstRowSecondColumn"), 12 | ("2ndRowFirstColumn", "2ndRowSecondColumn"), 13 | ] 14 | df = spark.createDataFrame(data, ["firstColumn", "secondColumn"]) 15 | df = df.withColumn("concatted", concat_ws(" ", col("firstColumn"), col("secondColumn"))) 16 | res = df.select("concatted").collect() 17 | assert res == [ 18 | Row(concatted="firstRowFirstColumn firstRowSecondColumn"), 19 | Row(concatted="2ndRowFirstColumn 2ndRowSecondColumn"), 20 | ] 21 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_functions_base64.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _ = pytest.importorskip("duckdb.experimental.spark") 4 | 5 | from spark_namespace.sql import functions as F 6 | 7 | 8 | class TestSparkFunctionsBase64: 9 | def test_base64(self, spark): 10 | data = [ 11 | ("quack",), 12 | ] 13 | res = ( 14 | spark.createDataFrame(data, ["firstColumn"]) 15 | .withColumn("encoded_value", F.base64(F.col("firstColumn"))) 16 | .select("encoded_value") 17 | .collect() 18 | ) 19 | assert res[0].encoded_value == "cXVhY2s=" 20 | 21 | def test_base64ColString(self, spark): 22 | data = [ 23 | ("quack",), 24 | ] 25 | res = ( 26 | spark.createDataFrame(data, ["firstColumn"]) 27 | .withColumn("encoded_value", F.base64("firstColumn")) 28 | .select("encoded_value") 29 | .collect() 30 | ) 31 | assert res[0].encoded_value == "cXVhY2s=" 32 | 33 | def test_unbase64(self, spark): 34 | data = [ 35 | ("cXVhY2s=",), 36 | ] 37 | res = ( 38 | spark.createDataFrame(data, ["firstColumn"]) 39 | .withColumn("decoded_value", F.unbase64(F.col("firstColumn"))) 40 | .select("decoded_value") 41 | .collect() 42 | ) 43 | assert res[0].decoded_value == b"quack" 44 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_functions_dataframe.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _ = pytest.importorskip("duckdb.experimental.spark") 4 | from spark_namespace.sql import functions as F 5 | 6 | 7 | class TestSparkFunctionsArray: 8 | def test_broadcast(self, spark): 9 | data = [ 10 | ([1, 2, 2], 2), 11 | ([2, 4, 5], 3), 12 | ] 13 | 14 | df = spark.createDataFrame(data, ["firstColumn", "secondColumn"]) 15 | df_broadcast = F.broadcast(df) 16 | 17 | assert df.collect() == df_broadcast.collect() 18 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_functions_expr.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from spark_namespace.sql import functions as F 3 | from spark_namespace.sql.types import Row 4 | 5 | _ = pytest.importorskip("duckdb.experimental.spark") 6 | 7 | 8 | class TestSparkFunctionsExpr: 9 | def test_expr(self, spark): 10 | df = spark.createDataFrame([["Alice"], ["Bob"]], ["name"]) 11 | res = df.select("name", F.expr("length(name)").alias("str_len")).collect() 12 | 13 | assert res == [ 14 | Row(name="Alice", str_len=5), 15 | Row(name="Bob", str_len=3), 16 | ] 17 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_functions_hash.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _ = pytest.importorskip("duckdb.experimental.spark") 4 | from spark_namespace.sql import functions as F 5 | 6 | 7 | class TestSparkFunctionsHash: 8 | def test_md5(self, spark): 9 | data = [ 10 | ("quack",), 11 | ] 12 | res = ( 13 | spark.createDataFrame(data, ["firstColumn"]) 14 | .withColumn("hashed_value", F.md5(F.col("firstColumn"))) 15 | .select("hashed_value") 16 | .collect() 17 | ) 18 | assert res[0].hashed_value == "cfaf278e8f522c72644cee2a753d2845" 19 | 20 | def test_sha256(self, spark): 21 | data = [ 22 | ("quack",), 23 | ] 24 | res = ( 25 | spark.createDataFrame(data, ["firstColumn"]) 26 | .withColumn("hashed_value", F.sha2(F.col("firstColumn"), 256)) 27 | .select("hashed_value") 28 | .collect() 29 | ) 30 | assert res[0].hashed_value == "82d928273d067d774889d5df4249aaf73c0b04c64f04d6ed001441ce87a0853c" 31 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_intersect.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _ = pytest.importorskip("duckdb.experimental.spark") 4 | 5 | from duckdb.experimental.spark.sql.types import Row 6 | 7 | 8 | @pytest.fixture 9 | def df(spark): 10 | return spark.createDataFrame([("a", 1), ("a", 1), ("b", 3), ("c", 4)], ["C1", "C2"]) 11 | 12 | 13 | @pytest.fixture 14 | def df2(spark): 15 | return spark.createDataFrame([("a", 1), ("a", 1), ("b", 3)], ["C1", "C2"]) 16 | 17 | 18 | class TestDataFrameIntersect: 19 | def test_intersect(self, spark, df, df2): 20 | df3 = df.intersect(df2).sort(df.C1) 21 | res = df3.collect() 22 | 23 | assert res == [ 24 | Row(C1="a", C2=1), 25 | Row(C1="b", C2=3), 26 | ] 27 | 28 | def test_intersect_all(self, spark, df, df2): 29 | df3 = df.intersectAll(df2).sort(df.C1) 30 | res = df3.collect() 31 | 32 | assert res == [ 33 | Row(C1="a", C2=1), 34 | Row(C1="a", C2=1), 35 | Row(C1="b", C2=3), 36 | ] 37 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_limit.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _ = pytest.importorskip("duckdb.experimental.spark") 4 | 5 | from spark_namespace.sql.types import ( 6 | Row, 7 | ) 8 | 9 | 10 | class TestDataFrameLimit: 11 | def test_dataframe_limit(self, spark): 12 | df = spark.sql("select * from range(100000)") 13 | df2 = df.limit(10) 14 | res = df2.collect() 15 | expected = [ 16 | Row(range=0), 17 | Row(range=1), 18 | Row(range=2), 19 | Row(range=3), 20 | Row(range=4), 21 | Row(range=5), 22 | Row(range=6), 23 | Row(range=7), 24 | Row(range=8), 25 | Row(range=9), 26 | ] 27 | assert res == expected 28 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_readcsv.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _ = pytest.importorskip("duckdb.experimental.spark") 4 | 5 | from spark_namespace import USE_ACTUAL_SPARK 6 | from spark_namespace.sql.types import Row 7 | 8 | 9 | class TestSparkReadCSV: 10 | def test_read_csv(self, spark, tmp_path): 11 | file_path = tmp_path / "basic.csv" 12 | file_path.write_text("1,2\n3,4\n5,6\n") 13 | df = spark.read.csv(file_path.as_posix()) 14 | res = df.collect() 15 | 16 | expected_res = sorted([Row(column0=1, column1=2), Row(column0=3, column1=4), Row(column0=5, column1=6)]) 17 | if USE_ACTUAL_SPARK: 18 | # Convert all values to strings as this is how Spark reads them by default 19 | expected_res = [Row(column0=str(row.column0), column1=str(row.column1)) for row in expected_res] 20 | assert sorted(res) == expected_res 21 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_readjson.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _ = pytest.importorskip("duckdb.experimental.spark") 4 | 5 | 6 | from spark_namespace.sql.types import Row 7 | 8 | 9 | class TestSparkReadJson: 10 | def test_read_json(self, duckdb_cursor, spark, tmp_path): 11 | file_path = tmp_path / "basic.parquet" 12 | file_path = file_path.as_posix() 13 | duckdb_cursor.execute(f"COPY (select 42 a, true b, 'this is a long string' c) to '{file_path}' (FORMAT JSON)") 14 | df = spark.read.json(file_path) 15 | res = df.collect() 16 | assert res == [Row(a=42, b=True, c="this is a long string")] 17 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_readparquet.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _ = pytest.importorskip("duckdb.experimental.spark") 4 | 5 | 6 | from spark_namespace.sql.types import Row 7 | 8 | 9 | class TestSparkReadParquet: 10 | def test_read_parquet(self, duckdb_cursor, spark, tmp_path): 11 | file_path = tmp_path / "basic.parquet" 12 | file_path = file_path.as_posix() 13 | duckdb_cursor.execute( 14 | f"COPY (select 42 a, true b, 'this is a long string' c) to '{file_path}' (FORMAT PARQUET)" 15 | ) 16 | df = spark.read.parquet(file_path) 17 | res = df.collect() 18 | assert res == [Row(a=42, b=True, c="this is a long string")] 19 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_runtime_config.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _ = pytest.importorskip("duckdb.experimental.spark") 4 | 5 | from spark_namespace import USE_ACTUAL_SPARK 6 | 7 | 8 | class TestSparkRuntimeConfig: 9 | def test_spark_runtime_config(self, spark): 10 | # This fetches the internal runtime config from the session 11 | spark.conf # noqa: B018 12 | 13 | @pytest.mark.skipif( 14 | USE_ACTUAL_SPARK, reason="Getting an error with our local PySpark setup. Unclear why but not a priority." 15 | ) 16 | def test_spark_runtime_config_set(self, spark): 17 | # Set Config 18 | with pytest.raises(NotImplementedError): 19 | spark.conf.set("spark.executor.memory", "5g") 20 | 21 | @pytest.mark.skip(reason="RuntimeConfig is not implemented yet") 22 | def test_spark_runtime_config_get(self, spark): 23 | # Get a Spark Config 24 | with pytest.raises(KeyError): 25 | spark.conf.get("spark.sql.shuffle.partitions") 26 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_to_parquet.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | _ = pytest.importorskip("duckdb.experimental.spark") 6 | 7 | 8 | @pytest.fixture 9 | def df(spark): 10 | simpleData = ( 11 | ("Java", 4000, 5), 12 | ("Python", 4600, 10), 13 | ("Scala", 4100, 15), 14 | ("Scala", 4500, 15), 15 | ("PHP", 3000, 20), 16 | ) 17 | columns = ["CourseName", "fee", "discount"] 18 | dataframe = spark.createDataFrame(data=simpleData, schema=columns) 19 | return dataframe 20 | 21 | 22 | class TestSparkToParquet: 23 | def test_basic_to_parquet(self, df, spark, tmp_path): 24 | temp_file_name = os.path.join(tmp_path, "temp_file.parquet") # noqa: PTH118 25 | 26 | df.write.parquet(temp_file_name) 27 | 28 | csv_rel = spark.read.parquet(temp_file_name) 29 | 30 | assert sorted(df.collect()) == sorted(csv_rel.collect()) 31 | 32 | def test_compressed_to_parquet(self, df, spark, tmp_path): 33 | temp_file_name = os.path.join(tmp_path, "temp_file.parquet") # noqa: PTH118 34 | 35 | df.write.parquet(temp_file_name, compression="ZSTD") 36 | 37 | csv_rel = spark.read.parquet(temp_file_name) 38 | 39 | assert sorted(df.collect()) == sorted(csv_rel.collect()) 40 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_udf.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _ = pytest.importorskip("duckdb.experimental.spark") 4 | 5 | 6 | class TestSparkUDF: 7 | def test_udf_register(self, spark): 8 | def to_upper_fn(s: str) -> str: 9 | return s.upper() 10 | 11 | spark.udf.register("to_upper_fn", to_upper_fn) 12 | assert spark.sql("select to_upper_fn('quack') as vl").collect()[0].vl == "QUACK" 13 | -------------------------------------------------------------------------------- /tests/fast/spark/test_spark_union_by_name.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _ = pytest.importorskip("duckdb.experimental.spark") 4 | 5 | 6 | from spark_namespace.sql.types import ( 7 | Row, 8 | ) 9 | 10 | 11 | @pytest.fixture 12 | def df1(spark): 13 | data = [("James", 34), ("Michael", 56), ("Robert", 30), ("Maria", 24)] 14 | dataframe = spark.createDataFrame(data=data, schema=["name", "id"]) 15 | return dataframe 16 | 17 | 18 | @pytest.fixture 19 | def df2(spark): 20 | data2 = [(34, "James"), (45, "Maria"), (45, "Jen"), (34, "Jeff")] 21 | dataframe = spark.createDataFrame(data=data2, schema=["id", "name"]) 22 | return dataframe 23 | 24 | 25 | class TestDataFrameUnion: 26 | def test_union_by_name(self, df1, df2): 27 | rel = df1.unionByName(df2) 28 | res = rel.collect() 29 | expected = [ 30 | Row(name="James", id=34), 31 | Row(name="Michael", id=56), 32 | Row(name="Robert", id=30), 33 | Row(name="Maria", id=24), 34 | Row(name="James", id=34), 35 | Row(name="Maria", id=45), 36 | Row(name="Jen", id=45), 37 | Row(name="Jeff", id=34), 38 | ] 39 | assert res == expected 40 | 41 | def test_union_by_name_allow_missing_cols(self, df1, df2): 42 | rel = df1.unionByName(df2.drop("id"), allowMissingColumns=True) 43 | res = rel.collect() 44 | expected = [ 45 | Row(name="James", id=34), 46 | Row(name="Michael", id=56), 47 | Row(name="Robert", id=30), 48 | Row(name="Maria", id=24), 49 | Row(name="James", id=None), 50 | Row(name="Maria", id=None), 51 | Row(name="Jen", id=None), 52 | Row(name="Jeff", id=None), 53 | ] 54 | assert res == expected 55 | -------------------------------------------------------------------------------- /tests/fast/test_ambiguous_prepare.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | class TestAmbiguousPrepare: 5 | def test_bool(self, duckdb_cursor): 6 | conn = duckdb.connect() 7 | res = conn.execute("select ?, ?, ?", (True, 42, [1, 2, 3])).fetchall() 8 | assert res[0][0] 9 | assert res[0][1] == 42 10 | assert res[0][2] == [1, 2, 3] 11 | -------------------------------------------------------------------------------- /tests/fast/test_case_alias.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from conftest import ArrowPandas, NumpyPandas 3 | 4 | import duckdb 5 | 6 | 7 | class TestCaseAlias: 8 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()]) 9 | def test_case_alias(self, duckdb_cursor, pandas): 10 | con = duckdb.connect(":memory:") 11 | 12 | df = pandas.DataFrame([{"COL1": "val1", "CoL2": 1.05}, {"COL1": "val3", "CoL2": 17}]) 13 | 14 | r1 = con.from_df(df).query("df", "select * from df").df() 15 | assert r1["COL1"][0] == "val1" 16 | assert r1["COL1"][1] == "val3" 17 | assert r1["CoL2"][0] == 1.05 18 | assert r1["CoL2"][1] == 17 19 | 20 | r2 = con.from_df(df).query("df", "select COL1, COL2 from df").df() 21 | assert r2["COL1"][0] == "val1" 22 | assert r2["COL1"][1] == "val3" 23 | assert r2["CoL2"][0] == 1.05 24 | assert r2["CoL2"][1] == 17 25 | 26 | r3 = con.from_df(df).query("df", "select COL1, COL2 from df ORDER BY COL1").df() 27 | assert r3["COL1"][0] == "val1" 28 | assert r3["COL1"][1] == "val3" 29 | assert r3["CoL2"][0] == 1.05 30 | assert r3["CoL2"][1] == 17 31 | 32 | r4 = con.from_df(df).query("df", "select COL1, COL2 from df GROUP BY COL1, COL2 ORDER BY COL1").df() 33 | assert r4["COL1"][0] == "val1" 34 | assert r4["COL1"][1] == "val3" 35 | assert r4["CoL2"][0] == 1.05 36 | assert r4["CoL2"][1] == 17 37 | -------------------------------------------------------------------------------- /tests/fast/test_context_manager.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | class TestContextManager: 5 | def test_context_manager(self, duckdb_cursor): 6 | with duckdb.connect(database=":memory:", read_only=False) as con: 7 | assert con.execute("select 1").fetchall() == [(1,)] 8 | -------------------------------------------------------------------------------- /tests/fast/test_duckdb_api.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import duckdb 4 | 5 | 6 | def test_duckdb_api(): 7 | res = duckdb.execute("SELECT name, value FROM duckdb_settings() WHERE name == 'duckdb_api'") 8 | formatted_python_version = f"{sys.version_info.major}.{sys.version_info.minor}" 9 | assert res.fetchall() == [("duckdb_api", f"python/{formatted_python_version}")] 10 | -------------------------------------------------------------------------------- /tests/fast/test_insert.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from conftest import ArrowPandas, NumpyPandas 3 | 4 | import duckdb 5 | 6 | 7 | class TestInsert: 8 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()]) 9 | def test_insert(self, pandas): 10 | test_df = pandas.DataFrame({"i": [1, 2, 3], "j": ["one", "two", "three"]}) 11 | # connect to an in-memory temporary database 12 | conn = duckdb.connect() 13 | # get a cursor 14 | cursor = conn.cursor() 15 | conn.execute("CREATE TABLE test (i INTEGER, j STRING)") 16 | rel = conn.table("test") 17 | rel.insert([1, "one"]) 18 | rel.insert([2, "two"]) 19 | rel.insert([3, "three"]) 20 | rel_a3 = cursor.table("test").project("CAST(i as BIGINT)i, j").to_df() 21 | pandas.testing.assert_frame_equal(rel_a3, test_df) 22 | 23 | def test_insert_with_schema(self, duckdb_cursor): 24 | duckdb_cursor.sql("create schema not_main") 25 | duckdb_cursor.sql("create table not_main.tbl as select * from range(10)") 26 | 27 | res = duckdb_cursor.table("not_main.tbl").fetchall() 28 | assert len(res) == 10 29 | 30 | duckdb_cursor.table("not_main.tbl").insert([42]) 31 | res2 = duckdb_cursor.table("not_main.tbl").fetchall() 32 | assert len(res2) == 11 33 | assert (42,) in res2 34 | -------------------------------------------------------------------------------- /tests/fast/test_json_logging.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | 5 | import duckdb 6 | 7 | 8 | def _parse_json_func(error_prefix: str): 9 | """Helper to check that the error message is indeed parsable json.""" 10 | 11 | def parse_func(exception) -> bool: 12 | msg = exception.args[0] 13 | assert msg.startswith(error_prefix) 14 | json_str = msg.split(error_prefix, 1)[1] 15 | try: 16 | json.loads(json_str) 17 | except Exception: 18 | return False 19 | return True 20 | 21 | return parse_func 22 | 23 | 24 | def test_json_syntax_error(): 25 | conn = duckdb.connect() 26 | conn.execute("SET errors_as_json='true'") 27 | with pytest.raises(duckdb.ParserException, match="SYNTAX_ERROR", check=_parse_json_func("Parser Error: ")): 28 | conn.execute("syntax error") 29 | 30 | 31 | def test_json_catalog_error(): 32 | conn = duckdb.connect() 33 | conn.execute("SET errors_as_json='true'") 34 | with pytest.raises(duckdb.CatalogException, match="MISSING_ENTRY", check=_parse_json_func("Catalog Error: ")): 35 | conn.execute("SELECT * FROM nonexistent_table") 36 | 37 | 38 | def test_json_syntax_error_extract_statements(): 39 | conn = duckdb.connect() 40 | conn.execute("SET errors_as_json='true'") 41 | with pytest.raises(duckdb.ParserException, match="SYNTAX_ERROR", check=_parse_json_func("Parser Error: ")): 42 | conn.extract_statements("syntax error") 43 | 44 | 45 | def test_json_syntax_error_get_table_names(): 46 | conn = duckdb.connect() 47 | conn.execute("SET errors_as_json='true'") 48 | with pytest.raises(duckdb.ParserException, match="SYNTAX_ERROR", check=_parse_json_func("Parser Error: ")): 49 | conn.get_table_names("syntax error") 50 | -------------------------------------------------------------------------------- /tests/fast/test_metatransaction.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | pd = pytest.importorskip("pandas") 4 | np = pytest.importorskip("numpy") 5 | 6 | NUMBER_OF_ROWS = 200000 7 | NUMBER_OF_COLUMNS = 1 8 | 9 | 10 | class TestMetaTransaction: 11 | def test_fetchmany(self, duckdb_cursor): 12 | duckdb_cursor.execute("CREATE SEQUENCE id_seq") 13 | column_names = ",\n".join([f"column_{i} FLOAT" for i in range(1, NUMBER_OF_COLUMNS + 1)]) 14 | create_table_query = f""" 15 | CREATE TABLE my_table ( 16 | id INTEGER DEFAULT nextval('id_seq'), 17 | {column_names} 18 | ) 19 | """ 20 | # Create a table containing a sequence 21 | duckdb_cursor.execute(create_table_query) 22 | 23 | for i in range(20): 24 | # Then insert a large amount of tuples, triggering a parallel execution 25 | data = np.random.rand(NUMBER_OF_ROWS, NUMBER_OF_COLUMNS) 26 | columns = [f"Column_{i + 1}" for i in range(NUMBER_OF_COLUMNS)] 27 | df = pd.DataFrame(data, columns=columns) 28 | df_columns = ", ".join(df.columns) 29 | # This gets executed in parallel, causing NextValFunction to be called in parallel 30 | # stressing the MetaTransaction::Get concurrency 31 | duckdb_cursor.execute(f"INSERT INTO my_table ({df_columns}) SELECT * FROM df") 32 | print(f"inserted {i}") 33 | duckdb_cursor.commit() 34 | -------------------------------------------------------------------------------- /tests/fast/test_module.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | class TestModule: 5 | def test_paramstyle(self): 6 | assert duckdb.paramstyle == "qmark" 7 | 8 | def test_threadsafety(self): 9 | assert duckdb.threadsafety == 1 10 | 11 | def test_apilevel(self): 12 | assert duckdb.apilevel == "2.0" 13 | -------------------------------------------------------------------------------- /tests/fast/test_multi_statement.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import shutil 3 | from pathlib import Path 4 | 5 | import duckdb 6 | 7 | 8 | class TestMultiStatement: 9 | def test_multi_statement(self, duckdb_cursor): 10 | con = duckdb.connect(":memory:") 11 | 12 | # test empty statement 13 | con.execute("") 14 | 15 | # run multiple statements in one call to execute 16 | con.execute( 17 | """ 18 | CREATE TABLE integers(i integer); 19 | insert into integers select * from range(10); 20 | select * from integers; 21 | """ 22 | ) 23 | results = [x[0] for x in con.fetchall()] 24 | assert results == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 25 | 26 | # test export/import 27 | export_location = Path.cwd() / "duckdb_pytest_dir_export" 28 | with contextlib.suppress(Exception): 29 | shutil.rmtree(export_location) 30 | con.execute("CREATE TABLE integers2(i INTEGER)") 31 | con.execute("INSERT INTO integers2 VALUES (1), (5), (7), (1928)") 32 | con.execute(f"EXPORT DATABASE '{export_location}'") 33 | # reset connection 34 | con = duckdb.connect(":memory:") 35 | con.execute(f"IMPORT DATABASE '{export_location}'") 36 | integers = [x[0] for x in con.execute("SELECT * FROM integers").fetchall()] 37 | integers2 = [x[0] for x in con.execute("SELECT * FROM integers2").fetchall()] 38 | assert integers == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 39 | assert integers2 == [1, 5, 7, 1928] 40 | shutil.rmtree(export_location) 41 | -------------------------------------------------------------------------------- /tests/fast/test_string_annotation.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # we need typing.Union in our import cache 4 | from typing import Union # noqa: F401 5 | 6 | import pytest 7 | 8 | 9 | def make_annotated_function(type: str): 10 | def test_base() -> None: 11 | return None 12 | 13 | import types 14 | 15 | test_function = types.FunctionType( 16 | test_base.__code__, test_base.__globals__, test_base.__name__, test_base.__defaults__, test_base.__closure__ 17 | ) 18 | # Add the 'type' string as return_annotation 19 | test_function.__annotations__ = {"return": type} 20 | return test_function 21 | 22 | 23 | def python_version_lower_than_3_10(): 24 | if sys.version_info[1] < 10: 25 | return True 26 | return False 27 | 28 | 29 | class TestStringAnnotation: 30 | @pytest.mark.skipif( 31 | python_version_lower_than_3_10(), reason="inspect.signature(eval_str=True) only supported since 3.10 and higher" 32 | ) 33 | @pytest.mark.parametrize( 34 | ("input", "expected"), 35 | [ 36 | ("str", "VARCHAR"), 37 | ("list[str]", "VARCHAR[]"), 38 | ("dict[str, str]", "MAP(VARCHAR, VARCHAR)"), 39 | ("dict[Union[str, bool], str]", "MAP(UNION(u1 VARCHAR, u2 BOOLEAN), VARCHAR)"), 40 | ], 41 | ) 42 | def test_string_annotations(self, duckdb_cursor, input, expected): 43 | from inspect import signature 44 | 45 | func = make_annotated_function(input) 46 | sig = signature(func) 47 | assert sig.return_annotation.__class__ is str 48 | 49 | duckdb_cursor.create_function("foo", func) 50 | rel = duckdb_cursor.sql("select foo()") 51 | assert rel.types == [expected] 52 | -------------------------------------------------------------------------------- /tests/fast/test_tf.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import duckdb 4 | 5 | tf = pytest.importorskip("tensorflow") 6 | 7 | 8 | def test_tf(): 9 | con = duckdb.connect() 10 | 11 | con.execute("create table t( a integer, b integer)") 12 | con.execute("insert into t values (1,2), (3,4)") 13 | 14 | # Test from connection 15 | duck_tf = con.execute("select * from t").tf() 16 | duck_numpy = con.sql("select * from t").fetchnumpy() 17 | tf.math.equal(duck_tf["a"], tf.convert_to_tensor(duck_numpy["a"])) 18 | tf.math.equal(duck_tf["b"], tf.convert_to_tensor(duck_numpy["b"])) 19 | 20 | # Test from relation 21 | duck_tf = con.sql("select * from t").tf() 22 | tf.math.equal(duck_tf["a"], tf.convert_to_tensor(duck_numpy["a"])) 23 | tf.math.equal(duck_tf["b"], tf.convert_to_tensor(duck_numpy["b"])) 24 | 25 | # Test all Numeric Types 26 | numeric_types = ["TINYINT", "SMALLINT", "BIGINT", "HUGEINT", "FLOAT", "DOUBLE", "DECIMAL(4,1)", "UTINYINT"] 27 | 28 | for supported_type in numeric_types: 29 | con = duckdb.connect() 30 | con.execute(f"create table t( a {supported_type} , b {supported_type})") 31 | con.execute("insert into t values (1,2), (3,4)") 32 | duck_tf = con.sql("select * from t").tf() 33 | duck_numpy = con.sql("select * from t").fetchnumpy() 34 | tf.math.equal(duck_tf["a"], tf.convert_to_tensor(duck_numpy["a"])) 35 | tf.math.equal(duck_tf["b"], tf.convert_to_tensor(duck_numpy["b"])) 36 | -------------------------------------------------------------------------------- /tests/fast/test_transaction.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | class TestConnectionTransaction: 5 | def test_transaction(self, duckdb_cursor): 6 | con = duckdb.connect() 7 | con.execute("create table t (i integer)") 8 | con.execute("insert into t values (1)") 9 | 10 | con.begin() 11 | con.execute("insert into t values (1)") 12 | assert con.execute("select count (*) from t").fetchone()[0] == 2 13 | con.rollback() 14 | assert con.execute("select count (*) from t").fetchone()[0] == 1 15 | con.begin() 16 | con.execute("insert into t values (1)") 17 | assert con.execute("select count (*) from t").fetchone()[0] == 2 18 | con.commit() 19 | assert con.execute("select count (*) from t").fetchone()[0] == 2 20 | -------------------------------------------------------------------------------- /tests/fast/test_type_explicit.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | import duckdb.sqltypes as duckdb_types 3 | 4 | 5 | class TestMap: 6 | def test_array_list_tuple_ambiguity(self): 7 | con = duckdb.connect() 8 | res = con.sql("SELECT $arg", params={"arg": (1, 2)}).fetchall()[0][0] 9 | assert res == [1, 2] 10 | 11 | # By using an explicit duckdb.Value with an array type, we should convert the input as an array 12 | # and get an array (tuple) back 13 | typ = duckdb.array_type(duckdb_types.BIGINT, 2) 14 | val = duckdb.Value((1, 2), typ) 15 | res = con.sql("SELECT $arg", params={"arg": val}).fetchall()[0][0] 16 | assert res == (1, 2) 17 | 18 | val = duckdb.Value([3, 4], typ) 19 | res = con.sql("SELECT $arg", params={"arg": val}).fetchall()[0][0] 20 | assert res == (3, 4) 21 | -------------------------------------------------------------------------------- /tests/fast/test_unicode.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pandas as pd 4 | 5 | import duckdb 6 | 7 | 8 | class TestUnicode: 9 | def test_unicode_pandas_scan(self, duckdb_cursor): 10 | con = duckdb.connect(database=":memory:", read_only=False) 11 | test_df = pd.DataFrame.from_dict({"i": [1, 2, 3], "j": ["a", "c", "ë"]}) 12 | con.register("test_df_view", test_df) 13 | con.execute("SELECT i, j, LENGTH(j) FROM test_df_view").fetchall() 14 | -------------------------------------------------------------------------------- /tests/fast/test_union.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | 4 | class TestUnion: 5 | def test_union_by_all(self): 6 | connection = duckdb.connect() 7 | 8 | connection.execute( 9 | """ 10 | create table tbl1 as select * from (VALUES 11 | (1, 2, 3, 4), 12 | (2, 3, 4, 5), 13 | (3, 4, 5, 6)) as tbl(A, B, C, D) 14 | """ 15 | ) 16 | connection.execute( 17 | """ 18 | create table tbl2 as select * from (VALUES 19 | (11, 12, 13, 14, 15), 20 | (12, 13, 14, 15, 16), 21 | (13, 14, 15, 16, 17)) as tbl (A, B, C, D, E) 22 | """ 23 | ) 24 | 25 | query = """ 26 | select 27 | * 28 | from 29 | ( 30 | select A, B, C, D, 0 as E from tbl1 31 | ) 32 | union all ( 33 | select * from tbl2 34 | ) order by all 35 | """ 36 | res = connection.sql(query).fetchall() 37 | assert res == [ 38 | (1, 2, 3, 4, 0), 39 | (2, 3, 4, 5, 0), 40 | (3, 4, 5, 6, 0), 41 | (11, 12, 13, 14, 15), 42 | (12, 13, 14, 15, 16), 43 | (13, 14, 15, 16, 17), 44 | ] 45 | 46 | df_1 = connection.execute("FROM tbl1").df() # noqa: F841 47 | df_2 = connection.execute("FROM tbl2").df() # noqa: F841 48 | 49 | query = """ 50 | select 51 | * 52 | from 53 | ( 54 | select A, B, C, D, 0 as E from df_1 55 | ) 56 | union all ( 57 | select * from df_2 58 | ) order by all 59 | """ 60 | res = connection.sql(query).fetchall() 61 | assert res == [ 62 | (1, 2, 3, 4, 0), 63 | (2, 3, 4, 5, 0), 64 | (3, 4, 5, 6, 0), 65 | (11, 12, 13, 14, 15), 66 | (12, 13, 14, 15, 16), 67 | (13, 14, 15, 16, 17), 68 | ] 69 | -------------------------------------------------------------------------------- /tests/fast/test_version.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import duckdb 4 | 5 | 6 | def test_version(): 7 | assert duckdb.__version__ != "0.0.0" 8 | 9 | 10 | def test_formatted_python_version(): 11 | formatted_python_version = f"{sys.version_info.major}.{sys.version_info.minor}" 12 | assert duckdb.__formatted_python_version__ == formatted_python_version 13 | -------------------------------------------------------------------------------- /tests/fast/types/test_blob.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | 4 | class TestBlob: 5 | def test_blob(self, duckdb_cursor): 6 | duckdb_cursor.execute("SELECT BLOB 'hello'") 7 | results = duckdb_cursor.fetchall() 8 | assert results[0][0] == b"hello" 9 | 10 | duckdb_cursor.execute("SELECT BLOB 'hello' AS a") 11 | results = duckdb_cursor.fetchnumpy() 12 | assert results["a"] == numpy.array([b"hello"], dtype=object) 13 | -------------------------------------------------------------------------------- /tests/fast/types/test_boolean.py: -------------------------------------------------------------------------------- 1 | class TestBoolean: 2 | def test_bool(self, duckdb_cursor): 3 | duckdb_cursor.execute("SELECT TRUE") 4 | results = duckdb_cursor.fetchall() 5 | assert results[0][0] 6 | -------------------------------------------------------------------------------- /tests/fast/types/test_datetime_date.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import duckdb 4 | 5 | 6 | class TestDateTimeDate: 7 | def test_date_infinity(self): 8 | con = duckdb.connect() 9 | # Positive infinity 10 | con.execute("SELECT 'infinity'::DATE") 11 | result = con.fetchall() 12 | # datetime.date.max 13 | assert result == [(datetime.date(9999, 12, 31),)] 14 | 15 | con.execute("SELECT '-infinity'::DATE") 16 | result = con.fetchall() 17 | # datetime.date.min 18 | assert result == [(datetime.date(1, 1, 1),)] 19 | 20 | def test_date_infinity_roundtrip(self): 21 | con = duckdb.connect() 22 | 23 | # positive infinity 24 | con.execute("select $1, $1 = 'infinity'::DATE", [datetime.date.max]) 25 | res = con.fetchall() 26 | assert res == [(datetime.date.max, False)] 27 | 28 | # negative infinity 29 | con.execute("select $1, $1 = '-infinity'::DATE", [datetime.date.min]) 30 | res = con.fetchall() 31 | assert res == [(datetime.date.min, False)] 32 | -------------------------------------------------------------------------------- /tests/fast/types/test_decimal.py: -------------------------------------------------------------------------------- 1 | from decimal import Decimal 2 | 3 | import numpy 4 | 5 | 6 | class TestDecimal: 7 | def test_decimal(self, duckdb_cursor): 8 | duckdb_cursor.execute( 9 | "SELECT 1.2::DECIMAL(4,1), 100.3::DECIMAL(9,1), 320938.4298::DECIMAL(18,4), 49082094824.904820482094::DECIMAL(30,12), NULL::DECIMAL" # noqa: E501 10 | ) 11 | result = duckdb_cursor.fetchall() 12 | assert result == [ 13 | (Decimal("1.2"), Decimal("100.3"), Decimal("320938.4298"), Decimal("49082094824.904820482094"), None) 14 | ] 15 | 16 | def test_decimal_numpy(self, duckdb_cursor): 17 | duckdb_cursor.execute( 18 | "SELECT 1.2::DECIMAL(4,1) AS a, 100.3::DECIMAL(9,1) AS b, 320938.4298::DECIMAL(18,4) AS c, 49082094824.904820482094::DECIMAL(30,12) AS d" # noqa: E501 19 | ) 20 | result = duckdb_cursor.fetchnumpy() 21 | assert result == { 22 | "a": numpy.array([1.2]), 23 | "b": numpy.array([100.3]), 24 | "c": numpy.array([320938.4298]), 25 | "d": numpy.array([49082094824.904820482094]), 26 | } 27 | -------------------------------------------------------------------------------- /tests/fast/types/test_hugeint.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | 4 | class TestHugeint: 5 | def test_hugeint(self, duckdb_cursor): 6 | duckdb_cursor.execute("SELECT 437894723897234238947043214") 7 | result = duckdb_cursor.fetchall() 8 | assert result == [(437894723897234238947043214,)] 9 | 10 | def test_hugeint_numpy(self, duckdb_cursor): 11 | duckdb_cursor.execute("SELECT 1::HUGEINT AS i") 12 | result = duckdb_cursor.fetchnumpy() 13 | assert result == {"i": numpy.array([1.0])} 14 | -------------------------------------------------------------------------------- /tests/fast/types/test_null.py: -------------------------------------------------------------------------------- 1 | class TestNull: 2 | def test_fetchone_null(self, duckdb_cursor): 3 | duckdb_cursor.execute("CREATE TABLE atable (Value int)") 4 | duckdb_cursor.execute("INSERT INTO atable VALUES (1)") 5 | duckdb_cursor.execute("SELECT * FROM atable") 6 | assert duckdb_cursor.fetchone()[0] == 1 7 | assert duckdb_cursor.fetchone() is None 8 | -------------------------------------------------------------------------------- /tests/fast/types/test_numeric.py: -------------------------------------------------------------------------------- 1 | def check_result(duckdb_cursor, value, type): 2 | duckdb_cursor.execute("SELECT " + str(value) + "::" + type) 3 | results = duckdb_cursor.fetchall() 4 | assert results[0][0] == value 5 | 6 | 7 | class TestNumeric: 8 | def test_numeric_results(self, duckdb_cursor): 9 | check_result(duckdb_cursor, 1, "TINYINT") 10 | check_result(duckdb_cursor, 1, "SMALLINT") 11 | check_result(duckdb_cursor, 1, "FLOAT") 12 | -------------------------------------------------------------------------------- /tests/fast/types/test_numpy.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import numpy as np 4 | 5 | import duckdb 6 | 7 | 8 | class TestNumpyDatetime64: 9 | def test_numpy_datetime64(self, duckdb_cursor): 10 | duckdb_con = duckdb.connect() 11 | 12 | duckdb_con.execute("create table tbl(col TIMESTAMP)") 13 | duckdb_con.execute( 14 | "insert into tbl VALUES (CAST(? AS TIMESTAMP WITHOUT TIME ZONE))", 15 | parameters=[np.datetime64("2022-02-08T06:01:38.761310")], 16 | ) 17 | assert [(datetime.datetime(2022, 2, 8, 6, 1, 38, 761310),)] == duckdb_con.execute( 18 | "select * from tbl" 19 | ).fetchall() 20 | 21 | def test_numpy_datetime_big(self): 22 | duckdb_con = duckdb.connect() 23 | 24 | duckdb_con.execute("create table test (date DATE)") 25 | duckdb_con.execute("INSERT INTO TEST VALUES ('2263-02-28')") 26 | 27 | res1 = duckdb_con.execute("select * from test").fetchnumpy() 28 | date_value = {"date": np.array(["2263-02-28"], dtype="datetime64[us]")} 29 | assert res1 == date_value 30 | 31 | def test_numpy_enum_conversion(self, duckdb_cursor): 32 | arr = np.array(["a", "b", "c"]) 33 | rel = duckdb_cursor.sql("select * from arr") 34 | res = rel.fetchnumpy()["column0"] 35 | np.testing.assert_equal(res, arr) 36 | -------------------------------------------------------------------------------- /tests/fast/types/test_time_tz.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from datetime import time, timezone 3 | 4 | import pytest 5 | 6 | pandas = pytest.importorskip("pandas") 7 | 8 | 9 | class TestTimeTz: 10 | def test_time_tz(self, duckdb_cursor): 11 | df = pandas.DataFrame({"col1": [time(1, 2, 3, tzinfo=timezone.utc)]}) # noqa: F841 12 | 13 | sql = "SELECT * FROM df" 14 | 15 | duckdb_cursor.execute(sql) 16 | 17 | res = duckdb_cursor.fetchall() 18 | assert res == [(datetime.time(1, 2, 3, tzinfo=datetime.timezone.utc),)] 19 | -------------------------------------------------------------------------------- /tests/fast/types/test_unsigned.py: -------------------------------------------------------------------------------- 1 | class TestUnsigned: 2 | def test_unsigned(self, duckdb_cursor): 3 | duckdb_cursor.execute("create table unsigned (a utinyint, b usmallint, c uinteger, d ubigint)") 4 | duckdb_cursor.execute("insert into unsigned values (1,1,1,1), (null,null,null,null)") 5 | duckdb_cursor.execute("select * from unsigned order by a nulls first") 6 | result = duckdb_cursor.fetchall() 7 | assert result == [(None, None, None, None), (1, 1, 1, 1)] 8 | -------------------------------------------------------------------------------- /tests/fast/udf/test_transactionality.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import duckdb 4 | 5 | 6 | class TestUDFTransactionality: 7 | @pytest.mark.xfail(reason="fetchone() does not realize the stream result was closed before completion") 8 | def test_type_coverage(self, duckdb_cursor): 9 | rel = duckdb_cursor.sql("select * from range(4096)") 10 | res = rel.fetchone() 11 | assert res == (0,) 12 | 13 | def my_func(x: str) -> int: 14 | return int(x) 15 | 16 | duckdb_cursor.create_function("test", my_func) 17 | 18 | with pytest.raises(duckdb.InvalidInputException, match="result closed"): 19 | res = rel.fetchone() 20 | -------------------------------------------------------------------------------- /tests/spark_namespace/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | USE_ACTUAL_SPARK = os.getenv("USE_ACTUAL_SPARK") == "true" 4 | -------------------------------------------------------------------------------- /tests/spark_namespace/errors.py: -------------------------------------------------------------------------------- 1 | from . import USE_ACTUAL_SPARK 2 | 3 | if USE_ACTUAL_SPARK: 4 | from pyspark.errors import * 5 | else: 6 | from duckdb.experimental.spark.errors import * 7 | -------------------------------------------------------------------------------- /tests/spark_namespace/sql/__init__.py: -------------------------------------------------------------------------------- 1 | from .. import USE_ACTUAL_SPARK 2 | 3 | if USE_ACTUAL_SPARK: 4 | from pyspark.sql import SparkSession 5 | else: 6 | from duckdb.experimental.spark.sql import SparkSession 7 | 8 | __all__ = ["SparkSession"] 9 | -------------------------------------------------------------------------------- /tests/spark_namespace/sql/catalog.py: -------------------------------------------------------------------------------- 1 | from .. import USE_ACTUAL_SPARK 2 | 3 | if USE_ACTUAL_SPARK: 4 | from pyspark.sql.catalog import * 5 | else: 6 | from duckdb.experimental.spark.sql.catalog import * 7 | -------------------------------------------------------------------------------- /tests/spark_namespace/sql/column.py: -------------------------------------------------------------------------------- 1 | from .. import USE_ACTUAL_SPARK 2 | 3 | if USE_ACTUAL_SPARK: 4 | from pyspark.sql.column import * 5 | else: 6 | from duckdb.experimental.spark.sql.column import * 7 | -------------------------------------------------------------------------------- /tests/spark_namespace/sql/dataframe.py: -------------------------------------------------------------------------------- 1 | from .. import USE_ACTUAL_SPARK 2 | 3 | if USE_ACTUAL_SPARK: 4 | from pyspark.sql.dataframe import * 5 | else: 6 | from duckdb.experimental.spark.sql.dataframe import * 7 | -------------------------------------------------------------------------------- /tests/spark_namespace/sql/functions.py: -------------------------------------------------------------------------------- 1 | from .. import USE_ACTUAL_SPARK 2 | 3 | if USE_ACTUAL_SPARK: 4 | from pyspark.sql.functions import * 5 | else: 6 | from duckdb.experimental.spark.sql.functions import * 7 | -------------------------------------------------------------------------------- /tests/spark_namespace/sql/types.py: -------------------------------------------------------------------------------- 1 | from .. import USE_ACTUAL_SPARK 2 | 3 | if USE_ACTUAL_SPARK: 4 | from pyspark.sql.types import * 5 | else: 6 | from duckdb.experimental.spark.sql.types import * 7 | --------------------------------------------------------------------------------