├── .clang-format
├── .clang-tidy
├── .clangd
├── .codecov.yml
├── .editorconfig
├── .githooks
    └── post-checkout
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.yml
    │   └── config.yml
    └── workflows
    │   ├── InternalIssuesCreateMirror.yml
    │   ├── InternalIssuesUpdateMirror.yml
    │   ├── cleanup_pypi.yml
    │   ├── code_quality.yml
    │   ├── coverage.yml
    │   ├── on_pr.yml
    │   ├── on_push.yml
    │   ├── packaging.yml
    │   ├── packaging_sdist.yml
    │   ├── packaging_wheels.yml
    │   ├── release.yml
    │   ├── submodule_auto_pr.yml
    │   ├── submodule_sanity.yml
    │   └── targeted_test.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── CMakeLists.txt
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── _duckdb-stubs
    ├── __init__.pyi
    ├── _func.pyi
    └── _sqltypes.pyi
├── adbc_driver_duckdb
    ├── __init__.py
    └── dbapi.py
├── cmake
    ├── compiler_launcher.cmake
    └── duckdb_loader.cmake
├── duckdb
    ├── __init__.py
    ├── _dbapi_type_object.py
    ├── _version.py
    ├── bytes_io_wrapper.py
    ├── experimental
    │   ├── __init__.py
    │   └── spark
    │   │   ├── LICENSE
    │   │   ├── __init__.py
    │   │   ├── _globals.py
    │   │   ├── _typing.py
    │   │   ├── conf.py
    │   │   ├── context.py
    │   │   ├── errors
    │   │       ├── __init__.py
    │   │       ├── error_classes.py
    │   │       ├── exceptions
    │   │       │   ├── __init__.py
    │   │       │   └── base.py
    │   │       └── utils.py
    │   │   ├── exception.py
    │   │   └── sql
    │   │       ├── __init__.py
    │   │       ├── _typing.py
    │   │       ├── catalog.py
    │   │       ├── column.py
    │   │       ├── conf.py
    │   │       ├── dataframe.py
    │   │       ├── functions.py
    │   │       ├── group.py
    │   │       ├── readwriter.py
    │   │       ├── session.py
    │   │       ├── streaming.py
    │   │       ├── type_utils.py
    │   │       ├── types.py
    │   │       └── udf.py
    ├── filesystem.py
    ├── func
    │   └── __init__.py
    ├── functional
    │   └── __init__.py
    ├── polars_io.py
    ├── py.typed
    ├── query_graph
    │   └── __main__.py
    ├── sqltypes
    │   └── __init__.py
    ├── typing
    │   └── __init__.py
    ├── udf.py
    └── value
    │   ├── __init__.py
    │   └── constant
    │       └── __init__.py
├── duckdb_packaging
    ├── __init__.py
    ├── _versioning.py
    ├── build_backend.py
    ├── pypi_cleanup.py
    └── setuptools_scm_version.py
├── pyodide.md
├── pyproject.toml
├── scripts
    ├── cache_data.json
    ├── connection_methods.json
    ├── connection_wrapper_methods.json
    ├── generate_connection_code.py
    ├── generate_connection_methods.py
    ├── generate_connection_stubs.py
    ├── generate_connection_wrapper_methods.py
    ├── generate_connection_wrapper_stubs.py
    ├── generate_import_cache_cpp.py
    ├── generate_import_cache_json.py
    ├── get_cpp_methods.py
    ├── imports.py
    ├── install_spark_in_cibuildwheels_linux_container.sh
    └── regenerate_python_stubs.sh
├── src
    └── duckdb_py
    │   ├── CMakeLists.txt
    │   ├── arrow
    │       ├── CMakeLists.txt
    │       ├── arrow_array_stream.cpp
    │       ├── arrow_export_utils.cpp
    │       └── pyarrow_filter_pushdown.cpp
    │   ├── common
    │       ├── CMakeLists.txt
    │       └── exceptions.cpp
    │   ├── dataframe.cpp
    │   ├── duckdb_python.cpp
    │   ├── functional
    │       ├── CMakeLists.txt
    │       └── functional.cpp
    │   ├── importer.cpp
    │   ├── include
    │       └── duckdb_python
    │       │   ├── arrow
    │       │       ├── arrow_array_stream.hpp
    │       │       ├── arrow_export_utils.hpp
    │       │       └── pyarrow_filter_pushdown.hpp
    │       │   ├── conversions
    │       │       └── optional_wrapper.hpp
    │       │   ├── expression
    │       │       └── pyexpression.hpp
    │       │   ├── filesystem_object.hpp
    │       │   ├── functional.hpp
    │       │   ├── import_cache
    │       │       ├── importer.hpp
    │       │       ├── modules
    │       │       │   ├── collections_module.hpp
    │       │       │   ├── datetime_module.hpp
    │       │       │   ├── decimal_module.hpp
    │       │       │   ├── duckdb_module.hpp
    │       │       │   ├── ipython_module.hpp
    │       │       │   ├── ipywidgets_module.hpp
    │       │       │   ├── numpy_module.hpp
    │       │       │   ├── pandas_module.hpp
    │       │       │   ├── pathlib_module.hpp
    │       │       │   ├── polars_module.hpp
    │       │       │   ├── pyarrow_module.hpp
    │       │       │   ├── pytz_module.hpp
    │       │       │   ├── types_module.hpp
    │       │       │   ├── typing_module.hpp
    │       │       │   └── uuid_module.hpp
    │       │       ├── python_import_cache.hpp
    │       │       ├── python_import_cache_item.hpp
    │       │       └── python_import_cache_modules.hpp
    │       │   ├── jupyter_progress_bar_display.hpp
    │       │   ├── map.hpp
    │       │   ├── numpy
    │       │       ├── array_wrapper.hpp
    │       │       ├── numpy_bind.hpp
    │       │       ├── numpy_result_conversion.hpp
    │       │       ├── numpy_scan.hpp
    │       │       ├── numpy_type.hpp
    │       │       └── raw_array_wrapper.hpp
    │       │   ├── pandas
    │       │       ├── column
    │       │       │   └── pandas_numpy_column.hpp
    │       │       ├── pandas_analyzer.hpp
    │       │       ├── pandas_bind.hpp
    │       │       ├── pandas_column.hpp
    │       │       └── pandas_scan.hpp
    │       │   ├── path_like.hpp
    │       │   ├── pybind11
    │       │       ├── conversions
    │       │       │   ├── exception_handling_enum.hpp
    │       │       │   ├── explain_enum.hpp
    │       │       │   ├── null_handling_enum.hpp
    │       │       │   ├── pyconnection_default.hpp
    │       │       │   ├── python_csv_line_terminator_enum.hpp
    │       │       │   ├── python_udf_type_enum.hpp
    │       │       │   └── render_mode_enum.hpp
    │       │       ├── dataframe.hpp
    │       │       ├── exceptions.hpp
    │       │       ├── gil_wrapper.hpp
    │       │       ├── pybind_wrapper.hpp
    │       │       ├── python_object_container.hpp
    │       │       └── registered_py_object.hpp
    │       │   ├── pyconnection
    │       │       └── pyconnection.hpp
    │       │   ├── pyfilesystem.hpp
    │       │   ├── pyrelation.hpp
    │       │   ├── pyresult.hpp
    │       │   ├── pystatement.hpp
    │       │   ├── python_conversion.hpp
    │       │   ├── python_dependency.hpp
    │       │   ├── python_objects.hpp
    │       │   ├── python_replacement_scan.hpp
    │       │   ├── pytype.hpp
    │       │   ├── pyutil.hpp
    │       │   └── typing.hpp
    │   ├── jupyter
    │       ├── CMakeLists.txt
    │       └── jupyter_progress_bar_display.cpp
    │   ├── map.cpp
    │   ├── native
    │       ├── CMakeLists.txt
    │       ├── python_conversion.cpp
    │       └── python_objects.cpp
    │   ├── numpy
    │       ├── CMakeLists.txt
    │       ├── array_wrapper.cpp
    │       ├── numpy_bind.cpp
    │       ├── numpy_result_conversion.cpp
    │       ├── numpy_scan.cpp
    │       ├── raw_array_wrapper.cpp
    │       └── type.cpp
    │   ├── pandas
    │       ├── CMakeLists.txt
    │       ├── analyzer.cpp
    │       ├── bind.cpp
    │       └── scan.cpp
    │   ├── path_like.cpp
    │   ├── pybind11
    │       ├── CMakeLists.txt
    │       └── pybind_wrapper.cpp
    │   ├── pyconnection.cpp
    │   ├── pyconnection
    │       ├── CMakeLists.txt
    │       └── type_creation.cpp
    │   ├── pyexpression.cpp
    │   ├── pyexpression
    │       ├── CMakeLists.txt
    │       └── initialize.cpp
    │   ├── pyfilesystem.cpp
    │   ├── pyrelation.cpp
    │   ├── pyrelation
    │       ├── CMakeLists.txt
    │       └── initialize.cpp
    │   ├── pyresult.cpp
    │   ├── pystatement.cpp
    │   ├── python_dependency.cpp
    │   ├── python_import_cache.cpp
    │   ├── python_replacement_scan.cpp
    │   ├── python_udf.cpp
    │   └── typing
    │       ├── CMakeLists.txt
    │       ├── pytype.cpp
    │       └── typing.cpp
└── tests
    ├── conftest.py
    ├── coverage
        └── test_pandas_categorical_coverage.py
    ├── extensions
        ├── json
        │   ├── data
        │   │   └── example.json
        │   └── test_read_json.py
        ├── test_extensions_loading.py
        └── test_httpfs.py
    ├── fast
        ├── adbc
        │   ├── test_adbc.py
        │   ├── test_connection_get_info.py
        │   └── test_statement_bind.py
        ├── api
        │   ├── test_3324.py
        │   ├── test_3654.py
        │   ├── test_3728.py
        │   ├── test_6315.py
        │   ├── test_attribute_getter.py
        │   ├── test_config.py
        │   ├── test_connection_close.py
        │   ├── test_connection_interrupt.py
        │   ├── test_cursor.py
        │   ├── test_dbapi00.py
        │   ├── test_dbapi01.py
        │   ├── test_dbapi04.py
        │   ├── test_dbapi05.py
        │   ├── test_dbapi07.py
        │   ├── test_dbapi08.py
        │   ├── test_dbapi09.py
        │   ├── test_dbapi10.py
        │   ├── test_dbapi11.py
        │   ├── test_dbapi12.py
        │   ├── test_dbapi13.py
        │   ├── test_dbapi_fetch.py
        │   ├── test_duckdb_connection.py
        │   ├── test_duckdb_execute.py
        │   ├── test_duckdb_query.py
        │   ├── test_explain.py
        │   ├── test_fsspec.py
        │   ├── test_insert_into.py
        │   ├── test_join.py
        │   ├── test_native_tz.py
        │   ├── test_query_interrupt.py
        │   ├── test_query_progress.py
        │   ├── test_read_csv.py
        │   ├── test_relation_to_view.py
        │   ├── test_streaming_result.py
        │   ├── test_to_csv.py
        │   ├── test_to_parquet.py
        │   └── test_with_propagating_exceptions.py
        ├── arrow
        │   ├── data
        │   │   ├── arrow_table
        │   │   ├── unsigned.parquet
        │   │   └── userdata1.parquet
        │   ├── parquet_write_roundtrip.py
        │   ├── test_10795.py
        │   ├── test_12384.py
        │   ├── test_14344.py
        │   ├── test_2426.py
        │   ├── test_5547.py
        │   ├── test_6584.py
        │   ├── test_6796.py
        │   ├── test_7652.py
        │   ├── test_7699.py
        │   ├── test_8522.py
        │   ├── test_9443.py
        │   ├── test_arrow_batch_index.py
        │   ├── test_arrow_binary_view.py
        │   ├── test_arrow_case_sensitive.py
        │   ├── test_arrow_decimal256.py
        │   ├── test_arrow_decimal_32_64.py
        │   ├── test_arrow_extensions.py
        │   ├── test_arrow_fetch.py
        │   ├── test_arrow_fetch_recordbatch.py
        │   ├── test_arrow_fixed_binary.py
        │   ├── test_arrow_ipc.py
        │   ├── test_arrow_list.py
        │   ├── test_arrow_offsets.py
        │   ├── test_arrow_pycapsule.py
        │   ├── test_arrow_recordbatchreader.py
        │   ├── test_arrow_replacement_scan.py
        │   ├── test_arrow_run_end_encoding.py
        │   ├── test_arrow_scanner.py
        │   ├── test_arrow_string_view.py
        │   ├── test_arrow_types.py
        │   ├── test_arrow_union.py
        │   ├── test_arrow_version_format.py
        │   ├── test_binary_type.py
        │   ├── test_buffer_size_option.py
        │   ├── test_dataset.py
        │   ├── test_date.py
        │   ├── test_dictionary_arrow.py
        │   ├── test_filter_pushdown.py
        │   ├── test_integration.py
        │   ├── test_interval.py
        │   ├── test_large_offsets.py
        │   ├── test_large_string.py
        │   ├── test_multiple_reads.py
        │   ├── test_nested_arrow.py
        │   ├── test_parallel.py
        │   ├── test_polars.py
        │   ├── test_progress.py
        │   ├── test_projection_pushdown.py
        │   ├── test_time.py
        │   ├── test_timestamp_timezone.py
        │   ├── test_timestamps.py
        │   ├── test_tpch.py
        │   ├── test_unregister.py
        │   └── test_view.py
        ├── data
        │   ├── binary_string.parquet
        │   ├── category.csv
        │   ├── datetime.csv
        │   ├── example.json
        │   ├── integers.csv
        │   ├── nullpadding.csv
        │   ├── problematic.csv
        │   ├── quote_escape.csv
        │   ├── stress_test.csv
        │   ├── tz.parquet
        │   └── unquote_without_delimiter.csv
        ├── numpy
        │   └── test_numpy_new_path.py
        ├── pandas
        │   ├── test_2304.py
        │   ├── test_append_df.py
        │   ├── test_bug2281.py
        │   ├── test_bug5922.py
        │   ├── test_column_order.py
        │   ├── test_copy_on_write.py
        │   ├── test_create_table_from_pandas.py
        │   ├── test_date_as_datetime.py
        │   ├── test_datetime_time.py
        │   ├── test_datetime_timestamp.py
        │   ├── test_df_analyze.py
        │   ├── test_df_object_resolution.py
        │   ├── test_df_recursive_nested.py
        │   ├── test_fetch_df_chunk.py
        │   ├── test_fetch_nested.py
        │   ├── test_implicit_pandas_scan.py
        │   ├── test_import_cache.py
        │   ├── test_issue_1767.py
        │   ├── test_limit.py
        │   ├── test_pandas_arrow.py
        │   ├── test_pandas_category.py
        │   ├── test_pandas_df_none.py
        │   ├── test_pandas_enum.py
        │   ├── test_pandas_limit.py
        │   ├── test_pandas_na.py
        │   ├── test_pandas_object.py
        │   ├── test_pandas_string.py
        │   ├── test_pandas_timestamp.py
        │   ├── test_pandas_types.py
        │   ├── test_pandas_unregister.py
        │   ├── test_pandas_update.py
        │   ├── test_parallel_pandas_scan.py
        │   ├── test_partitioned_pandas_scan.py
        │   ├── test_progress_bar.py
        │   ├── test_pyarrow_projection_pushdown.py
        │   ├── test_same_name.py
        │   ├── test_stride.py
        │   ├── test_timedelta.py
        │   └── test_timestamp.py
        ├── relational_api
        │   ├── test_groupings.py
        │   ├── test_joins.py
        │   ├── test_pivot.py
        │   ├── test_rapi_aggregations.py
        │   ├── test_rapi_close.py
        │   ├── test_rapi_description.py
        │   ├── test_rapi_functions.py
        │   ├── test_rapi_query.py
        │   ├── test_rapi_windows.py
        │   └── test_table_function.py
        ├── spark
        │   ├── test_replace_column_value.py
        │   ├── test_replace_empty_value.py
        │   ├── test_spark_arrow_table.py
        │   ├── test_spark_catalog.py
        │   ├── test_spark_column.py
        │   ├── test_spark_dataframe.py
        │   ├── test_spark_dataframe_sort.py
        │   ├── test_spark_drop_duplicates.py
        │   ├── test_spark_except.py
        │   ├── test_spark_filter.py
        │   ├── test_spark_function_concat_ws.py
        │   ├── test_spark_functions_array.py
        │   ├── test_spark_functions_base64.py
        │   ├── test_spark_functions_dataframe.py
        │   ├── test_spark_functions_date.py
        │   ├── test_spark_functions_expr.py
        │   ├── test_spark_functions_hash.py
        │   ├── test_spark_functions_hex.py
        │   ├── test_spark_functions_miscellaneous.py
        │   ├── test_spark_functions_null.py
        │   ├── test_spark_functions_numeric.py
        │   ├── test_spark_functions_sort.py
        │   ├── test_spark_functions_string.py
        │   ├── test_spark_group_by.py
        │   ├── test_spark_intersect.py
        │   ├── test_spark_join.py
        │   ├── test_spark_limit.py
        │   ├── test_spark_order_by.py
        │   ├── test_spark_pandas_dataframe.py
        │   ├── test_spark_readcsv.py
        │   ├── test_spark_readjson.py
        │   ├── test_spark_readparquet.py
        │   ├── test_spark_runtime_config.py
        │   ├── test_spark_session.py
        │   ├── test_spark_to_csv.py
        │   ├── test_spark_to_parquet.py
        │   ├── test_spark_transform.py
        │   ├── test_spark_types.py
        │   ├── test_spark_udf.py
        │   ├── test_spark_union.py
        │   ├── test_spark_union_by_name.py
        │   ├── test_spark_with_column.py
        │   ├── test_spark_with_column_renamed.py
        │   ├── test_spark_with_columns.py
        │   └── test_spark_with_columns_renamed.py
        ├── sqlite
        │   └── test_types.py
        ├── test_alex_multithread.py
        ├── test_all_types.py
        ├── test_ambiguous_prepare.py
        ├── test_case_alias.py
        ├── test_context_manager.py
        ├── test_duckdb_api.py
        ├── test_expression.py
        ├── test_filesystem.py
        ├── test_get_table_names.py
        ├── test_import_export.py
        ├── test_insert.py
        ├── test_json_logging.py
        ├── test_many_con_same_file.py
        ├── test_map.py
        ├── test_metatransaction.py
        ├── test_module.py
        ├── test_multi_statement.py
        ├── test_multithread.py
        ├── test_non_default_conn.py
        ├── test_parameter_list.py
        ├── test_parquet.py
        ├── test_pypi_cleanup.py
        ├── test_pytorch.py
        ├── test_relation.py
        ├── test_relation_dependency_leak.py
        ├── test_replacement_scan.py
        ├── test_result.py
        ├── test_runtime_error.py
        ├── test_sql_expression.py
        ├── test_string_annotation.py
        ├── test_tf.py
        ├── test_transaction.py
        ├── test_type.py
        ├── test_type_explicit.py
        ├── test_unicode.py
        ├── test_union.py
        ├── test_value.py
        ├── test_version.py
        ├── test_versioning.py
        ├── test_windows_abs_path.py
        ├── types
        │   ├── test_blob.py
        │   ├── test_boolean.py
        │   ├── test_datetime_date.py
        │   ├── test_datetime_datetime.py
        │   ├── test_decimal.py
        │   ├── test_hugeint.py
        │   ├── test_nan.py
        │   ├── test_nested.py
        │   ├── test_null.py
        │   ├── test_numeric.py
        │   ├── test_numpy.py
        │   ├── test_object_int.py
        │   ├── test_time_tz.py
        │   └── test_unsigned.py
        └── udf
        │   ├── test_null_filtering.py
        │   ├── test_remove_function.py
        │   ├── test_scalar.py
        │   ├── test_scalar_arrow.py
        │   ├── test_scalar_native.py
        │   └── test_transactionality.py
    ├── slow
        └── test_h2oai_arrow.py
    └── spark_namespace
        ├── __init__.py
        ├── errors.py
        └── sql
            ├── __init__.py
            ├── catalog.py
            ├── column.py
            ├── dataframe.py
            ├── functions.py
            └── types.py


/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | BasedOnStyle: LLVM
 3 | SortIncludes: false
 4 | TabWidth: 4
 5 | IndentWidth: 4
 6 | ColumnLimit: 120
 7 | AllowShortFunctionsOnASingleLine: false
 8 | ---
 9 | UseTab: ForIndentation
10 | DerivePointerAlignment: false
11 | PointerAlignment: Right
12 | AlignConsecutiveMacros: true
13 | AlignTrailingComments: true
14 | AllowAllArgumentsOnNextLine: true
15 | AllowAllConstructorInitializersOnNextLine: true
16 | AllowAllParametersOfDeclarationOnNextLine: true
17 | AlignAfterOpenBracket: Align
18 | SpaceBeforeCpp11BracedList: true
19 | SpaceBeforeCtorInitializerColon: true
20 | SpaceBeforeInheritanceColon: true
21 | SpacesInAngles: false
22 | SpacesInCStyleCastParentheses: false
23 | SpacesInConditionalStatement: false
24 | AllowShortLambdasOnASingleLine: Inline
25 | AllowShortLoopsOnASingleLine: false
26 | AlwaysBreakTemplateDeclarations: Yes
27 | IncludeBlocks: Regroup
28 | Language: Cpp
29 | AccessModifierOffset: -4
30 | ---
31 | Language: Java
32 | SpaceAfterCStyleCast: true
33 | ---
34 | 


--------------------------------------------------------------------------------
/.clangd:
--------------------------------------------------------------------------------
1 | CompileFlags:
2 |   CompilationDatabase: build/clangd
3 |   Add: -Wno-unqualified-std-cast-call
4 | 


--------------------------------------------------------------------------------
/.codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | coverage:
 3 |   precision: 2
 4 |   round: down
 5 |   range: "0...100"
 6 |   status:
 7 |     project:
 8 |       default:
 9 |          # advanced settings
10 |         if_not_found: success
11 |         if_ci_failed: failure
12 |         informational: true
13 |         only_pulls: false
14 |     patch:
15 |       default:
16 |         branches:
17 |           - main
18 |         if_not_found: success
19 |         if_ci_failed: error
20 |         informational: true
21 |         only_pulls: true
22 |         paths:
23 |           - "src"


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | charset = utf-8
 5 | end_of_line = lf
 6 | insert_final_newline = true
 7 | trim_trailing_whitespace = true
 8 | 
 9 | [*.{py,pyi}]
10 | indent_style = space
11 | indent_size = 4
12 | 
13 | [*.{c,cpp,h,hpp}]
14 | indent_style = tab
15 | tab_width = 4
16 | indent_size = tab
17 | max_line_length = 120
18 | x-soft-wrap-text = true
19 | x-soft-wrap-mode = CharacterWidth
20 | x-soft-wrap-limit = 120
21 | x-show-invisibles = false
22 | x-show-spaces = false
23 | 
24 | [Makefile]
25 | indent_style = tab
26 | tab_width = 4
27 | indent_size = tab
28 | x-soft-wrap-text = false
29 | 


--------------------------------------------------------------------------------
/.githooks/post-checkout:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | git submodule update --init --recursive
4 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 |   - name: Feature Request
4 |     url: https://github.com/duckdb/duckdb-python/discussions/new?category=ideas&title=Feature%20Request:%20...&labels=feature&body=Why%20do%20you%20want%20this%20feature%3F
5 |     about: Submit feature requests here
6 |   - name: Discussions
7 |     url: https://github.com/duckdb/duckdb-python/discussions
8 |     about: Please ask and answer general questions here.
9 | 


--------------------------------------------------------------------------------
/.github/workflows/code_quality.yml:
--------------------------------------------------------------------------------
 1 | name: Code Quality Checks
 2 | on:
 3 |   workflow_dispatch:
 4 |     inputs:
 5 |       git_ref:
 6 |         type: string
 7 |         description: Git ref of the DuckDB python package
 8 |         required: false
 9 |   workflow_call:
10 |     inputs:
11 |       git_ref:
12 |         type: string
13 |         description: Git ref of the DuckDB python package
14 |         required: false
15 | 
16 | defaults:
17 |   run:
18 |     shell: bash
19 | 
20 | jobs:
21 |   run_checks:
22 |     name: Run linting, formatting and static type checker
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |       - uses: actions/checkout@v4
26 |         with:
27 |           ref: ${{ inputs.git_ref }}
28 |           fetch-depth: 0
29 |           persist-credentials: false
30 | 
31 |       - name: Install Astral UV
32 |         uses: astral-sh/setup-uv@v7
33 |         with:
34 |           version: "0.9.0"
35 |           python-version: 3.9
36 | 
37 |       - name: pre-commit (cache)
38 |         uses: actions/cache@v4
39 |         with:
40 |           path: ~/.cache/pre-commit
41 |           key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}
42 | 
43 |       - name: pre-commit (--all-files)
44 |         run: |
45 |           uvx pre-commit run --show-diff-on-failure --color=always --all-files
46 | 


--------------------------------------------------------------------------------
/.github/workflows/on_pr.yml:
--------------------------------------------------------------------------------
 1 | name: Tests and builds on PR
 2 | on:
 3 |   pull_request:
 4 |     branches:
 5 |       - main
 6 |       - v*.*-*
 7 |     types: [opened, reopened, ready_for_review, converted_to_draft, synchronize]
 8 |     paths-ignore:
 9 |       - '**.md'
10 |       - 'LICENSE'
11 |       - '.editorconfig'
12 |       - 'scripts/**'
13 |       - '.github//**'
14 |       - '!.github/workflows/on_push.yml'
15 |       - '!.github/workflows/coverage.yml'
16 | 
17 | concurrency:
18 |   group: ${{ github.workflow }}-${{ github.ref }}
19 |   cancel-in-progress: true
20 | 
21 | jobs:
22 |   submodule_sanity_guard:
23 |     name: Make sure submodule is in a sane state
24 |     uses: ./.github/workflows/submodule_sanity.yml
25 | 
26 |   code_quality:
27 |     name: Code-quality checks
28 |     needs: submodule_sanity_guard
29 |     uses: ./.github/workflows/code_quality.yml
30 | 
31 |   packaging_test:
32 |     name: Build a minimal set of packages and run all tests on them
33 |     needs: code_quality
34 |     # Skip packaging tests for draft PRs
35 |     if: ${{ github.event_name != 'pull_request' || github.event.pull_request.draft == false }}
36 |     uses: ./.github/workflows/packaging.yml
37 |     with:
38 |       minimal: true
39 |       testsuite: all
40 |       duckdb-sha: ${{ github.base_ref }}
41 | 
42 |   coverage_test:
43 |     name: Run coverage tests
44 |     needs: code_quality
45 |     # Only run coverage test for draft PRs
46 |     if: ${{ github.event_name == 'pull_request' && github.event.pull_request.draft == true }}
47 |     uses: ./.github/workflows/coverage.yml
48 |     with:
49 |       duckdb_git_ref: ${{ github.base_ref }}
50 |       testsuite: all
51 | 


--------------------------------------------------------------------------------
/.github/workflows/on_push.yml:
--------------------------------------------------------------------------------
 1 | name: Tests and coverage on push
 2 | on:
 3 |   push:
 4 |     branches-ignore:
 5 |       - main
 6 |       - v*.*-*
 7 |     paths-ignore:
 8 |       - '**.md'
 9 |       - 'LICENSE'
10 |       - '.editorconfig'
11 |       - 'scripts/**'
12 |       - '.github//**'
13 |       - '!.github/workflows/on_push.yml'
14 |       - '!.github/workflows/coverage.yml'
15 | 
16 | concurrency:
17 |   group: ${{ github.workflow }}-${{ github.ref }}
18 |   cancel-in-progress: true
19 | 
20 | jobs:
21 |   code_quality:
22 |     name: Code-quality checks
23 |     uses: ./.github/workflows/code_quality.yml
24 | 
25 |   test:
26 |     name: Run coverage tests
27 |     needs: code_quality
28 |     uses: ./.github/workflows/coverage.yml
29 |     with:
30 |       git_ref: ${{ github.ref }}
31 |       testsuite: fast
32 | 


--------------------------------------------------------------------------------
/.github/workflows/submodule_sanity.yml:
--------------------------------------------------------------------------------
 1 | name: Check DuckDB submodule sanity
 2 | on:
 3 |   workflow_call:
 4 |   workflow_dispatch:
 5 | jobs:
 6 |   submodule_sanity:
 7 |     name: Make sure submodule is in a sane state
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Checkout DuckDB Python
11 |         uses: actions/checkout@v4
12 |         with:
13 |           fetch-depth: 0
14 | 
15 |       - name: Verify submodule origin
16 |         shell: bash
17 |         run: |
18 |           set -eux
19 |           git submodule update --init
20 |           cd external/duckdb
21 |           remote_count=$(git remote | wc -l)
22 |           if [[ $remote_count -gt 1 ]]; then
23 |             echo "::error::Multiple remotes found - only origin allowed"
24 |             git remote -v
25 |           fi
26 |           origin_url=$(git remote get-url origin)
27 |           if [[ "$origin_url" != "https://github.com/duckdb/duckdb"* ]]; then
28 |             echo "::error::Submodule origin has been tampered with: $origin_url"
29 |             exit 1
30 |           fi
31 | 
32 |       - name: Disallow changes to .gitmodules in PRs and pushes
33 |         if: ${{ github.event_name == 'pull_request' || github.event_name == 'push' }}
34 |         shell: bash
35 |         run: |
36 |           set -eux
37 |           before=${{ github.event_name == 'push' && github.event.before || format('origin/{0}', github.base_ref) }}
38 |           after=${{ github.event_name == 'push' && github.event.after || github.head_ref }}
39 |           if git diff --name-only $before...$after | grep -q "^\.gitmodules$"; then
40 |             echo "::error::.gitmodules may not be modified. If you see a reason to update, please discuss with the maintainers."
41 |             exit 1
42 |           fi
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | #==============================================================================#
 2 | # This file specifies intentionally untracked files that git should ignore.
 3 | #==============================================================================#
 4 | 
 5 | #==============================================================================#
 6 | # General
 7 | #==============================================================================#
 8 | # Temp files created by most text editors.
 9 | *~
10 | # Merge files created by git.
11 | *.orig
12 | # vim swap files
13 | .*.sw?
14 | .sw?
15 | #OS X specific files.
16 | .DS_store
17 | 
18 | #==============================================================================#
19 | # Build artifacts
20 | #==============================================================================#
21 | *.o
22 | *.lo
23 | *.la
24 | *.lai
25 | *.lib
26 | *.slo
27 | *.cuo
28 | *.pdf
29 | *.swp
30 | a.out
31 | *.so
32 | *.dylib
33 | *.dll
34 | 
35 | build
36 | .build_debug/*
37 | .build_release/*
38 | distribute/*
39 | *.testbin
40 | *.bin
41 | cmake_build
42 | .cmake_build
43 | cmake-build-debug
44 | cmake-build-release
45 | cmake-build-relwithdebinfo
46 | duckdb_packaging/duckdb_version.txt
47 | test.db
48 | 
49 | #==============================================================================#
50 | # Python
51 | #==============================================================================#
52 | 
53 | *.pyc
54 | .venv
55 | uv.lock
56 | dist
57 | duckdb.egg-info
58 | .eggs
59 | .pytest_cache
60 | .coverage
61 | duckdb_build
62 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "external/duckdb"]
2 | 	path = external/duckdb
3 | 	url = https://github.com/duckdb/duckdb.git
4 | 	branch = main
5 | [submodule]
6 | 	recurse = true
7 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.4.0
 4 |     hooks:
 5 |       - id: check-yaml
 6 |         args: ["--allow-multiple-documents"]
 7 |       - id: check-toml
 8 |       - id: check-added-large-files
 9 |       - id: detect-private-key
10 |       - id: check-merge-conflict
11 |       - id: forbid-new-submodules
12 | 
13 |   - repo: https://github.com/astral-sh/ruff-pre-commit
14 |     # Ruff version.
15 |     rev: v0.13.3
16 |     hooks:
17 |       # Run the linter.
18 |       - id: ruff-check
19 |       # Run the formatter.
20 |       - id: ruff-format
21 | 
22 |   - repo: https://github.com/pre-commit/mirrors-clang-format
23 |     rev: v21.1.2    # pick the version of clang-format you want
24 |     hooks:
25 |       - id: clang-format
26 |         files: \.(c|cpp|cc|h|hpp|cxx|hxx)$
27 | 
28 |   - repo: https://github.com/cheshirekow/cmake-format-precommit
29 |     rev: v0.6.13
30 |     hooks:
31 |       - id: cmake-format
32 | 
33 |   - repo: https://github.com/pre-commit/mirrors-mypy
34 |     rev: v1.18.2
35 |     hooks:
36 |       - id: mypy
37 |         entry: mypy
38 |         files: ^(duckdb/|_duckdb-stubs/)
39 |         exclude: ^duckdb/(experimental|query_graph)/
40 |         additional_dependencies: [ numpy, polars ]
41 | 
42 |   - repo: local
43 |     hooks:
44 |       - id: post-checkout-submodules
45 |         name: Update submodule post-checkout
46 |         entry: .githooks/post-checkout
47 |         language: script
48 |         stages: [ post-checkout ]
49 |         pass_filenames: false
50 |         always_run: true
51 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2018-2025 Stichting DuckDB Foundation
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/cmake/compiler_launcher.cmake:
--------------------------------------------------------------------------------
 1 | # cmake/compiler_launcher.cmake
 2 | include_guard(GLOBAL) # only include once
 3 | include(CMakeParseArguments)
 4 | 
 5 | # ────────────────────────────────────────────
 6 | # setup_compiler_launcher_if_available()
 7 | #
 8 | # Function to look for ccache and sccache to speed up builds, if available
 9 | # ────────────────────────────────────────────
10 | function(setup_compiler_launcher_if_available)
11 |   if(NOT DEFINED CMAKE_C_COMPILER_LAUNCHER)
12 |     find_program(COMPILER_LAUNCHER NAMES ccache sccache)
13 |     if(COMPILER_LAUNCHER)
14 |       message(STATUS "Using ${COMPILER_LAUNCHER} as C compiler launcher")
15 |       set(CMAKE_C_COMPILER_LAUNCHER
16 |           "${COMPILER_LAUNCHER}"
17 |           CACHE STRING "" FORCE)
18 |     endif()
19 |   endif()
20 | 
21 |   if(NOT DEFINED CMAKE_CXX_COMPILER_LAUNCHER)
22 |     find_program(COMPILER_LAUNCHER NAMES ccache sccache)
23 |     if(COMPILER_LAUNCHER)
24 |       message(STATUS "Using ${COMPILER_LAUNCHER} as C++ compiler launcher")
25 |       set(CMAKE_CXX_COMPILER_LAUNCHER
26 |           "${COMPILER_LAUNCHER}"
27 |           CACHE STRING "" FORCE)
28 |     endif()
29 |   endif()
30 | endfunction()
31 | 


--------------------------------------------------------------------------------
/duckdb/_version.py:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------
 2 | # Version API
 3 | #
 4 | # We provide three symbols:
 5 | # - duckdb.__version__: The version of this package
 6 | # - duckdb.__duckdb_version__: The version of duckdb that is bundled
 7 | # - duckdb.version(): A human-readable version string containing both of the above
 8 | # ----------------------------------------------------------------------
 9 | from importlib.metadata import version as _dist_version
10 | 
11 | import _duckdb
12 | 
13 | __version__: str = _dist_version("duckdb")
14 | """Version of the DuckDB Python Package."""
15 | 
16 | __duckdb_version__: str = _duckdb.__version__
17 | """Version of DuckDB that is bundled."""
18 | 
19 | 
20 | def version() -> str:
21 |     """Human-friendly formatted version string of both the distribution package and the bundled DuckDB engine."""
22 |     return f"{__version__} (with duckdb {_duckdb.__version__})"
23 | 


--------------------------------------------------------------------------------
/duckdb/experimental/__init__.py:
--------------------------------------------------------------------------------
1 | from . import spark  # noqa: D104
2 | 
3 | __all__ = [
4 |     "spark",
5 | ]
6 | 


--------------------------------------------------------------------------------
/duckdb/experimental/spark/__init__.py:
--------------------------------------------------------------------------------
1 | from .conf import SparkConf  # noqa: D104
2 | from .context import SparkContext
3 | from .exception import ContributionsAcceptedError
4 | from .sql import DataFrame, SparkSession
5 | 
6 | __all__ = ["ContributionsAcceptedError", "DataFrame", "SparkConf", "SparkContext", "SparkSession"]
7 | 


--------------------------------------------------------------------------------
/duckdb/experimental/spark/_typing.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one
 3 | # or more contributor license agreements.  See the NOTICE file
 4 | # distributed with this work for additional information
 5 | # regarding copyright ownership.  The ASF licenses this file
 6 | # to you under the Apache License, Version 2.0 (the
 7 | # "License"); you may not use this file except in compliance
 8 | # with the License.  You may obtain a copy of the License at
 9 | #
10 | #   http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied.  See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 | 
19 | from collections.abc import Iterable, Sized
20 | from typing import Callable, TypeVar, Union
21 | 
22 | from numpy import float32, float64, int32, int64, ndarray
23 | from typing_extensions import Literal, Protocol, Self
24 | 
25 | F = TypeVar("F", bound=Callable)
26 | T_co = TypeVar("T_co", covariant=True)
27 | 
28 | PrimitiveType = Union[bool, float, int, str]
29 | 
30 | NonUDFType = Literal[0]
31 | 
32 | 
33 | class SupportsIAdd(Protocol):
34 |     def __iadd__(self, other: "SupportsIAdd") -> Self: ...
35 | 
36 | 
37 | class SupportsOrdering(Protocol):
38 |     def __lt__(self, other: "SupportsOrdering") -> bool: ...
39 | 
40 | 
41 | class SizedIterable(Protocol, Sized, Iterable[T_co]): ...
42 | 
43 | 
44 | S = TypeVar("S", bound=SupportsOrdering)
45 | 
46 | NumberOrArray = TypeVar("NumberOrArray", float, int, complex, int32, int64, float32, float64, ndarray)
47 | 


--------------------------------------------------------------------------------
/duckdb/experimental/spark/conf.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional  # noqa: D100
 2 | 
 3 | from duckdb.experimental.spark.exception import ContributionsAcceptedError
 4 | 
 5 | 
 6 | class SparkConf:  # noqa: D101
 7 |     def __init__(self) -> None:  # noqa: D107
 8 |         raise NotImplementedError
 9 | 
10 |     def contains(self, key: str) -> bool:  # noqa: D102
11 |         raise ContributionsAcceptedError
12 | 
13 |     def get(self, key: str, defaultValue: Optional[str] = None) -> Optional[str]:  # noqa: D102
14 |         raise ContributionsAcceptedError
15 | 
16 |     def getAll(self) -> list[tuple[str, str]]:  # noqa: D102
17 |         raise ContributionsAcceptedError
18 | 
19 |     def set(self, key: str, value: str) -> "SparkConf":  # noqa: D102
20 |         raise ContributionsAcceptedError
21 | 
22 |     def setAll(self, pairs: list[tuple[str, str]]) -> "SparkConf":  # noqa: D102
23 |         raise ContributionsAcceptedError
24 | 
25 |     def setAppName(self, value: str) -> "SparkConf":  # noqa: D102
26 |         raise ContributionsAcceptedError
27 | 
28 |     def setExecutorEnv(  # noqa: D102
29 |         self, key: Optional[str] = None, value: Optional[str] = None, pairs: Optional[list[tuple[str, str]]] = None
30 |     ) -> "SparkConf":
31 |         raise ContributionsAcceptedError
32 | 
33 |     def setIfMissing(self, key: str, value: str) -> "SparkConf":  # noqa: D102
34 |         raise ContributionsAcceptedError
35 | 
36 |     def setMaster(self, value: str) -> "SparkConf":  # noqa: D102
37 |         raise ContributionsAcceptedError
38 | 
39 |     def setSparkHome(self, value: str) -> "SparkConf":  # noqa: D102
40 |         raise ContributionsAcceptedError
41 | 
42 |     def toDebugString(self) -> str:  # noqa: D102
43 |         raise ContributionsAcceptedError
44 | 
45 | 
46 | __all__ = ["SparkConf"]
47 | 


--------------------------------------------------------------------------------
/duckdb/experimental/spark/errors/exceptions/__init__.py:
--------------------------------------------------------------------------------
 1 | #  # noqa: D104
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 


--------------------------------------------------------------------------------
/duckdb/experimental/spark/exception.py:
--------------------------------------------------------------------------------
 1 | # ruff: noqa: D100
 2 | from typing import Optional
 3 | 
 4 | 
 5 | class ContributionsAcceptedError(NotImplementedError):
 6 |     """This method is not planned to be implemented, if you would like to implement this method
 7 |     or show your interest in this method to other members of the community,
 8 |     feel free to open up a PR or a Discussion over on https://github.com/duckdb/duckdb.
 9 |     """  # noqa: D205
10 | 
11 |     def __init__(self, message: Optional[str] = None) -> None:  # noqa: D107
12 |         doc = self.__class__.__doc__
13 |         if message:
14 |             doc = message + "\n" + doc
15 |         super().__init__(doc)
16 | 
17 | 
18 | __all__ = ["ContributionsAcceptedError"]
19 | 


--------------------------------------------------------------------------------
/duckdb/experimental/spark/sql/__init__.py:
--------------------------------------------------------------------------------
1 | from .catalog import Catalog  # noqa: D104
2 | from .conf import RuntimeConfig
3 | from .dataframe import DataFrame
4 | from .readwriter import DataFrameWriter
5 | from .session import SparkSession
6 | 
7 | __all__ = ["Catalog", "DataFrame", "DataFrameWriter", "RuntimeConfig", "SparkSession"]
8 | 


--------------------------------------------------------------------------------
/duckdb/experimental/spark/sql/conf.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Union  # noqa: D100
 2 | 
 3 | from duckdb import DuckDBPyConnection
 4 | from duckdb.experimental.spark._globals import _NoValue, _NoValueType
 5 | 
 6 | 
 7 | class RuntimeConfig:  # noqa: D101
 8 |     def __init__(self, connection: DuckDBPyConnection) -> None:  # noqa: D107
 9 |         self._connection = connection
10 | 
11 |     def set(self, key: str, value: str) -> None:  # noqa: D102
12 |         raise NotImplementedError
13 | 
14 |     def isModifiable(self, key: str) -> bool:  # noqa: D102
15 |         raise NotImplementedError
16 | 
17 |     def unset(self, key: str) -> None:  # noqa: D102
18 |         raise NotImplementedError
19 | 
20 |     def get(self, key: str, default: Union[Optional[str], _NoValueType] = _NoValue) -> str:  # noqa: D102
21 |         raise NotImplementedError
22 | 
23 | 
24 | __all__ = ["RuntimeConfig"]
25 | 


--------------------------------------------------------------------------------
/duckdb/experimental/spark/sql/streaming.py:
--------------------------------------------------------------------------------
 1 | from typing import TYPE_CHECKING, Optional, Union  # noqa: D100
 2 | 
 3 | from .types import StructType
 4 | 
 5 | if TYPE_CHECKING:
 6 |     from .dataframe import DataFrame
 7 |     from .session import SparkSession
 8 | 
 9 | PrimitiveType = Union[bool, float, int, str]
10 | OptionalPrimitiveType = Optional[PrimitiveType]
11 | 
12 | 
13 | class DataStreamWriter:  # noqa: D101
14 |     def __init__(self, dataframe: "DataFrame") -> None:  # noqa: D107
15 |         self.dataframe = dataframe
16 | 
17 |     def toTable(self, table_name: str) -> None:  # noqa: D102
18 |         # Should we register the dataframe or create a table from the contents?
19 |         raise NotImplementedError
20 | 
21 | 
22 | class DataStreamReader:  # noqa: D101
23 |     def __init__(self, session: "SparkSession") -> None:  # noqa: D107
24 |         self.session = session
25 | 
26 |     def load(  # noqa: D102
27 |         self,
28 |         path: Optional[str] = None,
29 |         format: Optional[str] = None,
30 |         schema: Union[StructType, str, None] = None,
31 |         **options: OptionalPrimitiveType,
32 |     ) -> "DataFrame":
33 |         raise NotImplementedError
34 | 
35 | 
36 | __all__ = ["DataStreamReader", "DataStreamWriter"]
37 | 


--------------------------------------------------------------------------------
/duckdb/experimental/spark/sql/udf.py:
--------------------------------------------------------------------------------
 1 | # https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/  # noqa: D100
 2 | from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
 3 | 
 4 | from .types import DataType
 5 | 
 6 | if TYPE_CHECKING:
 7 |     from .session import SparkSession
 8 | 
 9 | DataTypeOrString = Union[DataType, str]
10 | UserDefinedFunctionLike = TypeVar("UserDefinedFunctionLike")
11 | 
12 | 
13 | class UDFRegistration:  # noqa: D101
14 |     def __init__(self, sparkSession: "SparkSession") -> None:  # noqa: D107
15 |         self.sparkSession = sparkSession
16 | 
17 |     def register(  # noqa: D102
18 |         self,
19 |         name: str,
20 |         f: Union[Callable[..., Any], "UserDefinedFunctionLike"],
21 |         returnType: Optional["DataTypeOrString"] = None,
22 |     ) -> "UserDefinedFunctionLike":
23 |         self.sparkSession.conn.create_function(name, f, return_type=returnType)
24 | 
25 |     def registerJavaFunction(  # noqa: D102
26 |         self,
27 |         name: str,
28 |         javaClassName: str,
29 |         returnType: Optional["DataTypeOrString"] = None,
30 |     ) -> None:
31 |         raise NotImplementedError
32 | 
33 |     def registerJavaUDAF(self, name: str, javaClassName: str) -> None:  # noqa: D102
34 |         raise NotImplementedError
35 | 
36 | 
37 | __all__ = ["UDFRegistration"]
38 | 


--------------------------------------------------------------------------------
/duckdb/filesystem.py:
--------------------------------------------------------------------------------
 1 | """In-memory filesystem to store ephemeral dependencies.
 2 | 
 3 | Warning: Not for external use. May change at any moment. Likely to be made internal.
 4 | """
 5 | 
 6 | from __future__ import annotations
 7 | 
 8 | import io
 9 | import typing
10 | 
11 | from fsspec import AbstractFileSystem
12 | from fsspec.implementations.memory import MemoryFile, MemoryFileSystem
13 | 
14 | from .bytes_io_wrapper import BytesIOWrapper
15 | 
16 | 
17 | class ModifiedMemoryFileSystem(MemoryFileSystem):
18 |     """In-memory filesystem implementation that uses its own protocol."""
19 | 
20 |     protocol = ("DUCKDB_INTERNAL_OBJECTSTORE",)
21 |     # defer to the original implementation that doesn't hardcode the protocol
22 |     _strip_protocol: typing.Callable[[str], str] = classmethod(AbstractFileSystem._strip_protocol.__func__)  # type: ignore[assignment]
23 | 
24 |     def add_file(self, obj: io.IOBase | BytesIOWrapper | object, path: str) -> None:
25 |         """Add a file to the filesystem."""
26 |         if not (hasattr(obj, "read") and hasattr(obj, "seek")):
27 |             msg = "Can not read from a non file-like object"
28 |             raise TypeError(msg)
29 |         if isinstance(obj, io.TextIOBase):
30 |             # Wrap this so that we can return a bytes object from 'read'
31 |             obj = BytesIOWrapper(obj)
32 |         path = self._strip_protocol(path)
33 |         self.store[path] = MemoryFile(self, path, obj.read())
34 | 


--------------------------------------------------------------------------------
/duckdb/func/__init__.py:
--------------------------------------------------------------------------------
1 | from _duckdb._func import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType  # noqa: D104
2 | 
3 | __all__ = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"]
4 | 


--------------------------------------------------------------------------------
/duckdb/functional/__init__.py:
--------------------------------------------------------------------------------
 1 | """DuckDB function constants and types. DEPRECATED: please use `duckdb.func` instead."""
 2 | 
 3 | import warnings
 4 | 
 5 | from duckdb.func import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType
 6 | 
 7 | __all__ = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"]
 8 | 
 9 | warnings.warn(
10 |     "`duckdb.functional` is deprecated and will be removed in a future version. Please use `duckdb.func` instead.",
11 |     DeprecationWarning,
12 |     stacklevel=2,
13 | )
14 | 


--------------------------------------------------------------------------------
/duckdb/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/duckdb/sqltypes/__init__.py:
--------------------------------------------------------------------------------
 1 | """DuckDB's SQL types."""
 2 | 
 3 | from _duckdb._sqltypes import (
 4 |     BIGINT,
 5 |     BIT,
 6 |     BLOB,
 7 |     BOOLEAN,
 8 |     DATE,
 9 |     DOUBLE,
10 |     FLOAT,
11 |     HUGEINT,
12 |     INTEGER,
13 |     INTERVAL,
14 |     SMALLINT,
15 |     SQLNULL,
16 |     TIME,
17 |     TIME_TZ,
18 |     TIMESTAMP,
19 |     TIMESTAMP_MS,
20 |     TIMESTAMP_NS,
21 |     TIMESTAMP_S,
22 |     TIMESTAMP_TZ,
23 |     TINYINT,
24 |     UBIGINT,
25 |     UHUGEINT,
26 |     UINTEGER,
27 |     USMALLINT,
28 |     UTINYINT,
29 |     UUID,
30 |     VARCHAR,
31 |     DuckDBPyType,
32 | )
33 | 
34 | __all__ = [
35 |     "BIGINT",
36 |     "BIT",
37 |     "BLOB",
38 |     "BOOLEAN",
39 |     "DATE",
40 |     "DOUBLE",
41 |     "FLOAT",
42 |     "HUGEINT",
43 |     "INTEGER",
44 |     "INTERVAL",
45 |     "SMALLINT",
46 |     "SQLNULL",
47 |     "TIME",
48 |     "TIMESTAMP",
49 |     "TIMESTAMP_MS",
50 |     "TIMESTAMP_NS",
51 |     "TIMESTAMP_S",
52 |     "TIMESTAMP_TZ",
53 |     "TIME_TZ",
54 |     "TINYINT",
55 |     "UBIGINT",
56 |     "UHUGEINT",
57 |     "UINTEGER",
58 |     "USMALLINT",
59 |     "UTINYINT",
60 |     "UUID",
61 |     "VARCHAR",
62 |     "DuckDBPyType",
63 | ]
64 | 


--------------------------------------------------------------------------------
/duckdb/typing/__init__.py:
--------------------------------------------------------------------------------
 1 | """DuckDB's SQL types. DEPRECATED. Please use `duckdb.sqltypes` instead."""
 2 | 
 3 | import warnings
 4 | 
 5 | from duckdb.sqltypes import (
 6 |     BIGINT,
 7 |     BIT,
 8 |     BLOB,
 9 |     BOOLEAN,
10 |     DATE,
11 |     DOUBLE,
12 |     FLOAT,
13 |     HUGEINT,
14 |     INTEGER,
15 |     INTERVAL,
16 |     SMALLINT,
17 |     SQLNULL,
18 |     TIME,
19 |     TIME_TZ,
20 |     TIMESTAMP,
21 |     TIMESTAMP_MS,
22 |     TIMESTAMP_NS,
23 |     TIMESTAMP_S,
24 |     TIMESTAMP_TZ,
25 |     TINYINT,
26 |     UBIGINT,
27 |     UHUGEINT,
28 |     UINTEGER,
29 |     USMALLINT,
30 |     UTINYINT,
31 |     UUID,
32 |     VARCHAR,
33 |     DuckDBPyType,
34 | )
35 | 
36 | __all__ = [
37 |     "BIGINT",
38 |     "BIT",
39 |     "BLOB",
40 |     "BOOLEAN",
41 |     "DATE",
42 |     "DOUBLE",
43 |     "FLOAT",
44 |     "HUGEINT",
45 |     "INTEGER",
46 |     "INTERVAL",
47 |     "SMALLINT",
48 |     "SQLNULL",
49 |     "TIME",
50 |     "TIMESTAMP",
51 |     "TIMESTAMP_MS",
52 |     "TIMESTAMP_NS",
53 |     "TIMESTAMP_S",
54 |     "TIMESTAMP_TZ",
55 |     "TIME_TZ",
56 |     "TINYINT",
57 |     "UBIGINT",
58 |     "UHUGEINT",
59 |     "UINTEGER",
60 |     "USMALLINT",
61 |     "UTINYINT",
62 |     "UUID",
63 |     "VARCHAR",
64 |     "DuckDBPyType",
65 | ]
66 | 
67 | warnings.warn(
68 |     "`duckdb.typing` is deprecated and will be removed in a future version. Please use `duckdb.sqltypes` instead.",
69 |     DeprecationWarning,
70 |     stacklevel=2,
71 | )
72 | 


--------------------------------------------------------------------------------
/duckdb/udf.py:
--------------------------------------------------------------------------------
 1 | # ruff: noqa: D100
 2 | import typing
 3 | 
 4 | 
 5 | def vectorized(func: typing.Callable[..., typing.Any]) -> typing.Callable[..., typing.Any]:
 6 |     """Decorate a function with annotated function parameters.
 7 | 
 8 |     This allows DuckDB to infer that the function should be provided with pyarrow arrays and should expect
 9 |     pyarrow array(s) as output.
10 |     """
11 |     import types
12 |     from inspect import signature
13 | 
14 |     new_func = types.FunctionType(func.__code__, func.__globals__, func.__name__, func.__defaults__, func.__closure__)
15 |     # Construct the annotations:
16 |     import pyarrow as pa
17 | 
18 |     new_annotations = {}
19 |     sig = signature(func)
20 |     for param in sig.parameters:
21 |         new_annotations[param] = pa.lib.ChunkedArray
22 | 
23 |     new_func.__annotations__ = new_annotations
24 |     return new_func
25 | 


--------------------------------------------------------------------------------
/duckdb/value/__init__.py:
--------------------------------------------------------------------------------
1 | # noqa: D104
2 | 


--------------------------------------------------------------------------------
/duckdb_packaging/__init__.py:
--------------------------------------------------------------------------------
1 | """DuckDB Python packaging, versioning, and build tooling.
2 | 
3 | Requires Python >= 3.5 and does not work on mobile platforms due to the use of the `subprocess` module.
4 | """
5 | 


--------------------------------------------------------------------------------
/scripts/generate_connection_code.py:
--------------------------------------------------------------------------------
 1 | import generate_connection_methods
 2 | import generate_connection_stubs
 3 | import generate_connection_wrapper_methods
 4 | import generate_connection_wrapper_stubs
 5 | 
 6 | if __name__ == "__main__":
 7 |     generate_connection_methods.generate()
 8 |     generate_connection_stubs.generate()
 9 |     generate_connection_wrapper_methods.generate()
10 |     generate_connection_wrapper_stubs.generate()
11 | 


--------------------------------------------------------------------------------
/scripts/install_spark_in_cibuildwheels_linux_container.sh:
--------------------------------------------------------------------------------
 1 | # The cibuildwheels manylinux image runs CentOS
 2 | yum install java-11 -y
 3 | yum install wget -y
 4 | 
 5 | mkdir spark_installation
 6 | cd spark_installation
 7 | wget https://blobs.duckdb.org/ci/spark-3.5.3-bin-hadoop3.tgz
 8 | tar -xvzf spark-3.5.3-bin-hadoop3.tgz
 9 | mv spark-3.5.3-bin-hadoop3 spark
10 | 


--------------------------------------------------------------------------------
/scripts/regenerate_python_stubs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -ex
 4 | 
 5 | # this script regenerates python stub files using
 6 | # https://mypy.readthedocs.io/en/stable/stubgen.html .
 7 | # Stubs are written to
 8 | OUTPUT_DIR="tools/pythonpkg/duckdb-stubs"
 9 | # which is installed as an auxilliary package in the duckdb egg.
10 | 
11 | # Unfortunately, stubgen is good but not quite perfect, and
12 | # the stubs it generates need a little bit of tweaking, which
13 | # this regeneration process will blow away. git add -p is your friend.
14 | # To allow for this, please annotate any tweaks you subsequently
15 | # make with something like
16 | # # stubgen override
17 | # .
18 | # If you get particularly sick of this then there's a skeleton of
19 | # a solution in https://stackoverflow.com/a/36510671/5264127
20 | # but it might be overengineering things...
21 | 
22 | 
23 | rm -rf "${OUTPUT_DIR}"
24 | 
25 | 
26 | stubgen \
27 | 	--verbose \
28 | 	--package duckdb \
29 | 	--output "${OUTPUT_DIR}"
30 | 
31 | 
32 | # We need this while `duckdb` is a single file module and not a package.
33 | # If `duckdb` becomes a proper package, this can be removed.
34 | mv "${OUTPUT_DIR}/duckdb.pyi" "${OUTPUT_DIR}/__init__.pyi"
35 | 
36 | add_header() (
37 | 	{ set +x; } 2>/dev/null
38 | 	cat - "$1" > "$1.tmp" <<EOF
39 | # to regenerate this from scratch, run scripts/regenerate_python_stubs.sh .
40 | # be warned - currently there are still tweaks needed after this file is
41 | # generated. These should be annotated with a comment like
42 | # # stubgen override
43 | # to help the sanity of maintainers.
44 | EOF
45 | 	mv "$1.tmp" "$1"
46 | )
47 | 
48 | find "${OUTPUT_DIR}" -name "*.pyi" -print |
49 | 	while read pyi_name
50 | 	do
51 | 		add_header "${pyi_name}"
52 | 	done
53 | 
54 | 


--------------------------------------------------------------------------------
/src/duckdb_py/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # this is used for clang-tidy checks
 2 | add_subdirectory(pyrelation)
 3 | add_subdirectory(pyexpression)
 4 | add_subdirectory(pybind11)
 5 | add_subdirectory(numpy)
 6 | add_subdirectory(native)
 7 | add_subdirectory(jupyter)
 8 | add_subdirectory(typing)
 9 | add_subdirectory(functional)
10 | add_subdirectory(pyconnection)
11 | add_subdirectory(common)
12 | add_subdirectory(pandas)
13 | add_subdirectory(arrow)
14 | 
15 | add_library(
16 |   python_src OBJECT
17 |   dataframe.cpp
18 |   duckdb_python.cpp
19 |   importer.cpp
20 |   map.cpp
21 |   path_like.cpp
22 |   pyconnection.cpp
23 |   pyexpression.cpp
24 |   pyfilesystem.cpp
25 |   pyrelation.cpp
26 |   pyresult.cpp
27 |   pystatement.cpp
28 |   python_dependency.cpp
29 |   python_import_cache.cpp
30 |   python_replacement_scan.cpp
31 |   python_udf.cpp)
32 | 
33 | target_link_libraries(python_src PRIVATE _duckdb_dependencies)
34 | 


--------------------------------------------------------------------------------
/src/duckdb_py/arrow/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # this is used for clang-tidy checks
2 | add_library(python_arrow OBJECT arrow_array_stream.cpp arrow_export_utils.cpp
3 |                                 pyarrow_filter_pushdown.cpp)
4 | 
5 | target_link_libraries(python_arrow PRIVATE _duckdb_dependencies)
6 | 


--------------------------------------------------------------------------------
/src/duckdb_py/arrow/arrow_export_utils.cpp:
--------------------------------------------------------------------------------
 1 | #include "duckdb_python/arrow/arrow_array_stream.hpp"
 2 | 
 3 | #include "duckdb/common/assert.hpp"
 4 | #include "duckdb/common/common.hpp"
 5 | #include "duckdb/common/limits.hpp"
 6 | #include "duckdb/main/client_config.hpp"
 7 | #include "duckdb/planner/filter/conjunction_filter.hpp"
 8 | #include "duckdb/planner/filter/constant_filter.hpp"
 9 | #include "duckdb/planner/table_filter.hpp"
10 | #include "duckdb/common/arrow/arrow_converter.hpp"
11 | 
12 | #include "duckdb_python/pyconnection/pyconnection.hpp"
13 | #include "duckdb_python/pyrelation.hpp"
14 | #include "duckdb_python/pyresult.hpp"
15 | 
16 | namespace duckdb {
17 | 
18 | namespace pyarrow {
19 | 
20 | py::object ToArrowTable(const vector<LogicalType> &types, const vector<string> &names, const py::list &batches,
21 |                         ClientProperties &options) {
22 | 	py::gil_scoped_acquire acquire;
23 | 
24 | 	auto pyarrow_lib_module = py::module::import("pyarrow").attr("lib");
25 | 	auto from_batches_func = pyarrow_lib_module.attr("Table").attr("from_batches");
26 | 	auto schema_import_func = pyarrow_lib_module.attr("Schema").attr("_import_from_c");
27 | 	ArrowSchema schema;
28 | 	ArrowConverter::ToArrowSchema(&schema, types, names, options);
29 | 	auto schema_obj = schema_import_func(reinterpret_cast<uint64_t>(&schema));
30 | 
31 | 	return py::cast<duckdb::pyarrow::Table>(from_batches_func(batches, schema_obj));
32 | }
33 | 
34 | } // namespace pyarrow
35 | 
36 | } // namespace duckdb
37 | 


--------------------------------------------------------------------------------
/src/duckdb_py/common/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # this is used for clang-tidy checks
2 | add_library(python_common OBJECT exceptions.cpp)
3 | 
4 | target_link_libraries(python_common PRIVATE _duckdb_dependencies)
5 | 


--------------------------------------------------------------------------------
/src/duckdb_py/functional/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # this is used for clang-tidy checks
2 | add_library(python_functional OBJECT functional.cpp)
3 | 
4 | target_link_libraries(python_functional PRIVATE _duckdb_dependencies)
5 | 


--------------------------------------------------------------------------------
/src/duckdb_py/functional/functional.cpp:
--------------------------------------------------------------------------------
 1 | #include "duckdb_python/functional.hpp"
 2 | 
 3 | namespace duckdb {
 4 | 
 5 | void DuckDBPyFunctional::Initialize(py::module_ &parent) {
 6 | 	auto m = parent.def_submodule("_func", "This module contains classes and methods related to functions and udf");
 7 | 
 8 | 	py::enum_<duckdb::PythonUDFType>(m, "PythonUDFType")
 9 | 	    .value("NATIVE", duckdb::PythonUDFType::NATIVE)
10 | 	    .value("ARROW", duckdb::PythonUDFType::ARROW)
11 | 	    .export_values();
12 | 
13 | 	py::enum_<duckdb::FunctionNullHandling>(m, "FunctionNullHandling")
14 | 	    .value("DEFAULT", duckdb::FunctionNullHandling::DEFAULT_NULL_HANDLING)
15 | 	    .value("SPECIAL", duckdb::FunctionNullHandling::SPECIAL_HANDLING)
16 | 	    .export_values();
17 | }
18 | 
19 | } // namespace duckdb
20 | 


--------------------------------------------------------------------------------
/src/duckdb_py/importer.cpp:
--------------------------------------------------------------------------------
 1 | #include "duckdb_python/import_cache/importer.hpp"
 2 | #include "duckdb_python/import_cache/python_import_cache.hpp"
 3 | #include "duckdb_python/import_cache/python_import_cache_item.hpp"
 4 | #include "duckdb_python/pyconnection/pyconnection.hpp"
 5 | 
 6 | namespace duckdb {
 7 | 
 8 | py::handle PythonImporter::Import(stack<reference<PythonImportCacheItem>> &hierarchy, bool load) {
 9 | 	auto &import_cache = *DuckDBPyConnection::ImportCache();
10 | 	py::handle source(nullptr);
11 | 	while (!hierarchy.empty()) {
12 | 		// From top to bottom, import them
13 | 		auto &item = hierarchy.top();
14 | 		hierarchy.pop();
15 | 		source = item.get().Load(import_cache, source, load);
16 | 		if (!source) {
17 | 			// If load is false, or the module load fails and is not required, we return early
18 | 			break;
19 | 		}
20 | 	}
21 | 	return source;
22 | }
23 | 
24 | } // namespace duckdb
25 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/arrow/arrow_export_utils.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 4 | 
 5 | namespace duckdb {
 6 | 
 7 | namespace pyarrow {
 8 | 
 9 | py::object ToArrowTable(const vector<LogicalType> &types, const vector<string> &names, const py::list &batches,
10 |                         ClientProperties &options);
11 | 
12 | } // namespace pyarrow
13 | 
14 | } // namespace duckdb
15 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/arrow/pyarrow_filter_pushdown.hpp:
--------------------------------------------------------------------------------
 1 | //===----------------------------------------------------------------------===//
 2 | //                         DuckDB
 3 | //
 4 | // duckdb_python/arrow/pyarrow_filter_pushdown.hpp
 5 | //
 6 | //
 7 | //===----------------------------------------------------------------------===//
 8 | 
 9 | #pragma once
10 | 
11 | #include "duckdb/common/arrow/arrow_wrapper.hpp"
12 | #include "duckdb/function/table/arrow/arrow_duck_schema.hpp"
13 | #include "duckdb/common/unordered_map.hpp"
14 | #include "duckdb/planner/table_filter.hpp"
15 | #include "duckdb/main/client_properties.hpp"
16 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
17 | 
18 | namespace duckdb {
19 | 
20 | struct PyArrowFilterPushdown {
21 | 	static py::object TransformFilter(TableFilterSet &filter_collection, unordered_map<idx_t, string> &columns,
22 | 	                                  unordered_map<idx_t, idx_t> filter_to_col, const ClientProperties &config,
23 | 	                                  const ArrowTableSchema &arrow_table);
24 | };
25 | 
26 | } // namespace duckdb
27 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/conversions/optional_wrapper.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb_python/pyconnection.hpp"
 4 | #include "duckdb/common/helper.hpp"
 5 | 
 6 | using duckdb::Optional;
 7 | 
 8 | namespace py = pybind11;
 9 | 
10 | namespace PYBIND11_NAMESPACE {
11 | namespace detail {
12 | 
13 | template <class T>
14 | struct type_caster<Optional<T>> : public type_caster_base<Optional<T>> {
15 | 	using base = type_caster_base<Optional<T>>;
16 | 	using child = type_caster_base<T>;
17 | 	Optional<T> tmp;
18 | 
19 | public:
20 | 	bool load(handle src, bool convert) {
21 | 		if (base::load(src, convert)) {
22 | 			return true;
23 | 		} else if (child::load(src, convert)) {
24 | 			return true;
25 | 		}
26 | 		return false;
27 | 	}
28 | 
29 | 	static handle cast(Optional<T> src, return_value_policy policy, handle parent) {
30 | 		return base::cast(src, policy, parent);
31 | 	}
32 | };
33 | 
34 | } // namespace detail
35 | } // namespace PYBIND11_NAMESPACE
36 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/filesystem_object.hpp:
--------------------------------------------------------------------------------
 1 | //===----------------------------------------------------------------------===//
 2 | //                         DuckDB
 3 | //
 4 | // duckdb_python/filesystem_object.hpp
 5 | //
 6 | //
 7 | //===----------------------------------------------------------------------===//
 8 | 
 9 | #pragma once
10 | #include "duckdb_python/pybind11/registered_py_object.hpp"
11 | #include "duckdb_python/pyfilesystem.hpp"
12 | 
13 | namespace duckdb {
14 | 
15 | class FileSystemObject : public RegisteredObject {
16 | public:
17 | 	explicit FileSystemObject(py::object fs, vector<string> filenames_p)
18 | 	    : RegisteredObject(std::move(fs)), filenames(std::move(filenames_p)) {
19 | 	}
20 | 	~FileSystemObject() override {
21 | 		py::gil_scoped_acquire acquire;
22 | 		// Assert that the 'obj' is a filesystem
23 | 		D_ASSERT(py::isinstance(obj, DuckDBPyConnection::ImportCache()->duckdb.filesystem.ModifiedMemoryFileSystem()));
24 | 		for (auto &file : filenames) {
25 | 			obj.attr("delete")(file);
26 | 		}
27 | 	}
28 | 
29 | 	vector<string> filenames;
30 | };
31 | 
32 | } // namespace duckdb
33 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/functional.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 4 | #include "duckdb_python/pytype.hpp"
 5 | #include "duckdb_python/pyconnection/pyconnection.hpp"
 6 | 
 7 | namespace duckdb {
 8 | 
 9 | class DuckDBPyFunctional {
10 | public:
11 | 	DuckDBPyFunctional() = delete;
12 | 
13 | public:
14 | 	static void Initialize(py::module_ &m);
15 | };
16 | 
17 | } // namespace duckdb
18 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/import_cache/importer.hpp:
--------------------------------------------------------------------------------
 1 | //===----------------------------------------------------------------------===//
 2 | //                         DuckDB
 3 | //
 4 | // duckdb_python/import_cache/python_import_cache.hpp
 5 | //
 6 | //
 7 | //===----------------------------------------------------------------------===//
 8 | 
 9 | #pragma once
10 | 
11 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
12 | #include "duckdb.hpp"
13 | #include "duckdb/common/vector.hpp"
14 | #include "duckdb_python/import_cache/python_import_cache_modules.hpp"
15 | #include "duckdb/common/stack.hpp"
16 | 
17 | namespace duckdb {
18 | 
19 | struct PythonImporter {
20 | public:
21 | 	static py::handle Import(stack<reference<PythonImportCacheItem>> &hierarchy, bool load = true);
22 | };
23 | 
24 | } // namespace duckdb
25 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/import_cache/modules/collections_module.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //===----------------------------------------------------------------------===//
 3 | //                         DuckDB
 4 | //
 5 | // duckdb_python/import_cache/modules/collections_module.hpp
 6 | //
 7 | //
 8 | //===----------------------------------------------------------------------===//
 9 | 
10 | #pragma once
11 | 
12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp"
13 | 
14 | //! Note: This class is generated using scripts.
15 | //! If you need to add a new object to the cache you must:
16 | //! 1. adjust scripts/imports.py
17 | //! 2. run python scripts/generate_import_cache_json.py
18 | //! 3. run python scripts/generate_import_cache_cpp.py
19 | //! 4. run pre-commit to fix formatting errors
20 | 
21 | namespace duckdb {
22 | 
23 | struct CollectionsAbcCacheItem : public PythonImportCacheItem {
24 | 
25 | public:
26 | 	static constexpr const char *Name = "collections.abc";
27 | 
28 | public:
29 | 	CollectionsAbcCacheItem()
30 | 	    : PythonImportCacheItem("collections.abc"), Iterable("Iterable", this), Mapping("Mapping", this) {
31 | 	}
32 | 	~CollectionsAbcCacheItem() override {
33 | 	}
34 | 
35 | 	PythonImportCacheItem Iterable;
36 | 	PythonImportCacheItem Mapping;
37 | };
38 | 
39 | struct CollectionsCacheItem : public PythonImportCacheItem {
40 | 
41 | public:
42 | 	static constexpr const char *Name = "collections";
43 | 
44 | public:
45 | 	CollectionsCacheItem() : PythonImportCacheItem("collections"), abc() {
46 | 	}
47 | 	~CollectionsCacheItem() override {
48 | 	}
49 | 
50 | 	CollectionsAbcCacheItem abc;
51 | };
52 | 
53 | } // namespace duckdb
54 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/import_cache/modules/decimal_module.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //===----------------------------------------------------------------------===//
 3 | //                         DuckDB
 4 | //
 5 | // duckdb_python/import_cache/modules/decimal_module.hpp
 6 | //
 7 | //
 8 | //===----------------------------------------------------------------------===//
 9 | 
10 | #pragma once
11 | 
12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp"
13 | 
14 | //! Note: This class is generated using scripts.
15 | //! If you need to add a new object to the cache you must:
16 | //! 1. adjust scripts/imports.py
17 | //! 2. run python scripts/generate_import_cache_json.py
18 | //! 3. run python scripts/generate_import_cache_cpp.py
19 | //! 4. run pre-commit to fix formatting errors
20 | 
21 | namespace duckdb {
22 | 
23 | struct DecimalCacheItem : public PythonImportCacheItem {
24 | 
25 | public:
26 | 	static constexpr const char *Name = "decimal";
27 | 
28 | public:
29 | 	DecimalCacheItem() : PythonImportCacheItem("decimal"), Decimal("Decimal", this) {
30 | 	}
31 | 	~DecimalCacheItem() override {
32 | 	}
33 | 
34 | 	PythonImportCacheItem Decimal;
35 | };
36 | 
37 | } // namespace duckdb
38 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/import_cache/modules/ipython_module.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //===----------------------------------------------------------------------===//
 3 | //                         DuckDB
 4 | //
 5 | // duckdb_python/import_cache/modules/ipython_module.hpp
 6 | //
 7 | //
 8 | //===----------------------------------------------------------------------===//
 9 | 
10 | #pragma once
11 | 
12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp"
13 | 
14 | //! Note: This class is generated using scripts.
15 | //! If you need to add a new object to the cache you must:
16 | //! 1. adjust scripts/imports.py
17 | //! 2. run python scripts/generate_import_cache_json.py
18 | //! 3. run python scripts/generate_import_cache_cpp.py
19 | //! 4. run pre-commit to fix formatting errors
20 | 
21 | namespace duckdb {
22 | 
23 | struct IpythonDisplayCacheItem : public PythonImportCacheItem {
24 | 
25 | public:
26 | 	IpythonDisplayCacheItem(optional_ptr<PythonImportCacheItem> parent)
27 | 	    : PythonImportCacheItem("display", parent), display("display", this), HTML("HTML", this) {
28 | 	}
29 | 	~IpythonDisplayCacheItem() override {
30 | 	}
31 | 
32 | 	PythonImportCacheItem display;
33 | 	PythonImportCacheItem HTML;
34 | };
35 | 
36 | struct IpythonCacheItem : public PythonImportCacheItem {
37 | 
38 | public:
39 | 	static constexpr const char *Name = "IPython";
40 | 
41 | public:
42 | 	IpythonCacheItem() : PythonImportCacheItem("IPython"), get_ipython("get_ipython", this), display(this) {
43 | 	}
44 | 	~IpythonCacheItem() override {
45 | 	}
46 | 
47 | 	PythonImportCacheItem get_ipython;
48 | 	IpythonDisplayCacheItem display;
49 | 
50 | protected:
51 | 	bool IsRequired() const override final {
52 | 		return false;
53 | 	}
54 | };
55 | 
56 | } // namespace duckdb
57 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/import_cache/modules/ipywidgets_module.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //===----------------------------------------------------------------------===//
 3 | //                         DuckDB
 4 | //
 5 | // duckdb_python/import_cache/modules/ipywidgets_module.hpp
 6 | //
 7 | //
 8 | //===----------------------------------------------------------------------===//
 9 | 
10 | #pragma once
11 | 
12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp"
13 | 
14 | //! Note: This class is generated using scripts.
15 | //! If you need to add a new object to the cache you must:
16 | //! 1. adjust scripts/imports.py
17 | //! 2. run python scripts/generate_import_cache_json.py
18 | //! 3. run python scripts/generate_import_cache_cpp.py
19 | //! 4. run pre-commit to fix formatting errors
20 | 
21 | namespace duckdb {
22 | 
23 | struct IpywidgetsCacheItem : public PythonImportCacheItem {
24 | 
25 | public:
26 | 	static constexpr const char *Name = "ipywidgets";
27 | 
28 | public:
29 | 	IpywidgetsCacheItem() : PythonImportCacheItem("ipywidgets"), FloatProgress("FloatProgress", this) {
30 | 	}
31 | 	~IpywidgetsCacheItem() override {
32 | 	}
33 | 
34 | 	PythonImportCacheItem FloatProgress;
35 | 
36 | protected:
37 | 	bool IsRequired() const override final {
38 | 		return false;
39 | 	}
40 | };
41 | 
42 | } // namespace duckdb
43 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/import_cache/modules/pathlib_module.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //===----------------------------------------------------------------------===//
 3 | //                         DuckDB
 4 | //
 5 | // duckdb_python/import_cache/modules/pathlib_module.hpp
 6 | //
 7 | //
 8 | //===----------------------------------------------------------------------===//
 9 | 
10 | #pragma once
11 | 
12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp"
13 | 
14 | //! Note: This class is generated using scripts.
15 | //! If you need to add a new object to the cache you must:
16 | //! 1. adjust scripts/imports.py
17 | //! 2. run python scripts/generate_import_cache_json.py
18 | //! 3. run python scripts/generate_import_cache_cpp.py
19 | //! 4. run pre-commit to fix formatting errors
20 | 
21 | namespace duckdb {
22 | 
23 | struct PathlibCacheItem : public PythonImportCacheItem {
24 | 
25 | public:
26 | 	static constexpr const char *Name = "pathlib";
27 | 
28 | public:
29 | 	PathlibCacheItem() : PythonImportCacheItem("pathlib"), Path("Path", this) {
30 | 	}
31 | 	~PathlibCacheItem() override {
32 | 	}
33 | 
34 | 	PythonImportCacheItem Path;
35 | 
36 | protected:
37 | 	bool IsRequired() const override final {
38 | 		return false;
39 | 	}
40 | };
41 | 
42 | } // namespace duckdb
43 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/import_cache/modules/polars_module.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //===----------------------------------------------------------------------===//
 3 | //                         DuckDB
 4 | //
 5 | // duckdb_python/import_cache/modules/polars_module.hpp
 6 | //
 7 | //
 8 | //===----------------------------------------------------------------------===//
 9 | 
10 | #pragma once
11 | 
12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp"
13 | 
14 | //! Note: This class is generated using scripts.
15 | //! If you need to add a new object to the cache you must:
16 | //! 1. adjust scripts/imports.py
17 | //! 2. run python scripts/generate_import_cache_json.py
18 | //! 3. run python scripts/generate_import_cache_cpp.py
19 | //! 4. run pre-commit to fix formatting errors
20 | 
21 | namespace duckdb {
22 | 
23 | struct PolarsCacheItem : public PythonImportCacheItem {
24 | 
25 | public:
26 | 	static constexpr const char *Name = "polars";
27 | 
28 | public:
29 | 	PolarsCacheItem() : PythonImportCacheItem("polars"), DataFrame("DataFrame", this), LazyFrame("LazyFrame", this) {
30 | 	}
31 | 	~PolarsCacheItem() override {
32 | 	}
33 | 
34 | 	PythonImportCacheItem DataFrame;
35 | 	PythonImportCacheItem LazyFrame;
36 | 
37 | protected:
38 | 	bool IsRequired() const override final {
39 | 		return false;
40 | 	}
41 | };
42 | 
43 | } // namespace duckdb
44 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/import_cache/modules/pytz_module.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //===----------------------------------------------------------------------===//
 3 | //                         DuckDB
 4 | //
 5 | // duckdb_python/import_cache/modules/pytz_module.hpp
 6 | //
 7 | //
 8 | //===----------------------------------------------------------------------===//
 9 | 
10 | #pragma once
11 | 
12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp"
13 | 
14 | //! Note: This class is generated using scripts.
15 | //! If you need to add a new object to the cache you must:
16 | //! 1. adjust scripts/imports.py
17 | //! 2. run python scripts/generate_import_cache_json.py
18 | //! 3. run python scripts/generate_import_cache_cpp.py
19 | //! 4. run pre-commit to fix formatting errors
20 | 
21 | namespace duckdb {
22 | 
23 | struct PytzCacheItem : public PythonImportCacheItem {
24 | 
25 | public:
26 | 	static constexpr const char *Name = "pytz";
27 | 
28 | public:
29 | 	PytzCacheItem() : PythonImportCacheItem("pytz"), timezone("timezone", this) {
30 | 	}
31 | 	~PytzCacheItem() override {
32 | 	}
33 | 
34 | 	PythonImportCacheItem timezone;
35 | };
36 | 
37 | } // namespace duckdb
38 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/import_cache/modules/types_module.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //===----------------------------------------------------------------------===//
 3 | //                         DuckDB
 4 | //
 5 | // duckdb_python/import_cache/modules/types_module.hpp
 6 | //
 7 | //
 8 | //===----------------------------------------------------------------------===//
 9 | 
10 | #pragma once
11 | 
12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp"
13 | 
14 | //! Note: This class is generated using scripts.
15 | //! If you need to add a new object to the cache you must:
16 | //! 1. adjust scripts/imports.py
17 | //! 2. run python scripts/generate_import_cache_json.py
18 | //! 3. run python scripts/generate_import_cache_cpp.py
19 | //! 4. run pre-commit to fix formatting errors
20 | 
21 | namespace duckdb {
22 | 
23 | struct TypesCacheItem : public PythonImportCacheItem {
24 | 
25 | public:
26 | 	static constexpr const char *Name = "types";
27 | 
28 | public:
29 | 	TypesCacheItem()
30 | 	    : PythonImportCacheItem("types"), UnionType("UnionType", this), GenericAlias("GenericAlias", this),
31 | 	      BuiltinFunctionType("BuiltinFunctionType", this) {
32 | 	}
33 | 	~TypesCacheItem() override {
34 | 	}
35 | 
36 | 	PythonImportCacheItem UnionType;
37 | 	PythonImportCacheItem GenericAlias;
38 | 	PythonImportCacheItem BuiltinFunctionType;
39 | };
40 | 
41 | } // namespace duckdb
42 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/import_cache/modules/typing_module.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //===----------------------------------------------------------------------===//
 3 | //                         DuckDB
 4 | //
 5 | // duckdb_python/import_cache/modules/typing_module.hpp
 6 | //
 7 | //
 8 | //===----------------------------------------------------------------------===//
 9 | 
10 | #pragma once
11 | 
12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp"
13 | 
14 | //! Note: This class is generated using scripts.
15 | //! If you need to add a new object to the cache you must:
16 | //! 1. adjust scripts/imports.py
17 | //! 2. run python scripts/generate_import_cache_json.py
18 | //! 3. run python scripts/generate_import_cache_cpp.py
19 | //! 4. run pre-commit to fix formatting errors
20 | 
21 | namespace duckdb {
22 | 
23 | struct TypingCacheItem : public PythonImportCacheItem {
24 | 
25 | public:
26 | 	static constexpr const char *Name = "typing";
27 | 
28 | public:
29 | 	TypingCacheItem() : PythonImportCacheItem("typing"), Union("Union", this), get_origin("get_origin", this) {
30 | 	}
31 | 	~TypingCacheItem() override {
32 | 	}
33 | 
34 | 	PythonImportCacheItem Union;
35 | 	PythonImportCacheItem get_origin;
36 | };
37 | 
38 | } // namespace duckdb
39 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/import_cache/modules/uuid_module.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //===----------------------------------------------------------------------===//
 3 | //                         DuckDB
 4 | //
 5 | // duckdb_python/import_cache/modules/uuid_module.hpp
 6 | //
 7 | //
 8 | //===----------------------------------------------------------------------===//
 9 | 
10 | #pragma once
11 | 
12 | #include "duckdb_python/import_cache/python_import_cache_item.hpp"
13 | 
14 | //! Note: This class is generated using scripts.
15 | //! If you need to add a new object to the cache you must:
16 | //! 1. adjust scripts/imports.py
17 | //! 2. run python scripts/generate_import_cache_json.py
18 | //! 3. run python scripts/generate_import_cache_cpp.py
19 | //! 4. run pre-commit to fix formatting errors
20 | 
21 | namespace duckdb {
22 | 
23 | struct UuidCacheItem : public PythonImportCacheItem {
24 | 
25 | public:
26 | 	static constexpr const char *Name = "uuid";
27 | 
28 | public:
29 | 	UuidCacheItem() : PythonImportCacheItem("uuid"), UUID("UUID", this) {
30 | 	}
31 | 	~UuidCacheItem() override {
32 | 	}
33 | 
34 | 	PythonImportCacheItem UUID;
35 | };
36 | 
37 | } // namespace duckdb
38 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/import_cache/python_import_cache.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //===----------------------------------------------------------------------===//
 3 | //                         DuckDB
 4 | //
 5 | // duckdb_python/import_cache/python_import_cache.hpp
 6 | //
 7 | //
 8 | //===----------------------------------------------------------------------===//
 9 | 
10 | #pragma once
11 | 
12 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
13 | #include "duckdb.hpp"
14 | #include "duckdb/common/vector.hpp"
15 | #include "duckdb_python/import_cache/python_import_cache_modules.hpp"
16 | 
17 | namespace duckdb {
18 | 
19 | struct PythonImportCache {
20 | public:
21 | 	explicit PythonImportCache() {
22 | 	}
23 | 	~PythonImportCache();
24 | 
25 | public:
26 | 	PyarrowCacheItem pyarrow;
27 | 	PandasCacheItem pandas;
28 | 	DatetimeCacheItem datetime;
29 | 	DecimalCacheItem decimal;
30 | 	IpythonCacheItem IPython;
31 | 	IpywidgetsCacheItem ipywidgets;
32 | 	NumpyCacheItem numpy;
33 | 	PathlibCacheItem pathlib;
34 | 	PolarsCacheItem polars;
35 | 	DuckdbCacheItem duckdb;
36 | 	PytzCacheItem pytz;
37 | 	TypesCacheItem types;
38 | 	TypingCacheItem typing;
39 | 	UuidCacheItem uuid;
40 | 	CollectionsCacheItem collections;
41 | 
42 | public:
43 | 	py::handle AddCache(py::object item);
44 | 
45 | private:
46 | 	vector<py::object> owned_objects;
47 | };
48 | 
49 | } // namespace duckdb
50 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/import_cache/python_import_cache_modules.hpp:
--------------------------------------------------------------------------------
 1 | #include "duckdb_python/import_cache/modules/pyarrow_module.hpp"
 2 | #include "duckdb_python/import_cache/modules/pandas_module.hpp"
 3 | #include "duckdb_python/import_cache/modules/datetime_module.hpp"
 4 | #include "duckdb_python/import_cache/modules/decimal_module.hpp"
 5 | #include "duckdb_python/import_cache/modules/ipython_module.hpp"
 6 | #include "duckdb_python/import_cache/modules/ipywidgets_module.hpp"
 7 | #include "duckdb_python/import_cache/modules/numpy_module.hpp"
 8 | #include "duckdb_python/import_cache/modules/pathlib_module.hpp"
 9 | #include "duckdb_python/import_cache/modules/polars_module.hpp"
10 | #include "duckdb_python/import_cache/modules/duckdb_module.hpp"
11 | #include "duckdb_python/import_cache/modules/pytz_module.hpp"
12 | #include "duckdb_python/import_cache/modules/types_module.hpp"
13 | #include "duckdb_python/import_cache/modules/typing_module.hpp"
14 | #include "duckdb_python/import_cache/modules/uuid_module.hpp"
15 | #include "duckdb_python/import_cache/modules/collections_module.hpp"


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/jupyter_progress_bar_display.hpp:
--------------------------------------------------------------------------------
 1 | //===----------------------------------------------------------------------===//
 2 | //                         DuckDB
 3 | //
 4 | // duckdb_python/jupyter_progress_bar_display.hpp
 5 | //
 6 | //
 7 | //===----------------------------------------------------------------------===//
 8 | 
 9 | #pragma once
10 | 
11 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
12 | #include "duckdb/common/progress_bar/progress_bar_display.hpp"
13 | #include "duckdb/common/helper.hpp"
14 | 
15 | namespace duckdb {
16 | 
17 | class JupyterProgressBarDisplay : public ProgressBarDisplay {
18 | public:
19 | 	JupyterProgressBarDisplay();
20 | 	virtual ~JupyterProgressBarDisplay() {
21 | 	}
22 | 
23 | 	static unique_ptr<ProgressBarDisplay> Create();
24 | 
25 | public:
26 | 	void Update(double progress);
27 | 	void Finish();
28 | 
29 | private:
30 | 	void Initialize();
31 | 
32 | private:
33 | 	py::object progress_bar;
34 | };
35 | 
36 | } // namespace duckdb
37 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/map.hpp:
--------------------------------------------------------------------------------
 1 | //===----------------------------------------------------------------------===//
 2 | //                         DuckDB
 3 | //
 4 | // duckdb_python/pandas/pandas_scan.hpp
 5 | //
 6 | //
 7 | //===----------------------------------------------------------------------===//
 8 | 
 9 | #pragma once
10 | 
11 | #include "duckdb.hpp"
12 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
13 | #include "duckdb/parser/parsed_data/create_table_function_info.hpp"
14 | #include "duckdb/execution/execution_context.hpp"
15 | 
16 | namespace duckdb {
17 | 
18 | struct MapFunction : public TableFunction {
19 | 
20 | public:
21 | 	MapFunction();
22 | 
23 | 	static unique_ptr<FunctionData> MapFunctionBind(ClientContext &context, TableFunctionBindInput &input,
24 | 	                                                vector<LogicalType> &return_types, vector<string> &names);
25 | 
26 | 	static OperatorResultType MapFunctionExec(ExecutionContext &context, TableFunctionInput &data, DataChunk &input,
27 | 	                                          DataChunk &output);
28 | };
29 | 
30 | } // namespace duckdb
31 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/numpy/numpy_bind.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 4 | #include "duckdb/common/common.hpp"
 5 | 
 6 | namespace duckdb {
 7 | 
 8 | struct PandasColumnBindData;
 9 | class ClientContext;
10 | 
11 | struct NumpyBind {
12 | 	static void Bind(const ClientContext &config, py::handle df, vector<PandasColumnBindData> &out,
13 | 	                 vector<LogicalType> &return_types, vector<string> &names);
14 | };
15 | 
16 | } // namespace duckdb
17 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/numpy/numpy_result_conversion.hpp:
--------------------------------------------------------------------------------
 1 | //===----------------------------------------------------------------------===//
 2 | //                         DuckDB
 3 | //
 4 | // duckdb_python/numpy/numpy_result_conversion.hpp
 5 | //
 6 | //
 7 | //===----------------------------------------------------------------------===//
 8 | 
 9 | #pragma once
10 | 
11 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
12 | #include "duckdb_python/numpy/array_wrapper.hpp"
13 | #include "duckdb.hpp"
14 | 
15 | namespace duckdb {
16 | 
17 | class NumpyResultConversion {
18 | public:
19 | 	NumpyResultConversion(const vector<LogicalType> &types, idx_t initial_capacity,
20 | 	                      const ClientProperties &client_properties, bool pandas = false);
21 | 
22 | 	void Append(DataChunk &chunk);
23 | 
24 | 	py::object ToArray(idx_t col_idx) {
25 | 		return owned_data[col_idx].ToArray();
26 | 	}
27 | 	bool ToPandas() const {
28 | 		return pandas;
29 | 	}
30 | 
31 | private:
32 | 	void Resize(idx_t new_capacity);
33 | 
34 | private:
35 | 	vector<ArrayWrapper> owned_data;
36 | 	idx_t count;
37 | 	idx_t capacity;
38 | 	bool pandas;
39 | };
40 | 
41 | } // namespace duckdb
42 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/numpy/numpy_scan.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 4 | #include "duckdb/common/common.hpp"
 5 | 
 6 | namespace duckdb {
 7 | 
 8 | struct PandasColumnBindData;
 9 | 
10 | struct NumpyScan {
11 | 	static void Scan(PandasColumnBindData &bind_data, idx_t count, idx_t offset, Vector &out);
12 | 	static void ScanObjectColumn(PyObject **col, idx_t stride, idx_t count, idx_t offset, Vector &out);
13 | };
14 | 
15 | } // namespace duckdb
16 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/numpy/raw_array_wrapper.hpp:
--------------------------------------------------------------------------------
 1 | //===----------------------------------------------------------------------===//
 2 | //                         DuckDB
 3 | //
 4 | // duckdb_python/array_wrapper.hpp
 5 | //
 6 | //
 7 | //===----------------------------------------------------------------------===//
 8 | 
 9 | #pragma once
10 | 
11 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
12 | #include "duckdb.hpp"
13 | 
14 | namespace duckdb {
15 | 
16 | struct RawArrayWrapper {
17 | 
18 | 	explicit RawArrayWrapper(const LogicalType &type);
19 | 
20 | 	py::array array;
21 | 	data_ptr_t data;
22 | 	LogicalType type;
23 | 	idx_t type_width;
24 | 	idx_t count;
25 | 
26 | public:
27 | 	static string DuckDBToNumpyDtype(const LogicalType &type);
28 | 	void Initialize(idx_t capacity);
29 | 	void Resize(idx_t new_capacity);
30 | 	void Append(idx_t current_offset, Vector &input, idx_t count);
31 | };
32 | 
33 | } // namespace duckdb
34 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/pandas/column/pandas_numpy_column.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb_python/pandas/pandas_column.hpp"
 4 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 5 | 
 6 | namespace duckdb {
 7 | 
 8 | class PandasNumpyColumn : public PandasColumn {
 9 | public:
10 | 	PandasNumpyColumn(py::array array_p) : PandasColumn(PandasColumnBackend::NUMPY), array(std::move(array_p)) {
11 | 		D_ASSERT(py::hasattr(array, "strides"));
12 | 		stride = array.attr("strides").attr("__getitem__")(0).cast<idx_t>();
13 | 	}
14 | 
15 | public:
16 | 	py::array array;
17 | 	idx_t stride;
18 | };
19 | 
20 | } // namespace duckdb
21 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/pandas/pandas_bind.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 4 | #include "duckdb_python/pybind11/python_object_container.hpp"
 5 | #include "duckdb_python/numpy/numpy_type.hpp"
 6 | #include "duckdb/common/helper.hpp"
 7 | #include "duckdb_python/pandas/pandas_column.hpp"
 8 | 
 9 | namespace duckdb {
10 | 
11 | class ClientContext;
12 | 
13 | struct RegisteredArray {
14 | 	explicit RegisteredArray(py::array numpy_array) : numpy_array(std::move(numpy_array)) {
15 | 	}
16 | 	py::array numpy_array;
17 | };
18 | 
19 | struct PandasColumnBindData {
20 | 	NumpyType numpy_type;
21 | 	unique_ptr<PandasColumn> pandas_col;
22 | 	unique_ptr<RegisteredArray> mask;
23 | 	//! Only for categorical types
24 | 	string internal_categorical_type;
25 | 	//! Hold ownership of objects created during scanning
26 | 	PythonObjectContainer object_str_val;
27 | };
28 | 
29 | struct Pandas {
30 | 	static void Bind(const ClientContext &config, py::handle df, vector<PandasColumnBindData> &out,
31 | 	                 vector<LogicalType> &return_types, vector<string> &names);
32 | };
33 | 
34 | } // namespace duckdb
35 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/pandas/pandas_column.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace duckdb {
 4 | 
 5 | enum class PandasColumnBackend { NUMPY };
 6 | 
 7 | class PandasColumn {
 8 | public:
 9 | 	PandasColumn(PandasColumnBackend backend) : backend(backend) {
10 | 	}
11 | 	virtual ~PandasColumn() {
12 | 	}
13 | 
14 | public:
15 | 	PandasColumnBackend Backend() const {
16 | 		return backend;
17 | 	}
18 | 
19 | protected:
20 | 	PandasColumnBackend backend;
21 | };
22 | 
23 | } // namespace duckdb
24 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/path_like.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb/common/common.hpp"
 4 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 5 | #include "duckdb/main/external_dependencies.hpp"
 6 | #include "duckdb/common/types/value.hpp"
 7 | 
 8 | namespace duckdb {
 9 | 
10 | struct DuckDBPyConnection;
11 | 
12 | struct PathLike {
13 | 	static PathLike Create(const py::object &object, DuckDBPyConnection &connection);
14 | 	// The file(s) extracted from object
15 | 	vector<string> files;
16 | 	shared_ptr<ExternalDependency> dependency;
17 | };
18 | 
19 | } // namespace duckdb
20 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/pybind11/conversions/pyconnection_default.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb_python/pyconnection/pyconnection.hpp"
 4 | #include "duckdb/common/helper.hpp"
 5 | 
 6 | using duckdb::DuckDBPyConnection;
 7 | using duckdb::shared_ptr;
 8 | 
 9 | namespace py = pybind11;
10 | 
11 | namespace PYBIND11_NAMESPACE {
12 | namespace detail {
13 | 
14 | template <>
15 | class type_caster<shared_ptr<DuckDBPyConnection>>
16 |     : public copyable_holder_caster<DuckDBPyConnection, shared_ptr<DuckDBPyConnection>> {
17 | 	using type = DuckDBPyConnection;
18 | 	using holder_caster = copyable_holder_caster<DuckDBPyConnection, shared_ptr<DuckDBPyConnection>>;
19 | 	// This is used to generate documentation on duckdb-web
20 | 	PYBIND11_TYPE_CASTER(shared_ptr<type>, const_name("duckdb.DuckDBPyConnection"));
21 | 
22 | 	bool load(handle src, bool convert) {
23 | 		if (py::none().is(src)) {
24 | 			value = DuckDBPyConnection::DefaultConnection();
25 | 			return true;
26 | 		}
27 | 		if (!holder_caster::load(src, convert)) {
28 | 			return false;
29 | 		}
30 | 		value = std::move(holder);
31 | 		return true;
32 | 	}
33 | 
34 | 	static handle cast(shared_ptr<type> base, return_value_policy rvp, handle h) {
35 | 		return holder_caster::cast(base, rvp, h);
36 | 	}
37 | };
38 | 
39 | template <>
40 | struct is_holder_type<DuckDBPyConnection, shared_ptr<DuckDBPyConnection>> : std::true_type {};
41 | 
42 | } // namespace detail
43 | } // namespace PYBIND11_NAMESPACE
44 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/pybind11/conversions/render_mode_enum.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb/common/common.hpp"
 4 | #include "duckdb/common/exception.hpp"
 5 | #include "duckdb/common/string_util.hpp"
 6 | #include "duckdb/common/box_renderer.hpp"
 7 | #include "duckdb/common/enum_util.hpp"
 8 | 
 9 | using duckdb::InvalidInputException;
10 | using duckdb::RenderMode;
11 | using duckdb::string;
12 | using duckdb::StringUtil;
13 | 
14 | namespace py = pybind11;
15 | 
16 | static RenderMode RenderModeFromInteger(int64_t value) {
17 | 	if (value == 0) {
18 | 		return RenderMode::ROWS;
19 | 	} else if (value == 1) {
20 | 		return RenderMode::COLUMNS;
21 | 	} else {
22 | 		throw InvalidInputException("Unrecognized type for 'render_mode'");
23 | 	}
24 | }
25 | 
26 | namespace PYBIND11_NAMESPACE {
27 | namespace detail {
28 | 
29 | template <>
30 | struct type_caster<RenderMode> : public type_caster_base<RenderMode> {
31 | 	using base = type_caster_base<RenderMode>;
32 | 	RenderMode tmp;
33 | 
34 | public:
35 | 	bool load(handle src, bool convert) {
36 | 		if (base::load(src, convert)) {
37 | 			return true;
38 | 		} else if (py::isinstance<py::str>(src)) {
39 | 			string render_mode_str = py::str(src);
40 | 			auto render_mode =
41 | 			    duckdb::EnumUtil::FromString<RenderMode>(render_mode_str.empty() ? "ROWS" : render_mode_str);
42 | 			value = &render_mode;
43 | 			return true;
44 | 		} else if (py::isinstance<py::int_>(src)) {
45 | 			tmp = RenderModeFromInteger(src.cast<int64_t>());
46 | 			value = &tmp;
47 | 			return true;
48 | 		}
49 | 		return false;
50 | 	}
51 | 
52 | 	static handle cast(RenderMode src, return_value_policy policy, handle parent) {
53 | 		return base::cast(src, policy, parent);
54 | 	}
55 | };
56 | 
57 | } // namespace detail
58 | } // namespace PYBIND11_NAMESPACE
59 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/pybind11/dataframe.hpp:
--------------------------------------------------------------------------------
 1 | //===----------------------------------------------------------------------===//
 2 | //                         DuckDB
 3 | //
 4 | // duckdb_python/pybind11/dataframe.hpp
 5 | //
 6 | //
 7 | //===----------------------------------------------------------------------===//
 8 | 
 9 | #pragma once
10 | 
11 | #include "duckdb/common/types.hpp"
12 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
13 | 
14 | namespace duckdb {
15 | 
16 | class PandasDataFrame : public py::object {
17 | public:
18 | 	PandasDataFrame(const py::object &o) : py::object(o, borrowed_t {}) {
19 | 	}
20 | 	using py::object::object;
21 | 
22 | public:
23 | 	static bool check_(const py::handle &object); // NOLINT
24 | 	static bool IsPyArrowBacked(const py::handle &df);
25 | 	static py::object ToArrowTable(const py::object &df);
26 | };
27 | 
28 | class PolarsDataFrame : public py::object {
29 | public:
30 | 	PolarsDataFrame(const py::object &o) : py::object(o, borrowed_t {}) {
31 | 	}
32 | 	using py::object::object;
33 | 
34 | public:
35 | 	static bool IsDataFrame(const py::handle &object);
36 | 	static bool IsLazyFrame(const py::handle &object);
37 | 	static bool check_(const py::handle &object); // NOLINT
38 | };
39 | } // namespace duckdb
40 | 
41 | namespace pybind11 {
42 | namespace detail {
43 | template <>
44 | struct handle_type_name<duckdb::PandasDataFrame> {
45 | 	static constexpr auto name = _("pandas.DataFrame");
46 | };
47 | } // namespace detail
48 | } // namespace pybind11
49 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/pybind11/exceptions.hpp:
--------------------------------------------------------------------------------
 1 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 2 | 
 3 | namespace py = pybind11;
 4 | 
 5 | namespace duckdb {
 6 | 
 7 | void RegisterExceptions(const py::module &m);
 8 | 
 9 | } // namespace duckdb
10 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/pybind11/gil_wrapper.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 4 | 
 5 | namespace duckdb {
 6 | 
 7 | struct PythonGILWrapper {
 8 | 	py::gil_scoped_acquire acquire;
 9 | };
10 | 
11 | } // namespace duckdb
12 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/pybind11/python_object_container.hpp:
--------------------------------------------------------------------------------
 1 | //===----------------------------------------------------------------------===//
 2 | //                         DuckDB
 3 | //
 4 | // duckdb_python/pybind11/python_object_container.hpp
 5 | //
 6 | //
 7 | //===----------------------------------------------------------------------===//
 8 | 
 9 | #pragma once
10 | 
11 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
12 | #include "duckdb/common/vector.hpp"
13 | #include "duckdb_python/pybind11/gil_wrapper.hpp"
14 | #include "duckdb/common/helper.hpp"
15 | 
16 | namespace duckdb {
17 | 
18 | //! Every Python Object Must be created through our container
19 | //! The Container ensures that the GIL is HOLD on Python Object Construction/Destruction/Modification
20 | class PythonObjectContainer {
21 | public:
22 | 	PythonObjectContainer() {
23 | 	}
24 | 
25 | 	~PythonObjectContainer() {
26 | 		py::gil_scoped_acquire acquire;
27 | 		py_obj.clear();
28 | 	}
29 | 
30 | 	void Push(py::object &&obj) {
31 | 		py::gil_scoped_acquire gil;
32 | 		PushInternal(std::move(obj));
33 | 	}
34 | 
35 | 	const py::object &LastAddedObject() {
36 | 		D_ASSERT(!py_obj.empty());
37 | 		return py_obj.back();
38 | 	}
39 | 
40 | private:
41 | 	void PushInternal(py::object &&obj) {
42 | 		py_obj.emplace_back(obj);
43 | 	}
44 | 
45 | 	vector<py::object> py_obj;
46 | };
47 | } // namespace duckdb
48 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/pybind11/registered_py_object.hpp:
--------------------------------------------------------------------------------
 1 | //===----------------------------------------------------------------------===//
 2 | //                         DuckDB
 3 | //
 4 | // duckdb_python/pybind11/registered_py_object.hpp
 5 | //
 6 | //
 7 | //===----------------------------------------------------------------------===//
 8 | 
 9 | #pragma once
10 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
11 | 
12 | namespace duckdb {
13 | 
14 | class RegisteredObject {
15 | public:
16 | 	explicit RegisteredObject(py::object obj_p) : obj(std::move(obj_p)) {
17 | 	}
18 | 	virtual ~RegisteredObject() {
19 | 		py::gil_scoped_acquire acquire;
20 | 		obj = py::none();
21 | 	}
22 | 
23 | 	py::object obj;
24 | };
25 | 
26 | } // namespace duckdb
27 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/pystatement.hpp:
--------------------------------------------------------------------------------
 1 | //===----------------------------------------------------------------------===//
 2 | //                         DuckDB
 3 | //
 4 | // duckdb_python/pystatement.hpp
 5 | //
 6 | //
 7 | //===----------------------------------------------------------------------===//
 8 | 
 9 | #pragma once
10 | 
11 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
12 | #include "duckdb.hpp"
13 | 
14 | namespace duckdb {
15 | 
16 | struct DuckDBPyStatement {
17 | public:
18 | 	explicit DuckDBPyStatement(unique_ptr<SQLStatement> statement);
19 | 
20 | public:
21 | 	//! Create a copy of the wrapped statement
22 | 	unique_ptr<SQLStatement> GetStatement();
23 | 	string Query() const;
24 | 	py::set NamedParameters() const;
25 | 	StatementType Type() const;
26 | 	py::list ExpectedResultType() const;
27 | 
28 | public:
29 | 	static void Initialize(py::handle &m);
30 | 
31 | private:
32 | 	unique_ptr<SQLStatement> statement;
33 | };
34 | 
35 | } // namespace duckdb
36 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/python_conversion.hpp:
--------------------------------------------------------------------------------
 1 | //===----------------------------------------------------------------------===//
 2 | //                         DuckDB
 3 | //
 4 | // duckdb_python/pyresult.hpp
 5 | //
 6 | //
 7 | //===----------------------------------------------------------------------===//
 8 | 
 9 | #pragma once
10 | 
11 | #include "duckdb_python/numpy/array_wrapper.hpp"
12 | #include "duckdb.hpp"
13 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
14 | #include "duckdb_python/python_objects.hpp"
15 | #include "duckdb/common/types.hpp"
16 | #include "duckdb/common/types/hugeint.hpp"
17 | 
18 | #include "datetime.h" // from Python
19 | 
20 | namespace duckdb {
21 | 
22 | enum class PythonObjectType {
23 | 	Other,
24 | 	None,
25 | 	Integer,
26 | 	Float,
27 | 	Bool,
28 | 	Decimal,
29 | 	Uuid,
30 | 	Datetime,
31 | 	Date,
32 | 	Time,
33 | 	Timedelta,
34 | 	String,
35 | 	ByteArray,
36 | 	MemoryView,
37 | 	Bytes,
38 | 	List,
39 | 	Tuple,
40 | 	Dict,
41 | 	NdArray,
42 | 	NdDatetime,
43 | 	Value
44 | };
45 | 
46 | PythonObjectType GetPythonObjectType(py::handle &ele);
47 | 
48 | bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &target_type = LogicalType::UNKNOWN);
49 | bool DictionaryHasMapFormat(const PyDictionary &dict);
50 | void TransformPythonObject(py::handle ele, Vector &vector, idx_t result_offset, bool nan_as_null = true);
51 | Value TransformPythonValue(py::handle ele, const LogicalType &target_type = LogicalType::UNKNOWN,
52 |                            bool nan_as_null = true);
53 | 
54 | } // namespace duckdb
55 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/python_dependency.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb/common/string.hpp"
 4 | #include "duckdb/common/unique_ptr.hpp"
 5 | #include "duckdb/common/case_insensitive_map.hpp"
 6 | #include "duckdb/main/external_dependencies.hpp"
 7 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 8 | #include "duckdb_python/pybind11/registered_py_object.hpp"
 9 | 
10 | namespace duckdb {
11 | 
12 | class PythonDependencyItem : public DependencyItem {
13 | public:
14 | 	explicit PythonDependencyItem(unique_ptr<RegisteredObject> &&object);
15 | 	~PythonDependencyItem() override;
16 | 
17 | public:
18 | 	static shared_ptr<DependencyItem> Create(py::object object);
19 | 	static shared_ptr<DependencyItem> Create(unique_ptr<RegisteredObject> &&object);
20 | 
21 | public:
22 | 	unique_ptr<RegisteredObject> object;
23 | };
24 | 
25 | } // namespace duckdb
26 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/python_replacement_scan.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb/main/client_context_state.hpp"
 4 | #include "duckdb/common/case_insensitive_map.hpp"
 5 | #include "duckdb/parser/tableref.hpp"
 6 | #include "duckdb/function/replacement_scan.hpp"
 7 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 8 | 
 9 | namespace duckdb {
10 | 
11 | struct PythonReplacementScan {
12 | public:
13 | 	static unique_ptr<TableRef> Replace(ClientContext &context, ReplacementScanInput &input,
14 | 	                                    optional_ptr<ReplacementScanData> data);
15 | 	//! Try to perform a replacement, returns NULL on error
16 | 	static unique_ptr<TableRef> TryReplacementObject(const py::object &entry, const string &name,
17 | 	                                                 ClientContext &context, bool relation = false);
18 | 	//! Perform a replacement or throw if it failed
19 | 	static unique_ptr<TableRef> ReplacementObject(const py::object &entry, const string &name, ClientContext &context,
20 | 	                                              bool relation = false);
21 | };
22 | 
23 | } // namespace duckdb
24 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/pytype.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 4 | #include "duckdb/common/types.hpp"
 5 | 
 6 | namespace duckdb {
 7 | 
 8 | class PyGenericAlias : public py::object {
 9 | public:
10 | 	using py::object::object;
11 | 
12 | public:
13 | 	static bool check_(const py::handle &object);
14 | };
15 | 
16 | class PyUnionType : public py::object {
17 | public:
18 | 	using py::object::object;
19 | 
20 | public:
21 | 	static bool check_(const py::handle &object);
22 | };
23 | 
24 | class DuckDBPyType : public enable_shared_from_this<DuckDBPyType> {
25 | public:
26 | 	explicit DuckDBPyType(LogicalType type);
27 | 
28 | public:
29 | 	static void Initialize(py::handle &m);
30 | 
31 | public:
32 | 	bool Equals(const shared_ptr<DuckDBPyType> &other) const;
33 | 	bool EqualsString(const string &type_str) const;
34 | 	shared_ptr<DuckDBPyType> GetAttribute(const string &name) const;
35 | 	py::list Children() const;
36 | 	string ToString() const;
37 | 	const LogicalType &Type() const;
38 | 	string GetId() const;
39 | 
40 | private:
41 | private:
42 | 	LogicalType type;
43 | };
44 | 
45 | } // namespace duckdb
46 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/pyutil.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 4 | #include "duckdb/common/types.hpp"
 5 | 
 6 | namespace duckdb {
 7 | 
 8 | struct PyUtil {
 9 | 	static idx_t PyByteArrayGetSize(py::handle &obj) {
10 | 		return PyByteArray_GET_SIZE(obj.ptr()); // NOLINT
11 | 	}
12 | 
13 | 	static Py_buffer *PyMemoryViewGetBuffer(py::handle &obj) {
14 | 		return PyMemoryView_GET_BUFFER(obj.ptr());
15 | 	}
16 | 
17 | 	static bool PyUnicodeIsCompactASCII(py::handle &obj) {
18 | 		return PyUnicode_IS_COMPACT_ASCII(obj.ptr());
19 | 	}
20 | 
21 | 	static const char *PyUnicodeData(py::handle &obj) {
22 | 		return const_char_ptr_cast(PyUnicode_DATA(obj.ptr()));
23 | 	}
24 | 
25 | 	static char *PyUnicodeDataMutable(py::handle &obj) {
26 | 		return char_ptr_cast(PyUnicode_DATA(obj.ptr()));
27 | 	}
28 | 
29 | 	static idx_t PyUnicodeGetLength(py::handle &obj) {
30 | 		return PyUnicode_GET_LENGTH(obj.ptr());
31 | 	}
32 | 
33 | 	static bool PyUnicodeIsCompact(PyCompactUnicodeObject *obj) {
34 | 		return PyUnicode_IS_COMPACT(obj);
35 | 	}
36 | 
37 | 	static bool PyUnicodeIsASCII(PyCompactUnicodeObject *obj) {
38 | 		return PyUnicode_IS_ASCII(obj);
39 | 	}
40 | 
41 | 	static int PyUnicodeKind(py::handle &obj) {
42 | 		return PyUnicode_KIND(obj.ptr());
43 | 	}
44 | 
45 | 	static Py_UCS1 *PyUnicode1ByteData(py::handle &obj) {
46 | 		return PyUnicode_1BYTE_DATA(obj.ptr());
47 | 	}
48 | 
49 | 	static Py_UCS2 *PyUnicode2ByteData(py::handle &obj) {
50 | 		return PyUnicode_2BYTE_DATA(obj.ptr());
51 | 	}
52 | 
53 | 	static Py_UCS4 *PyUnicode4ByteData(py::handle &obj) {
54 | 		return PyUnicode_4BYTE_DATA(obj.ptr());
55 | 	}
56 | };
57 | 
58 | } // namespace duckdb
59 | 


--------------------------------------------------------------------------------
/src/duckdb_py/include/duckdb_python/typing.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 4 | #include "duckdb_python/pytype.hpp"
 5 | #include "duckdb_python/pyconnection/pyconnection.hpp"
 6 | 
 7 | namespace duckdb {
 8 | 
 9 | class DuckDBPyTyping {
10 | public:
11 | 	DuckDBPyTyping() = delete;
12 | 
13 | public:
14 | 	static void Initialize(py::module_ &m);
15 | };
16 | 
17 | } // namespace duckdb
18 | 


--------------------------------------------------------------------------------
/src/duckdb_py/jupyter/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # this is used for clang-tidy checks
2 | add_library(python_jupyter OBJECT jupyter_progress_bar_display.cpp)
3 | 
4 | target_link_libraries(python_jupyter PRIVATE _duckdb_dependencies)
5 | 


--------------------------------------------------------------------------------
/src/duckdb_py/jupyter/jupyter_progress_bar_display.cpp:
--------------------------------------------------------------------------------
 1 | #include "duckdb_python/jupyter_progress_bar_display.hpp"
 2 | #include "duckdb_python/pyconnection/pyconnection.hpp"
 3 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 4 | 
 5 | namespace duckdb {
 6 | 
 7 | unique_ptr<ProgressBarDisplay> JupyterProgressBarDisplay::Create() {
 8 | 	return make_uniq<JupyterProgressBarDisplay>();
 9 | }
10 | 
11 | void JupyterProgressBarDisplay::Initialize() {
12 | 	auto &import_cache = *DuckDBPyConnection::ImportCache();
13 | 	auto float_progress_attr = import_cache.ipywidgets.FloatProgress();
14 | 	D_ASSERT(float_progress_attr.ptr() != nullptr);
15 | 	// Initialize the progress bar
16 | 	py::dict style;
17 | 	style["bar_color"] = "black";
18 | 	progress_bar = float_progress_attr((py::arg("min") = 0, py::arg("max") = 100, py::arg("style") = style));
19 | 
20 | 	progress_bar.attr("layout").attr("width") = "auto";
21 | 
22 | 	// Display the progress bar
23 | 	auto display_attr = import_cache.IPython.display.display();
24 | 	D_ASSERT(display_attr.ptr() != nullptr);
25 | 	display_attr(progress_bar);
26 | }
27 | 
28 | JupyterProgressBarDisplay::JupyterProgressBarDisplay() : ProgressBarDisplay() {
29 | 	// Empty, we need the GIL to initialize, which we don't have here
30 | }
31 | 
32 | void JupyterProgressBarDisplay::Update(double progress) {
33 | 	py::gil_scoped_acquire gil;
34 | 	if (progress_bar.ptr() == nullptr) {
35 | 		// First print, we first need to initialize the display
36 | 		Initialize();
37 | 	}
38 | 	progress_bar.attr("value") = py::cast(progress);
39 | }
40 | 
41 | void JupyterProgressBarDisplay::Finish() {
42 | 	Update(100);
43 | }
44 | 
45 | } // namespace duckdb
46 | 


--------------------------------------------------------------------------------
/src/duckdb_py/native/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # this is used for clang-tidy checks
2 | add_library(python_native OBJECT python_objects.cpp python_conversion.cpp)
3 | 
4 | target_link_libraries(python_native PRIVATE _duckdb_dependencies)
5 | 


--------------------------------------------------------------------------------
/src/duckdb_py/numpy/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # this is used for clang-tidy checks
2 | add_library(
3 |   python_numpy OBJECT
4 |   type.cpp numpy_scan.cpp array_wrapper.cpp raw_array_wrapper.cpp
5 |   numpy_bind.cpp numpy_result_conversion.cpp)
6 | 
7 | target_link_libraries(python_numpy PRIVATE _duckdb_dependencies)
8 | 


--------------------------------------------------------------------------------
/src/duckdb_py/numpy/numpy_result_conversion.cpp:
--------------------------------------------------------------------------------
 1 | #include "duckdb_python/numpy/array_wrapper.hpp"
 2 | #include "duckdb_python/numpy/numpy_result_conversion.hpp"
 3 | 
 4 | namespace duckdb {
 5 | 
 6 | NumpyResultConversion::NumpyResultConversion(const vector<LogicalType> &types, idx_t initial_capacity,
 7 |                                              const ClientProperties &client_properties, bool pandas)
 8 |     : count(0), capacity(0), pandas(pandas) {
 9 | 	owned_data.reserve(types.size());
10 | 	for (auto &type : types) {
11 | 		owned_data.emplace_back(type, client_properties, pandas);
12 | 	}
13 | 	Resize(initial_capacity);
14 | }
15 | 
16 | void NumpyResultConversion::Resize(idx_t new_capacity) {
17 | 	if (capacity == 0) {
18 | 		for (auto &data : owned_data) {
19 | 			data.Initialize(new_capacity);
20 | 		}
21 | 	} else {
22 | 		for (auto &data : owned_data) {
23 | 			data.Resize(new_capacity);
24 | 		}
25 | 	}
26 | 	capacity = new_capacity;
27 | }
28 | 
29 | void NumpyResultConversion::Append(DataChunk &chunk) {
30 | 	if (count + chunk.size() > capacity) {
31 | 		Resize(capacity * 2);
32 | 	}
33 | 	auto chunk_types = chunk.GetTypes();
34 | 	auto source_offset = 0;
35 | 	auto source_size = chunk.size();
36 | 	auto to_append = chunk.size();
37 | 	for (idx_t col_idx = 0; col_idx < owned_data.size(); col_idx++) {
38 | 		owned_data[col_idx].Append(count, chunk.data[col_idx], source_size, source_offset, to_append);
39 | 	}
40 | 	count += to_append;
41 | #ifdef DEBUG
42 | 	for (auto &data : owned_data) {
43 | 		D_ASSERT(data.data->count == count);
44 | 		D_ASSERT(data.mask->count == count);
45 | 	}
46 | #endif
47 | }
48 | 
49 | } // namespace duckdb
50 | 


--------------------------------------------------------------------------------
/src/duckdb_py/pandas/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # this is used for clang-tidy checks
2 | add_library(python_pandas OBJECT scan.cpp analyzer.cpp bind.cpp)
3 | 
4 | target_link_libraries(python_pandas PRIVATE _duckdb_dependencies)
5 | 


--------------------------------------------------------------------------------
/src/duckdb_py/pybind11/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # this is used for clang-tidy checks
2 | add_library(python_pybind11 OBJECT pybind_wrapper.cpp)
3 | 
4 | target_link_libraries(python_pybind11 PRIVATE _duckdb_dependencies)
5 | 


--------------------------------------------------------------------------------
/src/duckdb_py/pybind11/pybind_wrapper.cpp:
--------------------------------------------------------------------------------
 1 | #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 2 | #include "duckdb/common/exception.hpp"
 3 | #include "duckdb_python/pyconnection/pyconnection.hpp"
 4 | 
 5 | namespace pybind11 {
 6 | 
 7 | // NOLINTNEXTLINE(readability-identifier-naming)
 8 | bool gil_check() {
 9 | 	return (bool)PyGILState_Check();
10 | }
11 | 
12 | // NOLINTNEXTLINE(readability-identifier-naming)
13 | void gil_assert() {
14 | 	if (!gil_check()) {
15 | 		throw duckdb::InternalException("The GIL should be held for this operation, but it's not!");
16 | 	}
17 | }
18 | 
19 | // NOLINTNEXTLINE(readability-identifier-naming)
20 | bool is_list_like(handle obj) {
21 | 	if (isinstance<str>(obj) || isinstance<bytes>(obj)) {
22 | 		return false;
23 | 	}
24 | 	if (is_dict_like(obj)) {
25 | 		return false;
26 | 	}
27 | 	auto &import_cache = *duckdb::DuckDBPyConnection::ImportCache();
28 | 	auto iterable = import_cache.collections.abc.Iterable();
29 | 	return isinstance(obj, iterable);
30 | }
31 | 
32 | // NOLINTNEXTLINE(readability-identifier-naming)
33 | bool is_dict_like(handle obj) {
34 | 	auto &import_cache = *duckdb::DuckDBPyConnection::ImportCache();
35 | 	auto mapping = import_cache.collections.abc.Mapping();
36 | 	return isinstance(obj, mapping);
37 | }
38 | 
39 | // NOLINTNEXTLINE(readability-identifier-naming)
40 | std::string to_string(const object &obj) {
41 | 	return std::string(py::str(obj));
42 | }
43 | 
44 | } // namespace pybind11
45 | 


--------------------------------------------------------------------------------
/src/duckdb_py/pyconnection/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # this is used for clang-tidy checks
2 | add_library(python_connection OBJECT type_creation.cpp)
3 | 
4 | target_link_libraries(python_connection PRIVATE _duckdb_dependencies)
5 | 


--------------------------------------------------------------------------------
/src/duckdb_py/pyexpression/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # this is used for clang-tidy checks
2 | add_library(python_expression OBJECT initialize.cpp)
3 | 
4 | target_link_libraries(python_expression PRIVATE _duckdb_dependencies)
5 | 


--------------------------------------------------------------------------------
/src/duckdb_py/pyrelation/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # this is used for clang-tidy checks
2 | add_library(python_relation OBJECT initialize.cpp)
3 | 
4 | target_link_libraries(python_relation PRIVATE _duckdb_dependencies)
5 | 


--------------------------------------------------------------------------------
/src/duckdb_py/python_dependency.cpp:
--------------------------------------------------------------------------------
 1 | #include "duckdb_python/python_dependency.hpp"
 2 | #include "duckdb/common/helper.hpp"
 3 | 
 4 | namespace duckdb {
 5 | 
 6 | PythonDependencyItem::PythonDependencyItem(unique_ptr<RegisteredObject> &&object) : object(std::move(object)) {
 7 | }
 8 | 
 9 | PythonDependencyItem::~PythonDependencyItem() { // NOLINT - cannot throw in exception
10 | 	py::gil_scoped_acquire gil;
11 | 	object.reset();
12 | }
13 | 
14 | shared_ptr<DependencyItem> PythonDependencyItem::Create(py::object object) {
15 | 	auto registered_object = make_uniq<RegisteredObject>(std::move(object));
16 | 	return make_shared_ptr<PythonDependencyItem>(std::move(registered_object));
17 | }
18 | 
19 | shared_ptr<DependencyItem> PythonDependencyItem::Create(unique_ptr<RegisteredObject> &&object) {
20 | 	return make_shared_ptr<PythonDependencyItem>(std::move(object));
21 | }
22 | 
23 | } // namespace duckdb
24 | 


--------------------------------------------------------------------------------
/src/duckdb_py/typing/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # this is used for clang-tidy checks
2 | add_library(python_type OBJECT pytype.cpp typing.cpp)
3 | 
4 | target_link_libraries(python_type PRIVATE _duckdb_dependencies)
5 | 


--------------------------------------------------------------------------------
/tests/extensions/json/data/example.json:
--------------------------------------------------------------------------------
1 | [
2 | 	{"id":1,"name":"O Brother, Where Art Thou?"},
3 | 	{"id":2,"name":"Home for the Holidays"},
4 | 	{"id":3,"name":"The Firm"},
5 | 	{"id":4,"name":"Broadcast News"},
6 | 	{"id":5,"name":"Raising Arizona"}
7 | ]
8 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_3324.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import duckdb
 4 | 
 5 | 
 6 | class Test3324:
 7 |     def test_3324(self, duckdb_cursor):
 8 |         duckdb_cursor.execute(
 9 |             """
10 |         create or replace table my_table as
11 |         select 'test1' as column1, 1 as column2, 'quack' as column3
12 |         union all
13 |         select 'test2' as column1, 2 as column2, 'quacks' as column3
14 |         union all
15 |         select 'test3' as column1, 3 as column2, 'quacking' as column3
16 |         """
17 |         ).fetch_df()
18 |         duckdb_cursor.execute(
19 |             """
20 |             prepare v1 as
21 |                 select
22 |                     column1
23 |                     , column2
24 |                     , column3
25 |                 from my_table
26 |                 where
27 |                     column1 = $1"""
28 |         ).fetch_df()
29 | 
30 |         with pytest.raises(duckdb.BinderException, match="Unexpected prepared parameter"):
31 |             duckdb_cursor.execute("""execute v1(?)""", ("test1",)).fetch_df()
32 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_3654.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import duckdb
 4 | 
 5 | try:
 6 |     import pyarrow as pa
 7 | 
 8 |     can_run = True
 9 | except Exception:
10 |     can_run = False
11 | from conftest import ArrowPandas, NumpyPandas
12 | 
13 | 
14 | class Test3654:
15 |     @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()])
16 |     def test_3654_pandas(self, duckdb_cursor, pandas):
17 |         df1 = pandas.DataFrame(
18 |             {
19 |                 "id": [1, 1, 2],
20 |             }
21 |         )
22 |         con = duckdb.connect()
23 |         con.register("df1", df1)
24 |         rel = con.view("df1")
25 |         print(rel.execute().fetchall())
26 |         assert rel.execute().fetchall() == [(1,), (1,), (2,)]
27 | 
28 |     @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()])
29 |     def test_3654_arrow(self, duckdb_cursor, pandas):
30 |         if not can_run:
31 |             return
32 | 
33 |         df1 = pandas.DataFrame(
34 |             {
35 |                 "id": [1, 1, 2],
36 |             }
37 |         )
38 |         table = pa.Table.from_pandas(df1)
39 |         con = duckdb.connect()
40 |         con.register("df1", table)
41 |         rel = con.view("df1")
42 |         print(rel.execute().fetchall())
43 |         assert rel.execute().fetchall() == [(1,), (1,), (2,)]
44 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_3728.py:
--------------------------------------------------------------------------------
 1 | import duckdb
 2 | 
 3 | 
 4 | class Test3728:
 5 |     def test_3728_describe_enum(self, duckdb_cursor):
 6 |         # Create an in-memory database, but the problem is also present in file-backed DBs
 7 |         cursor = duckdb.connect(":memory:")
 8 | 
 9 |         # Create an arbitrary enum type
10 |         cursor.execute("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');")
11 | 
12 |         # Create a table where one or more columns are enum typed
13 |         cursor.execute("CREATE TABLE person (name text, current_mood mood);")
14 | 
15 |         # This fails with "RuntimeError: Not implemented Error: unsupported type: mood"
16 |         assert cursor.table("person").execute().description == [
17 |             ("name", "VARCHAR", None, None, None, None, None),
18 |             ("current_mood", "ENUM('sad', 'ok', 'happy')", None, None, None, None, None),
19 |         ]
20 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_6315.py:
--------------------------------------------------------------------------------
 1 | import duckdb
 2 | 
 3 | 
 4 | class Test6315:
 5 |     def test_6315(self, duckdb_cursor):
 6 |         # segfault when accessing description after fetching rows
 7 |         c = duckdb.connect(":memory:")
 8 |         rv = c.execute("select * from sqlite_master where type = 'table'")
 9 |         rv.fetchall()
10 |         desc = rv.description
11 |         names = [x[0] for x in desc]
12 |         assert names == ["type", "name", "tbl_name", "rootpage", "sql"]
13 | 
14 |         # description of relation
15 |         rel = c.sql("select * from sqlite_master where type = 'table'")
16 |         desc = rel.description
17 |         names = [x[0] for x in desc]
18 |         assert names == ["type", "name", "tbl_name", "rootpage", "sql"]
19 | 
20 |         rel.fetchall()
21 |         desc = rel.description
22 |         names = [x[0] for x in desc]
23 |         assert names == ["type", "name", "tbl_name", "rootpage", "sql"]
24 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_connection_interrupt.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | import threading
 3 | import time
 4 | 
 5 | import pytest
 6 | 
 7 | import duckdb
 8 | 
 9 | 
10 | class TestConnectionInterrupt:
11 |     @pytest.mark.xfail(
12 |         condition=platform.system() == "Emscripten",
13 |         reason="threads not allowed on Emscripten",
14 |     )
15 |     def test_connection_interrupt(self):
16 |         conn = duckdb.connect()
17 | 
18 |         def interrupt() -> None:
19 |             # Wait for query to start running before interrupting
20 |             time.sleep(0.1)
21 |             conn.interrupt()
22 | 
23 |         thread = threading.Thread(target=interrupt)
24 |         thread.start()
25 |         with pytest.raises(duckdb.InterruptException):
26 |             conn.execute("select count(*) from range(100000000000)").fetchall()
27 |         thread.join()
28 | 
29 |     def test_interrupt_closed_connection(self):
30 |         conn = duckdb.connect()
31 |         conn.close()
32 |         with pytest.raises(duckdb.ConnectionException):
33 |             conn.interrupt()
34 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_dbapi01.py:
--------------------------------------------------------------------------------
 1 | # multiple result sets
 2 | 
 3 | import numpy
 4 | 
 5 | import duckdb
 6 | 
 7 | 
 8 | class TestMultipleResultSets:
 9 |     def test_regular_selection(self, duckdb_cursor, integers):
10 |         duckdb_cursor.execute("SELECT * FROM integers")
11 |         duckdb_cursor.execute("SELECT * FROM integers")
12 |         result = duckdb_cursor.fetchall()
13 |         assert result == [
14 |             (0,),
15 |             (1,),
16 |             (2,),
17 |             (3,),
18 |             (4,),
19 |             (5,),
20 |             (6,),
21 |             (7,),
22 |             (8,),
23 |             (9,),
24 |             (None,),
25 |         ], "Incorrect result returned"
26 | 
27 |     def test_numpy_selection(self, duckdb_cursor, integers):
28 |         duckdb_cursor.execute("SELECT * FROM integers")
29 |         duckdb_cursor.execute("SELECT * FROM integers")
30 |         result = duckdb_cursor.fetchnumpy()
31 |         expected = numpy.ma.masked_array(numpy.arange(11), mask=([False] * 10 + [True]))
32 | 
33 |         numpy.testing.assert_array_equal(result["i"], expected)
34 | 
35 |     def test_numpy_materialized(self, duckdb_cursor, integers):
36 |         connection = duckdb.connect("")
37 |         cursor = connection.cursor()
38 |         cursor.execute("CREATE TABLE integers (i integer)")
39 |         cursor.execute("INSERT INTO integers VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9),(NULL)")
40 |         rel = connection.table("integers")
41 |         res = rel.aggregate("sum(i)").execute().fetchnumpy()
42 |         assert res["sum(i)"][0] == 45
43 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_dbapi04.py:
--------------------------------------------------------------------------------
 1 | # simple DB API testcase
 2 | 
 3 | 
 4 | class TestSimpleDBAPI:
 5 |     def test_regular_selection(self, duckdb_cursor, integers):
 6 |         duckdb_cursor.execute("SELECT * FROM integers")
 7 |         result = duckdb_cursor.fetchall()
 8 |         assert result == [
 9 |             (0,),
10 |             (1,),
11 |             (2,),
12 |             (3,),
13 |             (4,),
14 |             (5,),
15 |             (6,),
16 |             (7,),
17 |             (8,),
18 |             (9,),
19 |             (None,),
20 |         ], "Incorrect result returned"
21 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_dbapi05.py:
--------------------------------------------------------------------------------
 1 | # simple DB API testcase
 2 | 
 3 | 
 4 | class TestSimpleDBAPI:
 5 |     def test_prepare(self, duckdb_cursor):
 6 |         result = duckdb_cursor.execute("SELECT CAST(? AS INTEGER), CAST(? AS INTEGER)", ["42", "84"]).fetchall()
 7 |         assert result == [
 8 |             (
 9 |                 42,
10 |                 84,
11 |             )
12 |         ], "Incorrect result returned"
13 | 
14 |         c = duckdb_cursor
15 | 
16 |         # from python docs
17 |         c.execute(
18 |             """CREATE TABLE stocks
19 |              (date text, trans text, symbol text, qty real, price real)"""
20 |         )
21 |         c.execute("INSERT INTO stocks VALUES ('2006-01-05','BUY','RHAT',100,35.14)")
22 | 
23 |         t = ("RHAT",)
24 |         result = c.execute("SELECT COUNT(*) FROM stocks WHERE symbol=?", t).fetchone()
25 |         assert result == (1,)
26 | 
27 |         t = ["RHAT"]
28 |         result = c.execute("SELECT COUNT(*) FROM stocks WHERE symbol=?", t).fetchone()
29 |         assert result == (1,)
30 | 
31 |         # Larger example that inserts many records at a time
32 |         purchases = [
33 |             ("2006-03-28", "BUY", "IBM", 1000, 45.00),
34 |             ("2006-04-05", "BUY", "MSFT", 1000, 72.00),
35 |             ("2006-04-06", "SELL", "IBM", 500, 53.00),
36 |         ]
37 |         c.executemany("INSERT INTO stocks VALUES (?,?,?,?,?)", purchases)
38 | 
39 |         result = c.execute("SELECT count(*) FROM stocks").fetchone()
40 |         assert result == (4,)
41 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_dbapi07.py:
--------------------------------------------------------------------------------
 1 | # timestamp ms precision
 2 | 
 3 | from datetime import datetime
 4 | 
 5 | import numpy
 6 | 
 7 | 
 8 | class TestNumpyTimestampMilliseconds:
 9 |     def test_numpy_timestamp(self, duckdb_cursor):
10 |         res = duckdb_cursor.execute("SELECT TIMESTAMP '2019-11-26 21:11:42.501' as test_time").fetchnumpy()
11 |         assert res["test_time"] == numpy.datetime64("2019-11-26 21:11:42.501")
12 | 
13 | 
14 | class TestTimestampMilliseconds:
15 |     def test_numpy_timestamp(self, duckdb_cursor):
16 |         res = duckdb_cursor.execute("SELECT TIMESTAMP '2019-11-26 21:11:42.501' as test_time").fetchone()[0]
17 |         assert res == datetime.strptime("2019-11-26 21:11:42.501", "%Y-%m-%d %H:%M:%S.%f")
18 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_dbapi08.py:
--------------------------------------------------------------------------------
 1 | # test fetchdf with various types
 2 | import pytest
 3 | from conftest import NumpyPandas
 4 | 
 5 | import duckdb
 6 | 
 7 | 
 8 | class TestType:
 9 |     @pytest.mark.parametrize("pandas", [NumpyPandas()])
10 |     def test_fetchdf(self, pandas):
11 |         con = duckdb.connect()
12 |         con.execute("CREATE TABLE items(item VARCHAR)")
13 |         con.execute("INSERT INTO items VALUES ('jeans'), (''), (NULL)")
14 |         res = con.execute("SELECT item FROM items").fetchdf()
15 |         assert isinstance(res, pandas.core.frame.DataFrame)
16 | 
17 |         df = pandas.DataFrame({"item": ["jeans", "", None]})
18 | 
19 |         print(res)
20 |         print(df)
21 |         pandas.testing.assert_frame_equal(res, df)
22 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_dbapi09.py:
--------------------------------------------------------------------------------
 1 | # date type
 2 | 
 3 | import datetime
 4 | 
 5 | import numpy
 6 | import pandas
 7 | 
 8 | 
 9 | class TestNumpyDate:
10 |     def test_fetchall_date(self, duckdb_cursor):
11 |         res = duckdb_cursor.execute("SELECT DATE '2020-01-10' as test_date").fetchall()
12 |         assert res == [(datetime.date(2020, 1, 10),)]
13 | 
14 |     def test_fetchnumpy_date(self, duckdb_cursor):
15 |         res = duckdb_cursor.execute("SELECT DATE '2020-01-10' as test_date").fetchnumpy()
16 |         arr = numpy.array(["2020-01-10"], dtype="datetime64[s]")
17 |         arr = numpy.ma.masked_array(arr)
18 |         numpy.testing.assert_array_equal(res["test_date"], arr)
19 | 
20 |     def test_fetchdf_date(self, duckdb_cursor):
21 |         res = duckdb_cursor.execute("SELECT DATE '2020-01-10' as test_date").fetchdf()
22 |         ser = pandas.Series(numpy.array(["2020-01-10"], dtype="datetime64[us]"), name="test_date")
23 |         pandas.testing.assert_series_equal(res["test_date"], ser)
24 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_dbapi11.py:
--------------------------------------------------------------------------------
 1 | # cursor description
 2 | 
 3 | import tempfile
 4 | 
 5 | import duckdb
 6 | 
 7 | 
 8 | def check_exception(f):
 9 |     had_exception = False
10 |     try:
11 |         f()
12 |     except Exception:
13 |         had_exception = True
14 |     assert had_exception
15 | 
16 | 
17 | class TestReadOnly:
18 |     def test_readonly(self, duckdb_cursor):
19 |         with tempfile.NamedTemporaryFile() as tmp:
20 |             db = tmp.name
21 | 
22 |         # this is forbidden
23 |         check_exception(lambda: duckdb.connect(":memory:", True))
24 | 
25 |         con_rw = duckdb.connect(db, False)
26 |         con_rw.cursor().execute("create table a (i integer)")
27 |         con_rw.cursor().execute("insert into a values (42)")
28 |         con_rw.close()
29 | 
30 |         con_ro = duckdb.connect(db, True)
31 |         con_ro.cursor().execute("select * from a").fetchall()
32 |         check_exception(lambda: con_ro.execute("delete from a"))
33 |         con_ro.close()
34 | 
35 |         con_rw = duckdb.connect(db, False)
36 |         con_rw.cursor().execute("drop table a")
37 |         con_rw.close()
38 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_dbapi13.py:
--------------------------------------------------------------------------------
 1 | # time type
 2 | 
 3 | import datetime
 4 | 
 5 | import numpy
 6 | import pandas
 7 | 
 8 | 
 9 | class TestNumpyTime:
10 |     def test_fetchall_time(self, duckdb_cursor):
11 |         res = duckdb_cursor.execute("SELECT TIME '13:06:40' as test_time").fetchall()
12 |         assert res == [(datetime.time(13, 6, 40),)]
13 | 
14 |     def test_fetchnumpy_time(self, duckdb_cursor):
15 |         res = duckdb_cursor.execute("SELECT TIME '13:06:40' as test_time").fetchnumpy()
16 |         arr = numpy.array([datetime.time(13, 6, 40)], dtype="object")
17 |         arr = numpy.ma.masked_array(arr)
18 |         numpy.testing.assert_array_equal(res["test_time"], arr)
19 | 
20 |     def test_fetchdf_time(self, duckdb_cursor):
21 |         res = duckdb_cursor.execute("SELECT TIME '13:06:40' as test_time").fetchdf()
22 |         ser = pandas.Series(numpy.array([datetime.time(13, 6, 40)], dtype="object"), name="test_time")
23 |         pandas.testing.assert_series_equal(res["test_time"], ser)
24 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_explain.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import duckdb
 4 | 
 5 | 
 6 | class TestExplain:
 7 |     def test_explain_basic(self, duckdb_cursor):
 8 |         res = duckdb_cursor.sql("select 42").explain()
 9 |         assert isinstance(res, str)
10 | 
11 |     def test_explain_standard(self, duckdb_cursor):
12 |         res = duckdb_cursor.sql("select 42").explain("standard")
13 |         assert isinstance(res, str)
14 | 
15 |         res = duckdb_cursor.sql("select 42").explain("STANDARD")
16 |         assert isinstance(res, str)
17 | 
18 |         res = duckdb_cursor.sql("select 42").explain(duckdb.ExplainType.STANDARD)
19 |         assert isinstance(res, str)
20 | 
21 |         res = duckdb_cursor.sql("select 42").explain(0)
22 |         assert isinstance(res, str)
23 | 
24 |     def test_explain_analyze(self, duckdb_cursor):
25 |         res = duckdb_cursor.sql("select 42").explain("analyze")
26 |         assert isinstance(res, str)
27 | 
28 |         res = duckdb_cursor.sql("select 42").explain("ANALYZE")
29 |         assert isinstance(res, str)
30 | 
31 |         res = duckdb_cursor.sql("select 42").explain(duckdb.ExplainType.ANALYZE)
32 |         assert isinstance(res, str)
33 | 
34 |         res = duckdb_cursor.sql("select 42").explain(1)
35 |         assert isinstance(res, str)
36 | 
37 |     def test_explain_df(self, duckdb_cursor):
38 |         pd = pytest.importorskip("pandas")
39 |         df = pd.DataFrame({"a": [42]})  # noqa: F841
40 |         res = duckdb_cursor.sql("select * from df").explain("ANALYZE")
41 |         assert isinstance(res, str)
42 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_insert_into.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pandas import DataFrame
 3 | 
 4 | import duckdb
 5 | 
 6 | 
 7 | class TestInsertInto:
 8 |     def test_insert_into_schema(self, duckdb_cursor):
 9 |         # open connection
10 |         con = duckdb.connect()
11 |         con.execute("CREATE SCHEMA s")
12 |         con.execute("CREATE TABLE s.t (id INTEGER PRIMARY KEY)")
13 | 
14 |         # make relation
15 |         df = DataFrame([1], columns=["id"])
16 |         rel = con.from_df(df)
17 | 
18 |         rel.insert_into("s.t")
19 | 
20 |         assert con.execute("select * from s.t").fetchall() == [(1,)]
21 | 
22 |         # This should fail since this will go to default schema
23 |         with pytest.raises(duckdb.CatalogException):
24 |             rel.insert_into("t")
25 | 
26 |         # If we add t in the default schema it should work.
27 |         con.execute("CREATE TABLE t (id INTEGER PRIMARY KEY)")
28 |         rel.insert_into("t")
29 |         assert con.execute("select * from t").fetchall() == [(1,)]
30 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_query_interrupt.py:
--------------------------------------------------------------------------------
 1 | import _thread as thread
 2 | import platform
 3 | import threading
 4 | import time
 5 | 
 6 | import pytest
 7 | 
 8 | import duckdb
 9 | 
10 | 
11 | def send_keyboard_interrupt():
12 |     # Wait a little, so we're sure the 'execute' has started
13 |     time.sleep(0.1)
14 |     # Send an interrupt to the main thread
15 |     thread.interrupt_main()
16 | 
17 | 
18 | class TestQueryInterruption:
19 |     @pytest.mark.xfail(
20 |         condition=platform.system() == "Emscripten",
21 |         reason="Emscripten builds cannot use threads",
22 |     )
23 |     def test_query_interruption(self):
24 |         con = duckdb.connect()
25 |         thread = threading.Thread(target=send_keyboard_interrupt)
26 |         # Start the thread
27 |         thread.start()
28 |         try:
29 |             con.execute("select count(*) from range(100000000000)").fetchall()
30 |         except RuntimeError:
31 |             # If this is not reached, we could not cancel the query before it completed
32 |             # indicating that the query interruption functionality is broken
33 |             assert True
34 |         except KeyboardInterrupt:
35 |             pytest.fail("Interrupted by user")
36 |         thread.join()
37 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_relation_to_view.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import duckdb
 4 | 
 5 | 
 6 | class TestRelationToView:
 7 |     def test_values_to_view(self, duckdb_cursor):
 8 |         rel = duckdb_cursor.values(["test", "this is a long string"])
 9 |         res = rel.fetchall()
10 |         assert res == [("test", "this is a long string")]
11 | 
12 |         rel.to_view("vw1")
13 | 
14 |         view = duckdb_cursor.table("vw1")
15 |         res = view.fetchall()
16 |         assert res == [("test", "this is a long string")]
17 | 
18 |     def test_relation_to_view(self, duckdb_cursor):
19 |         rel = duckdb_cursor.sql("select 'test', 'this is a long string'")
20 | 
21 |         res = rel.fetchall()
22 |         assert res == [("test", "this is a long string")]
23 | 
24 |         rel.to_view("vw1")
25 | 
26 |         view = duckdb_cursor.table("vw1")
27 |         res = view.fetchall()
28 |         assert res == [("test", "this is a long string")]
29 | 
30 |     def test_registered_relation(self, duckdb_cursor):
31 |         rel = duckdb_cursor.sql("select 'test', 'this is a long string'")
32 | 
33 |         con = duckdb.connect()
34 |         # Register on a different connection is not allowed
35 |         with pytest.raises(
36 |             duckdb.InvalidInputException,
37 |             match="was created by another Connection and can therefore not be used by this Connection",
38 |         ):
39 |             con.register("cross_connection", rel)
40 | 
41 |         # Register on the same connection just creates a view
42 |         duckdb_cursor.register("same_connection", rel)
43 |         view = duckdb_cursor.table("same_connection")
44 |         res = view.fetchall()
45 |         assert res == [("test", "this is a long string")]
46 | 


--------------------------------------------------------------------------------
/tests/fast/api/test_with_propagating_exceptions.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import duckdb
 4 | 
 5 | 
 6 | class TestWithPropagatingExceptions:
 7 |     def test_with(self):
 8 |         # Should propagate exception raised in the 'with duckdb.connect() ..'
 9 |         with pytest.raises(duckdb.ParserException, match=r"syntax error at or near *"), duckdb.connect() as con:
10 |             con.execute("invalid")
11 | 
12 |         # Does not raise an exception
13 |         with duckdb.connect() as con:
14 |             con.execute("select 1")
15 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/data/arrow_table:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duckdb/duckdb-python/HEAD/tests/fast/arrow/data/arrow_table


--------------------------------------------------------------------------------
/tests/fast/arrow/data/unsigned.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duckdb/duckdb-python/HEAD/tests/fast/arrow/data/unsigned.parquet


--------------------------------------------------------------------------------
/tests/fast/arrow/data/userdata1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duckdb/duckdb-python/HEAD/tests/fast/arrow/data/userdata1.parquet


--------------------------------------------------------------------------------
/tests/fast/arrow/test_10795.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import duckdb
 4 | 
 5 | pyarrow = pytest.importorskip("pyarrow")
 6 | 
 7 | 
 8 | @pytest.mark.parametrize("arrow_large_buffer_size", [True, False])
 9 | def test_10795(arrow_large_buffer_size):
10 |     conn = duckdb.connect()
11 |     conn.sql(f"set arrow_large_buffer_size={arrow_large_buffer_size}")
12 |     arrow = conn.sql("select map(['non-inlined string', 'test', 'duckdb'], [42, 1337, 123]) as map").to_arrow_table()
13 |     assert arrow.to_pydict() == {"map": [[("non-inlined string", 42), ("test", 1337), ("duckdb", 123)]]}
14 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_12384.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | import duckdb
 6 | 
 7 | pa = pytest.importorskip("pyarrow")
 8 | 
 9 | 
10 | def test_10795():
11 |     arrow_filename = Path(__file__).parent / "data" / "arrow_table"
12 |     with pa.memory_map(str(arrow_filename), "r") as source:
13 |         reader = pa.ipc.RecordBatchFileReader(source)
14 |         taxi_fhvhv_arrow = reader.read_all()
15 |         con = duckdb.connect(database=":memory:")
16 |         con.execute("SET TimeZone='UTC';")
17 |         con.register("taxi_fhvhv", taxi_fhvhv_arrow)
18 |         res = con.execute("""
19 |             SELECT PULocationID, pickup_datetime
20 |             FROM taxi_fhvhv
21 |             WHERE pickup_datetime >= '2023-01-01T00:00:00-05:00' AND PULocationID = 244
22 |         """).fetchall()
23 | 
24 |         assert len(res) == 3685
25 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_14344.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | 
 3 | import pytest
 4 | 
 5 | pa = pytest.importorskip("pyarrow")
 6 | 
 7 | 
 8 | def test_14344(duckdb_cursor):
 9 |     my_table = pa.Table.from_pydict({"foo": pa.array([hashlib.sha256(b"foo").digest()], type=pa.binary())})  # noqa: F841
10 |     my_table2 = pa.Table.from_pydict(  # noqa: F841
11 |         {"foo": pa.array([hashlib.sha256(b"foo").digest()], type=pa.binary()), "a": ["123"]}
12 |     )
13 | 
14 |     res = duckdb_cursor.sql(
15 |         """
16 | 		SELECT
17 | 			my_table2.* EXCLUDE (foo)
18 | 		FROM
19 | 			my_table
20 | 		LEFT JOIN
21 | 			my_table2
22 | 		USING (foo)
23 | 	"""
24 |     ).fetchall()
25 |     assert res == [("123",)]
26 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_2426.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import duckdb
 4 | 
 5 | pytest.importorskip("pyarrow")
 6 | 
 7 | try:
 8 |     can_run = True
 9 | except Exception:
10 |     can_run = False
11 | 
12 | 
13 | class Test2426:
14 |     def test_2426(self, duckdb_cursor):
15 |         if not can_run:
16 |             return
17 | 
18 |         con = duckdb.connect()
19 |         con.execute("Create Table test (a integer)")
20 | 
21 |         for i in range(1024):
22 |             for _j in range(2):
23 |                 con.execute("Insert Into test values ('" + str(i) + "')")
24 |         con.execute("Insert Into test values ('5000')")
25 |         con.execute("Insert Into test values ('6000')")
26 |         sql = """
27 |         SELECT  a, COUNT(*) AS repetitions
28 |         FROM    test
29 |         GROUP BY a
30 |         """
31 | 
32 |         result_df = con.execute(sql).df()
33 | 
34 |         arrow_table = con.execute(sql).fetch_arrow_table()
35 | 
36 |         arrow_df = arrow_table.to_pandas()
37 |         assert result_df["repetitions"].sum() == arrow_df["repetitions"].sum()
38 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_5547.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | from pandas.testing import assert_frame_equal
 4 | 
 5 | import duckdb
 6 | 
 7 | pa = pytest.importorskip("pyarrow")
 8 | 
 9 | 
10 | def test_5547():
11 |     num_rows = 2**17 + 1
12 | 
13 |     tbl = pa.Table.from_pandas(
14 |         pd.DataFrame.from_records(
15 |             [
16 |                 {
17 |                     "id": i,
18 |                     "nested": {
19 |                         "a": i,
20 |                     },
21 |                 }
22 |                 for i in range(num_rows)
23 |             ]
24 |         )
25 |     )
26 | 
27 |     con = duckdb.connect()
28 |     expected = tbl.to_pandas()
29 |     result = con.execute(
30 |         """
31 | 		SELECT * FROM tbl
32 |     """
33 |     ).df()
34 | 
35 |     assert_frame_equal(expected, result)
36 | 
37 |     con.close()
38 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_6584.py:
--------------------------------------------------------------------------------
 1 | from concurrent.futures import ThreadPoolExecutor
 2 | 
 3 | import pytest
 4 | 
 5 | import duckdb
 6 | 
 7 | pyarrow = pytest.importorskip("pyarrow")
 8 | 
 9 | 
10 | def f(cur, i, data):
11 |     cur.execute(f"create table t_{i} as select * from data")
12 |     return cur.execute(f"select * from t_{i}").fetch_arrow_table()
13 | 
14 | 
15 | def test_6584():
16 |     pool = ThreadPoolExecutor(max_workers=2)
17 |     data = pyarrow.Table.from_pydict({"a": [1, 2, 3]})
18 |     c = duckdb.connect()
19 |     futures = []
20 |     for i in range(2):
21 |         fut = pool.submit(f, c.cursor(), i, data)
22 |         futures.append(fut)
23 | 
24 |     for fut in futures:
25 |         arrow_res = fut.result()
26 |         assert data.equals(arrow_res)
27 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_6796.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from conftest import ArrowPandas, NumpyPandas
 3 | 
 4 | import duckdb
 5 | 
 6 | pyarrow = pytest.importorskip("pyarrow")
 7 | 
 8 | 
 9 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()])
10 | def test_6796(pandas):
11 |     conn = duckdb.connect()
12 |     input_df = pandas.DataFrame({"foo": ["bar"]})
13 |     conn.register("input_df", input_df)
14 | 
15 |     query = """
16 | 	select * from input_df
17 | 	union all
18 | 	select * from input_df
19 | 	"""
20 | 
21 |     # fetching directly into Pandas works
22 |     res_df = conn.execute(query).fetch_df()
23 |     res_arrow = conn.execute(query).fetch_arrow_table()  # noqa: F841
24 | 
25 |     df_arrow_table = pyarrow.Table.from_pandas(res_df)  # noqa: F841
26 | 
27 |     result_1 = conn.execute("select * from df_arrow_table order by all").fetchall()
28 | 
29 |     result_2 = conn.execute("select * from res_arrow order by all").fetchall()
30 | 
31 |     assert result_1 == result_2
32 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_7699.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | 
 3 | import pytest
 4 | 
 5 | pa = pytest.importorskip("pyarrow")
 6 | pq = pytest.importorskip("pyarrow.parquet")
 7 | pl = pytest.importorskip("polars")
 8 | 
 9 | 
10 | class Test7699:
11 |     def test_7699(self, duckdb_cursor):
12 |         pl_tbl = pl.DataFrame(
13 |             {
14 |                 "col1": pl.Series([string.ascii_uppercase[ix + 10] for ix in list(range(2)) + list(range(3))]).cast(
15 |                     pl.Categorical
16 |                 ),
17 |             }
18 |         )
19 | 
20 |         nickname = "df1234"
21 |         duckdb_cursor.register(nickname, pl_tbl)
22 | 
23 |         rel = duckdb_cursor.sql("select * from df1234")
24 |         res = rel.fetchall()
25 |         assert res == [("K",), ("L",), ("K",), ("L",), ("M",)]
26 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_8522.py:
--------------------------------------------------------------------------------
 1 | import datetime as dt
 2 | 
 3 | import pytest
 4 | 
 5 | pa = pytest.importorskip("pyarrow")
 6 | 
 7 | 
 8 | # Reconstruct filters when pushing down into arrow scan
 9 | # arrow supports timestamp_tz with different units than US, we only support US
10 | # so we have to convert ConstantValues back to their native unit when pushing the filter
11 | # expression containing them down to pyarrow
12 | class Test8522:
13 |     def test_8522(self, duckdb_cursor):
14 |         t_us = pa.Table.from_arrays(  # noqa: F841
15 |             arrays=[pa.array([dt.datetime(2022, 1, 1)])],
16 |             schema=pa.schema([pa.field("time", pa.timestamp("us", tz="UTC"))]),
17 |         )
18 | 
19 |         t_ms = pa.Table.from_arrays(  # noqa: F841
20 |             arrays=[pa.array([dt.datetime(2022, 1, 1)])],
21 |             schema=pa.schema([pa.field("time", pa.timestamp("ms", tz="UTC"))]),
22 |         )
23 | 
24 |         expected = duckdb_cursor.sql("FROM t_us").filter("time>='2022-01-01'").fetchall()
25 |         assert len(expected) == 1
26 | 
27 |         actual = duckdb_cursor.sql("FROM t_ms").filter("time>='2022-01-01'").fetchall()
28 |         assert actual == expected
29 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_9443.py:
--------------------------------------------------------------------------------
 1 | from datetime import time
 2 | from pathlib import PurePosixPath
 3 | 
 4 | import pytest
 5 | 
 6 | pq = pytest.importorskip("pyarrow.parquet")
 7 | pa = pytest.importorskip("pyarrow")
 8 | 
 9 | 
10 | class Test9443:
11 |     def test_9443(self, tmp_path, duckdb_cursor):
12 |         arrow_table = pa.Table.from_pylist(
13 |             [
14 |                 {"col1": time(1, 2, 3)},
15 |             ]
16 |         )  # col1: time64[us]
17 | 
18 |         print(arrow_table)
19 | 
20 |         temp_file = str(PurePosixPath(tmp_path.as_posix()) / "test9443.parquet")
21 |         pq.write_table(arrow_table, temp_file)
22 | 
23 |         sql = f'SELECT * FROM "{temp_file}"'
24 | 
25 |         duckdb_cursor.execute(sql)
26 |         duckdb_cursor.fetch_record_batch()
27 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_arrow_batch_index.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import duckdb
 4 | 
 5 | pa = pytest.importorskip("pyarrow")
 6 | 
 7 | 
 8 | class TestArrowBatchIndex:
 9 |     def test_arrow_batch_index(self, duckdb_cursor):
10 |         con = duckdb.connect()
11 |         df = con.execute("SELECT * FROM range(10000000) t(i)").df()
12 |         arrow_tbl = pa.Table.from_pandas(df)  # noqa: F841
13 | 
14 |         con.execute("CREATE TABLE tbl AS SELECT * FROM arrow_tbl")
15 | 
16 |         result = con.execute("SELECT * FROM tbl LIMIT 5").fetchall()
17 |         assert [x[0] for x in result] == [0, 1, 2, 3, 4]
18 | 
19 |         result = con.execute("SELECT * FROM tbl LIMIT 5 OFFSET 777778").fetchall()
20 |         assert [x[0] for x in result] == [777778, 777779, 777780, 777781, 777782]
21 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_arrow_binary_view.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import duckdb
 4 | 
 5 | pa = pytest.importorskip("pyarrow")
 6 | 
 7 | 
 8 | class TestArrowBinaryView:
 9 |     def test_arrow_binary_view(self, duckdb_cursor):
10 |         con = duckdb.connect()
11 |         tab = pa.table({"x": pa.array([b"abc", b"thisisaverybigbinaryyaymorethanfifteen", None], pa.binary_view())})
12 |         assert con.execute("FROM tab").fetchall() == [(b"abc",), (b"thisisaverybigbinaryyaymorethanfifteen",), (None,)]
13 |         # By default we won't export a view
14 |         assert not con.execute("FROM tab").fetch_arrow_table().equals(tab)
15 |         # We do the binary view from 1.4 onwards
16 |         con.execute("SET arrow_output_version = 1.4")
17 |         assert con.execute("FROM tab").fetch_arrow_table().equals(tab)
18 | 
19 |         assert con.execute("FROM tab where x = 'thisisaverybigbinaryyaymorethanfifteen'").fetchall() == [
20 |             (b"thisisaverybigbinaryyaymorethanfifteen",)
21 |         ]
22 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_arrow_case_sensitive.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | pa = pytest.importorskip("pyarrow")
 4 | 
 5 | 
 6 | class TestArrowCaseSensitive:
 7 |     def test_arrow_case_sensitive(self, duckdb_cursor):
 8 |         data = (pa.array([1], type=pa.int32()), pa.array([1000], type=pa.int32()))
 9 |         arrow_table = pa.Table.from_arrays([data[0], data[1]], ["A1", "a1"])
10 | 
11 |         duckdb_cursor.register("arrow_tbl", arrow_table)
12 |         assert duckdb_cursor.table("arrow_tbl").columns == ["A1", "a1_1"]
13 |         assert duckdb_cursor.execute("select A1 from arrow_tbl;").fetchall() == [(1,)]
14 |         assert duckdb_cursor.execute("select a1_1 from arrow_tbl;").fetchall() == [(1000,)]
15 |         assert arrow_table.column_names == ["A1", "a1"]
16 | 
17 |     def test_arrow_case_sensitive_repeated(self, duckdb_cursor):
18 |         data = (pa.array([1], type=pa.int32()), pa.array([1000], type=pa.int32()))
19 |         arrow_table = pa.Table.from_arrays([data[0], data[1], data[1]], ["A1", "a1_1", "a1"])
20 | 
21 |         duckdb_cursor.register("arrow_tbl", arrow_table)
22 |         assert duckdb_cursor.table("arrow_tbl").columns == ["A1", "a1_1", "a1_2"]
23 |         assert arrow_table.column_names == ["A1", "a1_1", "a1"]
24 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_arrow_decimal256.py:
--------------------------------------------------------------------------------
 1 | from decimal import Decimal
 2 | 
 3 | import pytest
 4 | 
 5 | import duckdb
 6 | 
 7 | pa = pytest.importorskip("pyarrow")
 8 | 
 9 | 
10 | class TestArrowDecimal256:
11 |     def test_decimal_256_throws(self, duckdb_cursor):
12 |         with duckdb.connect() as conn:
13 |             pa_decimal256 = pa.Table.from_pylist(  # noqa: F841
14 |                 [{"data": Decimal("100.00")} for _ in range(4)],
15 |                 pa.schema([("data", pa.decimal256(12, 4))]),
16 |             )
17 |             with pytest.raises(
18 |                 duckdb.NotImplementedException, match="Unsupported Internal Arrow Type for Decimal d:12,4,256"
19 |             ):
20 |                 conn.execute("select * from pa_decimal256;").fetch_arrow_table().to_pylist()
21 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_arrow_fixed_binary.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | pa = pytest.importorskip("pyarrow")
 4 | 
 5 | 
 6 | class TestArrowFixedBinary:
 7 |     def test_arrow_fixed_binary(self, duckdb_cursor):
 8 |         ids = [
 9 |             None,
10 |             b"\x66\x4d\xf4\xae\xb1\x5c\xb0\x4a\xdd\x5d\x1d\x54",
11 |             b"\x66\x4d\xf4\xf0\xa3\xfc\xec\x5b\x26\x81\x4e\x1d",
12 |         ]
13 | 
14 |         id_array = pa.array(ids, type=pa.binary(12))
15 |         arrow_table = pa.Table.from_arrays([id_array], names=["id"])  # noqa: F841
16 |         res = duckdb_cursor.sql(
17 |             """
18 | 			SELECT lower(hex(id)) as id FROM arrow_table
19 | 		"""
20 |         ).fetchall()
21 |         assert res == [(None,), ("664df4aeb15cb04add5d1d54",), ("664df4f0a3fcec5b26814e1d",)]
22 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_arrow_ipc.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import duckdb
 4 | 
 5 | pa = pytest.importorskip("pyarrow")
 6 | 
 7 | ipc = pytest.importorskip("pyarrow.ipc")
 8 | 
 9 | 
10 | def get_record_batch():
11 |     data = [pa.array([1, 2, 3, 4]), pa.array(["foo", "bar", "baz", None]), pa.array([True, None, False, True])]
12 |     return pa.record_batch(data, names=["f0", "f1", "f2"])
13 | 
14 | 
15 | class TestArrowIPCExtension:
16 |     # Only thing we can test in core is that it suggests the
17 |     # instalation and loading of the extension
18 |     def test_single_buffer(self, duckdb_cursor):
19 |         batch = get_record_batch()
20 |         sink = pa.BufferOutputStream()
21 | 
22 |         with ipc.new_stream(sink, batch.schema) as writer:
23 |             for _ in range(5):  # Write 5 batches into one stream
24 |                 writer.write_batch(batch)
25 | 
26 |         buffer = sink.getvalue()
27 | 
28 |         with pa.BufferReader(buffer) as buf_reader:  # Use pyarrow.BufferReader
29 |             stream = ipc.MessageReader.open_stream(buf_reader)
30 |             # This fails
31 |             with pytest.raises(
32 |                 duckdb.Error, match="The nanoarrow community extension is needed to read the Arrow IPC protocol"
33 |             ):
34 |                 duckdb_cursor.from_arrow(stream).fetchall()
35 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_arrow_union.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | pyarrow = pytest.importorskip("pyarrow")
 4 | 
 5 | 
 6 | def test_nested(duckdb_cursor):
 7 |     res = run(duckdb_cursor, "select 42::UNION(name VARCHAR, attr UNION(age INT, veteran BOOL)) as res")
 8 |     assert pyarrow.types.is_union(res.type)
 9 |     assert res.value.value == pyarrow.scalar(42, type=pyarrow.int32())
10 | 
11 | 
12 | def test_union_contains_nested_data(duckdb_cursor):
13 |     _ = pytest.importorskip("pyarrow", minversion="11")
14 |     res = run(duckdb_cursor, "select ['hello']::UNION(first_name VARCHAR, middle_names VARCHAR[]) as res")
15 |     assert pyarrow.types.is_union(res.type)
16 |     assert res.value == pyarrow.scalar(["hello"], type=pyarrow.list_(pyarrow.string()))
17 | 
18 | 
19 | def test_unions_inside_lists_structs_maps(duckdb_cursor):
20 |     res = run(duckdb_cursor, "select [union_value(name := 'Frank')] as res")
21 |     assert pyarrow.types.is_list(res.type)
22 |     assert pyarrow.types.is_union(res.type.value_type)
23 |     assert res[0].value == pyarrow.scalar("Frank", type=pyarrow.string())
24 | 
25 | 
26 | def test_unions_with_struct(duckdb_cursor):
27 |     duckdb_cursor.execute(
28 |         """
29 | 		CREATE TABLE tbl (a UNION(a STRUCT(a INT, b BOOL)))
30 | 	"""
31 |     )
32 |     duckdb_cursor.execute(
33 |         """
34 | 		INSERT INTO tbl VALUES ({'a': 42, 'b': true})
35 | 	"""
36 |     )
37 | 
38 |     rel = duckdb_cursor.table("tbl")
39 |     arrow = rel.fetch_arrow_table()  # noqa: F841
40 | 
41 |     duckdb_cursor.execute("create table other as select * from arrow")
42 |     rel2 = duckdb_cursor.table("other")
43 |     res = rel2.fetchall()
44 |     assert res == [({"a": 42, "b": True},)]
45 | 
46 | 
47 | def run(conn, query):
48 |     return conn.sql(query).fetch_arrow_table().columns[0][0]
49 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_binary_type.py:
--------------------------------------------------------------------------------
 1 | import duckdb
 2 | 
 3 | try:
 4 |     import pyarrow as pa
 5 | 
 6 |     can_run = True
 7 | except Exception:
 8 |     can_run = False
 9 | 
10 | 
11 | def create_binary_table(type):
12 |     schema = pa.schema([("data", type)])
13 |     inputs = [pa.array([b"foo", b"bar", b"baz"], type=type)]
14 |     return pa.Table.from_arrays(inputs, schema=schema)
15 | 
16 | 
17 | class TestArrowBinary:
18 |     def test_binary_types(self, duckdb_cursor):
19 |         if not can_run:
20 |             return
21 | 
22 |         # Fixed Size Binary
23 |         arrow_table = create_binary_table(pa.binary(3))
24 |         rel = duckdb.from_arrow(arrow_table)
25 |         res = rel.execute().fetchall()
26 |         assert res == [(b"foo",), (b"bar",), (b"baz",)]
27 | 
28 |         # Normal Binary
29 |         arrow_table = create_binary_table(pa.binary())
30 |         rel = duckdb.from_arrow(arrow_table)
31 |         res = rel.execute().fetchall()
32 |         assert res == [(b"foo",), (b"bar",), (b"baz",)]
33 | 
34 |         # Large Binary
35 |         arrow_table = create_binary_table(pa.large_binary())
36 |         rel = duckdb.from_arrow(arrow_table)
37 |         res = rel.execute().fetchall()
38 |         assert res == [(b"foo",), (b"bar",), (b"baz",)]
39 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_buffer_size_option.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import duckdb
 4 | from duckdb.sqltypes import VARCHAR
 5 | 
 6 | pa = pytest.importorskip("pyarrow")
 7 | 
 8 | 
 9 | class TestArrowBufferSize:
10 |     def test_arrow_buffer_size(self):
11 |         con = duckdb.connect()
12 | 
13 |         # All small string
14 |         res = con.query("select 'bla'").fetch_arrow_table()
15 |         assert res[0][0].type == pa.string()
16 |         res = con.query("select 'bla'").fetch_record_batch()
17 |         assert res.schema[0].type == pa.string()
18 | 
19 |         # All Large String
20 |         con.execute("SET arrow_large_buffer_size=True")
21 |         res = con.query("select 'bla'").fetch_arrow_table()
22 |         assert res[0][0].type == pa.large_string()
23 |         res = con.query("select 'bla'").fetch_record_batch()
24 |         assert res.schema[0].type == pa.large_string()
25 | 
26 |         # All small string again
27 |         con.execute("SET arrow_large_buffer_size=False")
28 |         res = con.query("select 'bla'").fetch_arrow_table()
29 |         assert res[0][0].type == pa.string()
30 |         res = con.query("select 'bla'").fetch_record_batch()
31 |         assert res.schema[0].type == pa.string()
32 | 
33 |     def test_arrow_buffer_size_udf(self):
34 |         def just_return(x):
35 |             return x
36 | 
37 |         con = duckdb.connect()
38 |         con.create_function("just_return", just_return, [VARCHAR], VARCHAR, type="arrow")
39 | 
40 |         res = con.query("select just_return('bla')").fetch_arrow_table()
41 | 
42 |         assert res[0][0].type == pa.string()
43 | 
44 |         # All Large String
45 |         con.execute("SET arrow_large_buffer_size=True")
46 | 
47 |         res = con.query("select just_return('bla')").fetch_arrow_table()
48 |         assert res[0][0].type == pa.large_string()
49 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_date.py:
--------------------------------------------------------------------------------
 1 | import duckdb
 2 | 
 3 | try:
 4 |     import pyarrow as pa
 5 | 
 6 |     can_run = True
 7 | except Exception:
 8 |     can_run = False
 9 | 
10 | 
11 | class TestArrowDate:
12 |     def test_date_types(self, duckdb_cursor):
13 |         if not can_run:
14 |             return
15 | 
16 |         data = (pa.array([1000 * 60 * 60 * 24], type=pa.date64()), pa.array([1], type=pa.date32()))
17 |         arrow_table = pa.Table.from_arrays([data[0], data[1]], ["a", "b"])
18 |         rel = duckdb.from_arrow(arrow_table).fetch_arrow_table()
19 |         assert rel["a"] == arrow_table["b"]
20 |         assert rel["b"] == arrow_table["b"]
21 | 
22 |     def test_date_null(self, duckdb_cursor):
23 |         if not can_run:
24 |             return
25 |         data = (pa.array([None], type=pa.date64()), pa.array([None], type=pa.date32()))
26 |         arrow_table = pa.Table.from_arrays([data[0], data[1]], ["a", "b"])
27 |         rel = duckdb.from_arrow(arrow_table).fetch_arrow_table()
28 |         assert rel["a"] == arrow_table["b"]
29 |         assert rel["b"] == arrow_table["b"]
30 | 
31 |     def test_max_date(self, duckdb_cursor):
32 |         if not can_run:
33 |             return
34 |         data = (pa.array([2147483647], type=pa.date32()), pa.array([2147483647], type=pa.date32()))
35 |         result = pa.Table.from_arrays([data[0], data[1]], ["a", "b"])
36 |         data = (
37 |             pa.array([2147483647 * (1000 * 60 * 60 * 24)], type=pa.date64()),
38 |             pa.array([2147483647], type=pa.date32()),
39 |         )
40 |         arrow_table = pa.Table.from_arrays([data[0], data[1]], ["a", "b"])
41 |         rel = duckdb.from_arrow(arrow_table).fetch_arrow_table()
42 |         assert rel["a"] == result["a"]
43 |         assert rel["b"] == result["b"]
44 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_large_string.py:
--------------------------------------------------------------------------------
 1 | import duckdb
 2 | 
 3 | try:
 4 |     import pyarrow as pa
 5 | 
 6 |     can_run = True
 7 | except Exception:
 8 |     can_run = False
 9 | 
10 | 
11 | class TestArrowLargeString:
12 |     def test_large_string_type(self, duckdb_cursor):
13 |         if not can_run:
14 |             return
15 | 
16 |         schema = pa.schema([("data", pa.large_string())])
17 |         inputs = [pa.array(["foo", "baaaar", "b"], type=pa.large_string())]
18 |         arrow_table = pa.Table.from_arrays(inputs, schema=schema)
19 | 
20 |         rel = duckdb.from_arrow(arrow_table)
21 |         res = rel.execute().fetchall()
22 |         assert res == [("foo",), ("baaaar",), ("b",)]
23 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_multiple_reads.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import duckdb
 4 | 
 5 | try:
 6 |     import pyarrow
 7 |     import pyarrow.parquet
 8 | 
 9 |     can_run = True
10 | except Exception:
11 |     can_run = False
12 | 
13 | 
14 | class TestArrowReads:
15 |     def test_multiple_queries_same_relation(self, duckdb_cursor):
16 |         if not can_run:
17 |             return
18 |         parquet_filename = str(Path(__file__).parent / "data" / "userdata1.parquet")
19 |         userdata_parquet_table = pyarrow.parquet.read_table(parquet_filename)
20 |         userdata_parquet_table.validate(full=True)
21 |         rel = duckdb.from_arrow(userdata_parquet_table)
22 |         assert rel.aggregate("(avg(salary))::INT").execute().fetchone()[0] == 149005
23 |         assert rel.aggregate("(avg(salary))::INT").execute().fetchone()[0] == 149005
24 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_projection_pushdown.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | 
 4 | class TestArrowProjectionPushdown:
 5 |     def test_projection_pushdown_no_filter(self, duckdb_cursor):
 6 |         pytest.importorskip("pyarrow")
 7 |         ds = pytest.importorskip("pyarrow.dataset")
 8 | 
 9 |         duckdb_cursor.execute(
10 |             """
11 |             CREATE TABLE test (a  INTEGER, b INTEGER, c INTEGER)
12 |         """
13 |         )
14 |         duckdb_cursor.execute(
15 |             """
16 |             INSERT INTO test VALUES
17 |                 (1,2,3),
18 |                 (10,20,30),
19 |                 (100,200,300),
20 |                 (NULL,NULL,NULL)
21 |         """
22 |         )
23 |         duck_tbl = duckdb_cursor.table("test")
24 |         arrow_table = duck_tbl.fetch_arrow_table()
25 |         assert duckdb_cursor.execute("SELECT sum(c) FROM arrow_table").fetchall() == [(333,)]
26 | 
27 |         # RecordBatch does not use projection pushdown, test that this also still works
28 |         record_batch = arrow_table.to_batches()[0]  # noqa: F841
29 |         assert duckdb_cursor.execute("SELECT sum(c) FROM record_batch").fetchall() == [(333,)]
30 | 
31 |         arrow_dataset = ds.dataset(arrow_table)  # noqa: F841
32 |         assert duckdb_cursor.execute("SELECT sum(c) FROM arrow_dataset").fetchall() == [(333,)]
33 | 


--------------------------------------------------------------------------------
/tests/fast/arrow/test_view.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | pa = pytest.importorskip("pyarrow")
 6 | pq = pytest.importorskip("pyarrow.parquet")
 7 | 
 8 | 
 9 | class TestArrowView:
10 |     def test_arrow_view(self, duckdb_cursor):
11 |         parquet_filename = str(Path(__file__).parent / "data" / "userdata1.parquet")
12 |         userdata_parquet_table = pa.parquet.read_table(parquet_filename)
13 |         userdata_parquet_table.validate(full=True)
14 |         duckdb_cursor.from_arrow(userdata_parquet_table).create_view("arrow_view")
15 |         assert duckdb_cursor.execute("PRAGMA show_tables").fetchone() == ("arrow_view",)
16 |         assert duckdb_cursor.execute("select avg(salary)::INT from arrow_view").fetchone()[0] == 149005
17 | 


--------------------------------------------------------------------------------
/tests/fast/data/binary_string.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duckdb/duckdb-python/HEAD/tests/fast/data/binary_string.parquet


--------------------------------------------------------------------------------
/tests/fast/data/category.csv:
--------------------------------------------------------------------------------
 1 | CATEGORY_ID|NAME|LAST_UPDATE
 2 | 1|Action|2006-02-15 04:46:27
 3 | 2|Animation|2006-02-15 04:46:27
 4 | 3|Children|2006-02-15 04:46:27
 5 | 4|Classics|2006-02-15 04:46:27
 6 | 5|Comedy|2006-02-15 04:46:27
 7 | 6|Documentary|2006-02-15 04:46:27
 8 | 7|Drama|2006-02-15 04:46:27
 9 | 8|Family|2006-02-15 04:46:27
10 | 9|Foreign|2006-02-15 04:46:27
11 | 10|Games|2006-02-15 04:46:27
12 | 11|Horror|2006-02-15 04:46:27
13 | 12|Music|2006-02-15 04:46:27
14 | 13|New|2006-02-15 04:46:27
15 | 14|Sci-Fi|2006-02-15 04:46:27
16 | 15|Sports|2006-02-15 04:46:27
17 | 16|Travel|2006-02-15 04:46:27
18 | 


--------------------------------------------------------------------------------
/tests/fast/data/datetime.csv:
--------------------------------------------------------------------------------
1 | a,b,t,d,ts
2 | 123,TEST2,12:12:12,2000-01-01,2000-01-01 12:12:00
3 | 345,TEST2,14:15:30,2002-02-02,2002-02-02 14:15:00
4 | 346,TEST2,15:16:17,2004-12-13,2004-12-13 15:16:00
5 | 


--------------------------------------------------------------------------------
/tests/fast/data/example.json:
--------------------------------------------------------------------------------
1 | {"id":1,"name":"O Brother, Where Art Thou?"}
2 | {"id":2,"name":"Home for the Holidays"}
3 | {"id":3,"name":"The Firm"}
4 | {"id":4,"name":"Broadcast News"}
5 | {"id":5,"name":"Raising Arizona"}


--------------------------------------------------------------------------------
/tests/fast/data/integers.csv:
--------------------------------------------------------------------------------
1 | 1;10;0
2 | 2;50;30


--------------------------------------------------------------------------------
/tests/fast/data/nullpadding.csv:
--------------------------------------------------------------------------------
1 | # this file has a bunch of gunk at the top
2 | one,two,three,four
3 | 1,a,alice
4 | 2,b,bob


--------------------------------------------------------------------------------
/tests/fast/data/problematic.csv:
--------------------------------------------------------------------------------
1 | a|b|c
2 | 1|1|1
3 | 1|1|1
4 | 1|1|1
5 | 1|1|1
6 | 1|1|1
7 | 1|1|1
8 | not_a_number|also_not_a_number|definitely_not_a_number


--------------------------------------------------------------------------------
/tests/fast/data/quote_escape.csv:
--------------------------------------------------------------------------------
1 | 123|TEST6|text1
2 | 345|TEST6|"text""2""text"
3 | "567"|TEST6|text3
4 | 


--------------------------------------------------------------------------------
/tests/fast/data/tz.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duckdb/duckdb-python/HEAD/tests/fast/data/tz.parquet


--------------------------------------------------------------------------------
/tests/fast/data/unquote_without_delimiter.csv:
--------------------------------------------------------------------------------
1 | "AAA"BB


--------------------------------------------------------------------------------
/tests/fast/pandas/test_bug2281.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | 
 3 | import pandas as pd
 4 | 
 5 | 
 6 | class TestPandasStringNull:
 7 |     def test_pandas_string_null(self, duckdb_cursor):
 8 |         csv = """what,is_control,is_test
 9 | ,0,0
10 | foo,1,0"""
11 |         df = pd.read_csv(io.StringIO(csv))
12 |         duckdb_cursor.register("c", df)
13 |         duckdb_cursor.execute("select what, count(*) from c group by what")
14 |         duckdb_cursor.fetchdf()
15 |         assert True  # Should not crash ^^
16 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_bug5922.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from conftest import ArrowPandas, NumpyPandas
 3 | 
 4 | import duckdb
 5 | 
 6 | 
 7 | class TestPandasAcceptFloat16:
 8 |     @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()])
 9 |     def test_pandas_accept_float16(self, duckdb_cursor, pandas):
10 |         df = pandas.DataFrame({"col": [1, 2, 3]})
11 |         df16 = df.astype({"col": "float16"})  # noqa: F841
12 |         con = duckdb.connect()
13 |         con.execute("CREATE TABLE tbl AS SELECT * FROM df16")
14 |         con.execute("select * from tbl")
15 |         df_result = con.fetchdf()
16 |         df32 = df.astype({"col": "float32"})
17 |         assert (df32["col"] == df_result["col"]).all()
18 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_column_order.py:
--------------------------------------------------------------------------------
 1 | import duckdb
 2 | 
 3 | 
 4 | class TestColumnOrder:
 5 |     def test_column_order(self, duckdb_cursor):
 6 |         to_execute = """
 7 | 		CREATE OR REPLACE TABLE t1 AS (
 8 | 			SELECT NULL AS col1,
 9 | 			NULL::TIMESTAMPTZ AS timepoint,
10 | 			NULL::DATE AS date,
11 | 		);
12 | 		SELECT timepoint, date, col1 FROM t1;
13 | 		"""
14 |         df = duckdb.execute(to_execute).fetchdf()
15 |         cols = list(df.columns)
16 |         assert cols == ["timepoint", "date", "col1"]
17 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_copy_on_write.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | import pytest
 4 | 
 5 | import duckdb
 6 | 
 7 | # https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html
 8 | pandas = pytest.importorskip("pandas", "1.5", reason="copy_on_write does not exist in earlier versions")
 9 | 
10 | 
11 | # Make sure the variable get's properly reset even in case of error
12 | @pytest.fixture(autouse=True)
13 | def scoped_copy_on_write_setting():
14 |     old_value = pandas.options.mode.copy_on_write
15 |     pandas.options.mode.copy_on_write = True
16 |     yield
17 |     # Reset it at the end of the function
18 |     pandas.options.mode.copy_on_write = old_value
19 |     return
20 | 
21 | 
22 | def convert_to_result(col):
23 |     return [(x,) for x in col]
24 | 
25 | 
26 | class TestCopyOnWrite:
27 |     @pytest.mark.parametrize(
28 |         "col",
29 |         [
30 |             ["a", "b", "this is a long string"],
31 |             [1.2334, None, 234.12],
32 |             [123234, -213123, 2324234],
33 |             [datetime.date(1990, 12, 7), None, datetime.date(1940, 1, 13)],
34 |             [datetime.datetime(2012, 6, 21, 13, 23, 45, 328), None],
35 |         ],
36 |     )
37 |     def test_copy_on_write(self, col):
38 |         assert pandas.options.mode.copy_on_write
39 |         con = duckdb.connect()
40 |         df_in = pandas.DataFrame(  # noqa: F841
41 |             {
42 |                 "numbers": col,
43 |             }
44 |         )
45 |         rel = con.sql("select * from df_in")
46 |         res = rel.fetchall()
47 |         print(res)
48 |         expected = convert_to_result(col)
49 |         assert res == expected
50 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_create_table_from_pandas.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from conftest import ArrowPandas, NumpyPandas
 3 | 
 4 | import duckdb
 5 | 
 6 | 
 7 | def assert_create(internal_data, expected_result, data_type, pandas):
 8 |     conn = duckdb.connect()
 9 |     df_in = pandas.DataFrame(data=internal_data, dtype=data_type)  # noqa: F841
10 | 
11 |     conn.execute("CREATE TABLE t AS SELECT * FROM df_in")
12 | 
13 |     result = conn.execute("SELECT * FROM t").fetchall()
14 |     assert result == expected_result
15 | 
16 | 
17 | def assert_create_register(internal_data, expected_result, data_type, pandas):
18 |     conn = duckdb.connect()
19 |     df_in = pandas.DataFrame(data=internal_data, dtype=data_type)
20 |     conn.register("dataframe", df_in)
21 |     conn.execute("CREATE TABLE t AS SELECT * FROM dataframe")
22 | 
23 |     result = conn.execute("SELECT * FROM t").fetchall()
24 |     assert result == expected_result
25 | 
26 | 
27 | class TestCreateTableFromPandas:
28 |     @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()])
29 |     def test_integer_create_table(self, duckdb_cursor, pandas):
30 |         # TODO: This should work with other data types e.g., int8...  # noqa: TD002, TD003
31 |         data_types = ["Int8", "Int16", "Int32", "Int64"]
32 |         internal_data = [1, 2, 3, 4]
33 |         expected_result = [(1,), (2,), (3,), (4,)]
34 |         for data_type in data_types:
35 |             print(data_type)
36 |             assert_create_register(internal_data, expected_result, data_type, pandas)
37 |             assert_create(internal_data, expected_result, data_type, pandas)
38 | 
39 |     # TODO: Also test other data types  # noqa: TD002, TD003
40 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_date_as_datetime.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | import pandas as pd
 4 | 
 5 | import duckdb
 6 | 
 7 | 
 8 | def run_checks(df):
 9 |     assert type(df["d"][0]) is datetime.date
10 |     assert df["d"][0] == datetime.date(1992, 7, 30)
11 |     assert pd.isnull(df["d"][1])
12 | 
13 | 
14 | def test_date_as_datetime():
15 |     con = duckdb.connect()
16 |     con.execute("create table t (d date)")
17 |     con.execute("insert into t values ('1992-07-30'), (NULL)")
18 | 
19 |     # Connection Methods
20 |     run_checks(con.execute("Select * from t").df(date_as_object=True))
21 |     run_checks(con.execute("Select * from t").fetchdf(date_as_object=True))
22 |     run_checks(con.execute("Select * from t").fetch_df_chunk(date_as_object=True))
23 |     run_checks(con.execute("Select * from t").fetch_df(date_as_object=True))
24 | 
25 |     # Relation Methods
26 |     rel = con.table("t")
27 |     run_checks(rel.df(date_as_object=True))
28 |     run_checks(rel.to_df(date_as_object=True))
29 | 
30 |     # Result Methods
31 |     run_checks(rel.query("t_1", "select * from t_1").df(date_as_object=True))
32 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_implicit_pandas_scan.py:
--------------------------------------------------------------------------------
 1 | # simple DB API testcase
 2 | 
 3 | import pandas as pd
 4 | import pytest
 5 | from conftest import ArrowPandas, NumpyPandas
 6 | from packaging.version import Version
 7 | 
 8 | import duckdb
 9 | 
10 | numpy_nullable_df = pd.DataFrame([{"COL1": "val1", "CoL2": 1.05}, {"COL1": "val4", "CoL2": 17}])
11 | 
12 | try:
13 |     from pandas.compat import pa_version_under7p0
14 | 
15 |     pyarrow_dtypes_enabled = not pa_version_under7p0
16 | except Exception:
17 |     pyarrow_dtypes_enabled = False
18 | 
19 | if Version(pd.__version__) >= Version("2.0.0") and pyarrow_dtypes_enabled:
20 |     pyarrow_df = numpy_nullable_df.convert_dtypes(dtype_backend="pyarrow")
21 | else:
22 |     # dtype_backend is not supported in pandas < 2.0.0
23 |     pyarrow_df = numpy_nullable_df
24 | 
25 | 
26 | class TestImplicitPandasScan:
27 |     @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()])
28 |     def test_local_pandas_scan(self, duckdb_cursor, pandas):
29 |         con = duckdb.connect()
30 |         df = pandas.DataFrame([{"COL1": "val1", "CoL2": 1.05}, {"COL1": "val3", "CoL2": 17}])  # noqa: F841
31 |         r1 = con.execute("select * from df").fetchdf()
32 |         assert r1["COL1"][0] == "val1"
33 |         assert r1["COL1"][1] == "val3"
34 |         assert r1["CoL2"][0] == 1.05
35 |         assert r1["CoL2"][1] == 17
36 | 
37 |     @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()])
38 |     def test_global_pandas_scan(self, duckdb_cursor, pandas):
39 |         con = duckdb.connect()
40 |         r1 = con.execute(f"select * from {pandas.backend}_df").fetchdf()
41 |         assert r1["COL1"][0] == "val1"
42 |         assert r1["COL1"][1] == "val4"
43 |         assert r1["CoL2"][0] == 1.05
44 |         assert r1["CoL2"][1] == 17
45 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_import_cache.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from conftest import ArrowPandas, NumpyPandas
 3 | 
 4 | import duckdb
 5 | 
 6 | 
 7 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()])
 8 | def test_import_cache_explicit_dtype(pandas):
 9 |     df = pandas.DataFrame(  # noqa: F841
10 |         {
11 |             "id": [1, 2, 3],
12 |             "value": pandas.Series(["123.123", pandas.NaT, pandas.NA], dtype=pandas.StringDtype(storage="python")),
13 |         }
14 |     )
15 |     con = duckdb.connect()
16 |     result_df = con.query("select id, value from df").df()
17 | 
18 |     assert result_df["value"][1] is None
19 |     assert result_df["value"][2] is None
20 | 
21 | 
22 | @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()])
23 | def test_import_cache_implicit_dtype(pandas):
24 |     df = pandas.DataFrame({"id": [1, 2, 3], "value": pandas.Series(["123.123", pandas.NaT, pandas.NA])})  # noqa: F841
25 |     con = duckdb.connect()
26 |     result_df = con.query("select id, value from df").df()
27 | 
28 |     assert result_df["value"][1] is None
29 |     assert result_df["value"][2] is None
30 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_issue_1767.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import pytest
 4 | from conftest import ArrowPandas, NumpyPandas
 5 | 
 6 | import duckdb
 7 | 
 8 | 
 9 | # Join from pandas not matching identical strings #1767
10 | class TestIssue1767:
11 |     @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()])
12 |     def test_unicode_join_pandas(self, duckdb_cursor, pandas):
13 |         A = pandas.DataFrame({"key": ["a", "п"]})
14 |         B = pandas.DataFrame({"key": ["a", "п"]})
15 |         con = duckdb.connect(":memory:")
16 |         arrow = con.register("A", A).register("B", B)
17 |         q = arrow.query("""SELECT key FROM "A" FULL JOIN "B" USING ("key") ORDER BY key""")
18 |         result = q.df()
19 | 
20 |         d = {"key": ["a", "п"]}
21 |         df = pandas.DataFrame(data=d)
22 |         pandas.testing.assert_frame_equal(result, df)
23 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_limit.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from conftest import ArrowPandas, NumpyPandas
 3 | 
 4 | import duckdb
 5 | 
 6 | 
 7 | class TestLimitPandas:
 8 |     @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()])
 9 |     def test_limit_df(self, duckdb_cursor, pandas):
10 |         df_in = pandas.DataFrame(
11 |             {
12 |                 "numbers": [1, 2, 3, 4, 5],
13 |             }
14 |         )
15 |         limit_df = duckdb.limit(df_in, 2)
16 |         assert len(limit_df.execute().fetchall()) == 2
17 | 
18 |     @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()])
19 |     def test_aggregate_df(self, duckdb_cursor, pandas):
20 |         df_in = pandas.DataFrame(
21 |             {
22 |                 "numbers": [1, 2, 2, 2],
23 |             }
24 |         )
25 |         aggregate_df = duckdb.aggregate(df_in, "count(numbers)", "numbers").order("all")
26 |         assert aggregate_df.execute().fetchall() == [(1,), (3,)]
27 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_pandas_df_none.py:
--------------------------------------------------------------------------------
1 | import duckdb
2 | 
3 | 
4 | class TestPandasDFNone:
5 |     # This used to decrease the ref count of None
6 |     def test_none_deref(self):
7 |         con = duckdb.connect()
8 |         df = con.sql("select NULL::VARCHAR as a from range(1000000)").df()  # noqa: F841
9 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_pandas_enum.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | 
 4 | import duckdb
 5 | 
 6 | 
 7 | class TestPandasEnum:
 8 |     def test_3480(self, duckdb_cursor):
 9 |         duckdb_cursor.execute(
10 |             """
11 |         create type cat as enum ('marie', 'duchess', 'toulouse');
12 |         create table tab (
13 |             cat cat,
14 |             amt int
15 |         );
16 |         """
17 |         )
18 |         df = duckdb_cursor.query("SELECT * FROM tab LIMIT 0;").to_df()
19 |         assert df["cat"].cat.categories.equals(pd.Index(["marie", "duchess", "toulouse"]))
20 |         duckdb_cursor.execute("DROP TABLE tab")
21 |         duckdb_cursor.execute("DROP TYPE cat")
22 | 
23 |     def test_3479(self, duckdb_cursor):
24 |         duckdb_cursor.execute(
25 |             """
26 |         create type cat as enum ('marie', 'duchess', 'toulouse');
27 |         create table tab (
28 |             cat cat,
29 |             amt int
30 |         );
31 |         """
32 |         )
33 | 
34 |         df = pd.DataFrame(
35 |             {
36 |                 "cat2": pd.Series(["duchess", "toulouse", "marie", None, "berlioz", "o_malley"], dtype="category"),
37 |                 "amt": [1, 2, 3, 4, 5, 6],
38 |             }
39 |         )
40 |         duckdb_cursor.register("df", df)
41 |         with pytest.raises(
42 |             duckdb.ConversionException,
43 |             match="Type UINT8 with value 0 can't be cast because the value is out of range for the destination "
44 |             "type UINT8",
45 |         ):
46 |             duckdb_cursor.execute("INSERT INTO tab SELECT * FROM df;")
47 | 
48 |         assert duckdb_cursor.execute("select * from tab").fetchall() == []
49 |         duckdb_cursor.execute("DROP TABLE tab")
50 |         duckdb_cursor.execute("DROP TYPE cat")
51 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_pandas_limit.py:
--------------------------------------------------------------------------------
 1 | import duckdb
 2 | 
 3 | 
 4 | class TestPandasLimit:
 5 |     def test_pandas_limit(self, duckdb_cursor):
 6 |         con = duckdb.connect()
 7 |         df = con.execute("select * from range(10000000) tbl(i)").df()  # noqa: F841
 8 | 
 9 |         con.execute("SET threads=8")
10 | 
11 |         limit_df = con.execute("SELECT * FROM df WHERE i=334 OR i>9967864 LIMIT 5").df()
12 |         assert list(limit_df["i"]) == [334, 9967865, 9967866, 9967867, 9967868]
13 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_pandas_string.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import pandas as pd
 3 | 
 4 | import duckdb
 5 | 
 6 | 
 7 | class TestPandasString:
 8 |     def test_pandas_string(self, duckdb_cursor):
 9 |         strings = numpy.array(["foo", "bar", "baz"])
10 | 
11 |         # https://pandas.pydata.org/pandas-docs/stable/user_guide/text.html
12 |         df_in = pd.DataFrame(
13 |             {
14 |                 "object": pd.Series(strings, dtype="object"),
15 |             }
16 |         )
17 |         # Only available in pandas 1.0.0
18 |         if hasattr(pd, "StringDtype"):
19 |             df_in["string"] = pd.Series(strings, dtype=pd.StringDtype())
20 | 
21 |         df_out = duckdb.query_df(df_in, "data", "SELECT * FROM data").df()
22 | 
23 |         assert numpy.all(df_out["object"] == strings)
24 |         if hasattr(pd, "StringDtype"):
25 |             assert numpy.all(df_out["string"] == strings)
26 | 
27 |     def test_bug_2467(self, duckdb_cursor):
28 |         N = 1_000_000
29 |         # Create DataFrame with string attribute
30 |         df = pd.DataFrame({"city": ["Amsterdam", "New York", "London"] * N})
31 |         # Copy Dataframe to DuckDB
32 |         con = duckdb.connect()
33 |         con.register("df", df)
34 |         con.execute(
35 |             """
36 |             CREATE TABLE t1 AS SELECT * FROM df
37 |         """
38 |         )
39 |         assert con.execute(
40 |             """
41 |             SELECT count(*) from t1
42 |         """
43 |         ).fetchall() == [(3000000,)]
44 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_pandas_timestamp.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | import pandas
 4 | import pytest
 5 | from conftest import pandas_2_or_higher
 6 | 
 7 | import duckdb
 8 | 
 9 | 
10 | @pytest.mark.parametrize("timezone", ["UTC", "CET", "Asia/Kathmandu"])
11 | @pytest.mark.skipif(not pandas_2_or_higher(), reason="Pandas <2.0.0 does not support timezones in the metadata string")
12 | def test_run_pandas_with_tz(timezone):
13 |     con = duckdb.connect()
14 |     con.execute(f"SET TimeZone = '{timezone}'")
15 |     df = pandas.DataFrame(
16 |         {
17 |             "timestamp": pandas.Series(
18 |                 data=[pandas.Timestamp(year=2022, month=1, day=1, hour=10, minute=15, tz=timezone, unit="us")],
19 |                 dtype=f"datetime64[us, {timezone}]",
20 |             )
21 |         }
22 |     )
23 |     duck_df = con.from_df(df).df()
24 |     assert duck_df["timestamp"][0] == df["timestamp"][0]
25 | 
26 | 
27 | def test_timestamp_conversion(duckdb_cursor):
28 |     tzinfo = pandas.Timestamp("2024-01-01 00:00:00+0100", tz="Europe/Copenhagen").tzinfo
29 |     ts_df = pandas.DataFrame(  # noqa: F841
30 |         {
31 |             "ts": [
32 |                 pandas.Timestamp("2024-01-01 00:00:00+0100", tz=tzinfo),
33 |                 pandas.Timestamp("2024-01-02 00:00:00+0100", tz=tzinfo),
34 |             ]
35 |         }
36 |     )
37 | 
38 |     query = """
39 |         select
40 |             *
41 |         from ts_df
42 |         where ts = $notationtime
43 |     """
44 |     params_zoneinfo = {"notationtime": datetime(2024, 1, 1, tzinfo=tzinfo)}
45 |     duckdb_cursor.execute("set TimeZone = 'Europe/Copenhagen'")
46 |     rel = duckdb_cursor.execute(query, parameters=params_zoneinfo)
47 |     res = rel.fetchall()
48 |     assert res[0][0] == datetime(2024, 1, 1, tzinfo=tzinfo)
49 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_pandas_update.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | import duckdb
 4 | 
 5 | 
 6 | class TestPandasUpdateList:
 7 |     def test_pandas_update_list(self, duckdb_cursor):
 8 |         duckdb_cursor = duckdb.connect(":memory:")
 9 |         duckdb_cursor.execute("create table t (l int[])")
10 |         duckdb_cursor.execute("insert into t values ([1, 2]), ([3,4])")
11 |         duckdb_cursor.execute("update t set l = [5, 6]")
12 |         expected = pd.DataFrame({"l": [[5, 6], [5, 6]]})
13 |         res = duckdb_cursor.execute("select * from t").fetchdf()
14 |         pd.testing.assert_frame_equal(expected, res)
15 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_partitioned_pandas_scan.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import pandas as pd
 3 | 
 4 | import duckdb
 5 | 
 6 | 
 7 | class TestPartitionedPandasScan:
 8 |     def test_parallel_pandas(self, duckdb_cursor):
 9 |         con = duckdb.connect()
10 |         df = pd.DataFrame({"i": numpy.arange(10000000)})
11 | 
12 |         con.register("df", df)
13 | 
14 |         seq_results = con.execute("SELECT SUM(i) FROM df").fetchall()
15 | 
16 |         con.execute("PRAGMA threads=4")
17 |         parallel_results = con.execute("SELECT SUM(i) FROM df").fetchall()
18 | 
19 |         assert seq_results[0][0] == 49999995000000
20 |         assert parallel_results[0][0] == 49999995000000
21 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_progress_bar.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import pandas as pd
 3 | 
 4 | import duckdb
 5 | 
 6 | 
 7 | class TestProgressBarPandas:
 8 |     def test_progress_pandas_single(self, duckdb_cursor):
 9 |         con = duckdb.connect()
10 |         df = pd.DataFrame({"i": numpy.arange(10000000)})
11 | 
12 |         con.register("df", df)
13 |         con.register("df_2", df)
14 |         con.execute("PRAGMA progress_bar_time=1")
15 |         con.execute("PRAGMA disable_print_progress_bar")
16 |         result = con.execute("SELECT SUM(df.i) FROM df inner join df_2 on (df.i = df_2.i)").fetchall()
17 |         assert result[0][0] == 49999995000000
18 | 
19 |     def test_progress_pandas_parallel(self, duckdb_cursor):
20 |         con = duckdb.connect()
21 |         df = pd.DataFrame({"i": numpy.arange(10000000)})
22 | 
23 |         con.register("df", df)
24 |         con.register("df_2", df)
25 |         con.execute("PRAGMA progress_bar_time=1")
26 |         con.execute("PRAGMA disable_print_progress_bar")
27 |         con.execute("PRAGMA threads=4")
28 |         parallel_results = con.execute("SELECT SUM(df.i) FROM df inner join df_2 on (df.i = df_2.i)").fetchall()
29 |         assert parallel_results[0][0] == 49999995000000
30 | 
31 |     def test_progress_pandas_empty(self, duckdb_cursor):
32 |         con = duckdb.connect()
33 |         df = pd.DataFrame({"i": []})
34 |         con.register("df", df)
35 |         con.execute("PRAGMA progress_bar_time=1")
36 |         con.execute("PRAGMA disable_print_progress_bar")
37 |         result = con.execute("SELECT SUM(df.i) from df").fetchall()
38 |         assert result[0][0] is None
39 | 


--------------------------------------------------------------------------------
/tests/fast/pandas/test_pyarrow_projection_pushdown.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from conftest import pandas_supports_arrow_backend
 3 | 
 4 | import duckdb
 5 | 
 6 | pa = pytest.importorskip("pyarrow")
 7 | ds = pytest.importorskip("pyarrow.dataset")
 8 | _ = pytest.importorskip("pandas", "2.0.0")
 9 | 
10 | 
11 | @pytest.mark.skipif(not pandas_supports_arrow_backend(), reason="pandas does not support the 'pyarrow' backend")
12 | class TestArrowDFProjectionPushdown:
13 |     def test_projection_pushdown_no_filter(self, duckdb_cursor):
14 |         duckdb_conn = duckdb.connect()
15 |         duckdb_conn.execute("CREATE TABLE test (a  INTEGER, b INTEGER, c INTEGER)")
16 |         duckdb_conn.execute("INSERT INTO  test VALUES (1,1,1),(10,10,10),(100,10,100),(NULL,NULL,NULL)")
17 |         duck_tbl = duckdb_conn.table("test")
18 |         arrow_table = duck_tbl.df().convert_dtypes(dtype_backend="pyarrow")
19 |         duckdb_conn.register("testarrowtable", arrow_table)
20 |         assert duckdb_conn.execute("SELECT sum(a) FROM  testarrowtable").fetchall() == [(111,)]
21 | 


--------------------------------------------------------------------------------
/tests/fast/relational_api/test_groupings.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import duckdb
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def con():
 8 |     conn = duckdb.connect()
 9 |     conn.execute(
10 |         """
11 | 		create table tbl as (SELECT * FROM (VALUES
12 | 			(1, 'a', 12),
13 | 			(1, 'a', 10),
14 | 			(2, 'b', 5),
15 | 			(2, 'a', 7),
16 | 			(3, 'a', 5),
17 | 			(5, 'c', 2)
18 | 		) AS tbl(a, b, c))
19 | 	"""
20 |     )
21 |     return conn
22 | 
23 | 
24 | class TestGroupings:
25 |     def test_basic_grouping(self, con):
26 |         rel = con.table("tbl").sum("a", "b")
27 |         res = rel.fetchall()
28 |         assert res == [(7,), (2,), (5,)]
29 | 
30 |         rel = con.sql("select sum(a) from tbl GROUP BY b")
31 |         res2 = rel.fetchall()
32 |         assert res == res2
33 | 
34 |     def test_cubed(self, con):
35 |         rel = con.table("tbl").sum("a", "CUBE (b)").order("ALL")
36 |         res = rel.fetchall()
37 |         assert res == [(2,), (5,), (7,), (14,)]
38 | 
39 |         rel = con.sql("select sum(a) from tbl GROUP BY CUBE (b) ORDER BY ALL")
40 |         res2 = rel.fetchall()
41 |         assert res == res2
42 | 
43 |     def test_rollup(self, con):
44 |         rel = con.table("tbl").sum("a", "ROLLUP (b, c)").order("ALL")
45 |         res = rel.fetchall()
46 |         assert res == [(1,), (1,), (2,), (2,), (2,), (3,), (5,), (5,), (7,), (14,)]
47 | 
48 |         rel = con.sql("select sum(a) from tbl GROUP BY ROLLUP (b, c) ORDER BY ALL")
49 |         res2 = rel.fetchall()
50 |         assert res == res2
51 | 


--------------------------------------------------------------------------------
/tests/fast/relational_api/test_pivot.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | from pathlib import Path
 3 | 
 4 | 
 5 | class TestPivot:
 6 |     def test_pivot_issue_14600(self, duckdb_cursor):
 7 |         duckdb_cursor.sql(
 8 |             "create table input_data as select unnest(['u','v','w']) as a, unnest(['x','y','z']) as b, unnest([1,2,3]) as c;"  # noqa: E501
 9 |         )
10 |         pivot_1 = duckdb_cursor.query("pivot input_data on a using max(c) group by b;")
11 |         pivot_2 = duckdb_cursor.query("pivot input_data on b using max(c) group by a;")
12 |         pivot_1.create("pivot_1")
13 |         pivot_2.create("pivot_2")
14 |         pivot_1_tbl = duckdb_cursor.table("pivot_1")
15 |         pivot_2_tbl = duckdb_cursor.table("pivot_2")
16 |         assert set(pivot_1.columns) == set(pivot_1_tbl.columns)
17 |         assert set(pivot_2.columns) == set(pivot_2_tbl.columns)
18 | 
19 |     def test_pivot_issue_14601(self, duckdb_cursor):
20 |         duckdb_cursor.sql(
21 |             "create table input_data as select unnest(['u','v','w']) as a, unnest(['x','y','z']) as b, unnest([1,2,3]) as c;"  # noqa: E501
22 |         )
23 |         pivot_1 = duckdb_cursor.query("pivot input_data on a using max(c) group by b;")
24 |         pivot_1.create("pivot_1")
25 |         export_dir = tempfile.mkdtemp()
26 |         duckdb_cursor.query(f"EXPORT DATABASE '{export_dir}'")
27 |         assert "CREATE TYPE" not in (Path(export_dir) / "schema.sql").read_text()
28 | 


--------------------------------------------------------------------------------
/tests/fast/relational_api/test_rapi_functions.py:
--------------------------------------------------------------------------------
 1 | import duckdb
 2 | 
 3 | 
 4 | class TestRAPIFunctions:
 5 |     def test_rapi_str_print(self, duckdb_cursor):
 6 |         res = duckdb_cursor.query("select 42::INT AS a, 84::BIGINT AS b")
 7 |         assert str(res) is not None
 8 |         res.show()
 9 | 
10 |     def test_rapi_relation_sql_query(self):
11 |         res = duckdb.table_function("range", [10])
12 |         assert res.sql_query() == 'SELECT * FROM "range"(10)'
13 | 


--------------------------------------------------------------------------------
/tests/fast/relational_api/test_table_function.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | import duckdb
 6 | 
 7 | script_path = Path(__file__).parent
 8 | 
 9 | 
10 | class TestTableFunction:
11 |     def test_table_function(self, duckdb_cursor):
12 |         path = str(script_path / ".." / "data/integers.csv")
13 |         rel = duckdb_cursor.table_function("read_csv", [path])
14 |         res = rel.fetchall()
15 |         assert res == [(1, 10, 0), (2, 50, 30)]
16 | 
17 |         # Provide only a string as argument, should error, needs a list
18 |         with pytest.raises(duckdb.InvalidInputException, match=r"'params' has to be a list of parameters"):
19 |             rel = duckdb_cursor.table_function("read_csv", path)
20 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_arrow_table.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _ = pytest.importorskip("duckdb.experimental.spark")
 4 | pa = pytest.importorskip("pyarrow")
 5 | from spark_namespace import USE_ACTUAL_SPARK
 6 | from spark_namespace.sql.dataframe import DataFrame
 7 | 
 8 | 
 9 | class TestArrowTable:
10 |     @pytest.mark.skipif(
11 |         USE_ACTUAL_SPARK and not hasattr(DataFrame, "toArrow"),
12 |         reason="toArrow is only introduced in PySpark 4.0.0",
13 |     )
14 |     def test_spark_to_arrow_table(self, spark):
15 |         if USE_ACTUAL_SPARK:
16 |             return
17 |         data = [
18 |             ("firstRowFirstColumn",),
19 |             ("2ndRowFirstColumn",),
20 |         ]
21 |         df = spark.createDataFrame(data, ["firstColumn"])
22 |         arrow_table = df.toArrow()
23 |         assert arrow_table.num_columns == 1
24 |         assert arrow_table.num_rows == 2
25 |         assert arrow_table.column_names == ["firstColumn"]
26 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_except.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _ = pytest.importorskip("duckdb.experimental.spark")
 4 | 
 5 | from duckdb.experimental.spark.sql.types import Row
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def df(spark):
10 |     return spark.createDataFrame([("a", 1), ("a", 1), ("a", 1), ("a", 2), ("b", 3), ("c", 4)], ["C1", "C2"])
11 | 
12 | 
13 | @pytest.fixture
14 | def df2(spark):
15 |     return spark.createDataFrame([("a", 1), ("b", 3)], ["C1", "C2"])
16 | 
17 | 
18 | class TestDataFrameIntersect:
19 |     def test_exceptAll(self, spark, df, df2):
20 |         df3 = df.exceptAll(df2).sort(*df.columns)
21 |         res = df3.collect()
22 | 
23 |         assert res == [
24 |             Row(C1="a", C2=1),
25 |             Row(C1="a", C2=1),
26 |             Row(C1="a", C2=2),
27 |             Row(C1="c", C2=4),
28 |         ]
29 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_function_concat_ws.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _ = pytest.importorskip("duckdb.experimental.spark")
 4 | from spark_namespace.sql.functions import col, concat_ws
 5 | from spark_namespace.sql.types import Row
 6 | 
 7 | 
 8 | class TestReplaceEmpty:
 9 |     def test_replace_empty(self, spark):
10 |         data = [
11 |             ("firstRowFirstColumn", "firstRowSecondColumn"),
12 |             ("2ndRowFirstColumn", "2ndRowSecondColumn"),
13 |         ]
14 |         df = spark.createDataFrame(data, ["firstColumn", "secondColumn"])
15 |         df = df.withColumn("concatted", concat_ws(" ", col("firstColumn"), col("secondColumn")))
16 |         res = df.select("concatted").collect()
17 |         assert res == [
18 |             Row(concatted="firstRowFirstColumn firstRowSecondColumn"),
19 |             Row(concatted="2ndRowFirstColumn 2ndRowSecondColumn"),
20 |         ]
21 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_functions_base64.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _ = pytest.importorskip("duckdb.experimental.spark")
 4 | 
 5 | from spark_namespace.sql import functions as F
 6 | 
 7 | 
 8 | class TestSparkFunctionsBase64:
 9 |     def test_base64(self, spark):
10 |         data = [
11 |             ("quack",),
12 |         ]
13 |         res = (
14 |             spark.createDataFrame(data, ["firstColumn"])
15 |             .withColumn("encoded_value", F.base64(F.col("firstColumn")))
16 |             .select("encoded_value")
17 |             .collect()
18 |         )
19 |         assert res[0].encoded_value == "cXVhY2s="
20 | 
21 |     def test_base64ColString(self, spark):
22 |         data = [
23 |             ("quack",),
24 |         ]
25 |         res = (
26 |             spark.createDataFrame(data, ["firstColumn"])
27 |             .withColumn("encoded_value", F.base64("firstColumn"))
28 |             .select("encoded_value")
29 |             .collect()
30 |         )
31 |         assert res[0].encoded_value == "cXVhY2s="
32 | 
33 |     def test_unbase64(self, spark):
34 |         data = [
35 |             ("cXVhY2s=",),
36 |         ]
37 |         res = (
38 |             spark.createDataFrame(data, ["firstColumn"])
39 |             .withColumn("decoded_value", F.unbase64(F.col("firstColumn")))
40 |             .select("decoded_value")
41 |             .collect()
42 |         )
43 |         assert res[0].decoded_value == b"quack"
44 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_functions_dataframe.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _ = pytest.importorskip("duckdb.experimental.spark")
 4 | from spark_namespace.sql import functions as F
 5 | 
 6 | 
 7 | class TestSparkFunctionsArray:
 8 |     def test_broadcast(self, spark):
 9 |         data = [
10 |             ([1, 2, 2], 2),
11 |             ([2, 4, 5], 3),
12 |         ]
13 | 
14 |         df = spark.createDataFrame(data, ["firstColumn", "secondColumn"])
15 |         df_broadcast = F.broadcast(df)
16 | 
17 |         assert df.collect() == df_broadcast.collect()
18 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_functions_expr.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from spark_namespace.sql import functions as F
 3 | from spark_namespace.sql.types import Row
 4 | 
 5 | _ = pytest.importorskip("duckdb.experimental.spark")
 6 | 
 7 | 
 8 | class TestSparkFunctionsExpr:
 9 |     def test_expr(self, spark):
10 |         df = spark.createDataFrame([["Alice"], ["Bob"]], ["name"])
11 |         res = df.select("name", F.expr("length(name)").alias("str_len")).collect()
12 | 
13 |         assert res == [
14 |             Row(name="Alice", str_len=5),
15 |             Row(name="Bob", str_len=3),
16 |         ]
17 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_functions_hash.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _ = pytest.importorskip("duckdb.experimental.spark")
 4 | from spark_namespace.sql import functions as F
 5 | 
 6 | 
 7 | class TestSparkFunctionsHash:
 8 |     def test_md5(self, spark):
 9 |         data = [
10 |             ("quack",),
11 |         ]
12 |         res = (
13 |             spark.createDataFrame(data, ["firstColumn"])
14 |             .withColumn("hashed_value", F.md5(F.col("firstColumn")))
15 |             .select("hashed_value")
16 |             .collect()
17 |         )
18 |         assert res[0].hashed_value == "cfaf278e8f522c72644cee2a753d2845"
19 | 
20 |     def test_sha256(self, spark):
21 |         data = [
22 |             ("quack",),
23 |         ]
24 |         res = (
25 |             spark.createDataFrame(data, ["firstColumn"])
26 |             .withColumn("hashed_value", F.sha2(F.col("firstColumn"), 256))
27 |             .select("hashed_value")
28 |             .collect()
29 |         )
30 |         assert res[0].hashed_value == "82d928273d067d774889d5df4249aaf73c0b04c64f04d6ed001441ce87a0853c"
31 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_intersect.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _ = pytest.importorskip("duckdb.experimental.spark")
 4 | 
 5 | from duckdb.experimental.spark.sql.types import Row
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def df(spark):
10 |     return spark.createDataFrame([("a", 1), ("a", 1), ("b", 3), ("c", 4)], ["C1", "C2"])
11 | 
12 | 
13 | @pytest.fixture
14 | def df2(spark):
15 |     return spark.createDataFrame([("a", 1), ("a", 1), ("b", 3)], ["C1", "C2"])
16 | 
17 | 
18 | class TestDataFrameIntersect:
19 |     def test_intersect(self, spark, df, df2):
20 |         df3 = df.intersect(df2).sort(df.C1)
21 |         res = df3.collect()
22 | 
23 |         assert res == [
24 |             Row(C1="a", C2=1),
25 |             Row(C1="b", C2=3),
26 |         ]
27 | 
28 |     def test_intersect_all(self, spark, df, df2):
29 |         df3 = df.intersectAll(df2).sort(df.C1)
30 |         res = df3.collect()
31 | 
32 |         assert res == [
33 |             Row(C1="a", C2=1),
34 |             Row(C1="a", C2=1),
35 |             Row(C1="b", C2=3),
36 |         ]
37 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_limit.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _ = pytest.importorskip("duckdb.experimental.spark")
 4 | 
 5 | from spark_namespace.sql.types import (
 6 |     Row,
 7 | )
 8 | 
 9 | 
10 | class TestDataFrameLimit:
11 |     def test_dataframe_limit(self, spark):
12 |         df = spark.sql("select * from range(100000)")
13 |         df2 = df.limit(10)
14 |         res = df2.collect()
15 |         expected = [
16 |             Row(range=0),
17 |             Row(range=1),
18 |             Row(range=2),
19 |             Row(range=3),
20 |             Row(range=4),
21 |             Row(range=5),
22 |             Row(range=6),
23 |             Row(range=7),
24 |             Row(range=8),
25 |             Row(range=9),
26 |         ]
27 |         assert res == expected
28 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_readcsv.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _ = pytest.importorskip("duckdb.experimental.spark")
 4 | 
 5 | from spark_namespace import USE_ACTUAL_SPARK
 6 | from spark_namespace.sql.types import Row
 7 | 
 8 | 
 9 | class TestSparkReadCSV:
10 |     def test_read_csv(self, spark, tmp_path):
11 |         file_path = tmp_path / "basic.csv"
12 |         file_path.write_text("1,2\n3,4\n5,6\n")
13 |         df = spark.read.csv(file_path.as_posix())
14 |         res = df.collect()
15 | 
16 |         expected_res = sorted([Row(column0=1, column1=2), Row(column0=3, column1=4), Row(column0=5, column1=6)])
17 |         if USE_ACTUAL_SPARK:
18 |             # Convert all values to strings as this is how Spark reads them by default
19 |             expected_res = [Row(column0=str(row.column0), column1=str(row.column1)) for row in expected_res]
20 |         assert sorted(res) == expected_res
21 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_readjson.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _ = pytest.importorskip("duckdb.experimental.spark")
 4 | 
 5 | 
 6 | from spark_namespace.sql.types import Row
 7 | 
 8 | 
 9 | class TestSparkReadJson:
10 |     def test_read_json(self, duckdb_cursor, spark, tmp_path):
11 |         file_path = tmp_path / "basic.parquet"
12 |         file_path = file_path.as_posix()
13 |         duckdb_cursor.execute(f"COPY (select 42 a, true b, 'this is a long string' c) to '{file_path}' (FORMAT JSON)")
14 |         df = spark.read.json(file_path)
15 |         res = df.collect()
16 |         assert res == [Row(a=42, b=True, c="this is a long string")]
17 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_readparquet.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _ = pytest.importorskip("duckdb.experimental.spark")
 4 | 
 5 | 
 6 | from spark_namespace.sql.types import Row
 7 | 
 8 | 
 9 | class TestSparkReadParquet:
10 |     def test_read_parquet(self, duckdb_cursor, spark, tmp_path):
11 |         file_path = tmp_path / "basic.parquet"
12 |         file_path = file_path.as_posix()
13 |         duckdb_cursor.execute(
14 |             f"COPY (select 42 a, true b, 'this is a long string' c) to '{file_path}' (FORMAT PARQUET)"
15 |         )
16 |         df = spark.read.parquet(file_path)
17 |         res = df.collect()
18 |         assert res == [Row(a=42, b=True, c="this is a long string")]
19 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_runtime_config.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _ = pytest.importorskip("duckdb.experimental.spark")
 4 | 
 5 | from spark_namespace import USE_ACTUAL_SPARK
 6 | 
 7 | 
 8 | class TestSparkRuntimeConfig:
 9 |     def test_spark_runtime_config(self, spark):
10 |         # This fetches the internal runtime config from the session
11 |         spark.conf  # noqa: B018
12 | 
13 |     @pytest.mark.skipif(
14 |         USE_ACTUAL_SPARK, reason="Getting an error with our local PySpark setup. Unclear why but not a priority."
15 |     )
16 |     def test_spark_runtime_config_set(self, spark):
17 |         # Set Config
18 |         with pytest.raises(NotImplementedError):
19 |             spark.conf.set("spark.executor.memory", "5g")
20 | 
21 |     @pytest.mark.skip(reason="RuntimeConfig is not implemented yet")
22 |     def test_spark_runtime_config_get(self, spark):
23 |         # Get a Spark Config
24 |         with pytest.raises(KeyError):
25 |             spark.conf.get("spark.sql.shuffle.partitions")
26 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_to_parquet.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | _ = pytest.importorskip("duckdb.experimental.spark")
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def df(spark):
10 |     simpleData = (
11 |         ("Java", 4000, 5),
12 |         ("Python", 4600, 10),
13 |         ("Scala", 4100, 15),
14 |         ("Scala", 4500, 15),
15 |         ("PHP", 3000, 20),
16 |     )
17 |     columns = ["CourseName", "fee", "discount"]
18 |     dataframe = spark.createDataFrame(data=simpleData, schema=columns)
19 |     return dataframe
20 | 
21 | 
22 | class TestSparkToParquet:
23 |     def test_basic_to_parquet(self, df, spark, tmp_path):
24 |         temp_file_name = os.path.join(tmp_path, "temp_file.parquet")  # noqa: PTH118
25 | 
26 |         df.write.parquet(temp_file_name)
27 | 
28 |         csv_rel = spark.read.parquet(temp_file_name)
29 | 
30 |         assert sorted(df.collect()) == sorted(csv_rel.collect())
31 | 
32 |     def test_compressed_to_parquet(self, df, spark, tmp_path):
33 |         temp_file_name = os.path.join(tmp_path, "temp_file.parquet")  # noqa: PTH118
34 | 
35 |         df.write.parquet(temp_file_name, compression="ZSTD")
36 | 
37 |         csv_rel = spark.read.parquet(temp_file_name)
38 | 
39 |         assert sorted(df.collect()) == sorted(csv_rel.collect())
40 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_udf.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _ = pytest.importorskip("duckdb.experimental.spark")
 4 | 
 5 | 
 6 | class TestSparkUDF:
 7 |     def test_udf_register(self, spark):
 8 |         def to_upper_fn(s: str) -> str:
 9 |             return s.upper()
10 | 
11 |         spark.udf.register("to_upper_fn", to_upper_fn)
12 |         assert spark.sql("select to_upper_fn('quack') as vl").collect()[0].vl == "QUACK"
13 | 


--------------------------------------------------------------------------------
/tests/fast/spark/test_spark_union_by_name.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _ = pytest.importorskip("duckdb.experimental.spark")
 4 | 
 5 | 
 6 | from spark_namespace.sql.types import (
 7 |     Row,
 8 | )
 9 | 
10 | 
11 | @pytest.fixture
12 | def df1(spark):
13 |     data = [("James", 34), ("Michael", 56), ("Robert", 30), ("Maria", 24)]
14 |     dataframe = spark.createDataFrame(data=data, schema=["name", "id"])
15 |     return dataframe
16 | 
17 | 
18 | @pytest.fixture
19 | def df2(spark):
20 |     data2 = [(34, "James"), (45, "Maria"), (45, "Jen"), (34, "Jeff")]
21 |     dataframe = spark.createDataFrame(data=data2, schema=["id", "name"])
22 |     return dataframe
23 | 
24 | 
25 | class TestDataFrameUnion:
26 |     def test_union_by_name(self, df1, df2):
27 |         rel = df1.unionByName(df2)
28 |         res = rel.collect()
29 |         expected = [
30 |             Row(name="James", id=34),
31 |             Row(name="Michael", id=56),
32 |             Row(name="Robert", id=30),
33 |             Row(name="Maria", id=24),
34 |             Row(name="James", id=34),
35 |             Row(name="Maria", id=45),
36 |             Row(name="Jen", id=45),
37 |             Row(name="Jeff", id=34),
38 |         ]
39 |         assert res == expected
40 | 
41 |     def test_union_by_name_allow_missing_cols(self, df1, df2):
42 |         rel = df1.unionByName(df2.drop("id"), allowMissingColumns=True)
43 |         res = rel.collect()
44 |         expected = [
45 |             Row(name="James", id=34),
46 |             Row(name="Michael", id=56),
47 |             Row(name="Robert", id=30),
48 |             Row(name="Maria", id=24),
49 |             Row(name="James", id=None),
50 |             Row(name="Maria", id=None),
51 |             Row(name="Jen", id=None),
52 |             Row(name="Jeff", id=None),
53 |         ]
54 |         assert res == expected
55 | 


--------------------------------------------------------------------------------
/tests/fast/test_ambiguous_prepare.py:
--------------------------------------------------------------------------------
 1 | import duckdb
 2 | 
 3 | 
 4 | class TestAmbiguousPrepare:
 5 |     def test_bool(self, duckdb_cursor):
 6 |         conn = duckdb.connect()
 7 |         res = conn.execute("select ?, ?, ?", (True, 42, [1, 2, 3])).fetchall()
 8 |         assert res[0][0]
 9 |         assert res[0][1] == 42
10 |         assert res[0][2] == [1, 2, 3]
11 | 


--------------------------------------------------------------------------------
/tests/fast/test_case_alias.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from conftest import ArrowPandas, NumpyPandas
 3 | 
 4 | import duckdb
 5 | 
 6 | 
 7 | class TestCaseAlias:
 8 |     @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()])
 9 |     def test_case_alias(self, duckdb_cursor, pandas):
10 |         con = duckdb.connect(":memory:")
11 | 
12 |         df = pandas.DataFrame([{"COL1": "val1", "CoL2": 1.05}, {"COL1": "val3", "CoL2": 17}])
13 | 
14 |         r1 = con.from_df(df).query("df", "select * from df").df()
15 |         assert r1["COL1"][0] == "val1"
16 |         assert r1["COL1"][1] == "val3"
17 |         assert r1["CoL2"][0] == 1.05
18 |         assert r1["CoL2"][1] == 17
19 | 
20 |         r2 = con.from_df(df).query("df", "select COL1, COL2 from df").df()
21 |         assert r2["COL1"][0] == "val1"
22 |         assert r2["COL1"][1] == "val3"
23 |         assert r2["CoL2"][0] == 1.05
24 |         assert r2["CoL2"][1] == 17
25 | 
26 |         r3 = con.from_df(df).query("df", "select COL1, COL2 from df ORDER BY COL1").df()
27 |         assert r3["COL1"][0] == "val1"
28 |         assert r3["COL1"][1] == "val3"
29 |         assert r3["CoL2"][0] == 1.05
30 |         assert r3["CoL2"][1] == 17
31 | 
32 |         r4 = con.from_df(df).query("df", "select COL1, COL2 from df GROUP BY COL1, COL2 ORDER BY COL1").df()
33 |         assert r4["COL1"][0] == "val1"
34 |         assert r4["COL1"][1] == "val3"
35 |         assert r4["CoL2"][0] == 1.05
36 |         assert r4["CoL2"][1] == 17
37 | 


--------------------------------------------------------------------------------
/tests/fast/test_context_manager.py:
--------------------------------------------------------------------------------
1 | import duckdb
2 | 
3 | 
4 | class TestContextManager:
5 |     def test_context_manager(self, duckdb_cursor):
6 |         with duckdb.connect(database=":memory:", read_only=False) as con:
7 |             assert con.execute("select 1").fetchall() == [(1,)]
8 | 


--------------------------------------------------------------------------------
/tests/fast/test_duckdb_api.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import duckdb
 4 | 
 5 | 
 6 | def test_duckdb_api():
 7 |     res = duckdb.execute("SELECT name, value FROM duckdb_settings() WHERE name == 'duckdb_api'")
 8 |     formatted_python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
 9 |     assert res.fetchall() == [("duckdb_api", f"python/{formatted_python_version}")]
10 | 


--------------------------------------------------------------------------------
/tests/fast/test_insert.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from conftest import ArrowPandas, NumpyPandas
 3 | 
 4 | import duckdb
 5 | 
 6 | 
 7 | class TestInsert:
 8 |     @pytest.mark.parametrize("pandas", [NumpyPandas(), ArrowPandas()])
 9 |     def test_insert(self, pandas):
10 |         test_df = pandas.DataFrame({"i": [1, 2, 3], "j": ["one", "two", "three"]})
11 |         # connect to an in-memory temporary database
12 |         conn = duckdb.connect()
13 |         # get a cursor
14 |         cursor = conn.cursor()
15 |         conn.execute("CREATE TABLE test (i INTEGER, j STRING)")
16 |         rel = conn.table("test")
17 |         rel.insert([1, "one"])
18 |         rel.insert([2, "two"])
19 |         rel.insert([3, "three"])
20 |         rel_a3 = cursor.table("test").project("CAST(i as BIGINT)i, j").to_df()
21 |         pandas.testing.assert_frame_equal(rel_a3, test_df)
22 | 
23 |     def test_insert_with_schema(self, duckdb_cursor):
24 |         duckdb_cursor.sql("create schema not_main")
25 |         duckdb_cursor.sql("create table not_main.tbl as select * from range(10)")
26 | 
27 |         res = duckdb_cursor.table("not_main.tbl").fetchall()
28 |         assert len(res) == 10
29 | 
30 |         duckdb_cursor.table("not_main.tbl").insert([42])
31 |         res2 = duckdb_cursor.table("not_main.tbl").fetchall()
32 |         assert len(res2) == 11
33 |         assert (42,) in res2
34 | 


--------------------------------------------------------------------------------
/tests/fast/test_json_logging.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import pytest
 4 | 
 5 | import duckdb
 6 | 
 7 | 
 8 | def _parse_json_func(error_prefix: str):
 9 |     """Helper to check that the error message is indeed parsable json."""
10 | 
11 |     def parse_func(exception) -> bool:
12 |         msg = exception.args[0]
13 |         assert msg.startswith(error_prefix)
14 |         json_str = msg.split(error_prefix, 1)[1]
15 |         try:
16 |             json.loads(json_str)
17 |         except Exception:
18 |             return False
19 |         return True
20 | 
21 |     return parse_func
22 | 
23 | 
24 | def test_json_syntax_error():
25 |     conn = duckdb.connect()
26 |     conn.execute("SET errors_as_json='true'")
27 |     with pytest.raises(duckdb.ParserException, match="SYNTAX_ERROR", check=_parse_json_func("Parser Error: ")):
28 |         conn.execute("syntax error")
29 | 
30 | 
31 | def test_json_catalog_error():
32 |     conn = duckdb.connect()
33 |     conn.execute("SET errors_as_json='true'")
34 |     with pytest.raises(duckdb.CatalogException, match="MISSING_ENTRY", check=_parse_json_func("Catalog Error: ")):
35 |         conn.execute("SELECT * FROM nonexistent_table")
36 | 
37 | 
38 | def test_json_syntax_error_extract_statements():
39 |     conn = duckdb.connect()
40 |     conn.execute("SET errors_as_json='true'")
41 |     with pytest.raises(duckdb.ParserException, match="SYNTAX_ERROR", check=_parse_json_func("Parser Error: ")):
42 |         conn.extract_statements("syntax error")
43 | 
44 | 
45 | def test_json_syntax_error_get_table_names():
46 |     conn = duckdb.connect()
47 |     conn.execute("SET errors_as_json='true'")
48 |     with pytest.raises(duckdb.ParserException, match="SYNTAX_ERROR", check=_parse_json_func("Parser Error: ")):
49 |         conn.get_table_names("syntax error")
50 | 


--------------------------------------------------------------------------------
/tests/fast/test_metatransaction.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | pd = pytest.importorskip("pandas")
 4 | np = pytest.importorskip("numpy")
 5 | 
 6 | NUMBER_OF_ROWS = 200000
 7 | NUMBER_OF_COLUMNS = 1
 8 | 
 9 | 
10 | class TestMetaTransaction:
11 |     def test_fetchmany(self, duckdb_cursor):
12 |         duckdb_cursor.execute("CREATE SEQUENCE id_seq")
13 |         column_names = ",\n".join([f"column_{i} FLOAT" for i in range(1, NUMBER_OF_COLUMNS + 1)])
14 |         create_table_query = f"""
15 |         CREATE TABLE my_table (
16 |             id INTEGER DEFAULT nextval('id_seq'),
17 |             {column_names}
18 |         )
19 |         """
20 |         # Create a table containing a sequence
21 |         duckdb_cursor.execute(create_table_query)
22 | 
23 |         for i in range(20):
24 |             # Then insert a large amount of tuples, triggering a parallel execution
25 |             data = np.random.rand(NUMBER_OF_ROWS, NUMBER_OF_COLUMNS)
26 |             columns = [f"Column_{i + 1}" for i in range(NUMBER_OF_COLUMNS)]
27 |             df = pd.DataFrame(data, columns=columns)
28 |             df_columns = ", ".join(df.columns)
29 |             # This gets executed in parallel, causing NextValFunction to be called in parallel
30 |             # stressing the MetaTransaction::Get concurrency
31 |             duckdb_cursor.execute(f"INSERT INTO my_table ({df_columns}) SELECT * FROM df")
32 |             print(f"inserted {i}")
33 |             duckdb_cursor.commit()
34 | 


--------------------------------------------------------------------------------
/tests/fast/test_module.py:
--------------------------------------------------------------------------------
 1 | import duckdb
 2 | 
 3 | 
 4 | class TestModule:
 5 |     def test_paramstyle(self):
 6 |         assert duckdb.paramstyle == "qmark"
 7 | 
 8 |     def test_threadsafety(self):
 9 |         assert duckdb.threadsafety == 1
10 | 
11 |     def test_apilevel(self):
12 |         assert duckdb.apilevel == "2.0"
13 | 


--------------------------------------------------------------------------------
/tests/fast/test_multi_statement.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import shutil
 3 | from pathlib import Path
 4 | 
 5 | import duckdb
 6 | 
 7 | 
 8 | class TestMultiStatement:
 9 |     def test_multi_statement(self, duckdb_cursor):
10 |         con = duckdb.connect(":memory:")
11 | 
12 |         # test empty statement
13 |         con.execute("")
14 | 
15 |         # run multiple statements in one call to execute
16 |         con.execute(
17 |             """
18 |         CREATE TABLE integers(i integer);
19 |         insert into integers select * from range(10);
20 |         select * from integers;
21 |         """
22 |         )
23 |         results = [x[0] for x in con.fetchall()]
24 |         assert results == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
25 | 
26 |         # test export/import
27 |         export_location = Path.cwd() / "duckdb_pytest_dir_export"
28 |         with contextlib.suppress(Exception):
29 |             shutil.rmtree(export_location)
30 |         con.execute("CREATE TABLE integers2(i INTEGER)")
31 |         con.execute("INSERT INTO integers2 VALUES (1), (5), (7), (1928)")
32 |         con.execute(f"EXPORT DATABASE '{export_location}'")
33 |         # reset connection
34 |         con = duckdb.connect(":memory:")
35 |         con.execute(f"IMPORT DATABASE '{export_location}'")
36 |         integers = [x[0] for x in con.execute("SELECT * FROM integers").fetchall()]
37 |         integers2 = [x[0] for x in con.execute("SELECT * FROM integers2").fetchall()]
38 |         assert integers == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
39 |         assert integers2 == [1, 5, 7, 1928]
40 |         shutil.rmtree(export_location)
41 | 


--------------------------------------------------------------------------------
/tests/fast/test_string_annotation.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | # we need typing.Union in our import cache
 4 | from typing import Union  # noqa: F401
 5 | 
 6 | import pytest
 7 | 
 8 | 
 9 | def make_annotated_function(type: str):
10 |     def test_base() -> None:
11 |         return None
12 | 
13 |     import types
14 | 
15 |     test_function = types.FunctionType(
16 |         test_base.__code__, test_base.__globals__, test_base.__name__, test_base.__defaults__, test_base.__closure__
17 |     )
18 |     # Add the 'type' string as return_annotation
19 |     test_function.__annotations__ = {"return": type}
20 |     return test_function
21 | 
22 | 
23 | def python_version_lower_than_3_10():
24 |     if sys.version_info[1] < 10:
25 |         return True
26 |     return False
27 | 
28 | 
29 | class TestStringAnnotation:
30 |     @pytest.mark.skipif(
31 |         python_version_lower_than_3_10(), reason="inspect.signature(eval_str=True) only supported since 3.10 and higher"
32 |     )
33 |     @pytest.mark.parametrize(
34 |         ("input", "expected"),
35 |         [
36 |             ("str", "VARCHAR"),
37 |             ("list[str]", "VARCHAR[]"),
38 |             ("dict[str, str]", "MAP(VARCHAR, VARCHAR)"),
39 |             ("dict[Union[str, bool], str]", "MAP(UNION(u1 VARCHAR, u2 BOOLEAN), VARCHAR)"),
40 |         ],
41 |     )
42 |     def test_string_annotations(self, duckdb_cursor, input, expected):
43 |         from inspect import signature
44 | 
45 |         func = make_annotated_function(input)
46 |         sig = signature(func)
47 |         assert sig.return_annotation.__class__ is str
48 | 
49 |         duckdb_cursor.create_function("foo", func)
50 |         rel = duckdb_cursor.sql("select foo()")
51 |         assert rel.types == [expected]
52 | 


--------------------------------------------------------------------------------
/tests/fast/test_tf.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import duckdb
 4 | 
 5 | tf = pytest.importorskip("tensorflow")
 6 | 
 7 | 
 8 | def test_tf():
 9 |     con = duckdb.connect()
10 | 
11 |     con.execute("create table t( a integer, b integer)")
12 |     con.execute("insert into t values (1,2), (3,4)")
13 | 
14 |     # Test from connection
15 |     duck_tf = con.execute("select * from t").tf()
16 |     duck_numpy = con.sql("select * from t").fetchnumpy()
17 |     tf.math.equal(duck_tf["a"], tf.convert_to_tensor(duck_numpy["a"]))
18 |     tf.math.equal(duck_tf["b"], tf.convert_to_tensor(duck_numpy["b"]))
19 | 
20 |     # Test from relation
21 |     duck_tf = con.sql("select * from t").tf()
22 |     tf.math.equal(duck_tf["a"], tf.convert_to_tensor(duck_numpy["a"]))
23 |     tf.math.equal(duck_tf["b"], tf.convert_to_tensor(duck_numpy["b"]))
24 | 
25 |     # Test all Numeric Types
26 |     numeric_types = ["TINYINT", "SMALLINT", "BIGINT", "HUGEINT", "FLOAT", "DOUBLE", "DECIMAL(4,1)", "UTINYINT"]
27 | 
28 |     for supported_type in numeric_types:
29 |         con = duckdb.connect()
30 |         con.execute(f"create table t( a {supported_type} , b {supported_type})")
31 |         con.execute("insert into t values (1,2), (3,4)")
32 |         duck_tf = con.sql("select * from t").tf()
33 |         duck_numpy = con.sql("select * from t").fetchnumpy()
34 |         tf.math.equal(duck_tf["a"], tf.convert_to_tensor(duck_numpy["a"]))
35 |         tf.math.equal(duck_tf["b"], tf.convert_to_tensor(duck_numpy["b"]))
36 | 


--------------------------------------------------------------------------------
/tests/fast/test_transaction.py:
--------------------------------------------------------------------------------
 1 | import duckdb
 2 | 
 3 | 
 4 | class TestConnectionTransaction:
 5 |     def test_transaction(self, duckdb_cursor):
 6 |         con = duckdb.connect()
 7 |         con.execute("create table t (i integer)")
 8 |         con.execute("insert into t values (1)")
 9 | 
10 |         con.begin()
11 |         con.execute("insert into t values (1)")
12 |         assert con.execute("select count (*) from t").fetchone()[0] == 2
13 |         con.rollback()
14 |         assert con.execute("select count (*) from t").fetchone()[0] == 1
15 |         con.begin()
16 |         con.execute("insert into t values (1)")
17 |         assert con.execute("select count (*) from t").fetchone()[0] == 2
18 |         con.commit()
19 |         assert con.execute("select count (*) from t").fetchone()[0] == 2
20 | 


--------------------------------------------------------------------------------
/tests/fast/test_type_explicit.py:
--------------------------------------------------------------------------------
 1 | import duckdb
 2 | import duckdb.sqltypes as duckdb_types
 3 | 
 4 | 
 5 | class TestMap:
 6 |     def test_array_list_tuple_ambiguity(self):
 7 |         con = duckdb.connect()
 8 |         res = con.sql("SELECT $arg", params={"arg": (1, 2)}).fetchall()[0][0]
 9 |         assert res == [1, 2]
10 | 
11 |         # By using an explicit duckdb.Value with an array type, we should convert the input as an array
12 |         # and get an array (tuple) back
13 |         typ = duckdb.array_type(duckdb_types.BIGINT, 2)
14 |         val = duckdb.Value((1, 2), typ)
15 |         res = con.sql("SELECT $arg", params={"arg": val}).fetchall()[0][0]
16 |         assert res == (1, 2)
17 | 
18 |         val = duckdb.Value([3, 4], typ)
19 |         res = con.sql("SELECT $arg", params={"arg": val}).fetchall()[0][0]
20 |         assert res == (3, 4)
21 | 


--------------------------------------------------------------------------------
/tests/fast/test_unicode.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import pandas as pd
 4 | 
 5 | import duckdb
 6 | 
 7 | 
 8 | class TestUnicode:
 9 |     def test_unicode_pandas_scan(self, duckdb_cursor):
10 |         con = duckdb.connect(database=":memory:", read_only=False)
11 |         test_df = pd.DataFrame.from_dict({"i": [1, 2, 3], "j": ["a", "c", "ë"]})
12 |         con.register("test_df_view", test_df)
13 |         con.execute("SELECT i, j, LENGTH(j) FROM test_df_view").fetchall()
14 | 


--------------------------------------------------------------------------------
/tests/fast/test_union.py:
--------------------------------------------------------------------------------
 1 | import duckdb
 2 | 
 3 | 
 4 | class TestUnion:
 5 |     def test_union_by_all(self):
 6 |         connection = duckdb.connect()
 7 | 
 8 |         connection.execute(
 9 |             """
10 | 			create table tbl1 as select * from (VALUES
11 | 				(1, 2, 3, 4),
12 | 				(2, 3, 4, 5),
13 | 				(3, 4, 5, 6)) as tbl(A, B, C, D)
14 | 		"""
15 |         )
16 |         connection.execute(
17 |             """
18 | 			create table tbl2 as select * from (VALUES
19 | 				(11, 12, 13, 14, 15),
20 | 				(12, 13, 14, 15, 16),
21 | 				(13, 14, 15, 16, 17)) as tbl (A, B, C, D, E)
22 | 		"""
23 |         )
24 | 
25 |         query = """
26 | 			select
27 | 				*
28 | 			from
29 | 				(
30 | 					select A, B, C, D, 0 as E from tbl1
31 | 				)
32 | 			union all (
33 | 				select * from tbl2
34 | 			) order by all
35 | 		"""
36 |         res = connection.sql(query).fetchall()
37 |         assert res == [
38 |             (1, 2, 3, 4, 0),
39 |             (2, 3, 4, 5, 0),
40 |             (3, 4, 5, 6, 0),
41 |             (11, 12, 13, 14, 15),
42 |             (12, 13, 14, 15, 16),
43 |             (13, 14, 15, 16, 17),
44 |         ]
45 | 
46 |         df_1 = connection.execute("FROM tbl1").df()  # noqa: F841
47 |         df_2 = connection.execute("FROM tbl2").df()  # noqa: F841
48 | 
49 |         query = """
50 | 			select
51 | 				*
52 | 			from
53 | 				(
54 | 					select A, B, C, D, 0 as E from df_1
55 | 				)
56 | 			union all (
57 | 				select * from df_2
58 | 			) order by all
59 | 		"""
60 |         res = connection.sql(query).fetchall()
61 |         assert res == [
62 |             (1, 2, 3, 4, 0),
63 |             (2, 3, 4, 5, 0),
64 |             (3, 4, 5, 6, 0),
65 |             (11, 12, 13, 14, 15),
66 |             (12, 13, 14, 15, 16),
67 |             (13, 14, 15, 16, 17),
68 |         ]
69 | 


--------------------------------------------------------------------------------
/tests/fast/test_version.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import duckdb
 4 | 
 5 | 
 6 | def test_version():
 7 |     assert duckdb.__version__ != "0.0.0"
 8 | 
 9 | 
10 | def test_formatted_python_version():
11 |     formatted_python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
12 |     assert duckdb.__formatted_python_version__ == formatted_python_version
13 | 


--------------------------------------------------------------------------------
/tests/fast/types/test_blob.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | 
 4 | class TestBlob:
 5 |     def test_blob(self, duckdb_cursor):
 6 |         duckdb_cursor.execute("SELECT BLOB 'hello'")
 7 |         results = duckdb_cursor.fetchall()
 8 |         assert results[0][0] == b"hello"
 9 | 
10 |         duckdb_cursor.execute("SELECT BLOB 'hello' AS a")
11 |         results = duckdb_cursor.fetchnumpy()
12 |         assert results["a"] == numpy.array([b"hello"], dtype=object)
13 | 


--------------------------------------------------------------------------------
/tests/fast/types/test_boolean.py:
--------------------------------------------------------------------------------
1 | class TestBoolean:
2 |     def test_bool(self, duckdb_cursor):
3 |         duckdb_cursor.execute("SELECT TRUE")
4 |         results = duckdb_cursor.fetchall()
5 |         assert results[0][0]
6 | 


--------------------------------------------------------------------------------
/tests/fast/types/test_datetime_date.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | import duckdb
 4 | 
 5 | 
 6 | class TestDateTimeDate:
 7 |     def test_date_infinity(self):
 8 |         con = duckdb.connect()
 9 |         # Positive infinity
10 |         con.execute("SELECT 'infinity'::DATE")
11 |         result = con.fetchall()
12 |         # datetime.date.max
13 |         assert result == [(datetime.date(9999, 12, 31),)]
14 | 
15 |         con.execute("SELECT '-infinity'::DATE")
16 |         result = con.fetchall()
17 |         # datetime.date.min
18 |         assert result == [(datetime.date(1, 1, 1),)]
19 | 
20 |     def test_date_infinity_roundtrip(self):
21 |         con = duckdb.connect()
22 | 
23 |         # positive infinity
24 |         con.execute("select $1, $1 = 'infinity'::DATE", [datetime.date.max])
25 |         res = con.fetchall()
26 |         assert res == [(datetime.date.max, False)]
27 | 
28 |         # negative infinity
29 |         con.execute("select $1, $1 = '-infinity'::DATE", [datetime.date.min])
30 |         res = con.fetchall()
31 |         assert res == [(datetime.date.min, False)]
32 | 


--------------------------------------------------------------------------------
/tests/fast/types/test_decimal.py:
--------------------------------------------------------------------------------
 1 | from decimal import Decimal
 2 | 
 3 | import numpy
 4 | 
 5 | 
 6 | class TestDecimal:
 7 |     def test_decimal(self, duckdb_cursor):
 8 |         duckdb_cursor.execute(
 9 |             "SELECT 1.2::DECIMAL(4,1), 100.3::DECIMAL(9,1), 320938.4298::DECIMAL(18,4), 49082094824.904820482094::DECIMAL(30,12), NULL::DECIMAL"  # noqa: E501
10 |         )
11 |         result = duckdb_cursor.fetchall()
12 |         assert result == [
13 |             (Decimal("1.2"), Decimal("100.3"), Decimal("320938.4298"), Decimal("49082094824.904820482094"), None)
14 |         ]
15 | 
16 |     def test_decimal_numpy(self, duckdb_cursor):
17 |         duckdb_cursor.execute(
18 |             "SELECT 1.2::DECIMAL(4,1) AS a, 100.3::DECIMAL(9,1) AS b, 320938.4298::DECIMAL(18,4) AS c, 49082094824.904820482094::DECIMAL(30,12) AS d"  # noqa: E501
19 |         )
20 |         result = duckdb_cursor.fetchnumpy()
21 |         assert result == {
22 |             "a": numpy.array([1.2]),
23 |             "b": numpy.array([100.3]),
24 |             "c": numpy.array([320938.4298]),
25 |             "d": numpy.array([49082094824.904820482094]),
26 |         }
27 | 


--------------------------------------------------------------------------------
/tests/fast/types/test_hugeint.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | 
 4 | class TestHugeint:
 5 |     def test_hugeint(self, duckdb_cursor):
 6 |         duckdb_cursor.execute("SELECT 437894723897234238947043214")
 7 |         result = duckdb_cursor.fetchall()
 8 |         assert result == [(437894723897234238947043214,)]
 9 | 
10 |     def test_hugeint_numpy(self, duckdb_cursor):
11 |         duckdb_cursor.execute("SELECT 1::HUGEINT AS i")
12 |         result = duckdb_cursor.fetchnumpy()
13 |         assert result == {"i": numpy.array([1.0])}
14 | 


--------------------------------------------------------------------------------
/tests/fast/types/test_null.py:
--------------------------------------------------------------------------------
1 | class TestNull:
2 |     def test_fetchone_null(self, duckdb_cursor):
3 |         duckdb_cursor.execute("CREATE TABLE atable (Value int)")
4 |         duckdb_cursor.execute("INSERT INTO atable VALUES (1)")
5 |         duckdb_cursor.execute("SELECT * FROM atable")
6 |         assert duckdb_cursor.fetchone()[0] == 1
7 |         assert duckdb_cursor.fetchone() is None
8 | 


--------------------------------------------------------------------------------
/tests/fast/types/test_numeric.py:
--------------------------------------------------------------------------------
 1 | def check_result(duckdb_cursor, value, type):
 2 |     duckdb_cursor.execute("SELECT " + str(value) + "::" + type)
 3 |     results = duckdb_cursor.fetchall()
 4 |     assert results[0][0] == value
 5 | 
 6 | 
 7 | class TestNumeric:
 8 |     def test_numeric_results(self, duckdb_cursor):
 9 |         check_result(duckdb_cursor, 1, "TINYINT")
10 |         check_result(duckdb_cursor, 1, "SMALLINT")
11 |         check_result(duckdb_cursor, 1, "FLOAT")
12 | 


--------------------------------------------------------------------------------
/tests/fast/types/test_numpy.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | import numpy as np
 4 | 
 5 | import duckdb
 6 | 
 7 | 
 8 | class TestNumpyDatetime64:
 9 |     def test_numpy_datetime64(self, duckdb_cursor):
10 |         duckdb_con = duckdb.connect()
11 | 
12 |         duckdb_con.execute("create table tbl(col TIMESTAMP)")
13 |         duckdb_con.execute(
14 |             "insert into tbl VALUES (CAST(? AS TIMESTAMP WITHOUT TIME ZONE))",
15 |             parameters=[np.datetime64("2022-02-08T06:01:38.761310")],
16 |         )
17 |         assert [(datetime.datetime(2022, 2, 8, 6, 1, 38, 761310),)] == duckdb_con.execute(
18 |             "select * from tbl"
19 |         ).fetchall()
20 | 
21 |     def test_numpy_datetime_big(self):
22 |         duckdb_con = duckdb.connect()
23 | 
24 |         duckdb_con.execute("create table test (date DATE)")
25 |         duckdb_con.execute("INSERT INTO TEST VALUES ('2263-02-28')")
26 | 
27 |         res1 = duckdb_con.execute("select * from test").fetchnumpy()
28 |         date_value = {"date": np.array(["2263-02-28"], dtype="datetime64[us]")}
29 |         assert res1 == date_value
30 | 
31 |     def test_numpy_enum_conversion(self, duckdb_cursor):
32 |         arr = np.array(["a", "b", "c"])
33 |         rel = duckdb_cursor.sql("select * from arr")
34 |         res = rel.fetchnumpy()["column0"]
35 |         np.testing.assert_equal(res, arr)
36 | 


--------------------------------------------------------------------------------
/tests/fast/types/test_time_tz.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | from datetime import time, timezone
 3 | 
 4 | import pytest
 5 | 
 6 | pandas = pytest.importorskip("pandas")
 7 | 
 8 | 
 9 | class TestTimeTz:
10 |     def test_time_tz(self, duckdb_cursor):
11 |         df = pandas.DataFrame({"col1": [time(1, 2, 3, tzinfo=timezone.utc)]})  # noqa: F841
12 | 
13 |         sql = "SELECT * FROM df"
14 | 
15 |         duckdb_cursor.execute(sql)
16 | 
17 |         res = duckdb_cursor.fetchall()
18 |         assert res == [(datetime.time(1, 2, 3, tzinfo=datetime.timezone.utc),)]
19 | 


--------------------------------------------------------------------------------
/tests/fast/types/test_unsigned.py:
--------------------------------------------------------------------------------
1 | class TestUnsigned:
2 |     def test_unsigned(self, duckdb_cursor):
3 |         duckdb_cursor.execute("create table unsigned (a utinyint, b usmallint, c uinteger, d ubigint)")
4 |         duckdb_cursor.execute("insert into unsigned values (1,1,1,1), (null,null,null,null)")
5 |         duckdb_cursor.execute("select * from unsigned order by a nulls first")
6 |         result = duckdb_cursor.fetchall()
7 |         assert result == [(None, None, None, None), (1, 1, 1, 1)]
8 | 


--------------------------------------------------------------------------------
/tests/fast/udf/test_transactionality.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import duckdb
 4 | 
 5 | 
 6 | class TestUDFTransactionality:
 7 |     @pytest.mark.xfail(reason="fetchone() does not realize the stream result was closed before completion")
 8 |     def test_type_coverage(self, duckdb_cursor):
 9 |         rel = duckdb_cursor.sql("select * from range(4096)")
10 |         res = rel.fetchone()
11 |         assert res == (0,)
12 | 
13 |         def my_func(x: str) -> int:
14 |             return int(x)
15 | 
16 |         duckdb_cursor.create_function("test", my_func)
17 | 
18 |         with pytest.raises(duckdb.InvalidInputException, match="result closed"):
19 |             res = rel.fetchone()
20 | 


--------------------------------------------------------------------------------
/tests/spark_namespace/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | USE_ACTUAL_SPARK = os.getenv("USE_ACTUAL_SPARK") == "true"
4 | 


--------------------------------------------------------------------------------
/tests/spark_namespace/errors.py:
--------------------------------------------------------------------------------
1 | from . import USE_ACTUAL_SPARK
2 | 
3 | if USE_ACTUAL_SPARK:
4 |     from pyspark.errors import *
5 | else:
6 |     from duckdb.experimental.spark.errors import *
7 | 


--------------------------------------------------------------------------------
/tests/spark_namespace/sql/__init__.py:
--------------------------------------------------------------------------------
1 | from .. import USE_ACTUAL_SPARK
2 | 
3 | if USE_ACTUAL_SPARK:
4 |     from pyspark.sql import SparkSession
5 | else:
6 |     from duckdb.experimental.spark.sql import SparkSession
7 | 
8 | __all__ = ["SparkSession"]
9 | 


--------------------------------------------------------------------------------
/tests/spark_namespace/sql/catalog.py:
--------------------------------------------------------------------------------
1 | from .. import USE_ACTUAL_SPARK
2 | 
3 | if USE_ACTUAL_SPARK:
4 |     from pyspark.sql.catalog import *
5 | else:
6 |     from duckdb.experimental.spark.sql.catalog import *
7 | 


--------------------------------------------------------------------------------
/tests/spark_namespace/sql/column.py:
--------------------------------------------------------------------------------
1 | from .. import USE_ACTUAL_SPARK
2 | 
3 | if USE_ACTUAL_SPARK:
4 |     from pyspark.sql.column import *
5 | else:
6 |     from duckdb.experimental.spark.sql.column import *
7 | 


--------------------------------------------------------------------------------
/tests/spark_namespace/sql/dataframe.py:
--------------------------------------------------------------------------------
1 | from .. import USE_ACTUAL_SPARK
2 | 
3 | if USE_ACTUAL_SPARK:
4 |     from pyspark.sql.dataframe import *
5 | else:
6 |     from duckdb.experimental.spark.sql.dataframe import *
7 | 


--------------------------------------------------------------------------------
/tests/spark_namespace/sql/functions.py:
--------------------------------------------------------------------------------
1 | from .. import USE_ACTUAL_SPARK
2 | 
3 | if USE_ACTUAL_SPARK:
4 |     from pyspark.sql.functions import *
5 | else:
6 |     from duckdb.experimental.spark.sql.functions import *
7 | 


--------------------------------------------------------------------------------
/tests/spark_namespace/sql/types.py:
--------------------------------------------------------------------------------
1 | from .. import USE_ACTUAL_SPARK
2 | 
3 | if USE_ACTUAL_SPARK:
4 |     from pyspark.sql.types import *
5 | else:
6 |     from duckdb.experimental.spark.sql.types import *
7 | 


--------------------------------------------------------------------------------