├── src
    ├── vcpkg.json
    ├── README.md
    ├── include
    │   ├── httpfs_extension.hpp
    │   ├── hash_functions.hpp
    │   ├── httpfs_curl_client.hpp
    │   ├── httpfs_client.hpp
    │   ├── crypto.hpp
    │   ├── http_metadata_cache.hpp
    │   ├── create_secret_functions.hpp
    │   ├── hffs.hpp
    │   └── http_state.hpp
    ├── httpfs_client_wasm.cpp
    ├── CMakeLists.txt
    ├── httpfs_config.py
    ├── hash_functions.cpp
    └── http_state.cpp
├── vcpkg.json
├── data
    └── secrets
    │   ├── httpfs
    │       ├── s3_config_secret_v1_1_2.duckdb_secret
    │       ├── s3_config_secret_v1_1_3.duckdb_secret
    │       ├── s3_config_secret_v_1_0_0.duckdb_secret
    │       ├── s3_secret_chain_v_1_0_0.duckdb_secret
    │       ├── s3_secret_chain_v_1_1_2.duckdb_secret
    │       └── s3_secret_chain_v_1_1_3.duckdb_secret
    │   └── README.md
├── .gitmodules
├── Makefile
├── test
    ├── sql
    │   ├── copy
    │   │   ├── csv
    │   │   │   ├── test_url_with_plus.test
    │   │   │   ├── test_sniff_httpfs.test
    │   │   │   ├── parallel
    │   │   │   │   ├── test_parallel_csv.test
    │   │   │   │   └── csv_parallel_httpfs.test
    │   │   │   ├── test_12314.test_slow
    │   │   │   ├── test_csv_remote.test_slow
    │   │   │   ├── test_csv_httpfs.test_slow
    │   │   │   ├── glob
    │   │   │   │   └── copy_csv_glob_s3.test
    │   │   │   ├── test_csv_httpfs_prepared.test
    │   │   │   └── test_csv_remote.test
    │   │   ├── test_remote_head_forbidden.test
    │   │   ├── parquet
    │   │   │   ├── delta_byte_array_length_mismatch.test
    │   │   │   ├── snowflake_lineitem.test
    │   │   │   ├── parquet_5968.test
    │   │   │   ├── delta_byte_array_multiple_pages.test
    │   │   │   ├── parquet_boolean_page.test_slow
    │   │   │   ├── parquet_http_prefetch.test
    │   │   │   ├── parquet_encryption_mbedtls_openssl.test
    │   │   │   ├── parquet_2102.test_slow
    │   │   │   ├── parquet_encryption_httpfs.test
    │   │   │   ├── test_yellow_cab.test_slow
    │   │   │   ├── test_parquet_remote.test
    │   │   │   └── test_parquet_remote_foreign_files.test
    │   │   ├── s3
    │   │   │   ├── s3_presigned_read.test
    │   │   │   ├── http_log.test
    │   │   │   ├── http_secret.test
    │   │   │   ├── s3_presigned_read.test_slow
    │   │   │   ├── csv_s3_file_size_bytes.test
    │   │   │   ├── hive_partitioned_write_s3.test_slow
    │   │   │   ├── upload_large_file.test_slow
    │   │   │   ├── glob_s3_paging.test_slow
    │   │   │   ├── upload_large_json_file.test_slow
    │   │   │   ├── parquet_s3_tpcds.test_slow
    │   │   │   ├── parquet_s3_tpch.test_slow
    │   │   │   ├── upload_file_parallel.test_slow
    │   │   │   ├── metadata_cache.test
    │   │   │   ├── download_config.test
    │   │   │   ├── http_proxy.test
    │   │   │   ├── s3_hive_partition.test
    │   │   │   └── upload_small_file.test
    │   │   ├── encryption
    │   │   │   └── different_aes_engines.test
    │   │   └── no_head_on_write.test
    │   ├── httpfs
    │   │   ├── internal_issue_2490.test
    │   │   └── hffs.test
    │   ├── crypto
    │   │   └── test_openssl_crypto.test
    │   ├── json
    │   │   └── table
    │   │   │   ├── read_json.test
    │   │   │   ├── read_json_objects.test
    │   │   │   └── read_json_auto.test_slow
    │   ├── attach
    │   │   ├── attach_remote.test
    │   │   ├── attach_httpfs.test
    │   │   ├── attach_s3.test
    │   │   └── attach_s3_tpch.test_slow
    │   ├── secret
    │   │   ├── test_secret_type.test
    │   │   ├── secret_s3_requester_pays.test
    │   │   ├── secret_aws.test
    │   │   ├── secret_refresh_attach.test
    │   │   ├── gcs_oauth.test
    │   │   └── secret_refresh.test
    │   ├── curl_client
    │   │   └── test_load_other_extensions.test
    │   ├── httpfs_client
    │   │   └── httpfs_client_implementation.test
    │   ├── storage
    │   │   ├── invalid_unicode_scrambled.test_slow
    │   │   ├── external_file_cache
    │   │   │   ├── external_file_cache_read_blob.test_slow
    │   │   │   └── external_file_cache_httpfs.test
    │   │   └── encryption
    │   │   │   └── temp_files
    │   │   │       └── encrypted_out_of_core.test_slow
    │   ├── extensions
    │   │   └── version_is_valid_httpfs.test
    │   ├── metadata_stats.test
    │   ├── secrets
    │   │   ├── secret_types_function.test
    │   │   ├── create_secret_invalid_map.test
    │   │   ├── create_secret_hffs.test
    │   │   ├── persistent_key_value_secret.test
    │   │   ├── create_secret_gcs.test_slow
    │   │   ├── create_secret_persistence_error_handling.test
    │   │   ├── create_secret_scope_matching.test
    │   │   ├── create_secret_non_writable_persistent_dir.test
    │   │   ├── create_secret_overwriting.test
    │   │   ├── create_secret_cascading.test_slow
    │   │   ├── create_secret_defaults.test
    │   │   ├── create_secret_settings.test
    │   │   ├── create_secret_r2.test
    │   │   ├── secret_compatibility_httpfs.test
    │   │   ├── create_secret_r2_serialization.test
    │   │   ├── create_secret_minio.test
    │   │   ├── create_secret.test_slow
    │   │   ├── create_secret_name_conflicts.test
    │   │   ├── create_secret_binding.test
    │   │   ├── create_secret_s3_serialization.test
    │   │   ├── create_secret_transactional.test
    │   │   └── create_secret_storage_backends.test
    │   ├── settings
    │   │   └── test_disabled_file_system_httpfs.test
    │   ├── full_file_download_fallback.test
    │   ├── test_headers_parsed.test
    │   ├── delete
    │   │   └── test_issue_1834.test_slow
    │   └── logging
    │   │   ├── http_logging.test
    │   │   └── file_system_logging.test
    ├── extension
    │   ├── duckdb_extension_settings.test
    │   ├── autoloading_load_only.test
    │   ├── autoloading_current_setting.test
    │   ├── autoloading_reset_setting.test
    │   ├── autoloading_filesystems.test
    │   └── autoloading_base.test
    └── README.md
├── extension_config.cmake
├── scripts
    ├── install_s3_test_server.sh
    ├── set_s3_test_server_variables.sh
    ├── generate_presigned_url.sh
    ├── run_s3_test_server.sh
    ├── run_squid.sh
    └── minio_s3.yml
├── .clang-format
├── LICENSE
├── .github
    └── workflows
    │   ├── MainDistributionPipeline.yml
    │   └── IntegrationTests.yml
└── CMakeLists.txt


/src/vcpkg.json:
--------------------------------------------------------------------------------
1 | {
2 |   "dependencies": [
3 |     "openssl"
4 |   ]
5 | }


--------------------------------------------------------------------------------
/vcpkg.json:
--------------------------------------------------------------------------------
1 | {
2 |   "dependencies": [
3 |     "openssl",
4 |     "curl"
5 |   ]
6 | }
7 | 


--------------------------------------------------------------------------------
/src/README.md:
--------------------------------------------------------------------------------
1 | Documentation on S3 tests setup can be found [in the duckdb/duckdb repository](https://github.com/duckdb/duckdb/blob/main/test/sql/copy/s3/README.md)
2 | 


--------------------------------------------------------------------------------
/data/secrets/httpfs/s3_config_secret_v1_1_2.duckdb_secret:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duckdb/duckdb-httpfs/HEAD/data/secrets/httpfs/s3_config_secret_v1_1_2.duckdb_secret


--------------------------------------------------------------------------------
/data/secrets/httpfs/s3_config_secret_v1_1_3.duckdb_secret:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duckdb/duckdb-httpfs/HEAD/data/secrets/httpfs/s3_config_secret_v1_1_3.duckdb_secret


--------------------------------------------------------------------------------
/data/secrets/httpfs/s3_config_secret_v_1_0_0.duckdb_secret:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duckdb/duckdb-httpfs/HEAD/data/secrets/httpfs/s3_config_secret_v_1_0_0.duckdb_secret


--------------------------------------------------------------------------------
/data/secrets/httpfs/s3_secret_chain_v_1_0_0.duckdb_secret:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duckdb/duckdb-httpfs/HEAD/data/secrets/httpfs/s3_secret_chain_v_1_0_0.duckdb_secret


--------------------------------------------------------------------------------
/data/secrets/httpfs/s3_secret_chain_v_1_1_2.duckdb_secret:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duckdb/duckdb-httpfs/HEAD/data/secrets/httpfs/s3_secret_chain_v_1_1_2.duckdb_secret


--------------------------------------------------------------------------------
/data/secrets/httpfs/s3_secret_chain_v_1_1_3.duckdb_secret:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duckdb/duckdb-httpfs/HEAD/data/secrets/httpfs/s3_secret_chain_v_1_1_3.duckdb_secret


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "duckdb"]
2 | 	path = duckdb
3 | 	url = https://github.com/duckdb/duckdb.git
4 | [submodule "extension-ci-tools"]
5 | 	path = extension-ci-tools
6 | 	url = https://github.com/duckdb/extension-ci-tools.git
7 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
2 | 
3 | # Configuration of extension
4 | EXT_NAME=httpfs
5 | EXT_CONFIG=${PROJ_DIR}extension_config.cmake
6 | 
7 | # Include the Makefile from extension-ci-tools
8 | include extension-ci-tools/makefiles/duckdb_extension.Makefile
9 | 


--------------------------------------------------------------------------------
/src/include/httpfs_extension.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb.hpp"
 4 | 
 5 | namespace duckdb {
 6 | 
 7 | class HttpfsExtension : public Extension {
 8 | public:
 9 | 	void Load(ExtensionLoader &loader) override;
10 | 	std::string Name() override;
11 | 	std::string Version() const override;
12 | };
13 | 
14 | } // namespace duckdb
15 | 


--------------------------------------------------------------------------------
/test/sql/copy/csv/test_url_with_plus.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/csv/test_url_with_plus.test
 2 | # description: Tests url with plus
 3 | # group: [csv]
 4 | 
 5 | require httpfs
 6 | 
 7 | statement ok
 8 | PRAGMA enable_verification
 9 | 
10 | statement ok
11 | FROM read_csv('https://d37ci6vzurychx.cloudfront.net/misc/taxi+_zone_lookup.csv');
12 | 


--------------------------------------------------------------------------------
/extension_config.cmake:
--------------------------------------------------------------------------------
 1 | # This file is included by DuckDB's build system. It specifies which extension to load
 2 | 
 3 | ################# HTTPFS
 4 | duckdb_extension_load(json)
 5 | duckdb_extension_load(parquet)
 6 | 
 7 | duckdb_extension_load(httpfs
 8 | 	SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}
 9 | 	INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/src/include
10 | )
11 | 


--------------------------------------------------------------------------------
/test/sql/copy/test_remote_head_forbidden.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/test_remote_head_forbidden.test
 2 | # description: Test Force download with server that doesn't want to give us the head
 3 | # group: [copy]
 4 | 
 5 | require httpfs
 6 | 
 7 | require json
 8 | 
 9 | statement ok
10 | FROM read_json('https://api.spring.io/projects/spring-boot/generations')
11 | 


--------------------------------------------------------------------------------
/test/sql/httpfs/internal_issue_2490.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/httpfs/internal_issue_2490.test
 2 | # description: Internal issue 2490 - Wrong URL encoding leads to 404 for redirects with httplib v0.14.3
 3 | # group: [httpfs]
 4 | 
 5 | require httpfs
 6 | 
 7 | require parquet
 8 | 
 9 | statement ok
10 | FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/us+er+da+ta.parquet' LIMIT 1;
11 | 


--------------------------------------------------------------------------------
/test/sql/copy/parquet/delta_byte_array_length_mismatch.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/parquet/delta_byte_array_length_mismatch.test
 2 | # description: Test reading a delta
 3 | # group: [parquet]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | statement ok
10 | SELECT * FROM parquet_scan('https://github.com/duckdb/duckdb-data/releases/download/v1.0/delta_byte_array_length_mismatch.parquet')
11 | 


--------------------------------------------------------------------------------
/test/sql/copy/parquet/snowflake_lineitem.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/parquet/snowflake_lineitem.test
 2 | # description: Test parquet file exported from snowflake
 3 | # group: [parquet]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | statement ok
10 | CREATE TABLE snowflake_lineitem AS FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/snowflake_lineitem_export.parquet'
11 | 


--------------------------------------------------------------------------------
/test/sql/crypto/test_openssl_crypto.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/attach/attach_encryption_fallback_readonly.test
 2 | # description: Test the openssl based crypto util
 3 | # group: [attach]
 4 | 
 5 | require httpfs
 6 | 
 7 | statement ok
 8 | ATTACH '__TEST_DIR__/test_write_only.db' as enc (ENCRYPTION_KEY 'abcde', ENCRYPTION_CIPHER 'GCM');
 9 | 
10 | statement ok
11 | CREATE TABLE enc.test AS SELECT 1 as a;
12 | 
13 | query I
14 | FROM enc.test
15 | ----
16 | 1


--------------------------------------------------------------------------------
/scripts/install_s3_test_server.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Note: needs sudo
 3 | 
 4 | unamestr=$(uname)
 5 | if [[ "$unamestr" == 'Linux' ]]; then
 6 | 	apt-get install -y docker.io
 7 | fi
 8 | 
 9 | docker --version
10 | echo '127.0.0.1 duckdb-minio.com' >> /etc/hosts
11 | echo '127.0.0.1 test-bucket.duckdb-minio.com' >> /etc/hosts
12 | echo '127.0.0.1 test-bucket-2.duckdb-minio.com' >> /etc/hosts
13 | echo '127.0.0.1 test-bucket-public.duckdb-minio.com' >> /etc/hosts


--------------------------------------------------------------------------------
/test/sql/json/table/read_json.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/json/table/read_json.test
 2 | # description: Read json files straight to columnar data
 3 | # group: [table]
 4 | 
 5 | require json
 6 | 
 7 | require httpfs
 8 | 
 9 | query II
10 | select * from read_json_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/example_rn.ndjson');
11 | ----
12 | 1	O Brother, Where Art Thou?
13 | 2	Home for the Holidays
14 | 3	The Firm
15 | 4	Broadcast News
16 | 5	Raising Arizona
17 | 


--------------------------------------------------------------------------------
/test/sql/attach/attach_remote.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/attach/attach_remote.test
 2 | # description: Test attaching of remote database
 3 | # group: [attach]
 4 | 
 5 | require httpfs
 6 | 
 7 | statement error
 8 | ATTACH 'https://duckdb.org/non_existing.db' AS db2 (READ_ONLY)
 9 | ----
10 | 
11 | statement error
12 | ATTACH 'https://duckdb.org/non_existing.db' AS db2
13 | ----
14 | 
15 | statement error
16 | ATTACH 'https://duckdb.org/non_existing.db' AS db2 (READ_WRITE)
17 | ----
18 | 


--------------------------------------------------------------------------------
/test/sql/copy/csv/test_sniff_httpfs.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/csv/test_sniff_httpfs.test
 2 | # description: Test sniff_csv functions over httpfs with auto-detection on compression
 3 | # group: [csv]
 4 | 
 5 | require httpfs
 6 | 
 7 | statement ok
 8 | PRAGMA enable_verification
 9 | 
10 | statement ok
11 | from sniff_csv('https://github.com/duckdb/duckdb/raw/main/data/csv/who.csv.gz');
12 | 
13 | statement ok
14 | from sniff_csv('https://github.com/duckdb/duckdb/raw/main/data/csv/who.csv.gz?v=1');
15 | 


--------------------------------------------------------------------------------
/scripts/set_s3_test_server_variables.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Run this script with 'source' or the shorthand: '.':
 4 | # i.e: source scripts/set_s3_test_server_variables.sh
 5 | 
 6 | # Enable the S3 tests to run
 7 | export S3_TEST_SERVER_AVAILABLE=1
 8 | 
 9 | export AWS_DEFAULT_REGION=eu-west-1
10 | export AWS_ACCESS_KEY_ID=minio_duckdb_user
11 | export AWS_SECRET_ACCESS_KEY=minio_duckdb_user_password
12 | export DUCKDB_S3_ENDPOINT=duckdb-minio.com:9000
13 | export DUCKDB_S3_USE_SSL=false
14 | 


--------------------------------------------------------------------------------
/test/sql/secret/test_secret_type.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secret/test_secret_type.test
 2 | # description: Test the secret types added by this extension
 3 | # group: [secret]
 4 | 
 5 | require httpfs
 6 | 
 7 | statement ok
 8 | LOAD httpfs
 9 | 
10 | statement ok
11 | PRAGMA enable_verification
12 | 
13 | query II
14 | SELECT type, default_provider from duckdb_secret_types() where extension='httpfs' order by type;
15 | ----
16 | aws	config
17 | gcs	config
18 | huggingface	config
19 | r2	config
20 | s3	config
21 | 


--------------------------------------------------------------------------------
/data/secrets/README.md:
--------------------------------------------------------------------------------
 1 | # Test secrets
 2 | DuckDB only allows persistent secrets with the x00 permission (e.g. 600 or 700). Therefore to use these 
 3 | secrets, the permissions need to be set before running any tests that uses them.
 4 | 
 5 | The recommended way to add tests that touch these persistent secret files is to put them behind a
 6 | ```shell
 7 | require-env TEST_PERSISTENT_SECRETS_AVAILABLE
 8 | ```
 9 | statement, which ensures the tests only run in CI jobs where the permissions are set correctly.
10 | 
11 | 


--------------------------------------------------------------------------------
/test/sql/curl_client/test_load_other_extensions.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/curl_client/test_load_other_extensions.test
 2 | # description: when using the curl client, test loading other extensions
 3 | # group: [curl_client]
 4 | 
 5 | require httpfs
 6 | 
 7 | # Do not ignore 'HTTP' error messages!
 8 | set ignore_error_messages
 9 | 
10 | statement ok
11 | SET httpfs_client_implementation='curl';
12 | 
13 | statement error
14 | INSTALL non_existent_extension;
15 | ----
16 | <REGEX>:.*HTTP Error: Failed to download extension.*
17 | 


--------------------------------------------------------------------------------
/test/sql/httpfs_client/httpfs_client_implementation.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/httpfs_client/httpfs_client_implementation.test
 2 | # description: Tests basic valus for httpfs_client_implementation
 3 | # group: [httpfs_client]
 4 | 
 5 | require httpfs
 6 | 
 7 | statement ok
 8 | set httpfs_client_implementation = 'default';
 9 | 
10 | statement ok
11 | set httpfs_client_implementation = 'httplib';
12 | 
13 | statement error
14 | set httpfs_client_implementation = 'something else';
15 | ----
16 | Unsupported option for httpfs_client_implementation
17 | 


--------------------------------------------------------------------------------
/src/include/hash_functions.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb/common/helper.hpp"
 4 | 
 5 | namespace duckdb {
 6 | 
 7 | typedef unsigned char hash_bytes[32];
 8 | typedef unsigned char hash_str[64];
 9 | 
10 | void sha256(const char *in, size_t in_len, hash_bytes &out);
11 | 
12 | void hmac256(const std::string &message, const char *secret, size_t secret_len, hash_bytes &out);
13 | 
14 | void hmac256(std::string message, hash_bytes secret, hash_bytes &out);
15 | 
16 | void hex256(hash_bytes &in, hash_str &out);
17 | 
18 | } // namespace duckdb
19 | 


--------------------------------------------------------------------------------
/test/sql/storage/invalid_unicode_scrambled.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/storage/invalid_unicode_scrambled.test_slow
 2 | # description: Issue #1650 - "invalid unicode detected in segment statistics" when inserting structs with strings and NULL values
 3 | # group: [storage]
 4 | 
 5 | require httpfs
 6 | 
 7 | require parquet
 8 | 
 9 | statement ok
10 | create or replace table blah as (with
11 | us as (select distinct * from 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/invalid_unicode_scrambled.parquet') select Address from
12 | us);
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/test/sql/copy/parquet/parquet_5968.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/parquet/parquet_5968.test
 2 | # description: Issue #5968: Segmentation fault on reading parquet file
 3 | # group: [parquet]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | statement ok
10 | CREATE TABLE issue_5968 AS FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/issue_5968.parquet';
11 | 
12 | query I
13 | SELECT COUNT(*) FROM issue_5968
14 | ----
15 | 2028587
16 | 
17 | query I
18 | SELECT * FROM issue_5968 LIMIT 5
19 | ----
20 | B00001
21 | B00001
22 | B00009
23 | B00009
24 | B00009
25 | 


--------------------------------------------------------------------------------
/test/sql/extensions/version_is_valid_httpfs.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/extensions/version_is_valid_httpfs.test
 2 | # description: Test version metadata on load
 3 | # group: [extensions]
 4 | 
 5 | require-env LOCAL_EXTENSION_REPO
 6 | 
 7 | require httpfs
 8 | 
 9 | statement ok
10 | SET autoinstall_known_extensions=true;
11 | 
12 | statement ok
13 | SET autoload_known_extensions=true;
14 | 
15 | statement ok
16 | SET enable_server_cert_verification = true;
17 | 
18 | query I
19 | SELECT count(*) FROM duckdb_extensions() WHERE extension_version != '' AND extension_name == 'httpfs';
20 | ----
21 | 1
22 | 


--------------------------------------------------------------------------------
/src/httpfs_client_wasm.cpp:
--------------------------------------------------------------------------------
 1 | #include "httpfs_client.hpp"
 2 | #include "http_state.hpp"
 3 | 
 4 | namespace duckdb {
 5 | 
 6 | unique_ptr<HTTPClient> HTTPFSUtil::InitializeClient(HTTPParams &http_params, const string &proto_host_port) {
 7 | 	throw InternalException("HTTPFSUtil::InitializeClient is not expected to be called");
 8 | }
 9 | 
10 | unordered_map<string, string> HTTPFSUtil::ParseGetParameters(const string &text) {
11 | 	unordered_map<string, string> result;
12 | 	// TODO: HTTPFSUtil::ParseGetParameters is currently not implemented
13 | 	return result;
14 | }
15 | 
16 | } // namespace duckdb
17 | 


--------------------------------------------------------------------------------
/test/sql/copy/csv/parallel/test_parallel_csv.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/csv/parallel/test_parallel_csv.test
 2 | # description: Test parallel read CSV function on ghub bugs
 3 | # group: [parallel]
 4 | 
 5 | # TODO: figure out where that bucket went
 6 | mode skip
 7 | 
 8 | require httpfs
 9 | 
10 | query II
11 | select * from read_csv_auto("https://duckdb-public-gzip-test.s3.us-east-2.amazonaws.com/test.csv", header = 0);
12 | ----
13 | foo	bar
14 | foo	bar
15 | 
16 | 
17 | query II
18 | from read_csv_auto("https://duckdb-public-gzip-test.s3.us-east-2.amazonaws.com/test.csv.gz", header = 0);
19 | ----
20 | foo	bar
21 | foo	bar
22 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(HTTPFS_SOURCES
 2 |     hffs.cpp
 3 |     s3fs.cpp
 4 |     httpfs.cpp
 5 |     http_state.cpp
 6 |     crypto.cpp
 7 |     hash_functions.cpp
 8 |     create_secret_functions.cpp
 9 |     httpfs_extension.cpp)
10 | if(NOT EMSCRIPTEN)
11 |   set(HTTPFS_SOURCES ${HTTPFS_SOURCES} crypto.cpp httpfs_httplib_client.cpp
12 |                      httpfs_curl_client.cpp)
13 | else()
14 |   set(HTTPFS_SOURCES ${HTTPFS_SOURCES} httpfs_client_wasm.cpp)
15 | endif()
16 | 
17 | add_library(httpfs_library OBJECT ${HTTPFS_SOURCES})
18 | set(ALL_OBJECT_FILES
19 |     ${ALL_OBJECT_FILES} $<TARGET_OBJECTS:httpfs_library>
20 |     PARENT_SCOPE)
21 | 


--------------------------------------------------------------------------------
/test/sql/metadata_stats.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/metadata_stats.test
 2 | # description: Test getting metadata stats
 3 | # group: [sql]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | require json
10 | 
11 | # Test Force download with server that doesn't want to give us the head
12 | statement ok
13 | FROM read_json('https://api.spring.io/projects/spring-boot/generations')
14 | 
15 | statement ok
16 | SET force_download=false;
17 | 
18 | query II
19 | explain analyze SELECT id, first_name, last_name, email FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet')
20 | ----
21 | analyzed_plan	<REGEX>:.*GET: 2.*
22 | 


--------------------------------------------------------------------------------
/src/httpfs_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # list all include directories
 4 | include_directories = [
 5 |     os.path.sep.join(x.split('/')) for x in ['src/include', 'third_party/httplib', 'extension/parquet/include']
 6 | ]
 7 | # source files
 8 | source_files = [
 9 |     os.path.sep.join(x.split('/'))
10 |     for x in [
11 |         'src/' + s
12 |         for s in [
13 |             'create_secret_functions.cpp',
14 |             'crypto.cpp',
15 |             'hffs.cpp',
16 |             'http_state.cpp',
17 |             'httpfs.cpp',
18 |             'httpfs_extension.cpp',
19 |             'httpfs_client.cpp',
20 |             's3fs.cpp',
21 |         ]
22 |     ]
23 | ]
24 | 


--------------------------------------------------------------------------------
/test/sql/copy/csv/test_12314.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/csv/test_12314.test_slow
 2 | # description: Test CSV reading for issue 12314
 3 | # group: [csv]
 4 | 
 5 | require httpfs
 6 | 
 7 | statement ok
 8 | PRAGMA enable_verification
 9 | 
10 | statement error
11 | from read_csv('https://github.com/duckdb/duckdb-data/releases/download/v1.0/sample_data_12314.csv.gz',HEADER = 1,  PARALLEL=false);
12 | ----
13 | Change the maximum length size, e.g., max_line_size=2097408
14 | 
15 | query I
16 | select count(*) from read_csv('https://github.com/duckdb/duckdb-data/releases/download/v1.0/sample_data_12314.csv.gz',HEADER = 1, PARALLEL=false , max_line_size=2097408);
17 | ----
18 | 26238
19 | 


--------------------------------------------------------------------------------
/test/sql/copy/csv/test_csv_remote.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/csv/test_csv_remote.test_slow
 2 | # description: Test reading csv files over http, slow queries
 3 | # group: [csv]
 4 | 
 5 | statement ok
 6 | pragma enable_verification;
 7 | 
 8 | require httpfs
 9 | 
10 | # Read a compressed file (~44MB compressed, ~700MB uncompressed) over HTTP
11 | query IIIIII
12 | select count(*), min(strain), max(strain), min(strlen(sequence)), max(strlen(sequence)), avg(strlen(sequence))
13 | from read_csv_auto('https://raw.githubusercontent.com/duckdb/duckdb/main/data/csv/sequences.csv.gz', delim=',');
14 | ----
15 | 100000	ARG/Cordoba-1006-155/2020	tiger/NY/040420/2020	17340	30643	29821.264410
16 | 


--------------------------------------------------------------------------------
/test/sql/copy/parquet/delta_byte_array_multiple_pages.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/parquet/delta_byte_array_multiple_pages.test
 2 | # description: Test delta byte array parquet file with multiple pages
 3 | # group: [parquet]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | statement ok
10 | CREATE TABLE delta_byte_array AS SELECT * FROM parquet_scan('https://github.com/duckdb/duckdb-data/releases/download/v1.0/delta_byte_array_multiple_pages.parquet')
11 | 
12 | query I
13 | SELECT COUNT(*) FROM delta_byte_array
14 | ----
15 | 100000
16 | 
17 | query II
18 | SELECT min(strlen(json_column)), max(strlen(json_column)) FROM delta_byte_array
19 | ----
20 | 54	54
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/test/sql/secrets/secret_types_function.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/secret_types_function.test
 2 | # description: Test duckdb_secret_types function
 3 | # group: [secrets]
 4 | 
 5 | mode skip
 6 | 
 7 | query III
 8 | FROM duckdb_secret_types() WHERE type IN ['s3', 'r2', 'gcs', 'http'] ORDER BY type
 9 | ----
10 | http	config	(empty)
11 | 
12 | require httpfs
13 | 
14 | require no_extension_autoloading "EXPECTED: The duckdb_secret_types() function does not trigger autoloading httpfs"
15 | 
16 | query III
17 | FROM duckdb_secret_types() WHERE type IN ['s3', 'r2', 'gcs', 'http'] ORDER BY type
18 | ----
19 | gcs	config	httpfs
20 | http	config	(empty)
21 | r2	config	httpfs
22 | s3	config	httpfs
23 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_invalid_map.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_invalid_map.test
 2 | # description: Test throwing input errors on multi map input.
 3 | # group: [secrets]
 4 | 
 5 | require httpfs
 6 | 
 7 | statement ok
 8 | PRAGMA enable_verification;
 9 | 
10 | statement error
11 | CREATE PERSISTENT SECRET http_multimap (
12 | 	TYPE HTTP,
13 | 	EXTRA_HTTP_HEADERS MAP{123: 'quack1', 123 : 'quack2'}
14 | );
15 | ----
16 | <REGEX>:Invalid Input Error.*Map keys must be unique.*
17 | 
18 | statement error
19 | CREATE PERSISTENT SECRET http_multimap (
20 | 	TYPE HTTP,
21 | 	EXTRA_HTTP_HEADERS MAP{NULL: 'quack1', 123 : 'quack2'}
22 | );
23 | ----
24 | <REGEX>:Invalid Input Error.*Map keys can not be NULL.*


--------------------------------------------------------------------------------
/test/sql/copy/parquet/parquet_boolean_page.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/parquet/parquet_boolean_page.test_slow
 2 | # description: Test that boolean values that cross column pages are correctly read
 3 | # group: [parquet]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | statement ok
10 | PRAGMA enable_verification
11 | 
12 | query IIIII
13 | SELECT
14 |     SUM(CASE WHEN is_successful THEN 1 ELSE 0 END),
15 | 	SUM(CASE WHEN advanced_on_error_flag THEN 1 ELSE 0 END),
16 | 	SUM(CASE WHEN safe_on_error_flag THEN 1 ELSE 0 END),
17 | 	SUM(CASE WHEN rbi_flag THEN 1 ELSE 0 END),
18 | 	SUM(CASE WHEN team_unearned_flag THEN 1 ELSE 0 END)
19 | FROM read_parquet('https://github.com/duckdb/duckdb-data/releases/download/v1.0/event_baserunning_advance_attempt.parquet');
20 | ----
21 | 9252616	111041	7120	1609612	1860
22 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_hffs.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_hffs.test
 2 | # description: Test huggingface secrets
 3 | # group: [secrets]
 4 | 
 5 | statement ok
 6 | PRAGMA enable_verification;
 7 | 
 8 | require httpfs
 9 | 
10 | statement ok
11 | set allow_persistent_secrets=false;
12 | 
13 | # Manually setting token is simplest
14 | statement ok
15 | CREATE SECRET hf1 (
16 | 	TYPE HUGGINGFACE,
17 |     TOKEN 'bla'
18 | )
19 | 
20 | # Cache provider will automatically try to fetch the token from the cache
21 | statement ok
22 | CREATE SECRET hf2 (
23 | 	TYPE HUGGINGFACE,
24 |     PROVIDER 'credential_chain'
25 | )
26 | 
27 | query IIII
28 | SELECT name, type, provider, scope FROM duckdb_secrets() order by name;
29 | ----
30 | hf1	huggingface	config	['hf://']
31 | hf2	huggingface	credential_chain	['hf://']
32 | 


--------------------------------------------------------------------------------
/test/sql/secrets/persistent_key_value_secret.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/persistent_key_value_secret.test
 2 | # group: [secrets]
 3 | 
 4 | load __TEST_DIR__/persistent_extra_headers
 5 | 
 6 | require httpfs
 7 | 
 8 | require json
 9 | 
10 | statement ok
11 | CREATE PERSISTENT SECRET http (
12 | 	TYPE HTTP,
13 | 	EXTRA_HTTP_HEADERS MAP {
14 | 		'Authorization': 'Bearer sk_test_not_valid_key'
15 | 	}
16 | );
17 | 
18 | restart
19 | 
20 | # Because this is an https host, the 'EXTRA_HTTP_HEADERS' will be used, as long as this doesn't crash anything
21 | # we are happy with this test throwing an IO error.
22 | statement error
23 | select
24 | 	unnest(data) as customers
25 | from
26 | 	read_json('https://non.existant/endpoint');
27 | ----
28 | IO Error: Could not establish connection error for HTTP HEAD to 'https://non.existant/endpoint'
29 | 


--------------------------------------------------------------------------------
/test/extension/duckdb_extension_settings.test:
--------------------------------------------------------------------------------
 1 | # name: test/extension/duckdb_extension_settings.test
 2 | # description: settings for extensions
 3 | # group: [extension]
 4 | 
 5 | # TODO: move back to duckdb/duckdb
 6 | mode skip
 7 | 
 8 | require httpfs
 9 | 
10 | statement ok
11 | SET autoinstall_known_extensions = true;
12 | 
13 | statement ok
14 | SET autoload_known_extensions = true;
15 | 
16 | statement ok
17 | SET extension_directory = '__TEST_DIR__/custom_extension_directory';
18 | 
19 | statement ok
20 | SET custom_extension_repository = '__TEST_DIR__/not_existing_folder'
21 | 
22 | statement error
23 | FROM read_csv('https://some.org/file.csv');
24 | ----
25 | not_existing_folder
26 | 
27 | statement ok
28 | SET autoinstall_extension_repository = '__TEST_DIR__/other_folder';
29 | 
30 | statement error
31 | FROM read_csv('https://some.org/file.csv');
32 | ----
33 | other_folder
34 | 


--------------------------------------------------------------------------------
/test/sql/settings/test_disabled_file_system_httpfs.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/settings/test_disabled_file_system_httpfs.test
 2 | # description: Test disabled file systems with HTTPFS
 3 | # group: [settings]
 4 | 
 5 | require skip_reload
 6 | 
 7 | require no_extension_autoloading "EXPECTED: Test disable loading from local file system"
 8 | 
 9 | statement ok
10 | PRAGMA enable_verification
11 | 
12 | require httpfs
13 | 
14 | statement ok
15 | SET disabled_filesystems='LocalFileSystem';
16 | 
17 | # httpfs works
18 | statement ok
19 | from read_csv_auto('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv');
20 | 
21 | statement ok
22 | SET disabled_filesystems='LocalFileSystem,HTTPFileSystem';
23 | 
24 | # not if we disable it
25 | statement error
26 | from read_csv_auto('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv');
27 | ----
28 | File system HTTPFileSystem has been disabled by configuration
29 | 


--------------------------------------------------------------------------------
/test/sql/storage/external_file_cache/external_file_cache_read_blob.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/storage/external_file_cache/external_file_cache_read_blob.test_slow
 2 | # description: Test the external file cache for read_blob HTTPFS reads
 3 | # group: [external_file_cache]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | # first read_blob should do 1 GET
10 | query II
11 | explain analyze from read_blob('https://blobs.duckdb.org/data/shakespeare.parquet');
12 | ----
13 | analyzed_plan	<REGEX>:.*GET: 1.*
14 | 
15 | # second one should do 0
16 | query II
17 | explain analyze from read_blob('https://blobs.duckdb.org/data/shakespeare.parquet');
18 | ----
19 | analyzed_plan	<REGEX>:.*GET: 0.*
20 | 
21 | # although the read was cached using read_blob, the parquet reader can read from cache
22 | query II
23 | explain analyze from 'https://blobs.duckdb.org/data/shakespeare.parquet';
24 | ----
25 | analyzed_plan	<REGEX>:.*GET: 0.*
26 | 


--------------------------------------------------------------------------------
/scripts/generate_presigned_url.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #Note: DONT run as root
 3 | 
 4 | DUCKDB_PATH=duckdb
 5 | if command -v duckdb; then
 6 |   DUCKDB_PATH=duckdb
 7 | elif test -f build/release/duckdb; then
 8 |   DUCKDB_PATH=build/release/duckdb
 9 | elif test -f build/reldebug/duckdb; then
10 |   DUCKDB_PATH=build/reldebug/duckdb
11 | elif test -f build/debug/duckdb; then
12 |   DUCKDB_PATH=build/debug/duckdb
13 | fi
14 | 
15 | rm -rf test/test_data
16 | mkdir -p test/test_data
17 | 
18 | generate_large_parquet_query=$(cat <<EOF
19 | 
20 | CALL DBGEN(sf=1);
21 | COPY lineitem TO 'test/test_data/presigned-url-lineitem.parquet' (FORMAT 'parquet');
22 | 
23 | EOF
24 | )
25 | $DUCKDB_PATH -c "$generate_large_parquet_query"
26 | 
27 | # Generate Storage Version
28 | $DUCKDB_PATH  test/test_data/attach.db < duckdb/test/sql/storage_version/generate_storage_version.sql
29 | $DUCKDB_PATH  test/test_data/lineitem_sf1.db -c "CALL dbgen(sf=1)"
30 | 


--------------------------------------------------------------------------------
/test/sql/json/table/read_json_objects.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/json/table/read_json_objects.test
 2 | # description: Read ndjson files
 3 | # group: [table]
 4 | 
 5 | require json
 6 | 
 7 | require httpfs
 8 | 
 9 | # same file but hosted on github
10 | query I
11 | select * from read_json_objects('https://github.com/duckdb/duckdb-data/releases/download/v1.0/example_rn.ndjson', format='nd')
12 | ----
13 | {"id":1,"name":"O Brother, Where Art Thou?"}
14 | {"id":2,"name":"Home for the Holidays"}
15 | {"id":3,"name":"The Firm"}
16 | {"id":4,"name":"Broadcast News"}
17 | {"id":5,"name":"Raising Arizona"}
18 | 
19 | query I
20 | select * from read_ndjson_objects('https://github.com/duckdb/duckdb-data/releases/download/v1.0/example_rn.ndjson')
21 | ----
22 | {"id":1,"name":"O Brother, Where Art Thou?"}
23 | {"id":2,"name":"Home for the Holidays"}
24 | {"id":3,"name":"The Firm"}
25 | {"id":4,"name":"Broadcast News"}
26 | {"id":5,"name":"Raising Arizona"}
27 | 
28 | 


--------------------------------------------------------------------------------
/src/hash_functions.cpp:
--------------------------------------------------------------------------------
 1 | #include "mbedtls_wrapper.hpp"
 2 | #include "hash_functions.hpp"
 3 | 
 4 | namespace duckdb {
 5 | 
 6 | void sha256(const char *in, size_t in_len, hash_bytes &out) {
 7 | 	duckdb_mbedtls::MbedTlsWrapper::ComputeSha256Hash(in, in_len, (char *)out);
 8 | }
 9 | 
10 | void hmac256(const std::string &message, const char *secret, size_t secret_len, hash_bytes &out) {
11 | 	duckdb_mbedtls::MbedTlsWrapper::Hmac256(secret, secret_len, message.data(), message.size(), (char *)out);
12 | }
13 | 
14 | void hmac256(std::string message, hash_bytes secret, hash_bytes &out) {
15 | 	hmac256(message, (char *)secret, sizeof(hash_bytes), out);
16 | }
17 | 
18 | void hex256(hash_bytes &in, hash_str &out) {
19 | 	const char *hex = "0123456789abcdef";
20 | 	unsigned char *pin = in;
21 | 	unsigned char *pout = out;
22 | 	for (; pin < in + sizeof(in); pout += 2, pin++) {
23 | 		pout[0] = hex[(*pin >> 4) & 0xF];
24 | 		pout[1] = hex[*pin & 0xF];
25 | 	}
26 | }
27 | 
28 | } // namespace duckdb
29 | 


--------------------------------------------------------------------------------
/test/sql/attach/attach_httpfs.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/attach/attach_httpfs.test
 2 | # description: Test attach using httpfs
 3 | # group: [attach]
 4 | 
 5 | require httpfs
 6 | 
 7 | require-env S3_TEST_SERVER_AVAILABLE 1
 8 | 
 9 | require-env AWS_DEFAULT_REGION
10 | 
11 | require-env AWS_ACCESS_KEY_ID
12 | 
13 | require-env AWS_SECRET_ACCESS_KEY
14 | 
15 | require-env DUCKDB_S3_ENDPOINT
16 | 
17 | require-env DUCKDB_S3_USE_SSL
18 | 
19 | require-env S3_ATTACH_DB_PRESIGNED_URL
20 | 
21 | # ATTACH a DuckDB database over HTTPFS
22 | statement ok
23 | ATTACH '${S3_ATTACH_DB_PRESIGNED_URL}' AS db (READONLY 1);
24 | 
25 | query IIIII
26 | SELECT * FROM db.integral_values
27 | ----
28 | 1	2	3	4	5
29 | NULL	NULL	NULL	NULL	NULL
30 | 
31 | statement error
32 | CREATE TABLE db.integers(i INTEGER);
33 | ----
34 | read-only
35 | 
36 | statement ok
37 | SELECT * FROM db.all_types
38 | 
39 | statement error
40 | SELECT * FROM db.all_typez
41 | ----
42 | all_types
43 | 
44 | statement ok
45 | DETACH db
46 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_gcs.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_gcs.test_slow
 2 | # description: Test secret creation using the default gcs secret provider
 3 | # group: [secrets]
 4 | 
 5 | statement ok
 6 | PRAGMA enable_verification;
 7 | 
 8 | require httpfs
 9 | 
10 | # Ensure any currently stored secrets don't interfere with the test
11 | statement ok
12 | set allow_persistent_secrets=false;
13 | 
14 | statement ok
15 | reset s3_use_ssl;
16 | 
17 | # GCS Secrets automatically default to the correct endpoint for Google Cloud Storage
18 | statement ok
19 | CREATE SECRET (
20 |     TYPE GCS,
21 |     KEY_ID 'my_key',
22 |     SECRET 'my_secret'
23 | )
24 | 
25 | # The secret will be created for the default scope
26 | query IIII
27 | SELECT name, type, provider, scope FROM duckdb_secrets();
28 | ----
29 | __default_gcs	gcs	config	['gcs://', 'gs://']
30 | 
31 | statement error
32 | FROM 'gcs://test-bucket/test.csv'
33 | ----
34 | https://storage.googleapis.com/test-bucket/test.csv
35 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | BasedOnStyle: LLVM
 3 | SortIncludes: false
 4 | TabWidth: 4
 5 | IndentWidth: 4
 6 | ColumnLimit: 120
 7 | AllowShortFunctionsOnASingleLine: false
 8 | ---
 9 | UseTab: ForIndentation
10 | DerivePointerAlignment: false
11 | PointerAlignment: Right
12 | AlignConsecutiveMacros: true
13 | AlignTrailingComments: true
14 | AllowAllArgumentsOnNextLine: true
15 | AllowAllConstructorInitializersOnNextLine: true
16 | AllowAllParametersOfDeclarationOnNextLine: true
17 | AlignAfterOpenBracket: Align
18 | SpaceBeforeCpp11BracedList: true
19 | SpaceBeforeCtorInitializerColon: true
20 | SpaceBeforeInheritanceColon: true
21 | SpacesInAngles: false
22 | SpacesInCStyleCastParentheses: false
23 | SpacesInConditionalStatement: false
24 | AllowShortLambdasOnASingleLine: Inline
25 | AllowShortLoopsOnASingleLine: false
26 | AlwaysBreakTemplateDeclarations: Yes
27 | IncludeBlocks: Regroup
28 | Language: Cpp
29 | AccessModifierOffset: -4
30 | ---
31 | Language: Java
32 | SpaceAfterCStyleCast: true
33 | ---
34 | 


--------------------------------------------------------------------------------
/test/sql/storage/external_file_cache/external_file_cache_httpfs.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/storage/external_file_cache/external_file_cache_httpfs.test
 2 | # description: Test the external file cache for HTTPFS reads
 3 | # group: [external_file_cache]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | # first query caches the data
10 | statement ok
11 | from 'https://blobs.duckdb.org/data/shakespeare.parquet';
12 | 
13 | # second query should only have a head request, no gets
14 | query II
15 | explain analyze from 'https://blobs.duckdb.org/data/shakespeare.parquet';
16 | ----
17 | analyzed_plan	<REGEX>:.*GET: 0.*
18 | 
19 | statement ok
20 | SET enable_http_metadata_cache = true;
21 | 
22 | # first query saves the metadata (and data, but that was already there)
23 | statement ok
24 | from 'https://blobs.duckdb.org/data/shakespeare.parquet';
25 | 
26 | # second query should do no HEAD and no GET
27 | query II
28 | explain analyze from 'https://blobs.duckdb.org/data/shakespeare.parquet';
29 | ----
30 | analyzed_plan	<REGEX>:.*HEAD: 0.*
31 | 


--------------------------------------------------------------------------------
/test/sql/copy/s3/s3_presigned_read.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/s3/s3_presigned_read.test
 2 | # description: Read small csv/parquet files from S3 Presigned URL.
 3 | # group: [s3]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | require-env S3_TEST_SERVER_AVAILABLE 1
10 | 
11 | # Require that these environment variables are also set
12 | 
13 | require-env AWS_DEFAULT_REGION
14 | 
15 | require-env AWS_ACCESS_KEY_ID
16 | 
17 | require-env AWS_SECRET_ACCESS_KEY
18 | 
19 | require-env DUCKDB_S3_ENDPOINT
20 | 
21 | require-env DUCKDB_S3_USE_SSL
22 | 
23 | require-env S3_SMALL_CSV_PRESIGNED_URL
24 | 
25 | require-env S3_SMALL_PARQUET_PRESIGNED_URL
26 | 
27 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
28 | set ignore_error_messages
29 | 
30 | query I
31 | SELECT phone FROM read_csv_auto('${S3_SMALL_CSV_PRESIGNED_URL}');
32 | ----
33 | +318855443322
34 | +552244331122
35 | +12233445567
36 | 
37 | query I
38 | SELECT i FROM '${S3_SMALL_PARQUET_PRESIGNED_URL}';
39 | ----
40 | 1
41 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2018-2025 Stichting DuckDB Foundation
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/test/sql/copy/s3/http_log.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/s3/http_log.test
 2 | # description: Test http logger
 3 | # group: [s3]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | require-env S3_TEST_SERVER_AVAILABLE 1
10 | 
11 | require-env AWS_DEFAULT_REGION
12 | 
13 | require-env AWS_ACCESS_KEY_ID
14 | 
15 | require-env AWS_SECRET_ACCESS_KEY
16 | 
17 | require-env DUCKDB_S3_ENDPOINT
18 | 
19 | require-env DUCKDB_S3_USE_SSL
20 | 
21 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
22 | set ignore_error_messages
23 | 
24 | # Create some test data
25 | statement ok
26 | COPY (SELECT 'value-1' as value) TO 's3://test-bucket/http_log/test.parquet';
27 | 
28 | statement ok
29 | CALL enable_logging('HTTP')
30 | 
31 | statement ok
32 | set logging_level='debug'
33 | 
34 | query I
35 | FROM 's3://test-bucket/http_log/test.parquet'
36 | ----
37 | value-1
38 | 
39 | query II rowsort
40 | SELECT request.type, parse_filename(request.url) FROM duckdb_logs_parsed('HTTP');
41 | ----
42 | GET	test.parquet
43 | HEAD	test.parquet
44 | 


--------------------------------------------------------------------------------
/test/sql/copy/s3/http_secret.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/s3/http_secret.test
 2 | # description: Test http secret
 3 | # group: [s3]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | require-env S3_TEST_SERVER_AVAILABLE 1
10 | 
11 | require-env AWS_DEFAULT_REGION
12 | 
13 | require-env AWS_ACCESS_KEY_ID
14 | 
15 | require-env AWS_SECRET_ACCESS_KEY
16 | 
17 | require-env DUCKDB_S3_ENDPOINT
18 | 
19 | require-env DUCKDB_S3_USE_SSL
20 | 
21 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
22 | set ignore_error_messages
23 | 
24 | # Create some test data
25 | statement ok
26 | COPY (SELECT 'value-1' as value) TO 's3://test-bucket/http-secret-test/test.parquet';
27 | 
28 | statement ok
29 | PRAGMA enable_verification
30 | 
31 | # Create some wonky headers
32 | statement ok
33 | CREATE SECRET http3 (
34 |     TYPE HTTP, 
35 |     EXTRA_HTTP_HEADERS MAP{
36 | 		'Authorization': 'Im very important',
37 | 		'CustomHeader': 'fliepflap'
38 | 	}
39 | );
40 | 
41 | query I
42 | FROM 's3://test-bucket/http-secret-test/test.parquet'
43 | ----
44 | value-1
45 | 


--------------------------------------------------------------------------------
/test/sql/full_file_download_fallback.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/full_file_download_fallback.test
 2 | # group: [sql]
 3 | 
 4 | require parquet
 5 | 
 6 | require httpfs
 7 | 
 8 | require tpch
 9 | 
10 | require-env PYTHON_HTTP_SERVER_URL
11 | 
12 | require-env PYTHON_HTTP_SERVER_DIR
13 | 
14 | statement ok
15 | CALL enable_logging();
16 | 
17 | statement ok
18 | call dbgen(sf=1);
19 | 
20 | statement ok
21 | copy lineitem to '${PYTHON_HTTP_SERVER_DIR}/lineitem.csv'
22 | 
23 | statement ok
24 | drop table lineitem;
25 | 
26 | statement ok
27 | CREATE view lineitem AS FROM '${PYTHON_HTTP_SERVER_URL}/lineitem.csv';
28 | 
29 | query I
30 | pragma tpch(6);
31 | ----
32 | 123141078.22829981
33 | 
34 | query I
35 | select count(*) from duckdb_logs where log_level='WARN' and message like '%Falling back to full%'
36 | ----
37 | 2
38 | 
39 | statement ok
40 | set auto_fallback_to_full_download=false
41 | 
42 | statement error
43 | pragma tpch(6);
44 | ----
45 | HTTP Error: Content-Length from server mismatches requested range, server may not support range requests. You can try to resolve this by enabling `SET force_download=true`
46 | 
47 | 


--------------------------------------------------------------------------------
/test/sql/secret/secret_s3_requester_pays.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secret/secret_s3_requester_pays.test
 2 | # description: Tests secret refreshing with AWS requester pays mode
 3 | # group: [secret]
 4 | 
 5 | require-env S3_TEST_SERVER_AVAILABLE 1
 6 | 
 7 | require-env AWS_DEFAULT_REGION
 8 | 
 9 | require-env AWS_ACCESS_KEY_ID
10 | 
11 | require-env AWS_SECRET_ACCESS_KEY
12 | 
13 | require-env DUCKDB_S3_ENDPOINT
14 | 
15 | require-env DUCKDB_S3_USE_SSL
16 | 
17 | require httpfs
18 | 
19 | require parquet
20 | 
21 | statement ok
22 | SET enable_logging=true
23 | 
24 | statement ok
25 | set s3_use_ssl='${DUCKDB_S3_USE_SSL}'
26 | 
27 | statement ok
28 | set s3_endpoint='${DUCKDB_S3_ENDPOINT}'
29 | 
30 | statement ok
31 | set s3_region='${AWS_DEFAULT_REGION}'
32 | 
33 | # Create some test data
34 | statement ok
35 | CREATE SECRET s1 (
36 |     TYPE S3,
37 |     KEY_ID '${AWS_ACCESS_KEY_ID}',
38 |     SECRET '${AWS_SECRET_ACCESS_KEY}',
39 |     REQUESTER_PAYS true
40 | )
41 | 
42 | statement ok
43 | copy (select 1 as a) to 's3://test-bucket/test-file.parquet'
44 | 
45 | query I
46 | FROM "s3://test-bucket/test-file.parquet"
47 | ----
48 | 1


--------------------------------------------------------------------------------
/test/sql/copy/csv/test_csv_httpfs.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/csv/test_csv_httpfs.test_slow
 2 | # description: This test triggers the http prefetch mechanism.
 3 | # group: [csv]
 4 | 
 5 | statement ok
 6 | pragma enable_verification;
 7 | 
 8 | require httpfs
 9 | 
10 | require parquet
11 | 
12 | #FIXME: remote changed?
13 | mode skip
14 | 
15 | # Add test for 3731
16 | query I
17 | SELECT count(*) FROM read_csv_auto('https://datasets.imdbws.com/name.basics.tsv.gz', delim='\t', quote='')
18 | ----
19 | 12783090
20 | 
21 | query I
22 |   copy (
23 |     SELECT *
24 |       REPLACE (
25 |         str_split(primaryProfession,',') as primaryProfession,
26 |         str_split(knownForTitles,',') as knownForTitles,
27 |         case WHEN regexp_matches(deathYear,'[0-9]+') THEN CAST(deathYear as integer) END as deathYear,
28 |         case WHEN regexp_matches(birthYear,'[0-9]+') THEN CAST(birthYear as integer) END as birthYear
29 |       )
30 |     FROM read_csv_auto('https://datasets.imdbws.com/name.basics.tsv.gz', delim='\t', quote='')
31 |   ) to '__TEST_DIR__/name_basics.parquet' (FORMAT 'parquet', CODEC 'ZSTD')
32 | ----
33 | 12783090
34 | 


--------------------------------------------------------------------------------
/test/sql/test_headers_parsed.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/test_headers_parsed.test
 2 | # description: This test triggers the http prefetch mechanism.
 3 | # group: [sql]
 4 | 
 5 | require httpfs
 6 | 
 7 | require parquet
 8 | 
 9 | statement ok
10 | SET httpfs_client_implementation='curl';
11 | 
12 | statement ok
13 | CALL enable_logging('HTTP');
14 | 
15 | query II
16 | select * from 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/job_role_type.parquet' order by all;
17 | ----
18 | 1	actor
19 | 2	actress
20 | 3	producer
21 | 4	writer
22 | 5	cinematographer
23 | 6	composer
24 | 7	costume designer
25 | 8	director
26 | 9	editor
27 | 10	miscellaneous crew
28 | 11	production designer
29 | 12	guest
30 | 
31 | query I
32 | select response.status from duckdb_logs_parsed('HTTP') order by all;
33 | ----
34 | OK_200
35 | PartialContent_206
36 | 
37 | # response status is either
38 | # HTTP/2 200
39 | # HTTP/2 206
40 | # OR
41 | # HTTP/1.1 200 OK
42 | # HTTP/1.1 206 Partial Content
43 | # depending on OS and CA (I think)
44 | query I
45 | select response.headers['__RESPONSE_STATUS__'] LIKE 'HTTP%20%' from duckdb_logs_parsed('HTTP') order by all;
46 | ----
47 | true
48 | true
49 | 


--------------------------------------------------------------------------------
/test/sql/copy/parquet/parquet_http_prefetch.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/parquet/parquet_http_prefetch.test
 2 | # description: This test triggers the http prefetch mechanism.
 3 | # group: [parquet]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | require-env S3_TEST_SERVER_AVAILABLE 1
10 | 
11 | # Require that these environment variables are also set
12 | 
13 | require-env AWS_DEFAULT_REGION
14 | 
15 | require-env AWS_ACCESS_KEY_ID
16 | 
17 | require-env AWS_SECRET_ACCESS_KEY
18 | 
19 | require-env DUCKDB_S3_ENDPOINT
20 | 
21 | require-env DUCKDB_S3_USE_SSL
22 | 
23 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
24 | set ignore_error_messages
25 | 
26 | statement ok
27 | CREATE TABLE test_fetch_delay (a INT, b INT);
28 | 
29 | statement ok
30 | INSERT INTO test_fetch_delay (SELECT (i%2) * 2, (i%2) * 2 from range(0,2500000) as tbl(i));
31 | 
32 | statement ok
33 | COPY test_fetch_delay to 's3://test-bucket/skip_delay.parquet';
34 | 
35 | statement ok
36 | CREATE TABLE test as SELECT * from 's3://test-bucket/skip_delay.parquet' where a = 1;
37 | 
38 | query I
39 | SELECT COUNT(*) FROM test;
40 | ----
41 | 0
42 | 


--------------------------------------------------------------------------------
/src/include/httpfs_curl_client.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <curl/curl.h>
 4 | 
 5 | #include "duckdb/common/http_util.hpp"
 6 | 
 7 | namespace duckdb {
 8 | class HTTPLogger;
 9 | class FileOpener;
10 | struct FileOpenerInfo;
11 | class HTTPState;
12 | 
13 | class CURLHandle {
14 | public:
15 | 	CURLHandle(const string &token, const string &cert_path);
16 | 	~CURLHandle();
17 | 
18 | public:
19 | 	operator CURL *() {
20 | 		return curl;
21 | 	}
22 | 	CURLcode Execute() {
23 | 		return curl_easy_perform(curl);
24 | 	}
25 | 
26 | private:
27 | 	CURL *curl = NULL;
28 | };
29 | 
30 | class CURLRequestHeaders {
31 | public:
32 | 	CURLRequestHeaders(vector<std::string> &input) {
33 | 		for (auto &header : input) {
34 | 			Add(header);
35 | 		}
36 | 	}
37 | 	CURLRequestHeaders() {
38 | 	}
39 | 
40 | 	~CURLRequestHeaders() {
41 | 		if (headers) {
42 | 			curl_slist_free_all(headers);
43 | 		}
44 | 		headers = NULL;
45 | 	}
46 | 	operator bool() const {
47 | 		return headers != NULL;
48 | 	}
49 | 
50 | public:
51 | 	void Add(const string &header) {
52 | 		headers = curl_slist_append(headers, header.c_str());
53 | 	}
54 | 
55 | public:
56 | 	curl_slist *headers = NULL;
57 | };
58 | 
59 | } // namespace duckdb
60 | 


--------------------------------------------------------------------------------
/test/sql/attach/attach_s3.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/attach/attach_s3.test
 2 | # description: Test attach using httpfs
 3 | # group: [attach]
 4 | 
 5 | require httpfs
 6 | 
 7 | require-env S3_TEST_SERVER_AVAILABLE 1
 8 | 
 9 | require-env AWS_DEFAULT_REGION
10 | 
11 | require-env AWS_ACCESS_KEY_ID
12 | 
13 | require-env AWS_SECRET_ACCESS_KEY
14 | 
15 | require-env DUCKDB_S3_ENDPOINT
16 | 
17 | require-env DUCKDB_S3_USE_SSL
18 | 
19 | require-env S3_ATTACH_DB
20 | 
21 | statement ok
22 | CREATE SECRET (
23 |     TYPE S3,
24 |     PROVIDER config,
25 |     KEY_ID '${AWS_ACCESS_KEY_ID}',
26 |     SECRET '${AWS_SECRET_ACCESS_KEY}',
27 |     REGION '${AWS_DEFAULT_REGION}',
28 |     ENDPOINT '${DUCKDB_S3_ENDPOINT}',
29 |     USE_SSL '${DUCKDB_S3_USE_SSL}'
30 | )
31 | 
32 | # ATTACH a DuckDB database over HTTPFS
33 | statement ok
34 | ATTACH '${S3_ATTACH_DB}' AS db (READONLY 1);
35 | 
36 | query IIIII
37 | SELECT * FROM db.integral_values
38 | ----
39 | 1	2	3	4	5
40 | NULL	NULL	NULL	NULL	NULL
41 | 
42 | statement error
43 | CREATE TABLE db.integers(i INTEGER);
44 | ----
45 | read-only
46 | 
47 | statement ok
48 | SELECT * FROM db.all_types
49 | 
50 | statement error
51 | SELECT * FROM db.all_typez
52 | ----
53 | all_types
54 | 
55 | statement ok
56 | DETACH db
57 | 


--------------------------------------------------------------------------------
/test/sql/secret/secret_aws.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secret/secret_aws.test
 2 | # description: Tests secret refreshing
 3 | # group: [secret]
 4 | 
 5 | require-env S3_TEST_SERVER_AVAILABLE 1
 6 | 
 7 | require-env AWS_DEFAULT_REGION
 8 | 
 9 | require-env AWS_ACCESS_KEY_ID
10 | 
11 | require-env AWS_SECRET_ACCESS_KEY
12 | 
13 | require-env DUCKDB_S3_ENDPOINT
14 | 
15 | require-env DUCKDB_S3_USE_SSL
16 | 
17 | set ignore_error_messages
18 | 
19 | require httpfs
20 | 
21 | require parquet
22 | 
23 | foreach httpfs_implementation curl httplib
24 | 
25 | statement ok
26 | SET httpfs_client_implementation='${httpfs_implementation}';
27 | 
28 | statement ok
29 | SET enable_logging=true
30 | 
31 | statement ok
32 | set s3_use_ssl='${DUCKDB_S3_USE_SSL}'
33 | 
34 | statement ok
35 | set s3_endpoint='${DUCKDB_S3_ENDPOINT}'
36 | 
37 | statement ok
38 | set s3_region='${AWS_DEFAULT_REGION}'
39 | 
40 | # Create some test data
41 | statement ok
42 | CREATE or replace SECRET s1 (
43 |     TYPE AWS,
44 |     KEY_ID '${AWS_ACCESS_KEY_ID}',
45 |     SECRET '${AWS_SECRET_ACCESS_KEY}'
46 | )
47 | 
48 | statement ok
49 | copy (select 1 as a) to 's3://test-bucket/test-file.parquet'
50 | 
51 | query I
52 | FROM "s3://test-bucket/test-file.parquet"
53 | ----
54 | 1
55 | 
56 | endloop


--------------------------------------------------------------------------------
/test/sql/copy/s3/s3_presigned_read.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/s3/s3_presigned_read.test_slow
 2 | # description: Read large csv/parquet files from S3 Presigned URL.
 3 | # group: [s3]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | require-env S3_TEST_SERVER_AVAILABLE 1
10 | 
11 | # Require that these environment variables are also set
12 | 
13 | require-env AWS_DEFAULT_REGION
14 | 
15 | require-env AWS_ACCESS_KEY_ID
16 | 
17 | require-env AWS_SECRET_ACCESS_KEY
18 | 
19 | require-env DUCKDB_S3_ENDPOINT
20 | 
21 | require-env DUCKDB_S3_USE_SSL
22 | 
23 | 
24 | require-env S3_LARGE_PARQUET_PRESIGNED_URL
25 | 
26 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
27 | set ignore_error_messages
28 | 
29 | statement ok
30 | set http_timeout=120000;
31 | 
32 | # More retries (longest wait will be 25600ms)
33 | statement ok
34 | set http_retries=6;
35 | 
36 | query I
37 | SELECT
38 |     sum(l_extendedprice * l_discount) AS revenue
39 | FROM
40 |     '${S3_LARGE_PARQUET_PRESIGNED_URL}' 
41 | WHERE
42 |     l_shipdate >= CAST('1994-01-01' AS date)
43 |     AND l_shipdate < CAST('1995-01-01' AS date)
44 |     AND l_discount BETWEEN 0.05
45 |     AND 0.07
46 |     AND l_quantity < 24;
47 | ----
48 | 123141078.2283
49 | 


--------------------------------------------------------------------------------
/.github/workflows/MainDistributionPipeline.yml:
--------------------------------------------------------------------------------
 1 | #
 2 | # This workflow calls the main distribution pipeline from DuckDB to build, test and (optionally) release the extension
 3 | #
 4 | name: Main Extension Distribution Pipeline
 5 | on:
 6 |   push:
 7 |   pull_request:
 8 |   workflow_dispatch:
 9 | 
10 | concurrency:
11 |   group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
12 |   cancel-in-progress: true
13 | 
14 | jobs:
15 |   duckdb-stable-build:
16 |     name: Build extension binaries
17 |     uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
18 |     with:
19 |       extension_name: httpfs
20 |       duckdb_version: v1.4.2
21 |       ci_tools_version: main
22 | 
23 | 
24 |   duckdb-stable-deploy:
25 |     name: Deploy extension binaries
26 |     needs: duckdb-stable-build
27 |     uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main
28 |     secrets: inherit
29 |     with:
30 |       extension_name: httpfs
31 |       duckdb_version: v1.4.2
32 |       ci_tools_version: main
33 |       deploy_latest: ${{ startsWith(github.ref, 'refs/heads/v') }}
34 |       deploy_versioned: ${{ startsWith(github.ref, 'refs/heads/v') || github.ref == 'refs/heads/main' }}
35 | 


--------------------------------------------------------------------------------
/test/sql/delete/test_issue_1834.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/delete/test_issue_1834.test_slow
 2 | # description: Deleting with DELETE USING causes a segmentation fault
 3 | # group: [delete]
 4 | 
 5 | require httpfs
 6 | 
 7 | statement ok
 8 | CREATE TABLE Person_likes_Comment (creationDate timestamp without time zone not null, id bigint not null, likes_Comment bigint not null);
 9 | 
10 | statement ok
11 | CREATE TABLE Person_Delete_candidates (deletionDate timestamp without time zone not null, id bigint);
12 | 
13 | statement ok
14 | COPY Person_likes_Comment FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/Person_likes_Comment.csv' (DELIMITER '|', TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
15 | 
16 | statement ok
17 | COPY Person_Delete_candidates FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/Person_Delete_candidates.csv' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
18 | 
19 | statement ok
20 | DELETE FROM Person_likes_Comment USING Person_Delete_candidates WHERE Person_Delete_candidates.id = Person_likes_Comment.id;
21 | 
22 | # all tuples fulfilling this predicate should have been deleted
23 | query I
24 | SELECT COUNT(*) FROM Person_likes_Comment, Person_Delete_candidates WHERE Person_Delete_candidates.id = Person_likes_Comment.id;
25 | ----
26 | 0
27 | 


--------------------------------------------------------------------------------
/test/sql/logging/http_logging.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/logging/http_logging.test
 2 | # group: [logging]
 3 | 
 4 | require parquet
 5 | 
 6 | require httpfs
 7 | 
 8 | statement ok
 9 | CALL enable_logging('HTTP');
10 | 
11 | statement ok
12 | FROM 'https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv'
13 | 
14 | query IIII
15 | SELECT
16 | 	request.type,
17 | 	request.url,
18 | 	response.status,
19 | 	response.reason,
20 | FROM duckdb_logs_parsed('HTTP') WHERE response.status != 'ServiceUnavailable_503'
21 | ----
22 | HEAD	https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv	OK_200	OK
23 | GET	https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv	PartialContent_206	Partial Content
24 | 
25 | query II
26 | SELECT request.headers['Range'], response.headers['Content-Range']
27 | FROM duckdb_logs_parsed('HTTP')
28 | WHERE request.type='GET'
29 | ----
30 | bytes=0-1275	bytes 0-1275/1276
31 | 
32 | statement ok
33 | CALL truncate_duckdb_logs()
34 | 
35 | # This old option still exists, however it now logs to the duckdb log instead of printing straight to stdout
36 | statement ok
37 | set enable_http_logging=false;
38 | 
39 | statement ok
40 | FROM 'https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv'
41 | 
42 | query I
43 | select count(*) FROM duckdb_logs_parsed('HTTP');
44 | ----
45 | 0
46 | 


--------------------------------------------------------------------------------
/test/sql/copy/s3/csv_s3_file_size_bytes.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/s3/csv_s3_file_size_bytes.test
 2 | # description: Test FILE_SIZE_BYTES parameter for csv copy over s3
 3 | # group: [s3]
 4 | 
 5 | require httpfs
 6 | 
 7 | require-env S3_TEST_SERVER_AVAILABLE 1
 8 | 
 9 | # Require that these environment variables are also set
10 | 
11 | require-env AWS_DEFAULT_REGION
12 | 
13 | require-env AWS_ACCESS_KEY_ID
14 | 
15 | require-env AWS_SECRET_ACCESS_KEY
16 | 
17 | require-env DUCKDB_S3_ENDPOINT
18 | 
19 | require-env DUCKDB_S3_USE_SSL
20 | 
21 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
22 | set ignore_error_messages
23 | 
24 | # different vector sizes result in different number of files
25 | require no_vector_verification
26 | 
27 | statement ok
28 | CREATE TABLE bigdata AS SELECT i AS col_a, i AS col_b FROM range(0,10000) tbl(i);
29 | 
30 | statement ok
31 | set threads=1
32 | 
33 | # parameter in bytes
34 | statement ok
35 | COPY (FROM bigdata) TO 's3://test-bucket/file_size_bytes_csv1' (FORMAT CSV, FILE_SIZE_BYTES 1000);
36 | 
37 | query I
38 | SELECT COUNT(*) FROM read_csv_auto('s3://test-bucket/file_size_bytes_csv1/*.csv')
39 | ----
40 | 10000
41 | 
42 | # should lead to 3 files
43 | query I
44 | SELECT count(*) FROM glob('s3://test-bucket/file_size_bytes_csv1/*.csv')
45 | ----
46 | 3
47 | 


--------------------------------------------------------------------------------
/test/sql/secret/secret_refresh_attach.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secret/secret_refresh_attach.test
 2 | # description: Tests secret refreshing
 3 | # group: [secret]
 4 | 
 5 | require-env S3_TEST_SERVER_AVAILABLE 1
 6 | 
 7 | require-env AWS_DEFAULT_REGION
 8 | 
 9 | require-env AWS_ACCESS_KEY_ID
10 | 
11 | require-env AWS_SECRET_ACCESS_KEY
12 | 
13 | require-env DUCKDB_S3_ENDPOINT
14 | 
15 | require-env DUCKDB_S3_USE_SSL
16 | 
17 | require-env S3_ATTACH_DB
18 | 
19 | set ignore_error_messages
20 | 
21 | require httpfs
22 | 
23 | require parquet
24 | 
25 | statement ok
26 | SET enable_logging=true
27 | 
28 | statement ok
29 | set s3_use_ssl='${DUCKDB_S3_USE_SSL}'
30 | 
31 | # Create secret with incorrect credentials to trigger secret refreshing
32 | statement ok
33 | CREATE SECRET uhuh_this_mah_sh (
34 |     TYPE S3,
35 |     PROVIDER config,
36 |     KEY_ID 'all the girls',
37 |     SECRET 'stomp yo feet like dis',
38 |     REGION '${AWS_DEFAULT_REGION}',
39 |     ENDPOINT '${DUCKDB_S3_ENDPOINT}',
40 |     USE_SSL '${DUCKDB_S3_USE_SSL}',
41 |     REFRESH 'auto'
42 | )
43 | 
44 | statement error
45 | ATTACH 's3://test-bucket/presigned/attach.db' AS db (READONLY 1);
46 | ----
47 | 
48 | # Secret refresh has been triggered
49 | query II
50 | SELECT log_level, message FROM duckdb_logs WHERE message like '%Successfully refreshed secret%'
51 | ----
52 | INFO	Successfully refreshed secret: uhuh_this_mah_sh, new key_id: all the girls


--------------------------------------------------------------------------------
/test/sql/copy/encryption/different_aes_engines.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/encryption/different_aes_engines.test
 2 | # group: [encryption]
 3 | 
 4 | foreach cipher GCM CTR
 5 | 
 6 | statement ok
 7 | ATTACH '__TEST_DIR__/enc_test_${cipher}.db' as enc (ENCRYPTION_KEY 'asdf', ENCRYPTION_CIPHER '${cipher}');
 8 | 
 9 | statement ok
10 | CREATE TABLE enc.test (a INTEGER, b INTEGER);
11 | 
12 | statement ok
13 | INSERT INTO enc.test VALUES (11, 22), (13, 22), (12, 21)
14 | 
15 | statement ok
16 | DETACH enc
17 | 
18 | restart
19 | 
20 | require httpfs
21 | 
22 | statement ok
23 | ATTACH '__TEST_DIR__/enc_test_${cipher}.db' as enc (ENCRYPTION_KEY 'asdf');
24 | 
25 | 
26 | query II
27 | FROM enc.test
28 | ----
29 | 11	22
30 | 13	22
31 | 12	21
32 | 
33 | 
34 | restart
35 | 
36 | endloop
37 | 
38 | 
39 | foreach cipher GCM CTR
40 | 
41 | require httpfs
42 | 
43 | statement ok
44 | ATTACH '__TEST_DIR__/enc_test_${cipher}.db' as enc (ENCRYPTION_KEY 'asdf', ENCRYPTION_CIPHER '${cipher}');
45 | 
46 | statement ok
47 | CREATE TABLE enc.test (a INTEGER, b INTEGER);
48 | 
49 | statement ok
50 | INSERT INTO enc.test VALUES (11, 22), (13, 22), (12, 21)
51 | 
52 | statement ok
53 | DETACH enc
54 | 
55 | restart
56 | 
57 | statement ok
58 | ATTACH '__TEST_DIR__/enc_test_${cipher}.db' as enc (ENCRYPTION_KEY 'asdf');
59 | 
60 | query II
61 | FROM enc.test
62 | ----
63 | 11	22
64 | 13	22
65 | 12	21
66 | 
67 | 
68 | restart
69 | 
70 | 
71 | endloop
72 | 


--------------------------------------------------------------------------------
/test/sql/json/table/read_json_auto.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/json/table/read_json_auto.test_slow
 2 | # description: Read json files - schema detection
 3 | # group: [table]
 4 | 
 5 | require json
 6 | 
 7 | require httpfs
 8 | 
 9 | # this is one big object - yyjson uses it as a benchmark
10 | query II
11 | select typeof("type"), typeof(features) from read_json_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/canada.json', maximum_depth=3);
12 | ----
13 | VARCHAR	STRUCT("type" JSON, properties JSON, geometry JSON)[]
14 | 
15 | # let's crank up maximum_depth and see if we can fully unnest this big object
16 | query II
17 | select typeof("type"), typeof(features) from read_json_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/canada.json', maximum_depth=8);
18 | ----
19 | VARCHAR	STRUCT("type" VARCHAR, properties STRUCT("name" VARCHAR), geometry STRUCT("type" VARCHAR, coordinates DOUBLE[][][]))[]
20 | 
21 | # ^ fully unnested, no more JSON type in there
22 | 
23 | # the "coordinates" array in "features.geometry" is huge, let's just check the length - not all the values
24 | query IIIII
25 | select type, features[1].type, features[1].properties.name, features[1].geometry.type, length(features[1].geometry.coordinates)
26 | from read_json_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/canada.json', maximum_depth=8);
27 | ----
28 | FeatureCollection	Feature	Canada	Polygon	480
29 | 


--------------------------------------------------------------------------------
/test/sql/copy/parquet/parquet_encryption_mbedtls_openssl.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/parquet/parquet_encryption_mbedtls_openssl.test
 2 | # description: Test Parquet encryption with OpenSSL
 3 | # group: [parquet]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | # parquet keys are not persisted across restarts
10 | statement ok
11 | PRAGMA enable_verification
12 | 
13 | # add keys of 3 different lengths
14 | statement ok
15 | PRAGMA add_parquet_key('key128', '0123456789112345')
16 | 
17 | statement ok
18 | PRAGMA add_parquet_key('key192', '012345678911234501234567')
19 | 
20 | statement ok
21 | PRAGMA add_parquet_key('key256', '01234567891123450123456789112345')
22 | 
23 | # test all valid AES key lengths
24 | foreach key_len 128 192 256
25 | 
26 | # write files with OpenSSL enabled
27 | statement error
28 | COPY (SELECT 42 i) to '__TEST_DIR__/encrypted${key_len}_openssl.parquet' (ENCRYPTION_CONFIG {footer_key: 'key${key_len}'}, DEBUG_USE_OPENSSL randomval)
29 | ----
30 | BOOL
31 | 
32 | # write files with OpenSSL enabled
33 | statement ok
34 | COPY (SELECT 42 i) to '__TEST_DIR__/encrypted${key_len}_openssl.parquet' (ENCRYPTION_CONFIG {footer_key: 'key${key_len}'}, DEBUG_USE_OPENSSL true)
35 | 
36 | # read OpenSSL encrypted files by using mbedtls
37 | query I
38 | SELECT * FROM read_parquet('__TEST_DIR__/encrypted${key_len}_openssl.parquet', encryption_config={footer_key: 'key${key_len}'}, debug_use_openssl=false)
39 | ----
40 | 42
41 | 
42 | endloop
43 | 


--------------------------------------------------------------------------------
/test/sql/copy/no_head_on_write.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/no_head_on_write.test
 2 | # description: Confirm that we don't send head requests for writes
 3 | # group: [copy]
 4 | 
 5 | require-env S3_TEST_SERVER_AVAILABLE 1
 6 | 
 7 | require-env AWS_DEFAULT_REGION
 8 | 
 9 | require-env AWS_ACCESS_KEY_ID
10 | 
11 | require-env AWS_SECRET_ACCESS_KEY
12 | 
13 | require-env DUCKDB_S3_ENDPOINT
14 | 
15 | require-env DUCKDB_S3_USE_SSL
16 | 
17 | require httpfs
18 | 
19 | require parquet
20 | 
21 | statement ok
22 | SET enable_logging=true
23 | 
24 | statement ok
25 | set s3_use_ssl='${DUCKDB_S3_USE_SSL}'
26 | 
27 | statement ok
28 | set s3_endpoint='${DUCKDB_S3_ENDPOINT}'
29 | 
30 | statement ok
31 | set s3_region='${AWS_DEFAULT_REGION}'
32 | 
33 | # Create some test data
34 | statement ok
35 | CREATE SECRET s1 (
36 |     TYPE S3,
37 |     KEY_ID '${AWS_ACCESS_KEY_ID}',
38 |     SECRET '${AWS_SECRET_ACCESS_KEY}',
39 |     REQUESTER_PAYS true
40 | )
41 | 
42 | statement ok
43 | CALL enable_logging('HTTP');
44 | 
45 | statement ok
46 | copy (select 1 as a) to 's3://test-bucket/test-file.parquet'
47 | 
48 | query I
49 | select request.type FROM duckdb_logs_parsed('HTTP')
50 | ----
51 | PUT
52 | 
53 | statement ok
54 | CALL truncate_duckdb_logs();
55 | 
56 | statement ok
57 | copy (select random() as a FROM range(8000000)) to 's3://test-bucket/test-file2.csv'
58 | 
59 | query I
60 | select request.type FROM duckdb_logs_parsed('HTTP')
61 | ----
62 | POST
63 | PUT
64 | PUT
65 | POST
66 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_persistence_error_handling.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_persistence_error_handling.test
 2 | # description: Test secret persistence with buggy secrets
 3 | # group: [secrets]
 4 | 
 5 | statement ok
 6 | PRAGMA enable_verification;
 7 | 
 8 | load __TEST_DIR__/create_secret_persistence_error_handling.db
 9 | 
10 | require httpfs
11 | 
12 | statement ok
13 | set secret_directory='__TEST_DIR__/create_secret_persistence_error_handling'
14 | 
15 | # Hacky way to make duckdb create the create_secret_persistence_error_handling dir
16 | statement ok
17 | COPY (select 1 as a, 2 as b ) to '__TEST_DIR__/create_secret_persistence_error_handling/' (FORMAT csv, PARTITION_BY a)
18 | 
19 | # Now write a corrupt secret file
20 | statement ok
21 | COPY (select 1 as a ) to '__TEST_DIR__/create_secret_persistence_error_handling/s1.duckdb_secret' (FORMAT csv)
22 | 
23 | statement error
24 | FROM duckdb_secrets();
25 | ----
26 | 
27 | restart 
28 | 
29 | statement ok
30 | set secret_directory='__TEST_DIR__/create_secret_persistence_error_handling2'
31 | 
32 | statement ok
33 | CREATE PERSISTENT SECRET s1 (TYPE S3);
34 | 
35 | restart no_extension_load
36 | 
37 | statement ok
38 | set secret_directory='__TEST_DIR__/create_secret_persistence_error_handling2'
39 | 
40 | # Disable autoloading
41 | statement ok
42 | SET autoload_known_extensions=false;
43 | 
44 | # Force persistent deserialization; we can deserialize generic key/value secrets
45 | statement ok
46 | from duckdb_secrets();
47 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_scope_matching.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_scope_matching.test
 2 | # description: Test scope matching behaviour is correct
 3 | # group: [secrets]
 4 | 
 5 | load __TEST_DIR__/create_secret_scope_matching.db
 6 | 
 7 | statement ok
 8 | PRAGMA enable_verification;
 9 | 
10 | require httpfs
11 | 
12 | statement ok
13 | set secret_directory='__TEST_DIR__/create_secret_scope_matching'
14 | 
15 | # No match
16 | query I
17 | SELECT name FROM which_secret('s3://', 's3')
18 | ----
19 | 
20 | statement ok
21 | CREATE TEMPORARY SECRET t1 ( TYPE S3 )
22 | 
23 | statement ok
24 | CREATE TEMPORARY SECRET t2 ( TYPE S3 )
25 | 
26 | statement ok
27 | CREATE SECRET p1 IN LOCAL_FILE ( TYPE S3 )
28 | 
29 | # This ties within the same storage: the two temporary secrets s1 and s2 both score identically. We solve this by
30 | # tie-breaking on secret name alphabetical ordering
31 | query I
32 | SELECT name FROM which_secret('s3://', 's3')
33 | ----
34 | t1
35 | 
36 | query III
37 | FROM which_secret('s3://', 's3')
38 | ----
39 | t1	TEMPORARY	memory
40 | 
41 | statement ok
42 | DROP SECRET t1
43 | 
44 | # Temporary secrets take preference over temporary ones
45 | query I
46 | SELECT name FROM which_secret('s3://', 's3')
47 | ----
48 | t2
49 | 
50 | statement ok
51 | DROP SECRET t2
52 | 
53 | query I
54 | SELECT name FROM which_secret('s3://', 's3')
55 | ----
56 | p1
57 | 
58 | statement maybe
59 | DROP SECRET p1
60 | ----
61 | Invalid Input Error: Failed to remove non-existent secret
62 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_non_writable_persistent_dir.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_non_writable_persistent_dir.test
 2 | # description: Test persistent secrets when the secret dir is non-writable
 3 | # group: [secrets]
 4 | 
 5 | statement ok
 6 | PRAGMA enable_verification;
 7 | 
 8 | load __TEST_DIR__/create_secret_non_writable_persistent_dir.db
 9 | 
10 | require httpfs
11 | 
12 | # First we create any file
13 | statement ok
14 | COPY (SELECT 1 as a) to '__TEST_DIR__/file_to_prevent_the_secret_dir_from_being_created.csv'
15 | 
16 | # Then we set the secret dir to this.
17 | statement ok
18 | set secret_directory='__TEST_DIR__/file_to_prevent_the_secret_dir_from_being_created.csv'
19 | 
20 | # Now on creation of a tmp secret, the secret manager is initialized, but the persistent secret directory creation is impossible
21 | statement ok
22 | CREATE SECRET my_tmp_secret (
23 | 	TYPE S3,
24 |     SCOPE 's3://bucket1'
25 | )
26 | 
27 | # This now fails with the message that we could not create the persistent secret directory
28 | statement error
29 | CREATE PERSISTENT SECRET my_tmp_secret (
30 | 	TYPE S3,
31 |     SCOPE 's3://bucket2'
32 | )
33 | ----
34 | 
35 | restart
36 | 
37 | # Try with a correct, deeply nested path: AOK?
38 | statement ok
39 | set secret_directory='__TEST_DIR__/create_secret_non_writable_persistent_dir/a/deeply/nested/folder/will/be/created'
40 | 
41 | statement maybe
42 | CREATE PERSISTENT SECRET my_tmp_secret (
43 | 	TYPE S3,
44 |     SCOPE 's3://bucket2'
45 | )
46 | ----
47 | 


--------------------------------------------------------------------------------
/test/sql/attach/attach_s3_tpch.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/attach/attach_s3_tpch.test_slow
 2 | # description: Test running TPC-H over a database attached over S3
 3 | # group: [attach]
 4 | 
 5 | require httpfs
 6 | 
 7 | require tpch
 8 | 
 9 | require-env S3_TEST_SERVER_AVAILABLE 1
10 | 
11 | require-env AWS_DEFAULT_REGION
12 | 
13 | require-env AWS_ACCESS_KEY_ID
14 | 
15 | require-env AWS_SECRET_ACCESS_KEY
16 | 
17 | require-env DUCKDB_S3_ENDPOINT
18 | 
19 | require-env DUCKDB_S3_USE_SSL
20 | 
21 | statement ok
22 | CREATE SECRET (
23 |     TYPE S3,
24 |     PROVIDER config,
25 |     KEY_ID '${AWS_ACCESS_KEY_ID}',
26 |     SECRET '${AWS_SECRET_ACCESS_KEY}',
27 |     REGION '${AWS_DEFAULT_REGION}',
28 |     ENDPOINT '${DUCKDB_S3_ENDPOINT}',
29 |     USE_SSL '${DUCKDB_S3_USE_SSL}'
30 | )
31 | 
32 | # ATTACH a DuckDB database over HTTPFS
33 | statement ok
34 | ATTACH 's3://test-bucket/presigned/lineitem_sf1.db' AS db (READONLY 1);
35 | 
36 | statement ok
37 | USE db
38 | 
39 | loop i 1 9
40 | 
41 | query I
42 | PRAGMA tpch(${i})
43 | ----
44 | <FILE>:duckdb/extension/tpch/dbgen/answers/sf1/q0${i}.csv
45 | 
46 | endloop
47 | 
48 | loop i 10 23
49 | 
50 | query I
51 | PRAGMA tpch(${i})
52 | ----
53 | <FILE>:duckdb/extension/tpch/dbgen/answers/sf1/q${i}.csv
54 | 
55 | endloop
56 | 
57 | statement ok
58 | USE memory
59 | 
60 | statement ok
61 | DETACH db
62 | 
63 | statement ok
64 | ATTACH 's3://test-bucket/presigned/lineitem_sf1.db' AS db (READONLY 1);
65 | 
66 | statement ok
67 | USE db
68 | 
69 | query IIIIIIIIIIIIIIII
70 | select count(distinct columns(*)) from lineitem;
71 | ----
72 | 1500000	200000	10000	7	50	933900	11	9	3	2	2526	2466	2554	4	7	3610733
73 | 


--------------------------------------------------------------------------------
/test/sql/copy/parquet/parquet_2102.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/parquet/parquet_2102.test_slow
 2 | # description: Missing Column Data After Adding Left Join To Query in DuckDB Version 0.2.8
 3 | # group: [parquet]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | statement ok
10 | CREATE TABLE view_one AS SELECT * FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/issue2102_one.parquet';
11 | 
12 | statement ok
13 | CREATE TABLE view_two AS SELECT * FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/issue2102_two.parquet';
14 | 
15 | query I
16 | SELECT COUNT(*) FROM view_one WHERE date IS NULL
17 | ----
18 | 6219
19 | 
20 | statement ok
21 | CREATE TABLE tbl1 AS SELECT one.id id, one.date date
22 | FROM
23 | 	view_one AS one
24 | JOIN
25 | 	view_two two ON two.id = one.id AND two.line = 1;
26 | 
27 | query I
28 | SELECT COUNT(*) FROM tbl1
29 | ----
30 | 691951
31 | 
32 | query I
33 | SELECT COUNT(*) FROM tbl1 WHERE date IS NULL
34 | ----
35 | 4742
36 | 
37 | statement ok
38 | CREATE TABLE tbl2 AS SELECT one.id id, one.date date
39 | FROM
40 | 	view_one AS one
41 | LEFT JOIN
42 | 	view_two two ON two.id = one.id AND two.line = 1;
43 | 
44 | query I
45 | SELECT COUNT(*) FROM tbl2
46 | ----
47 | 695434
48 | 
49 | query I
50 | SELECT COUNT(*) FROM tbl2 WHERE date IS NULL
51 | ----
52 | 6219
53 | 
54 | statement ok
55 | CREATE TABLE tbl3 AS SELECT one.id id, one.date date
56 | FROM
57 | 	view_one AS one
58 | LEFT JOIN
59 | 	view_two two ON two.id = one.id;
60 | 
61 | query I
62 | SELECT COUNT(*) FROM tbl3
63 | ----
64 | 768666
65 | 
66 | query I
67 | SELECT COUNT(*) FROM tbl3 WHERE date IS NULL
68 | ----
69 | 7124
70 | 


--------------------------------------------------------------------------------
/test/extension/autoloading_load_only.test:
--------------------------------------------------------------------------------
 1 | # name: test/extension/autoloading_load_only.test
 2 | # description: Tests for autoloading with no autoinstall
 3 | # group: [extension]
 4 | 
 5 | require httpfs
 6 | 
 7 | # This test assumes icu and json to be available in the LOCAL_EXTENSION_REPO and NOT linked into duckdb statically
 8 | # -> this should be the case for our autoloading tests where we have the local_extension_repo variable set
 9 | require-env LOCAL_EXTENSION_REPO
10 | 
11 | # Ensure we have a clean extension directory without any preinstalled extensions
12 | statement ok
13 | set extension_directory='__TEST_DIR__/autoloading_load_only'
14 | 
15 | ### No autoloading nor installing: throw error with installation hint
16 | statement ok
17 | set autoload_known_extensions=false
18 | 
19 | statement ok
20 | set autoinstall_known_extensions=false
21 | 
22 | statement error
23 | SET s3_region='eu-west-1';
24 | ----
25 | <REGEX>:.*Catalog Error.*Setting with name "s3_region" is not in the catalog.*
26 | 
27 | ### Autoloading but not autoinstall, while the extension is not installed: still not working
28 | statement ok
29 | set autoload_known_extensions=true
30 | 
31 | statement ok
32 | set autoinstall_extension_repository='/tmp/non-existent-repo';
33 | 
34 | statement error
35 | SET s3_region='eu-west-1';
36 | ----
37 | <REGEX>:.*Extension Autoloading Error.*An error occurred while trying to automatically install the required extension 'httpfs'.*
38 | 
39 | ### Manually install the extension from the local repo
40 | statement ok
41 | INSTALL httpfs FROM '${LOCAL_EXTENSION_REPO}'
42 | 
43 | # now autoloading works!
44 | statement ok
45 | SET s3_region='eu-west-1';
46 | 


--------------------------------------------------------------------------------
/test/extension/autoloading_current_setting.test:
--------------------------------------------------------------------------------
 1 | # name: test/extension/autoloading_current_setting.test
 2 | # description: More tests for extension autoloading.
 3 | # group: [extension]
 4 | 
 5 | # This test assumes icu and json to be available in the LOCAL_EXTENSION_REPO and NOT linked into duckdb statically
 6 | # -> this should be the case for our autoloading tests where we have the local_extension_repo variable set
 7 | require-env LOCAL_EXTENSION_REPO
 8 | 
 9 | require httpfs
10 | 
11 | statement ok
12 | set extension_directory='__TEST_DIR__/autoloading_current_setting'
13 | 
14 | ### No autoloading: throw error with installation hint
15 | statement ok
16 | set autoload_known_extensions=false
17 | 
18 | statement ok
19 | set autoinstall_known_extensions=false
20 | 
21 | statement error
22 | select current_setting('s3_region');
23 | ----
24 | <REGEX>:.*Catalog Error.*Setting with name "s3_region" is not in the catalog.*
25 | 
26 | ### Autoloading, but but not autoinstall
27 | statement ok
28 | set autoload_known_extensions=true
29 | 
30 | statement ok
31 | set autoinstall_extension_repository='/tmp/non-existent-repo';
32 | 
33 | # Error should inform the user on whats happening
34 | statement error
35 | select current_setting('s3_region');
36 | ----
37 | <REGEX>:.*Extension Autoloading Error.*An error occurred while trying to automatically install the required extension 'httpfs'.*
38 | 
39 | ### Autoloading with autoinstall and correct extension repo
40 | statement ok
41 | set autoinstall_extension_repository='${LOCAL_EXTENSION_REPO}';
42 | 
43 | statement ok
44 | set autoinstall_known_extensions=true
45 | 
46 | statement ok
47 | select current_setting('s3_region');
48 | 


--------------------------------------------------------------------------------
/test/extension/autoloading_reset_setting.test:
--------------------------------------------------------------------------------
 1 | # name: test/extension/autoloading_reset_setting.test
 2 | # description: Testing reset setting that lives in an extension that can be autoloaded
 3 | # group: [extension]
 4 | 
 5 | require httpfs
 6 | 
 7 | # This test assumes httpfs and json to be available in the LOCAL_EXTENSION_REPO and NOT linked into duckdb statically
 8 | # -> this should be the case for our autoloading tests where we have the local_extension_repo variable set
 9 | require-env LOCAL_EXTENSION_REPO
10 | 
11 | statement ok
12 | set extension_directory='__TEST_DIR__/autoloading_reset_setting'
13 | 
14 | ### No autoloading: throw error with installation hint
15 | statement ok
16 | set autoload_known_extensions=false
17 | 
18 | statement ok
19 | set autoinstall_known_extensions=false
20 | 
21 | # Testing reset setting
22 | statement error
23 | RESET s3_region;
24 | ----
25 | Catalog Error: Setting with name "s3_region" is not in the catalog, but it exists in the httpfs extension.
26 | 
27 | ### Autoloading, but no auto install
28 | statement ok
29 | set autoload_known_extensions=true
30 | 
31 | statement ok
32 | set autoinstall_extension_repository='/tmp/non-existent-repo';
33 | 
34 | # Error should inform the user on whats happening
35 | statement error
36 | RESET s3_region;
37 | ----
38 | Extension Autoloading Error: An error occurred while trying to automatically install the required extension 'httpfs':
39 | Extension
40 | 
41 | ### Autoloading with correct tmp repo and autoinstall
42 | statement ok
43 | set autoinstall_extension_repository='${LOCAL_EXTENSION_REPO}';
44 | 
45 | statement ok
46 | set autoinstall_known_extensions=true
47 | 
48 | statement ok
49 | RESET s3_region;
50 | 


--------------------------------------------------------------------------------
/test/extension/autoloading_filesystems.test:
--------------------------------------------------------------------------------
 1 | # name: test/extension/autoloading_filesystems.test
 2 | # description: Tests for autoloading with filesystems
 3 | # group: [extension]
 4 | 
 5 | require httpfs
 6 | 
 7 | # This test assumes icu and json to be available in the LOCAL_EXTENSION_REPO and NOT linked into duckdb statically
 8 | # -> this should be the case for our autoloading tests where we have the local_extension_repo variable set
 9 | require-env LOCAL_EXTENSION_REPO
10 | 
11 | statement ok
12 | set allow_persistent_secrets=false;
13 | 
14 | # Ensure we have a clean extension directory without any preinstalled extensions
15 | statement ok
16 | set extension_directory='__TEST_DIR__/autoloading_filesystems'
17 | 
18 | ### No autoloading nor installing: throw error with installation hint
19 | statement ok
20 | set autoload_known_extensions=false
21 | 
22 | statement ok
23 | set autoinstall_known_extensions=false
24 | 
25 | statement error
26 | SELECT * FROM 's3://some-bucket/a-file.csv'
27 | ----
28 | Missing Extension Error: File s3://some-bucket/a-file.csv requires the extension httpfs to be loaded
29 | 
30 | ### With autoloading, install and correct repo
31 | statement ok
32 | set autoload_known_extensions=true
33 | 
34 | statement ok
35 | set autoinstall_known_extensions=true
36 | 
37 | statement ok
38 | set autoinstall_extension_repository='${LOCAL_EXTENSION_REPO}';
39 | 
40 | # Set an invalid endpoint to ensure we fail in the httpfs extension when trying to connect
41 | statement ok
42 | SET s3_endpoint='false_endpoint';
43 | 
44 | statement error
45 | SELECT * FROM 's3://some-bucket/a-file.csv'
46 | ----
47 | Could not establish connection error for HTTP HEAD to 'https://some-bucket.false_endpoint/a-file.csv'
48 | 


--------------------------------------------------------------------------------
/scripts/run_s3_test_server.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #Note: DONT run as root
 3 | 
 4 | if [ ! -f test/test_data/attach.db ]; then
 5 |     echo "File test/test_data/attach.db not found, run ./scripts/generate_presigned_url.sh to generate"
 6 | else
 7 |   rm -rf /tmp/minio_test_data
 8 |   rm -rf /tmp/minio_root_data
 9 |   mkdir -p /tmp/minio_test_data
10 |   mkdir -p /tmp/minio_root_data
11 |   docker compose -f scripts/minio_s3.yml -p duckdb-minio up -d
12 | 
13 |   # for testing presigned url
14 |   container_name=$(docker ps -a --format '{{.Names}}' | grep -m 1 "duckdb-minio")
15 |   echo $container_name
16 | 
17 |   for i in $(seq 1 360);
18 |   do
19 |     docker_finish_logs=$(docker logs $container_name 2>/dev/null | grep -m 1 'FINISHED SETTING UP MINIO' || echo '')
20 |     if [ ! -z "${docker_finish_logs}" ]; then
21 |       break
22 |     fi
23 |     sleep 1
24 |   done
25 | 
26 | 
27 |   export S3_SMALL_CSV_PRESIGNED_URL=$(docker logs $container_name 2>/dev/null | grep -m 1 'Share:.*phonenumbers\.csv' | grep -o 'http[s]\?://[^ ]\+')
28 |   echo $S3_SMALL_CSV_PRESIGNED_URL
29 | 
30 |   export S3_SMALL_PARQUET_PRESIGNED_URL=$(docker logs $container_name 2>/dev/null | grep -m 1 'Share:.*t1\.parquet' | grep -o 'http[s]\?://[^ ]\+')
31 |   echo $S3_SMALL_PARQUET_PRESIGNED_URL
32 | 
33 |   export S3_LARGE_PARQUET_PRESIGNED_URL=$(docker logs $container_name 2>/dev/null | grep -m 1 'Share:.*lineitem_large\.parquet' | grep -o 'http[s]\?://[^ ]\+')
34 |   echo $S3_LARGE_PARQUET_PRESIGNED_URL
35 | 
36 |   export S3_ATTACH_DB_PRESIGNED_URL=$(docker logs $container_name 2>/dev/null | grep -m 1 'Share:.*attach\.db' | grep -o 'http[s]\?://[^ ]\+')
37 |   echo $S3_ATTACH_DB_PRESIGNED_URL
38 | 
39 |   export S3_ATTACH_DB="s3://test-bucket/presigned/attach.db"
40 | fi


--------------------------------------------------------------------------------
/test/sql/httpfs/hffs.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/httpfs/hffs.test
 2 | # description: Ensure the HuggingFace filesystem works as expected
 3 | # group: [httpfs]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | statement error
10 | FROM parquet_scan('hf://')
11 | ----
12 | IO Error: Failed to parse 'hf://'. Please format url like: 'hf://datasets/my-username/my-dataset/path/to/file.parquet'
13 | 
14 | statement error
15 | FROM 'hf://file.parquet'
16 | ----
17 | IO Error: Failed to parse 'hf://file.parquet'. Please format url like: 'hf://datasets/my-username/my-dataset/path/to/file.parquet'
18 | 
19 | statement error
20 | FROM 'hf://yepthisdoesntwork/file.parquet'
21 | ----
22 | IO Error: Failed to parse: 'hf://yepthisdoesntwork/file.parquet'. Currently DuckDB only supports querying datasets or spaces, so the url should start with 'hf://datasets' or 'hf://spaces'
23 | 
24 | statement error
25 | FROM 'hf://stil/not/file.parquet'
26 | ----
27 | IO Error: Failed to parse: 'hf://stil/not/file.parquet'. Currently DuckDB only supports querying datasets or spaces, so the url should start with 'hf://datasets' or 'hf://spaces'
28 | 
29 | statement error
30 | FROM 'hf://datasets/file.parquet'
31 | ----
32 | IO Error: Failed to parse 'hf://datasets/file.parquet'. Please format url like: 'hf://datasets/my-username/my-dataset/path/to/file.parquet'
33 | 
34 | statement error
35 | FROM 'hf://datasets/myname/file.parquet'
36 | ----
37 | IO Error: Failed to parse 'hf://datasets/myname/file.parquet'. Please format url like: 'hf://datasets/my-username/my-dataset/path/to/file.parquet'
38 | 
39 | statement error
40 | FROM 'hf://datasets/**/file.parquet'
41 | ----
42 | IO Error: Failed to parse 'hf://datasets/**/file.parquet'. Please format url like: 'hf://datasets/my-username/my-dataset/path/to/file.parquet'
43 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_overwriting.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_overwriting.test
 2 | # description: Test secret overwriting and deleting
 3 | # group: [secrets]
 4 | 
 5 | statement ok
 6 | PRAGMA enable_verification;
 7 | 
 8 | require httpfs
 9 | 
10 | # Ensure any currently stored secrets don't interfere with the test
11 | statement ok
12 | set allow_persistent_secrets=false;
13 | 
14 | # Create some s3 secret
15 | statement ok
16 | CREATE SECRET my_secret (
17 | 	TYPE S3,
18 |     SCOPE 's3://bucket1'
19 | )
20 | 
21 | query II
22 | SELECT name, scope FROM duckdb_secrets();
23 | ----
24 | my_secret	['s3://bucket1']
25 | 
26 | statement error
27 | CREATE SECRET my_secret (
28 | 	TYPE S3,
29 |     KEY_ID 'my_key',
30 |     SECRET 'my_secret',
31 |     SCOPE 's3://bucket1'
32 | )
33 | ----
34 | Invalid Input Error: Temporary secret with name 'my_secret' already exists!
35 | 
36 | # We should be able to replace the secret though
37 | statement ok
38 | CREATE OR REPLACE SECRET my_secret (
39 | 	TYPE S3,
40 |     SCOPE 's3://bucket2'
41 | )
42 | 
43 | query II
44 | SELECT name, scope FROM duckdb_secrets();
45 | ----
46 | my_secret	['s3://bucket2']
47 | 
48 | # We can also ignore if we want to
49 | statement ok
50 | CREATE SECRET IF NOT EXISTS my_secret (
51 | 	TYPE S3,
52 |     SCOPE 's3://bucket5'
53 | )
54 | 
55 | query II
56 | SELECT name, scope FROM duckdb_secrets();
57 | ----
58 | my_secret	['s3://bucket2']
59 | 
60 | # Now try dropping a secret that does not exist
61 | statement error
62 | DROP SECRET my_secret_does_not_exist;
63 | ----
64 | Failed to remove non-existent secret with name 'my_secret_does_not_exist'
65 | 
66 | # Drop one that does exist
67 | statement ok
68 | DROP SECRET my_secret;
69 | 
70 | # Secret be gone!
71 | query II
72 | SELECT name, scope FROM duckdb_secrets();
73 | ----
74 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.12...3.29)
 2 | 
 3 | project(HTTPFsExtension)
 4 | 
 5 | add_extension_definitions()
 6 | 
 7 | if (NOT EMSCRIPTEN)
 8 |   add_definitions(-DOVERRIDE_ENCRYPTION_UTILS=1)
 9 | else()
10 |   set(DUCKDB_EXTENSION_HTTPFS_LINKED_LIBS "../../third_party/mbedtls/libduckdb_mbedtls.a")
11 | endif()
12 | 
13 | if(MINGW)
14 |   set(OPENSSL_USE_STATIC_LIBS TRUE)
15 | endif()
16 | 
17 | find_package(OpenSSL REQUIRED)
18 | find_package(CURL REQUIRED)
19 | include_directories(${OPENSSL_INCLUDE_DIR})
20 | include_directories(${CURL_INCLUDE_DIRS})
21 | 
22 | include_directories(src/include
23 |                     ${DUCKDB_MODULE_BASE_DIR}/third_party/httplib)
24 | add_subdirectory(src)
25 | set(EXTENSION_SOURCES ${ALL_OBJECT_FILES})
26 | 
27 | build_static_extension(httpfs ${EXTENSION_SOURCES})
28 | 
29 | set(PARAMETERS "-warnings")
30 | build_loadable_extension(httpfs ${PARAMETERS} ${EXTENSION_SOURCES})
31 | 
32 | if(EMSCRIPTEN)
33 |   target_link_libraries(httpfs_loadable_extension duckdb_mbedtls)
34 | else()
35 |   target_link_libraries(httpfs_loadable_extension duckdb_mbedtls
36 |                         ${OPENSSL_LIBRARIES})
37 |   target_link_libraries(httpfs_extension duckdb_mbedtls ${OPENSSL_LIBRARIES})
38 | 
39 |   # Link dependencies into extension
40 |   target_link_libraries(httpfs_loadable_extension ${CURL_LIBRARIES})
41 |   target_link_libraries(httpfs_extension ${CURL_LIBRARIES})
42 | 
43 | 
44 |   if(MINGW)
45 |     find_package(ZLIB)
46 |     target_link_libraries(httpfs_loadable_extension ZLIB::ZLIB -lcrypt32)
47 |     target_link_libraries(httpfs_extension ZLIB::ZLIB -lcrypt32)
48 |   endif()
49 | endif()
50 | 
51 | 
52 | install(
53 |   TARGETS httpfs_extension
54 |   EXPORT "${DUCKDB_EXPORT_SET}"
55 |   LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
56 |   ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")
57 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_cascading.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_cascading.test_slow
 2 | # description: Test the cascading mechanism of secret settings
 3 | # group: [secrets]
 4 | 
 5 | statement ok
 6 | PRAGMA enable_verification;
 7 | 
 8 | require httpfs
 9 | 
10 | # Ensure any currently stored secrets don't interfere with the test
11 | statement ok
12 | set allow_persistent_secrets=false;
13 | 
14 | statement ok
15 | set s3_endpoint = 'invalid-on-purpose-setting'
16 | 
17 | statement ok
18 | set s3_url_style = 'path'
19 | 
20 | statement ok
21 | set s3_use_ssl = false
22 | 
23 | # This secret overrides only the url style, not the endpoint
24 | statement ok
25 | CREATE SECRET s1 (
26 | 	TYPE S3,
27 |     REGION 'my_region',
28 |     URL_STYLE 'vhost',
29 |     SCOPE 's3://url-style-only'
30 | )
31 | 
32 | # This secret overrides both the url style and the endpoint
33 | statement ok
34 | CREATE SECRET s2 (
35 | 	TYPE S3,
36 |     REGION 'my_region',
37 |     URL_STYLE 'vhost',
38 |     ENDPOINT 'invalid-on-purpose-secret',
39 |     SCOPE 's3://url-style-and-endpoint'
40 | )
41 | 
42 | # Only the url style from the secret is used
43 | statement error
44 | FROM 's3://url-style-only/test.csv'
45 | ----
46 | Could not establish connection error for HTTP HEAD to 'http://url-style-only.invalid-on-purpose-setting/test.csv'
47 | 
48 | # Both Url style and endpoint are used now
49 | statement error
50 | FROM 's3://url-style-and-endpoint/test.csv'
51 | ----
52 | Could not establish connection error for HTTP HEAD to 'http://url-style-and-endpoint.invalid-on-purpose-secret/test.csv'
53 | 
54 | # This request matches none of the secrets, we use the settings
55 | statement error
56 | FROM 's3://test-bucket/test.csv'
57 | ----
58 | Could not establish connection error for HTTP HEAD to 'http://invalid-on-purpose-setting/test-bucket/test.csv'
59 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_defaults.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_defaults.test
 2 | # description: Test default values during secret creation
 3 | # group: [secrets]
 4 | 
 5 | statement ok
 6 | PRAGMA enable_verification;
 7 | 
 8 | require httpfs
 9 | 
10 | # Ensure any currently stored secrets don't interfere with the test
11 | statement ok
12 | set allow_persistent_secrets=false;
13 | 
14 | statement ok
15 | DROP SECRET IF EXISTS s1;
16 | 
17 | # Without name we use the __default_<type> name. The default config for for the S3 type is config
18 | statement ok
19 | CREATE SECRET (
20 |     TYPE S3,
21 |     KEY_ID 'my_key',
22 |     SECRET 'my_secret'
23 | )
24 | 
25 | query IIII
26 | SELECT name, provider, type, scope FROM duckdb_secrets();
27 | ----
28 | __default_s3	config	s3	['s3://', 's3n://', 's3a://']
29 | 
30 | # Without name we use the __default_<type> name. The default config for for the R2 type is config
31 | statement ok
32 | CREATE SECRET (
33 |     TYPE R2,
34 |     KEY_ID 'my_key',
35 |     SECRET 'my_secret',
36 |     ACCOUNT_ID 'my_account_id'
37 | )
38 | 
39 | query IIII
40 | SELECT name, provider, type, scope FROM duckdb_secrets() ORDER BY name;
41 | ----
42 | __default_r2	config	r2	['r2://']
43 | __default_s3	config	s3	['s3://', 's3n://', 's3a://']
44 | 
45 | 
46 | # Without name we use the __default_<type> name. The default config for for the R2 type is config
47 | statement ok
48 | CREATE SECRET (
49 |     TYPE GCS,
50 |     KEY_ID 'my_key',
51 |     SECRET 'my_secret'
52 | )
53 | 
54 | # duckdb_secrets with all defaults looks like this now
55 | query IIIIII
56 | SELECT name, persistent, storage, provider, type, scope FROM duckdb_secrets() ORDER BY name;
57 | ----
58 | __default_gcs	0	memory	config	gcs	['gcs://', 'gs://']
59 | __default_r2	0	memory	config	r2	['r2://']
60 | __default_s3	0	memory	config	s3	['s3://', 's3n://', 's3a://']


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_settings.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_settings.test
 2 | # description: Test setting secret settings
 3 | # group: [secrets]
 4 | 
 5 | statement ok
 6 | PRAGMA enable_verification;
 7 | 
 8 | load __TEST_DIR__/secrets_settings.db
 9 | 
10 | require httpfs
11 | 
12 | statement ok
13 | set secret_directory='__TEST_DIR__/create_secret_settings1'
14 | 
15 | statement ok
16 | set allow_persistent_secrets=true;
17 | 
18 | # Create some s3 secret, the normally the default is TEMPORARY
19 | statement ok
20 | CREATE PERSISTENT SECRET my_perm_secret (
21 | 	TYPE S3,
22 |     SCOPE 's3://bucket1'
23 | )
24 | 
25 | query II
26 | SELECT name, scope from duckdb_secrets();
27 | ----
28 | my_perm_secret	['s3://bucket1']
29 | 
30 | statement error
31 | set secret_directory='__TEST_DIR__/create_secret_settings2'
32 | ----
33 | Invalid Input Error: Changing Secret Manager settings after the secret manager is used is not allowed!
34 | 
35 | statement error
36 | set allow_persistent_secrets=false;
37 | ----
38 | Invalid Input Error: Changing Secret Manager settings after the secret manager is used is not allowed!
39 | 
40 | # This setting CAN be modified after init
41 | statement ok
42 | set default_secret_storage = 'local_file'
43 | 
44 | statement ok
45 | reset default_secret_storage;
46 | 
47 | restart
48 | 
49 | # When disabling secrets, we won't read the one that we wrote earlier
50 | statement ok
51 | set allow_persistent_secrets=false
52 | 
53 | query I
54 | select count(*) from duckdb_secrets();
55 | ----
56 | 0
57 | 
58 | restart
59 | 
60 | # Switch settings back and it works again
61 | statement ok
62 | set allow_persistent_secrets=true
63 | 
64 | # setting the path right it will work
65 | statement ok
66 | set secret_directory='__TEST_DIR__/create_secret_settings1'
67 | 
68 | query II
69 | SELECT name, scope from duckdb_secrets();
70 | ----
71 | my_perm_secret	['s3://bucket1']
72 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_r2.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_r2.test
 2 | # description: Test secret creation using the default r2 secret provider
 3 | # group: [secrets]
 4 | 
 5 | statement ok
 6 | PRAGMA enable_verification;
 7 | 
 8 | require httpfs
 9 | 
10 | statement ok
11 | set secret_directory='__TEST_DIR__/create_secret_r2'
12 | 
13 | # R2 is secrets will instead of requiring manually constructing the endpoint of <ACCOUNT_ID>.r2.cloudflarestorage.com,
14 | # use the account_id to configure it. Also the region is not required at all. Also the scope defaults to r2://
15 | statement ok
16 | CREATE SECRET (
17 |     TYPE R2,
18 |     ACCOUNT_ID 'some_bogus_account',
19 |     KEY_ID 'my_key',
20 |     SECRET 'my_secret'
21 | )
22 | 
23 | # The secret will be created for the default scope
24 | query IIII
25 | SELECT name, type, provider, scope FROM duckdb_secrets();
26 | ----
27 | __default_r2	r2	config	['r2://']
28 | 
29 | #
30 | statement error
31 | FROM 's3://test-bucket/test.csv'
32 | ----
33 | <REGEX>:.*HTTP Error.*
34 | 
35 | # Account ID is only for R2, trying to set this for S3 will fail
36 | statement error
37 | CREATE SECRET (
38 |     TYPE S3,
39 |     ACCOUNT_ID 'some_bogus_account',
40 |     KEY_ID 'my_key',
41 |     SECRET 'my_secret'
42 | )
43 | ----
44 | Binder Error: Unknown parameter 'account_id' for secret type 's3' with default provider 'config'
45 | 
46 | # Account ID is only for R2, trying to set this for GCS will fail
47 | statement error
48 | CREATE SECRET (
49 |     TYPE GCS,
50 |     PROVIDER config,
51 |     ACCOUNT_ID 'some_bogus_account',
52 |     KEY_ID 'my_key',
53 |     SECRET 'my_secret'
54 | )
55 | ----
56 | Binder Error: Unknown parameter 'account_id' for secret type 'gcs' with provider 'config'
57 | 
58 | # Ensure secret lookup works correctly;
59 | statement ok
60 | CREATE SECRET test(
61 |     TYPE R2,
62 |     ACCOUNT_ID 'some_bogus_account',
63 |     KEY_ID 'my_key',
64 |     SECRET 'my_secret'
65 | )
66 | 


--------------------------------------------------------------------------------
/test/sql/storage/encryption/temp_files/encrypted_out_of_core.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/storage/encryption/temp_files/encrypted_out_of_core.test_slow
 2 | # description: Encrypted large joins in persistent databases have a leftover temporary directory.
 3 | # group: [temp_files]
 4 | 
 5 | foreach cipher GCM CTR
 6 | 
 7 | 
 8 | require httpfs
 9 | 
10 | require tpch
11 | 
12 | load __TEST_DIR__/leftover_temp_files.db
13 | 
14 | statement ok
15 | ATTACH '__TEST_DIR__/encrypted_temp_files_${cipher}.db' AS enc_${cipher} (ENCRYPTION_KEY 'asdf', ENCRYPTION_CIPHER '${cipher}');
16 | 
17 | statement ok
18 | SET temp_file_encryption=true;
19 | 
20 | statement ok
21 | USE enc_${cipher};
22 | 
23 | statement ok
24 | SET threads=8
25 | 
26 | statement ok
27 | SET memory_limit='1GB';
28 | 
29 | statement ok
30 | CALL dbgen(sf=1);
31 | 
32 | statement ok
33 | ALTER TABLE lineitem RENAME TO lineitem1
34 | 
35 | statement ok
36 | CREATE TABLE lineitem2 AS FROM lineitem1
37 | 
38 | # creating and dropping a table with an ORDER BY
39 | statement ok
40 | CREATE OR REPLACE TEMPORARY TABLE ans as select l1.*, l1.* from lineitem1 l1 ORDER BY l_orderkey, l_returnflag
41 | 
42 | statement ok
43 | DROP TABLE ans;
44 | 
45 | # performing a small hash join
46 | statement ok
47 | CREATE OR REPLACE TEMPORARY TABLE ans as select l1.*, l2.* from lineitem1 l1 JOIN (FROM lineitem2 l2 WHERE l_orderkey<10000) AS l2 USING (l_orderkey, l_linenumber)
48 | 
49 | statement ok
50 | DROP TABLE ans;
51 | 
52 | # performing a large window function
53 | statement ok
54 | CREATE OR REPLACE TEMPORARY TABLE ans as select l1.*, row_number() OVER (PARTITION BY l_orderkey, l_linenumber ORDER BY l_orderkey) from lineitem1 l1
55 | 
56 | statement ok
57 | DROP TABLE ans;
58 | 
59 | # performing a large hash join
60 | statement ok
61 | CREATE OR REPLACE TEMPORARY TABLE ans as select l1.*, l2.* from lineitem1 l1 JOIN lineitem2 l2 USING (l_orderkey, l_linenumber)
62 | 
63 | statement ok
64 | DROP TABLE ans;
65 | 
66 | restart
67 | 
68 | endloop


--------------------------------------------------------------------------------
/test/sql/secrets/secret_compatibility_httpfs.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/secret_compatibility_httpfs.test
 2 | # description: Test secret compatibility across versions
 3 | # group: [secrets]
 4 | 
 5 | require httpfs
 6 | 
 7 | require-env TEST_PERSISTENT_SECRETS_AVAILABLE
 8 | 
 9 | # Ensure any currently stored secrets don't interfere with the test
10 | statement ok
11 | set secret_directory='./data/secrets/httpfs'
12 | 
13 | query IIIIIII
14 | from duckdb_secrets() order by name;
15 | ----
16 | s3_config_secret_v1_1_2	s3	config	true	local_file	['s3://', 's3n://', 's3a://']	name=s3_config_secret_v1_1_2;type=s3;provider=config;serializable=true;scope=s3://,s3n://,s3a://;region=us-east-2;use_ssl=false
17 | s3_config_secret_v1_1_3	s3	config	true	local_file	['s3://', 's3n://', 's3a://']	name=s3_config_secret_v1_1_3;type=s3;provider=config;serializable=true;scope=s3://,s3n://,s3a://;region=us-east-2;use_ssl=false
18 | s3_config_secret_v_1_0_0	s3	config	true	local_file	['s3://', 's3n://', 's3a://']	name=s3_config_secret_v_1_0_0;type=s3;provider=config;serializable=true;scope=s3://,s3n://,s3a://;endpoint=s3.amazonaws.com;key_id=;region=us-east-2;s3_url_compatibility_mode=0;secret=redacted;session_token=redacted;url_style=;use_ssl=0
19 | s3_secret_chain_v_1_0_0	s3	credential_chain	true	local_file	['s3://', 's3n://', 's3a://']	name=s3_secret_chain_v_1_0_0;type=s3;provider=credential_chain;serializable=true;scope=s3://,s3n://,s3a://;endpoint=s3.amazonaws.com;region=us-east-2;use_ssl=false
20 | s3_secret_chain_v_1_1_2	s3	credential_chain	true	local_file	['s3://', 's3n://', 's3a://']	name=s3_secret_chain_v_1_1_2;type=s3;provider=credential_chain;serializable=true;scope=s3://,s3n://,s3a://;endpoint=s3.amazonaws.com;region=us-east-2;use_ssl=false
21 | s3_secret_chain_v_1_1_3	s3	credential_chain	true	local_file	['s3://', 's3n://', 's3a://']	name=s3_secret_chain_v_1_1_3;type=s3;provider=credential_chain;serializable=true;scope=s3://,s3n://,s3a://;endpoint=s3.amazonaws.com;region=us-east-2;use_ssl=false
22 | 


--------------------------------------------------------------------------------
/test/sql/copy/s3/hive_partitioned_write_s3.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/s3/hive_partitioned_write_s3.test_slow
 2 | # description: slow test for the hive partitioned write to s3
 3 | # group: [s3]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | require tpch
10 | 
11 | require-env S3_TEST_SERVER_AVAILABLE 1
12 | 
13 | require-env AWS_DEFAULT_REGION
14 | 
15 | require-env AWS_ACCESS_KEY_ID
16 | 
17 | require-env AWS_SECRET_ACCESS_KEY
18 | 
19 | require-env DUCKDB_S3_ENDPOINT
20 | 
21 | require-env DUCKDB_S3_USE_SSL
22 | 
23 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
24 | set ignore_error_messages
25 | 
26 | statement ok
27 | pragma memory_limit='200MB'
28 | 
29 | statement ok
30 | set http_timeout=120000;
31 | 
32 | # More retries (longest wait will be 25600ms)
33 | statement ok
34 | set http_retries=6;
35 | 
36 | # around 200MB worth of data, will require the PartitionedColumnData to spill to disk
37 | statement ok
38 | COPY (SELECT i%2::INT32 as part_col, i::INT32 FROM range(0,25000000) tbl(i)) TO 's3://test-bucket/partitioned_memory_spill' (FORMAT parquet, PARTITION_BY part_col, overwrite_or_ignore TRUE);
39 | 
40 | statement ok
41 | pragma memory_limit='-1'
42 | 
43 | statement ok
44 | call dbgen(sf=1);
45 | 
46 | # Partition by 2 columns
47 | statement ok
48 | COPY lineitem TO 's3://test-bucket/lineitem_sf1_partitioned' (FORMAT PARQUET, PARTITION_BY (l_returnflag, l_linestatus), overwrite_or_ignore TRUE);
49 | 
50 | statement ok
51 | DROP TABLE lineitem;
52 | 
53 | statement ok
54 | CREATE VIEW lineitem as SELECT * FROM parquet_scan('s3://test-bucket/lineitem_sf1_partitioned/*/*/*.parquet', HIVE_PARTITIONING=1);
55 | 
56 | loop i 1 9
57 | 
58 | query I
59 | PRAGMA tpch(${i})
60 | ----
61 | <FILE>:duckdb/extension/tpch/dbgen/answers/sf1/q0${i}.csv
62 | 
63 | endloop
64 | 
65 | loop i 10 23
66 | 
67 | query I
68 | PRAGMA tpch(${i})
69 | ----
70 | <FILE>:duckdb/extension/tpch/dbgen/answers/sf1/q${i}.csv
71 | 
72 | endloop
73 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_r2_serialization.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_r2_serialization.test
 2 | # description: Demo of secret serialization
 3 | # group: [secrets]
 4 | 
 5 | # NOTE: this is a testing feature that will be removed / replaced with actual persistent secrets.
 6 | 
 7 | require httpfs
 8 | 
 9 | require parquet
10 | 
11 | load __TEST_DIR__/test_serialize_secrets.db
12 | 
13 | statement ok
14 | PRAGMA enable_verification;
15 | 
16 | statement ok
17 | set secret_directory='__TEST_DIR__/create_secret_r2_serialization'
18 | 
19 | statement ok
20 | CREATE OR REPLACE PERSISTENT SECRET s1 (
21 |     TYPE S3,
22 |     PROVIDER config,
23 |     SCOPE 's3://my_scope',
24 |     KEY_ID 'mekey',
25 |     SECRET 'mesecret',
26 |     REGION 'meregion',
27 |     SESSION_TOKEN 'mesesh',
28 |     ENDPOINT 'meendpoint',
29 |     URL_STYLE 'mahstyle',
30 |     USE_SSL true,
31 |     URL_COMPATIBILITY_MODE true
32 | )
33 | 
34 | query IIII
35 | select name, type, provider, scope FROM duckdb_secrets();
36 | ----
37 | s1	s3	config	['s3://my_scope']
38 | 
39 | query I nosort secret_to_string
40 | select * from duckdb_secrets();
41 | ----
42 | 
43 | restart
44 | 
45 | # Now setting the secret dir somehwere nonexistent will yield no persistent secrets
46 | statement ok
47 | set secret_directory='__TEST_DIR__/does_not_exist2'
48 | 
49 | query I
50 | select count(*) FROM duckdb_secrets();
51 | ----
52 | 0
53 | 
54 | restart
55 | 
56 | # However setting it to the dir that does, we can suddenly see our persisted secrets
57 | statement ok
58 | set secret_directory='__TEST_DIR__/create_secret_r2_serialization'
59 | 
60 | # After restart secret is still there
61 | query IIII
62 | select name, type, provider, scope FROM duckdb_secrets();
63 | ----
64 | s1	s3	config	['s3://my_scope']
65 | 
66 | # Even more: it matches the exact string note that we don't disable redaction here to ensure we cover
67 | # redaction set serialization with this test
68 | query I nosort secret_to_string
69 | select * from duckdb_secrets();
70 | ----


--------------------------------------------------------------------------------
/test/sql/copy/s3/upload_large_file.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/s3/upload_large_file.test_slow
 2 | # description: Copy large csv/parquet files from and to S3.
 3 | # group: [s3]
 4 | 
 5 | require tpch
 6 | 
 7 | require parquet
 8 | 
 9 | require httpfs
10 | 
11 | require-env S3_TEST_SERVER_AVAILABLE 1
12 | 
13 | # Require that these environment variables are also set
14 | 
15 | require-env AWS_DEFAULT_REGION
16 | 
17 | require-env AWS_ACCESS_KEY_ID
18 | 
19 | require-env AWS_SECRET_ACCESS_KEY
20 | 
21 | require-env DUCKDB_S3_ENDPOINT
22 | 
23 | require-env DUCKDB_S3_USE_SSL
24 | 
25 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
26 | set ignore_error_messages
27 | 
28 | # confirm we use a reasonable amount of memory
29 | statement ok
30 | SET memory_limit='2.5GB';
31 | 
32 | statement ok
33 | set http_timeout=120000;
34 | 
35 | # More retries (longest wait will be 25600ms)
36 | statement ok
37 | set http_retries=6;
38 | 
39 | # disable tmp dir to force OOM if we exceed our set limit
40 | statement ok
41 | PRAGMA temp_directory=''
42 | 
43 | statement ok
44 | SET s3_uploader_thread_limit = 5;
45 | 
46 | statement ok
47 | CALL DBGEN(sf=1)
48 | 
49 | query I
50 | SELECT
51 |     sum(l_extendedprice * l_discount) AS revenue
52 | FROM
53 |     lineitem
54 | WHERE
55 |     l_shipdate >= CAST('1994-01-01' AS date)
56 |     AND l_shipdate < CAST('1995-01-01' AS date)
57 |     AND l_discount BETWEEN 0.05
58 |     AND 0.07
59 |     AND l_quantity < 24;
60 | ----
61 | 123141078.2283
62 | 
63 | # Parquet file ~300MB
64 | statement ok
65 | COPY lineitem TO 's3://test-bucket/multipart/export_large.parquet' (FORMAT 'parquet');
66 | 
67 | query I
68 | SELECT
69 |     sum(l_extendedprice * l_discount) AS revenue
70 | FROM
71 |     "s3://test-bucket/multipart/export_large.parquet"
72 | WHERE
73 |     l_shipdate >= CAST('1994-01-01' AS date)
74 |     AND l_shipdate < CAST('1995-01-01' AS date)
75 |     AND l_discount BETWEEN 0.05
76 |     AND 0.07
77 |     AND l_quantity < 24;
78 | ----
79 | 123141078.2283
80 | 


--------------------------------------------------------------------------------
/test/sql/copy/parquet/parquet_encryption_httpfs.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/parquet/parquet_encryption_httpfs.test
 2 | # description: Test Parquet encryption with OpenSSL
 3 | # group: [parquet]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | # parquet keys are not persisted across restarts
10 | statement ok
11 | PRAGMA enable_verification
12 | 
13 | # add keys of 3 different lengths
14 | statement ok
15 | PRAGMA add_parquet_key('key128', '0123456789112345')
16 | 
17 | statement ok
18 | PRAGMA add_parquet_key('key192', '012345678911234501234567')
19 | 
20 | statement ok
21 | PRAGMA add_parquet_key('key256', '01234567891123450123456789112345')
22 | 
23 | # test all valid AES key lengths
24 | foreach key_len 128 192 256
25 | 
26 | statement ok
27 | COPY (SELECT 42 i) to '__TEST_DIR__/encrypted${key_len}_openssl.parquet' (ENCRYPTION_CONFIG {footer_key: 'key${key_len}'})
28 | 
29 | query I
30 | SELECT * FROM read_parquet('__TEST_DIR__/encrypted${key_len}_openssl.parquet', encryption_config={footer_key: 'key${key_len}'})
31 | ----
32 | 42
33 | 
34 | statement ok
35 | CREATE OR REPLACE TABLE test (i INTEGER)
36 | 
37 | statement ok
38 | COPY test FROM '__TEST_DIR__/encrypted${key_len}_openssl.parquet' (ENCRYPTION_CONFIG {footer_key: 'key${key_len}'})
39 | 
40 | query I
41 | SELECT * FROM test
42 | ----
43 | 42
44 | 
45 | endloop
46 | 
47 | # what happens if we don't try to decrypt even if the file is encrypted?
48 | statement error
49 | SELECT * FROM read_parquet('__TEST_DIR__/encrypted128_openssl.parquet')
50 | ----
51 | Invalid Input Error
52 | 
53 | # what if we try to decrypt with the wrong key?
54 | statement error
55 | SELECT * FROM read_parquet('__TEST_DIR__/encrypted128_openssl.parquet', encryption_config={footer_key: 'key192'})
56 | ----
57 | Invalid Input Error: Computed AES tag differs from read AES tag, are you using the right key?
58 | 
59 | # what if we don't encrypt, but try to decrypt?
60 | statement ok
61 | COPY (SELECT 42 i) to '__TEST_DIR__/unencrypted.parquet'
62 | 
63 | statement error
64 | SELECT * FROM read_parquet('__TEST_DIR__/unencrypted.parquet', encryption_config={footer_key: 'key256'})
65 | ----
66 | Invalid Input Error
67 | 


--------------------------------------------------------------------------------
/test/sql/copy/csv/glob/copy_csv_glob_s3.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/csv/glob/copy_csv_glob_s3.test
 2 | # description: Test globbing CSVs on s3
 3 | # group: [glob]
 4 | 
 5 | statement ok
 6 | PRAGMA enable_verification
 7 | 
 8 | require httpfs
 9 | 
10 | require-env S3_TEST_SERVER_AVAILABLE 1
11 | 
12 | # Require that these environment variables are also set
13 | 
14 | require-env AWS_DEFAULT_REGION
15 | 
16 | require-env AWS_ACCESS_KEY_ID
17 | 
18 | require-env AWS_SECRET_ACCESS_KEY
19 | 
20 | require-env DUCKDB_S3_ENDPOINT
21 | 
22 | require-env DUCKDB_S3_USE_SSL
23 | 
24 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
25 | set ignore_error_messages
26 | 
27 | # copy files to S3 before beginning tests
28 | statement ok
29 | COPY (select * from 'duckdb/data/csv/glob/a1/a1.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/a1/a1.csv';
30 | 
31 | statement ok
32 | COPY (select * from 'duckdb/data/csv/glob/a2/a2.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/a2/a2.csv';
33 | 
34 | statement ok
35 | COPY (select * from 'duckdb/data/csv/glob/a3/b1.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/a3/b1.csv';
36 | 
37 | statement ok
38 | COPY (select null) to 's3://test-bucket/glob/copy/empty/empty.csv';
39 | 
40 | statement ok
41 | COPY (select * from 'duckdb/data/csv/glob/i1/integer.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/empty/integer.csv';
42 | 
43 | statement ok
44 | CREATE TABLE dates(d DATE);
45 | 
46 | statement ok
47 | COPY dates FROM 's3://test-bucket/copy_csv_glob_s3/copy/a[123]/*.csv' (AUTO_DETECT 1);
48 | 
49 | # simple globbing for both url styles
50 | foreach urlstyle path vhost
51 | 
52 | statement ok
53 | SET s3_url_style='${urlstyle}'
54 | 
55 | query I
56 | SELECT * FROM dates ORDER BY 1
57 | ----
58 | 2019-06-05
59 | 2019-06-15
60 | 2019-06-25
61 | 2019-07-05
62 | 2019-07-15
63 | 2019-07-25
64 | 2019-08-05
65 | 2019-08-15
66 | 2019-08-25
67 | 
68 | # nothing matches the glob
69 | statement error
70 | INSERT INTO dates FROM read_csv('s3://test-bucket/copy_csv_glob_s3/copy/*/a*a.csv', auto_detect=1)
71 | ----
72 | No files found that match the pattern
73 | 
74 | endloop
75 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_minio.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_minio.test
 2 | # description: Test s3 secrets actually work using minio
 3 | # group: [secrets]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | require-env S3_TEST_SERVER_AVAILABLE 1
10 | 
11 | # Require that these environment variables are also set
12 | 
13 | require-env AWS_DEFAULT_REGION
14 | 
15 | require-env AWS_ACCESS_KEY_ID
16 | 
17 | require-env AWS_SECRET_ACCESS_KEY
18 | 
19 | require-env DUCKDB_S3_ENDPOINT
20 | 
21 | require-env DUCKDB_S3_USE_SSL
22 | 
23 | set ignore_error_messages
24 | 
25 | load __TEST_DIR__/persistent_secrets.db
26 | 
27 | statement ok
28 | PRAGMA enable_verification;
29 | 
30 | statement ok
31 | set secret_directory='__TEST_DIR__/create_secret_minio'
32 | 
33 | # first need to unset the duckdb settings: currently the env variables are loaded automatically making all queries auth
34 | statement ok
35 | set s3_access_key_id='';
36 | 
37 | statement ok
38 | set s3_secret_access_key='';
39 | 
40 | statement error
41 | copy (select 1 as a) to 's3://test- /test-file.parquet'
42 | ----
43 | 
44 | # Now we create a scoped secret with correct credentials
45 | statement ok
46 | CREATE PERSISTENT SECRET (
47 |     TYPE S3,
48 |     PROVIDER config,
49 |     SCOPE 's3://test-bucket/only-this-file-gets-auth.parquet',
50 |     KEY_ID '${AWS_ACCESS_KEY_ID}',
51 |     SECRET '${AWS_SECRET_ACCESS_KEY}',
52 |     REGION '${AWS_DEFAULT_REGION}',
53 |     ENDPOINT '${DUCKDB_S3_ENDPOINT}',
54 |     USE_SSL '${DUCKDB_S3_USE_SSL}'
55 | )
56 | 
57 | # scope doesn't match! query still fails
58 | statement error
59 | copy (select 1 as a) to 's3://test-bucket/test-file.parquet'
60 | ----
61 | 
62 | # scope matches, the secret is chosen and the query will succeed
63 | statement ok
64 | copy (select 1 as a) to 's3://test-bucket/only-this-file-gets-auth.parquet'
65 | 
66 | restart
67 | 
68 | statement ok
69 | set secret_directory='__TEST_DIR__/create_secret_minio'
70 | 
71 | # persistent secrets survive restart
72 | statement ok
73 | copy (select 1 as a) to 's3://test-bucket/only-this-file-gets-auth.parquet'
74 | 
75 | # Its still scoped
76 | statement error
77 | copy (select 1 as a) to 's3://test-bucket/no-auth-here.parquet'
78 | ----


--------------------------------------------------------------------------------
/src/include/httpfs_client.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb/common/http_util.hpp"
 4 | 
 5 | namespace duckdb {
 6 | class HTTPLogger;
 7 | class FileOpener;
 8 | struct FileOpenerInfo;
 9 | class HTTPState;
10 | 
11 | struct HTTPFSParams : public HTTPParams {
12 | 	HTTPFSParams(HTTPUtil &http_util) : HTTPParams(http_util) {
13 | 	}
14 | 
15 | 	static constexpr bool DEFAULT_ENABLE_SERVER_CERT_VERIFICATION = false;
16 | 	static constexpr uint64_t DEFAULT_HF_MAX_PER_PAGE = 0;
17 | 	static constexpr bool DEFAULT_FORCE_DOWNLOAD = false;
18 | 	static constexpr bool AUTO_FALLBACK_TO_FULL_DOWNLOAD = true;
19 | 
20 | 	bool force_download = DEFAULT_FORCE_DOWNLOAD;
21 | 	bool auto_fallback_to_full_download = AUTO_FALLBACK_TO_FULL_DOWNLOAD;
22 | 	bool enable_server_cert_verification = DEFAULT_ENABLE_SERVER_CERT_VERIFICATION;
23 | 	bool enable_curl_server_cert_verification = true;
24 | 	idx_t hf_max_per_page = DEFAULT_HF_MAX_PER_PAGE;
25 | 	string ca_cert_file;
26 | 	string bearer_token;
27 | 	bool unsafe_disable_etag_checks {false};
28 | 	shared_ptr<HTTPState> state;
29 | 	string user_agent = {""};
30 | 	// Additional fields needs to be appended at the end and need to be propagated to duckdb-wasm
31 | 	// TODO: make this unnecessary
32 | };
33 | 
34 | class HTTPFSUtil : public HTTPUtil {
35 | public:
36 | 	unique_ptr<HTTPParams> InitializeParameters(optional_ptr<FileOpener> opener,
37 | 	                                            optional_ptr<FileOpenerInfo> info) override;
38 | 	unique_ptr<HTTPClient> InitializeClient(HTTPParams &http_params, const string &proto_host_port) override;
39 | 
40 | 	static unordered_map<string, string> ParseGetParameters(const string &text);
41 | 	static shared_ptr<HTTPUtil> GetHTTPUtil(optional_ptr<FileOpener> opener);
42 | 
43 | 	string GetName() const override;
44 | };
45 | 
46 | class HTTPFSCurlUtil : public HTTPFSUtil {
47 | public:
48 | 	unique_ptr<HTTPClient> InitializeClient(HTTPParams &http_params, const string &proto_host_port) override;
49 | 
50 | 	static unordered_map<string, string> ParseGetParameters(const string &text);
51 | 
52 | 	string GetName() const override;
53 | };
54 | 
55 | struct HeaderCollector {
56 | 	std::vector<HTTPHeaders> header_collection;
57 | };
58 | 
59 | } // namespace duckdb
60 | 


--------------------------------------------------------------------------------
/test/sql/copy/csv/parallel/csv_parallel_httpfs.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/csv/parallel/csv_parallel_httpfs.test
 2 | # description: This test issue #7336 and #7337
 3 | # group: [parallel]
 4 | 
 5 | statement ok
 6 | PRAGMA enable_verification
 7 | 
 8 | require httpfs
 9 | 
10 | query IIII
11 | select column00, column01, column02, column03 from read_csv_auto('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv')
12 | ----
13 | 1	AAAAAAAABAAAAAAA	980124	7135
14 | 2	AAAAAAAACAAAAAAA	819667	1461
15 | 3	AAAAAAAADAAAAAAA	1473522	6247
16 | 4	AAAAAAAAEAAAAAAA	1703214	3986
17 | 5	AAAAAAAAFAAAAAAA	953372	4470
18 | 6	AAAAAAAAGAAAAAAA	213219	6374
19 | 7	AAAAAAAAHAAAAAAA	68377	3219
20 | 8	AAAAAAAAIAAAAAAA	1215897	2471
21 | 9	AAAAAAAAJAAAAAAA	1168667	1404
22 | 10	AAAAAAAAKAAAAAAA	1207553	5143
23 | 
24 | query IIIIIIIIIIIIIIIIII
25 | from read_csv_auto('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv');
26 | ----
27 | 1	AAAAAAAABAAAAAAA	980124	7135	32946	2452238	2452208	Mr.	Javier	Lewis	Y	9	12	1936	CHILE	NULL	Javier.Lewis@VFAxlnZEvOx.org	2452508
28 | 2	AAAAAAAACAAAAAAA	819667	1461	31655	2452318	2452288	Dr.	Amy	Moses	Y	9	4	1966	TOGO	NULL	Amy.Moses@Ovk9KjHH.com	2452318
29 | 3	AAAAAAAADAAAAAAA	1473522	6247	48572	2449130	2449100	Miss	Latisha	Hamilton	Y	18	9	1979	NIUE	NULL	Latisha.Hamilton@V.com	2452313
30 | 4	AAAAAAAAEAAAAAAA	1703214	3986	39558	2450030	2450000	Dr.	Michael	White	Y	7	6	1983	MEXICO	NULL	Michael.White@i.org	2452361
31 | 5	AAAAAAAAFAAAAAAA	953372	4470	36368	2449438	2449408	Sir	Robert	Moran	N	8	5	1956	FIJI	NULL	Robert.Moran@Hh.edu	2452469
32 | 6	AAAAAAAAGAAAAAAA	213219	6374	27082	2451883	2451853	Ms.	Brunilda	Sharp	Y	4	12	1925	SURINAME	NULL	Brunilda.Sharp@T3pylZEUQjm.org	2452430
33 | 7	AAAAAAAAHAAAAAAA	68377	3219	44814	2451438	2451408	Ms.	Fonda	Wiles	N	24	4	1985	GAMBIA	NULL	Fonda.Wiles@S9KnyEtz9hv.org	2452360
34 | 8	AAAAAAAAIAAAAAAA	1215897	2471	16598	2449406	2449376	Sir	Ollie	Shipman	N	26	12	1938	KOREA, REPUBLIC OF	NULL	Ollie.Shipman@be.org	2452334
35 | 9	AAAAAAAAJAAAAAAA	1168667	1404	49388	2452275	2452245	Sir	Karl	Gilbert	N	26	10	1966	MONTSERRAT	NULL	Karl.Gilbert@Crg5KyP2IxX9C4d6.edu	2452454
36 | 10	AAAAAAAAKAAAAAAA	1207553	5143	19580	2451353	2451323	Ms.	Albert	Brunson	N	15	10	1973	JORDAN	NULL	Albert.Brunson@62.com	2452641
37 | 


--------------------------------------------------------------------------------
/test/sql/copy/parquet/test_yellow_cab.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/parquet/test_yellow_cab.test_slow
 2 | # description: Test yellow cab parquet file
 3 | # group: [parquet]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | statement ok
10 | CREATE TABLE yellow_cab AS SELECT * FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/yellowcab.parquet'
11 | 
12 | statement ok
13 | PRAGMA enable_verification
14 | 
15 | query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
16 | select min(VendorID::VARCHAR), max(VendorID::VARCHAR), min(tpep_pickup_datetime::VARCHAR), max(tpep_pickup_datetime::VARCHAR), min(tpep_dropoff_datetime::VARCHAR), max(tpep_dropoff_datetime::VARCHAR), min(passenger_count::VARCHAR), max(passenger_count::VARCHAR), min(trip_distance::VARCHAR), max(trip_distance::VARCHAR), min(pickup_longitude::VARCHAR), max(pickup_longitude::VARCHAR), min(pickup_latitude::VARCHAR), max(pickup_latitude::VARCHAR), min(RatecodeID::VARCHAR), max(RatecodeID::VARCHAR), min(store_and_fwd_flag::VARCHAR), max(store_and_fwd_flag::VARCHAR), min(dropoff_longitude::VARCHAR), max(dropoff_longitude::VARCHAR), min(dropoff_latitude::VARCHAR), max(dropoff_latitude::VARCHAR), min(payment_type::VARCHAR), max(payment_type::VARCHAR), min(fare_amount::VARCHAR), max(fare_amount::VARCHAR), min(extra::VARCHAR), max(extra::VARCHAR), min(mta_tax::VARCHAR), max(mta_tax::VARCHAR), min(tip_amount::VARCHAR), max(tip_amount::VARCHAR), min(tolls_amount::VARCHAR), max(tolls_amount::VARCHAR), min(improvement_surcharge::VARCHAR), max(improvement_surcharge::VARCHAR), min(total_amount::VARCHAR), max(total_amount::VARCHAR) from yellow_cab;
17 | ----
18 | 1	2	2016-01-01 00:00:00	2016-01-29 12:08:57	2016-01-01 00:00:00	2016-01-30 12:05:11	0	8	.00	97.40	-0.13990700244903564	0	0	57.269275665283203	1	99	(empty)	Y	-73.210006713867188	0	0	41.317001342773437	1	4	-10	998	-0.5	2.0	-0.5	0.5	0	998.14	-10.5	9.75	-0.3	0.3	-10.8	998.3
19 | 
20 | 
21 | # writer round-trip
22 | statement ok
23 | COPY yellow_cab TO '__TEST_DIR__/yellowcab.parquet' (FORMAT PARQUET);
24 | 
25 | query IIIIIIIIIIIIIIIIIII nosort yellowcab
26 | SELECT * FROM yellow_cab
27 | ----
28 | 
29 | query IIIIIIIIIIIIIIIIIII nosort yellowcab
30 | SELECT * FROM '__TEST_DIR__/yellowcab.parquet'
31 | ----
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/test/sql/copy/s3/glob_s3_paging.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/s3/glob_s3_paging.test_slow
 2 | # description: Test globbing of a large number of parquet files to test the paging mechanism
 3 | # group: [s3]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | require-env S3_TEST_SERVER_AVAILABLE 1
10 | 
11 | # Require that these environment variables are also set
12 | 
13 | require-env AWS_DEFAULT_REGION
14 | 
15 | require-env AWS_ACCESS_KEY_ID
16 | 
17 | require-env AWS_SECRET_ACCESS_KEY
18 | 
19 | require-env DUCKDB_S3_ENDPOINT
20 | 
21 | require-env DUCKDB_S3_USE_SSL
22 | 
23 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
24 | set ignore_error_messages
25 | 
26 | statement ok
27 | set http_timeout=120000;
28 | 
29 | # More retries (longest wait will be 25600ms)
30 | statement ok
31 | set http_retries=6;
32 | 
33 | # Test should be a bit faster using the metadata cache
34 | statement ok
35 | SET enable_http_metadata_cache=true;
36 | 
37 | foreach urlstyle path vhost
38 | 
39 | statement ok
40 | SET s3_url_style='${urlstyle}'
41 | 
42 | ## For both formats we generate 2000 files which we will glob to test the paging mechanism of aws ListObjectV2 call is handled properly
43 | foreach format parquet csv
44 | 
45 | foreach i 0 1
46 | 
47 | foreach j 0 1 2 3 4 5 6 7 8 9
48 | 
49 | foreach k 0 1 2 3 4 5 6 7 8 9
50 | 
51 | foreach l 0 1 2 3 4 5 6 7 8 9
52 | 
53 | statement ok
54 | COPY (select (${i}${j}${k}${l})::INT as column0) to 's3://test-bucket/parquet_glob_s3_paging/paging/t${i}${j}${k}${l}-${urlstyle}-urls.${format}';
55 | 
56 | endloop
57 | 
58 | endloop
59 | 
60 | endloop
61 | 
62 | endloop
63 | 
64 | # Begin tests
65 | query I
66 | select sum(column0) from 's3://test-bucket/parquet_glob_s3_paging/paging/t*-${urlstyle}-urls.${format}'
67 | ----
68 | 1999000
69 | 
70 | endloop
71 | 
72 | endloop
73 | 
74 | # test with parquet_metadata_cache = true
75 | statement ok
76 | SET parquet_metadata_cache=true;
77 | 
78 | foreach urlstyle path vhost
79 | 
80 | foreach format parquet
81 | 
82 | loop i 0 2
83 | 
84 | # Begin tests
85 | query I
86 | select sum(column0) from 's3://test-bucket/parquet_glob_s3_paging/paging/t*-${urlstyle}-urls.${format}'
87 | ----
88 | 1999000
89 | 
90 | endloop
91 | 
92 | endloop
93 | 
94 | endloop
95 | 


--------------------------------------------------------------------------------
/src/include/crypto.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb/common/encryption_state.hpp"
 4 | #include "duckdb/common/helper.hpp"
 5 | 
 6 | #include <stddef.h>
 7 | #include <string>
 8 | 
 9 | typedef struct evp_cipher_ctx_st EVP_CIPHER_CTX;
10 | typedef struct evp_cipher_st EVP_CIPHER;
11 | 
12 | namespace duckdb {
13 | 
14 | typedef unsigned char hash_bytes[32];
15 | typedef unsigned char hash_str[64];
16 | 
17 | void sha256(const char *in, size_t in_len, hash_bytes &out);
18 | 
19 | void hmac256(const std::string &message, const char *secret, size_t secret_len, hash_bytes &out);
20 | 
21 | void hmac256(std::string message, hash_bytes secret, hash_bytes &out);
22 | 
23 | void hex256(hash_bytes &in, hash_str &out);
24 | 
25 | class DUCKDB_EXTENSION_API AESStateSSL : public EncryptionState {
26 | 
27 | public:
28 | 	explicit AESStateSSL(EncryptionTypes::CipherType cipher_p, idx_t key_len_p);
29 | 	~AESStateSSL() override;
30 | 
31 | public:
32 | 	void InitializeEncryption(const_data_ptr_t iv, idx_t iv_len, const_data_ptr_t key, idx_t key_len,
33 | 	                          const_data_ptr_t aad, idx_t aad_len) override;
34 | 	void InitializeDecryption(const_data_ptr_t iv, idx_t iv_len, const_data_ptr_t key, idx_t key_len,
35 | 	                          const_data_ptr_t aad, idx_t aad_len) override;
36 | 	size_t Process(const_data_ptr_t in, idx_t in_len, data_ptr_t out, idx_t out_len) override;
37 | 	size_t Finalize(data_ptr_t out, idx_t out_len, data_ptr_t tag, idx_t tag_len) override;
38 | 	void GenerateRandomData(data_ptr_t data, idx_t len) override;
39 | 
40 | 	const EVP_CIPHER *GetCipher(idx_t key_len);
41 | 	size_t FinalizeGCM(data_ptr_t out, idx_t out_len, data_ptr_t tag, idx_t tag_len);
42 | 
43 | private:
44 | 	EVP_CIPHER_CTX *context;
45 | 	EncryptionTypes::Mode mode;
46 | };
47 | 
48 | } // namespace duckdb
49 | 
50 | extern "C" {
51 | 
52 | class DUCKDB_EXTENSION_API AESStateSSLFactory : public duckdb::EncryptionUtil {
53 | public:
54 | 	explicit AESStateSSLFactory() {
55 | 	}
56 | 
57 | 	duckdb::shared_ptr<duckdb::EncryptionState> CreateEncryptionState(duckdb::EncryptionTypes::CipherType cipher_p,
58 | 	                                                                  duckdb::idx_t key_len_p) const override {
59 | 		return duckdb::make_shared_ptr<duckdb::AESStateSSL>(cipher_p, key_len_p);
60 | 	}
61 | 
62 | 	~AESStateSSLFactory() override {
63 | 	}
64 | };
65 | }
66 | 


--------------------------------------------------------------------------------
/src/include/http_metadata_cache.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb/common/atomic.hpp"
 4 | #include "duckdb/common/chrono.hpp"
 5 | #include "duckdb/common/list.hpp"
 6 | #include "duckdb/common/mutex.hpp"
 7 | #include "duckdb/common/string.hpp"
 8 | #include "duckdb/common/types.hpp"
 9 | #include "duckdb/common/unordered_map.hpp"
10 | #include "duckdb/main/client_context.hpp"
11 | #include "duckdb/main/client_context_state.hpp"
12 | 
13 | #include <stddef.h>
14 | #include <string>
15 | 
16 | namespace duckdb {
17 | 
18 | struct HTTPMetadataCacheEntry {
19 | 	idx_t length;
20 | 	timestamp_t last_modified;
21 | 	string etag;
22 | };
23 | 
24 | // Simple cache with a max age for an entry to be valid
25 | class HTTPMetadataCache : public ClientContextState {
26 | public:
27 | 	explicit HTTPMetadataCache(bool flush_on_query_end_p, bool shared_p)
28 | 	    : flush_on_query_end(flush_on_query_end_p), shared(shared_p) {};
29 | 
30 | 	void Insert(const string &path, HTTPMetadataCacheEntry val) {
31 | 		if (shared) {
32 | 			lock_guard<mutex> parallel_lock(lock);
33 | 			map[path] = val;
34 | 		} else {
35 | 			map[path] = val;
36 | 		}
37 | 	};
38 | 
39 | 	void Erase(string path) {
40 | 		if (shared) {
41 | 			lock_guard<mutex> parallel_lock(lock);
42 | 			map.erase(path);
43 | 		} else {
44 | 			map.erase(path);
45 | 		}
46 | 	};
47 | 
48 | 	bool Find(string path, HTTPMetadataCacheEntry &ret_val) {
49 | 		if (shared) {
50 | 			lock_guard<mutex> parallel_lock(lock);
51 | 			auto lookup = map.find(path);
52 | 			if (lookup != map.end()) {
53 | 				ret_val = lookup->second;
54 | 				return true;
55 | 			} else {
56 | 				return false;
57 | 			}
58 | 		} else {
59 | 			auto lookup = map.find(path);
60 | 			if (lookup != map.end()) {
61 | 				ret_val = lookup->second;
62 | 				return true;
63 | 			} else {
64 | 				return false;
65 | 			}
66 | 		}
67 | 	};
68 | 
69 | 	void Clear() {
70 | 		if (shared) {
71 | 			lock_guard<mutex> parallel_lock(lock);
72 | 			map.clear();
73 | 		} else {
74 | 			map.clear();
75 | 		}
76 | 	}
77 | 
78 | 	//! Called by the ClientContext when the current query ends
79 | 	void QueryEnd(ClientContext &context) override {
80 | 		if (flush_on_query_end) {
81 | 			Clear();
82 | 		}
83 | 	}
84 | 
85 | protected:
86 | 	mutex lock;
87 | 	unordered_map<string, HTTPMetadataCacheEntry> map;
88 | 	bool flush_on_query_end;
89 | 	bool shared;
90 | };
91 | 
92 | } // namespace duckdb
93 | 


--------------------------------------------------------------------------------
/test/sql/logging/file_system_logging.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/logging/file_system_logging.test
 2 | # group: [logging]
 3 | 
 4 | require parquet
 5 | 
 6 | require noforcestorage
 7 | 
 8 | statement ok
 9 | set enable_logging = true;
10 | 
11 | statement ok
12 | set logging_level='trace';
13 | 
14 | statement ok
15 | COPY (SELECT 1 as a) TO '__TEST_DIR__/test.csv'
16 | 
17 | statement ok
18 | FROM '__TEST_DIR__/test.csv'
19 | 
20 | statement ok
21 | pragma threads=1
22 | 
23 | # Note: regex for test stability
24 | query IIII
25 | SELECT scope, type, log_level, regexp_replace(message, '\"path\":.*test.csv"', '"test.csv"')
26 | FROM duckdb_logs
27 | WHERE type = 'FileSystem'
28 | ORDER BY timestamp
29 | ----
30 | CONNECTION	FileSystem	TRACE	{"fs":"LocalFileSystem","test.csv","op":"OPEN"}
31 | CONNECTION	FileSystem	TRACE	{"fs":"LocalFileSystem","test.csv","op":"WRITE","bytes":"4","pos":"0"}
32 | CONNECTION	FileSystem	TRACE	{"fs":"LocalFileSystem","test.csv","op":"CLOSE"}
33 | CONNECTION	FileSystem	TRACE	{"fs":"LocalFileSystem","test.csv","op":"OPEN"}
34 | CONNECTION	FileSystem	TRACE	{"fs":"LocalFileSystem","test.csv","op":"READ","bytes":"4","pos":"0"}
35 | CONNECTION	FileSystem	TRACE	{"fs":"LocalFileSystem","test.csv","op":"READ","bytes":"0","pos":"4"}
36 | CONNECTION	FileSystem	TRACE	{"fs":"LocalFileSystem","test.csv","op":"CLOSE"}
37 | 
38 | statement ok
39 | CALL truncate_duckdb_logs();
40 | 
41 | require httpfs
42 | 
43 | statement ok
44 | FROM 'https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv'
45 | 
46 | # FIXME: investigate why we call READ twice?
47 | query IIII
48 | SELECT scope, type, log_level, regexp_replace(message, '\"path\":.*test.csv"', '"test.csv"')
49 | FROM duckdb_logs
50 | WHERE type = 'FileSystem' AND message NOT LIKE '%duckdb_extension%'
51 | ORDER BY timestamp
52 | ----
53 | CONNECTION	FileSystem	TRACE	{"fs":"HTTPFileSystem","path":"https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv","op":"OPEN"}
54 | CONNECTION	FileSystem	TRACE	{"fs":"HTTPFileSystem","path":"https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv","op":"READ","bytes":"1276","pos":"0"}
55 | CONNECTION	FileSystem	TRACE	{"fs":"HTTPFileSystem","path":"https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv","op":"READ","bytes":"0","pos":"1276"}
56 | CONNECTION	FileSystem	TRACE	{"fs":"HTTPFileSystem","path":"https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv","op":"CLOSE"}
57 | 


--------------------------------------------------------------------------------
/test/sql/copy/s3/upload_large_json_file.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/s3/upload_large_json_file.test_slow
 2 | # description: Copy large json files from and to S3.
 3 | # group: [s3]
 4 | 
 5 | require tpch
 6 | 
 7 | require json
 8 | 
 9 | require parquet
10 | 
11 | require httpfs
12 | 
13 | require-env S3_TEST_SERVER_AVAILABLE 1
14 | 
15 | # Require that these environment variables are also set
16 | 
17 | require-env AWS_DEFAULT_REGION
18 | 
19 | require-env AWS_ACCESS_KEY_ID
20 | 
21 | require-env AWS_SECRET_ACCESS_KEY
22 | 
23 | require-env DUCKDB_S3_ENDPOINT
24 | 
25 | require-env DUCKDB_S3_USE_SSL
26 | 
27 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
28 | set ignore_error_messages
29 | 
30 | statement ok
31 | set http_timeout=120000;
32 | 
33 | # More retries (longest wait will be 25600ms)
34 | statement ok
35 | set http_retries=6;
36 | 
37 | statement ok
38 | CALL DBGEN(sf=0.1)
39 | 
40 | query I
41 | SELECT
42 |     sum(l_extendedprice * l_discount) AS revenue
43 | FROM
44 |     lineitem
45 | WHERE
46 |     l_shipdate >= CAST('1994-01-01' AS date)
47 |     AND l_shipdate < CAST('1995-01-01' AS date)
48 |     AND l_discount BETWEEN 0.05
49 |     AND 0.07
50 |     AND l_quantity < 24;
51 | ----
52 | 11803420.2534
53 | 
54 | statement ok
55 | COPY lineitem TO 's3://test-bucket/multipart/export_large.json' (FORMAT 'json');
56 | 
57 | query I
58 | SELECT
59 |     sum(l_extendedprice * l_discount) AS revenue
60 | FROM
61 |     "s3://test-bucket/multipart/export_large.json"
62 | WHERE
63 |     l_shipdate >= CAST('1994-01-01' AS date)
64 |     AND l_shipdate < CAST('1995-01-01' AS date)
65 |     AND l_discount BETWEEN 0.05
66 |     AND 0.07
67 |     AND l_quantity < 24;
68 | ----
69 | 11803420.2534
70 | 
71 | # This query triggers an edge case where we apply an S3-specific optimization using multiple cached filehandles
72 | query I
73 | SELECT
74 |     sum(l_extendedprice * l_discount)/3 AS revenue
75 | FROM
76 |     read_json_auto([
77 |         's3://test-bucket/multipart/export_large.json',
78 |         's3://test-bucket/multipart/export_large.json',
79 |         's3://test-bucket/multipart/export_large.json',])
80 | WHERE
81 |     l_shipdate >= CAST('1994-01-01' AS date)
82 |     AND l_shipdate < CAST('1995-01-01' AS date)
83 |     AND l_discount BETWEEN 0.05
84 |     AND 0.07
85 |     AND l_quantity < 24;
86 | ----
87 | 11803420.2534


--------------------------------------------------------------------------------
/test/sql/secret/gcs_oauth.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secret/gcs_oauth.test
 2 | # description: Test GCS OAuth2 bearer token support
 3 | # group: [secret]
 4 | 
 5 | require httpfs
 6 | 
 7 | statement ok
 8 | PRAGMA enable_verification
 9 | 
10 | # Test creating a GCS secret with OAuth2 bearer token
11 | statement ok
12 | CREATE SECRET gcs_oauth_test (
13 |     TYPE GCS,
14 |     bearer_token 'test_oauth2_token_12345'
15 | );
16 | 
17 | # Verify the secret was created
18 | query I
19 | SELECT COUNT(*) FROM duckdb_secrets() WHERE name = 'gcs_oauth_test' AND type = 'gcs';
20 | ----
21 | 1
22 | 
23 | # Verify bearer token is redacted
24 | query I
25 | SELECT COUNT(*) FROM duckdb_secrets() WHERE name = 'gcs_oauth_test' AND secret_string LIKE '%bearer_token=redacted%';
26 | ----
27 | 1
28 | 
29 | # Test creating a GCS secret with HMAC keys (backward compatibility)
30 | statement ok
31 | CREATE SECRET gcs_hmac_test (
32 |     TYPE GCS,
33 |     key_id 'test_key_id',
34 |     secret 'test_secret'
35 | );
36 | 
37 | # Verify both secrets exist
38 | query II
39 | SELECT name, type FROM duckdb_secrets() WHERE name IN ('gcs_oauth_test', 'gcs_hmac_test') ORDER BY name;
40 | ----
41 | gcs_hmac_test	gcs
42 | gcs_oauth_test	gcs
43 | 
44 | # Test creating a GCS secret with both bearer token and HMAC (bearer token should take precedence)
45 | statement ok
46 | CREATE SECRET gcs_mixed_test (
47 |     TYPE GCS,
48 |     bearer_token 'oauth_token',
49 |     key_id 'hmac_key',
50 |     secret 'hmac_secret'
51 | );
52 | 
53 | # Verify all three secrets exist
54 | query I
55 | SELECT COUNT(*) FROM duckdb_secrets() WHERE name LIKE 'gcs_%test';
56 | ----
57 | 3
58 | 
59 | # Clean up
60 | statement ok
61 | DROP SECRET gcs_oauth_test;
62 | 
63 | statement ok
64 | DROP SECRET gcs_hmac_test;
65 | 
66 | statement ok
67 | DROP SECRET gcs_mixed_test;
68 | 
69 | # Verify all secrets are removed
70 | query I
71 | SELECT COUNT(*) FROM duckdb_secrets() WHERE name LIKE 'gcs_%test';
72 | ----
73 | 0
74 | 
75 | # Test that bearer_token parameter is not allowed for S3 secrets
76 | statement error Unknown named parameter
77 | CREATE SECRET s3_with_bearer (
78 |     TYPE S3,
79 |     bearer_token 'should_not_work'
80 | );
81 | ----
82 | 
83 | # Test that bearer_token parameter is not allowed for R2 secrets  
84 | statement error Unknown named parameter
85 | CREATE SECRET r2_with_bearer (
86 |     TYPE R2,
87 |     bearer_token 'should_not_work'
88 | );
89 | ----


--------------------------------------------------------------------------------
/test/sql/copy/s3/parquet_s3_tpcds.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/s3/parquet_s3_tpcds.test_slow
 2 | # description: Test all tpcds queries on tpch sf0.01 over s3
 3 | # group: [s3]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | require tpcds
10 | 
11 | require-env S3_TEST_SERVER_AVAILABLE 1
12 | 
13 | # Require that these environment variables are also set
14 | 
15 | require-env AWS_DEFAULT_REGION
16 | 
17 | require-env AWS_ACCESS_KEY_ID
18 | 
19 | require-env AWS_SECRET_ACCESS_KEY
20 | 
21 | require-env DUCKDB_S3_ENDPOINT
22 | 
23 | require-env DUCKDB_S3_USE_SSL
24 | 
25 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
26 | set ignore_error_messages
27 | 
28 | # answers are generated from postgres
29 | # hence check with NULLS LAST flag
30 | statement ok
31 | PRAGMA default_null_order='NULLS LAST'
32 | 
33 | statement ok
34 | SET enable_http_metadata_cache=true;
35 | 
36 | statement ok
37 | set http_timeout=120000;
38 | 
39 | # More retries (longest wait will be 25600ms)
40 | statement ok
41 | set http_retries=6;
42 | 
43 | statement ok
44 | CREATE SCHEMA tpcds;
45 | 
46 | statement ok
47 | CALL dsdgen(sf=0.01, schema='tpcds');
48 | 
49 | foreach tbl call_center catalog_page catalog_returns catalog_sales customer customer_demographics customer_address date_dim household_demographics inventory income_band item promotion reason ship_mode store store_returns store_sales time_dim warehouse web_page web_returns web_sales web_site
50 | 
51 | statement ok
52 | COPY tpcds.${tbl} TO 's3://test-bucket/tpcds-sf0_01/${tbl}.parquet' (FORMAT 'PARQUET', COMPRESSION 'ZSTD');
53 | 
54 | statement ok
55 | CREATE VIEW ${tbl} AS SELECT * FROM parquet_scan('s3://test-bucket/tpcds-sf0_01/${tbl}.parquet');
56 | 
57 | endloop
58 | 
59 | # too slow queries:
60 | # 64, 85
61 | 
62 | loop i 1 9
63 | 
64 | query I
65 | PRAGMA tpcds(${i})
66 | ----
67 | <FILE>:extension/tpcds/dsdgen/answers/sf0.01/0${i}.csv
68 | 
69 | endloop
70 | 
71 | loop i 10 64
72 | 
73 | query I
74 | PRAGMA tpcds(${i})
75 | ----
76 | <FILE>:extension/tpcds/dsdgen/answers/sf0.01/${i}.csv
77 | 
78 | endloop
79 | 
80 | loop i 65 85
81 | 
82 | query I
83 | PRAGMA tpcds(${i})
84 | ----
85 | <FILE>:extension/tpcds/dsdgen/answers/sf0.01/${i}.csv
86 | 
87 | endloop
88 | 
89 | loop i 86 99
90 | 
91 | query I
92 | PRAGMA tpcds(${i})
93 | ----
94 | <FILE>:extension/tpcds/dsdgen/answers/sf0.01/${i}.csv
95 | 
96 | endloop
97 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret.test_slow
 2 | # description: Test secret creation using the default s3 secret provider
 3 | # group: [secrets]
 4 | 
 5 | statement ok
 6 | PRAGMA enable_verification;
 7 | 
 8 | require httpfs
 9 | 
10 | # Ensure any currently stored secrets don't interfere with the test
11 | statement ok
12 | set allow_persistent_secrets=false;
13 | 
14 | statement ok
15 | reset s3_use_ssl
16 | 
17 | # Create an S3 secret using the default provider (for s3, this will be the "config" provider, requiring the user to set all)
18 | statement ok
19 | CREATE SECRET default_provider_secret (
20 | 	TYPE S3,
21 |     KEY_ID 'my_key',
22 |     SECRET 'my_secret',
23 |     REGION 'my_region',
24 |     ENDPOINT 'invalid-on-purpose'
25 | )
26 | 
27 | # The secret will be created for the default scope for this type
28 | query III
29 | SELECT name, type, scope FROM duckdb_secrets() WHERE name='default_provider_secret';
30 | ----
31 | default_provider_secret	s3	['s3://', 's3n://', 's3a://']
32 | 
33 | # Note the endpoint is now using the one in the default_provider_secret
34 | statement error
35 | FROM 's3://test-bucket/test.csv'
36 | ----
37 | HTTP HEAD to 'https://test-bucket.invalid-on-purpose/test.csv'
38 | 
39 | # Now create an S3 secret using the default (config) provider by explicitly passing it
40 | statement ok
41 | CREATE SECRET secret_scope_1 (
42 |     TYPE S3,
43 |     PROVIDER config,
44 |     SCOPE 's3://b1',
45 |     ENDPOINT 'invalid-on-purpose-2'
46 | )
47 | 
48 | query III
49 | SELECT name, type, scope FROM duckdb_secrets() WHERE name='secret_scope_1';
50 | ----
51 | secret_scope_1	s3	['s3://b1']
52 | 
53 | # Longest match of credential scope takes the win so, this is will grab the secret_scope_1 secret
54 | statement error
55 | FROM 's3://b1/test.csv'
56 | ----
57 | Could not establish connection error for HTTP HEAD to 'https://b1.invalid-on-purpose-2/test.csv'
58 | 
59 | # Now confirm we can also set multiple scopes
60 | statement ok
61 | CREATE SECRET secret_scope_2 (
62 |     TYPE S3,
63 |     PROVIDER config,
64 |     SCOPE ['s3://b2', 's3://b3'],
65 |     ENDPOINT 'invalid-on-purpose-3'
66 | )
67 | 
68 | query III
69 | SELECT name, type, scope FROM duckdb_secrets() WHERE name='secret_scope_2';
70 | ----
71 | secret_scope_2	s3	['s3://b2', 's3://b3']
72 | 
73 | statement error
74 | FROM 's3://b2/test.csv'
75 | ----
76 | Could not establish connection error for HTTP HEAD to 'https://b2.invalid-on-purpose-3/test.csv'
77 | 


--------------------------------------------------------------------------------
/test/sql/copy/csv/test_csv_httpfs_prepared.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/csv/test_csv_httpfs_prepared.test
 2 | # description: CSV Reading From HTTPFS in Prepared Statements
 3 | # group: [csv]
 4 | 
 5 | require httpfs
 6 | 
 7 | statement ok
 8 | PRAGMA enable_verification
 9 | 
10 | statement ok
11 | PREPARE boaz_bug AS from read_csv_auto('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv') order by 1
12 | 
13 | query ITIIIIITTTTIIITTTI
14 | EXECUTE boaz_bug
15 | ----
16 | 1	AAAAAAAABAAAAAAA	980124	7135	32946	2452238	2452208	Mr.	Javier	Lewis	Y	9	12	1936	CHILE	NULL	Javier.Lewis@VFAxlnZEvOx.org	2452508
17 | 2	AAAAAAAACAAAAAAA	819667	1461	31655	2452318	2452288	Dr.	Amy	Moses	Y	9	4	1966	TOGO	NULL	Amy.Moses@Ovk9KjHH.com	2452318
18 | 3	AAAAAAAADAAAAAAA	1473522	6247	48572	2449130	2449100	Miss	Latisha	Hamilton	Y	18	9	1979	NIUE	NULL	Latisha.Hamilton@V.com	2452313
19 | 4	AAAAAAAAEAAAAAAA	1703214	3986	39558	2450030	2450000	Dr.	Michael	White	Y	7	6	1983	MEXICO	NULL	Michael.White@i.org	2452361
20 | 5	AAAAAAAAFAAAAAAA	953372	4470	36368	2449438	2449408	Sir	Robert	Moran	N	8	5	1956	FIJI	NULL	Robert.Moran@Hh.edu	2452469
21 | 6	AAAAAAAAGAAAAAAA	213219	6374	27082	2451883	2451853	Ms.	Brunilda	Sharp	Y	4	12	1925	SURINAME	NULL	Brunilda.Sharp@T3pylZEUQjm.org	2452430
22 | 7	AAAAAAAAHAAAAAAA	68377	3219	44814	2451438	2451408	Ms.	Fonda	Wiles	N	24	4	1985	GAMBIA	NULL	Fonda.Wiles@S9KnyEtz9hv.org	2452360
23 | 8	AAAAAAAAIAAAAAAA	1215897	2471	16598	2449406	2449376	Sir	Ollie	Shipman	N	26	12	1938	KOREA, REPUBLIC OF	NULL	Ollie.Shipman@be.org	2452334
24 | 9	AAAAAAAAJAAAAAAA	1168667	1404	49388	2452275	2452245	Sir	Karl	Gilbert	N	26	10	1966	MONTSERRAT	NULL	Karl.Gilbert@Crg5KyP2IxX9C4d6.edu	2452454
25 | 10	AAAAAAAAKAAAAAAA	1207553	5143	19580	2451353	2451323	Ms.	Albert	Brunson	N	15	10	1973	JORDAN	NULL	Albert.Brunson@62.com	2452641
26 | 
27 | statement ok
28 | DEALLOCATE boaz_bug
29 | 
30 | statement error
31 | EXECUTE boaz_bug
32 | ----
33 | Prepared statement "boaz_bug" does not exist
34 | 
35 | # Recreate prepared statement with different file
36 | 
37 | #FIXME: FILE changed?
38 | mode skip
39 | 
40 | statement ok
41 | PREPARE boaz_bug AS SELECT * from read_csv_auto('https://www.data.gouv.fr/fr/datasets/r/6d186965-f41b-41f3-9b23-88241cc6890c') order by all limit 5;
42 | 
43 | query ITTRRR
44 | EXECUTE boaz_bug
45 | ----
46 | 2020	Allemagne	Germany	26.1	53196.069	200601.2
47 | 2020	Autriche	Austria	18.0	4723.5	26215.8
48 | 2020	Belgique	Belgium	28.999999999999996	9436.1	32553.0
49 | 2020	Bulgarie	Bulgaria	11.600000000000001	1124.1	9698.7
50 | 2020	Chypre	Cyprus	0.0	0.0	1627.6
51 | 


--------------------------------------------------------------------------------
/test/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | In order to test these locally, `minio` is used. This requires Docker to be installed.
 3 | 
 4 | ### Installing Docker on MacOS
 5 | 
 6 | Install `docker` using `homebrew`.
 7 | 
 8 | 
 9 | ```bash
10 | brew install docker --cask
11 | ```
12 | 
13 | Then open `/Applications/Docker`. Note that the first time you open the application you need to go to the `Applications` folder, right-click `Docker` and select `open`.
14 | 
15 | ### Setting Up Docker
16 | 
17 | In order to finish setting up Docker, you need to open the Docker application, and login to your Docker account. Create a Docker account if you do not have one and finish setting up.
18 | 
19 | ### Running Minio
20 | 
21 | Run the `install_s3_test_server` script. This requires root. This makes a few changes to your system, specifically to `/etc/hosts` to set up a few redirect interfaces to localhost. This only needs to be run once.
22 | 
23 | ```bash
24 | sudo ./scripts/install_s3_test_server.sh
25 | ```
26 | 
27 | Then, if this has not been done yet, we need to generate some data:
28 | 
29 | ```
30 | ./scripts/generate_presigned_url.sh
31 | ```
32 | 
33 | Then run the test server in the back-ground using Docker. Note that Docker must be opened for this to work. On MacOS you can open the docker gui (`/Applications/Docker`) and leave it open to accomplish this.
34 | 
35 | 
36 | ```bash
37 | source ./scripts/run_s3_test_server.sh
38 | ```
39 | 
40 | Now set up the following environment variables to enable running of the tests.
41 | 
42 | This can be done either manually:
43 | ```bash
44 | export S3_TEST_SERVER_AVAILABLE=1
45 | export AWS_DEFAULT_REGION=eu-west-1
46 | export AWS_ACCESS_KEY_ID=minio_duckdb_user
47 | export AWS_SECRET_ACCESS_KEY=minio_duckdb_user_password
48 | export DUCKDB_S3_ENDPOINT=duckdb-minio.com:9000  
49 | export DUCKDB_S3_USE_SSL=false
50 | ```
51 | 
52 | Or using the `set_s3_test_server_variables.sh` script  
53 | 
54 | ```bash
55 | # use source so it sets the environment variables in your current environment
56 | source scripts/set_s3_test_server_variables.sh
57 | ```
58 | 
59 | Now you should be able to run the S3 tests using minio, e.g.:
60 | 
61 | ```bash
62 | build/debug/test/unittest test/sql/copy/s3/s3_hive_partition.test
63 | ```
64 | 
65 | > minio uses port 9000. Clickhouse also uses port 9000. If the tests are not working and you have a running Clickhouse service - try killing it first, e.g. using `killall -9 clickhouse`
66 | 
67 | #### Test Data
68 | 
69 | The configuration for minio is stored in `scripts/minio_s3.yml`. Data is stored in `/tmp/minio_test_data`.


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_name_conflicts.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_name_conflicts.test
 2 | # description: Test name conflict behaviour for secrets
 3 | # group: [secrets]
 4 | 
 5 | statement ok
 6 | PRAGMA enable_verification;
 7 | 
 8 | load __TEST_DIR__/persistent_secrets.db
 9 | 
10 | require httpfs
11 | 
12 | statement ok
13 | set secret_directory='__TEST_DIR__/create_secret_name_conflicts'
14 | 
15 | statement ok
16 | CREATE TEMPORARY SECRET s1 ( TYPE S3 )
17 | 
18 | statement error
19 | CREATE TEMPORARY SECRET s1 ( TYPE S3 )
20 | ----
21 | Invalid Input Error: Temporary secret with name 's1' already exists!
22 | 
23 | statement ok
24 | CREATE PERSISTENT SECRET s1 ( TYPE S3 )
25 | 
26 | statement error
27 | CREATE PERSISTENT SECRET s1 ( TYPE S3 )
28 | ----
29 | Persistent secret with name 's1' already exists in secret storage 'local_file'!
30 | 
31 | statement error
32 | DROP SECRET s1;
33 | ----
34 | Invalid Input Error: Ambiguity found for secret name 's1', secret occurs in multiple storages
35 | 
36 | statement error
37 | DROP SECRET s1 FROM bogus;
38 | ----
39 | Invalid Input Error: Unknown storage type found for drop secret: 'bogus'
40 | 
41 | statement ok
42 | DROP TEMPORARY SECRET s1;
43 | 
44 | # Re-dropping the temp s1 is now erroneous
45 | statement error
46 | DROP TEMPORARY SECRET s1;
47 | ----
48 | Invalid Input Error: Failed to remove non-existent secret with name 's1'
49 | 
50 | query II
51 | SELECT name, storage FROM duckdb_secrets()
52 | ----
53 | s1	local_file
54 | 
55 | # Now we will do it again but while the permanent secret is still lazily loaded
56 | restart
57 | 
58 | statement ok
59 | set secret_directory='__TEST_DIR__/create_secret_name_conflicts'
60 | 
61 | statement ok
62 | CREATE TEMPORARY SECRET s1 ( TYPE S3 )
63 | 
64 | # Now the drop should be ambiguous again: but the persistent secret will be lazily loaded now
65 | statement error
66 | DROP SECRET s1;
67 | ----
68 | Invalid Input Error: Ambiguity found for secret name 's1', secret occurs in multiple storages
69 | 
70 | # Fully specified drop statement this time
71 | statement ok
72 | DROP PERSISTENT SECRET s1 FROM LOCAL_FILE;
73 | 
74 | # Now a semi-weird case: this will create if not exists only within its own storage: therefore this does actually create
75 | # the secret
76 | statement ok
77 | CREATE PERSISTENT SECRET IF NOT EXISTS s1 ( TYPE S3 )
78 | 
79 | query II
80 | SELECT name, storage FROM duckdb_secrets() ORDER BY storage
81 | ----
82 | s1	local_file
83 | s1	memory
84 | 
85 | statement ok
86 | DROP PERSISTENT SECRET s1;
87 | 
88 | statement ok
89 | DROP SECRET s1;


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_binding.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_binding.test
 2 | # description: Test secret binding & types
 3 | # group: [secrets]
 4 | 
 5 | statement ok
 6 | PRAGMA enable_verification;
 7 | 
 8 | require httpfs
 9 | 
10 | # Ensure any currently stored secrets don't interfere with the test
11 | statement ok
12 | set allow_persistent_secrets=false;
13 | 
14 | # Binder autocasts options, also both with single quotes and without is allowed
15 | statement ok
16 | CREATE SECRET s1 (
17 |     TYPE R2,
18 |     PROVIDER config,
19 |     SCOPE ['s3://my_r2_scope', 's3://my_r2_scope2'],
20 |     ACCOUNT_ID 'some_bogus_account',
21 |     KEY_ID '123',
22 |     USE_SSL 1,
23 |     URL_COMPATIBILITY_MODE false
24 | )
25 | 
26 | query I nosort s1
27 | FROM duckdb_secrets();
28 | ----
29 | 
30 | statement ok
31 | DROP SECRET s1
32 | 
33 | # Create the secret again but in a different way to demonstrate casting and case insensitivity of param names
34 | statement ok
35 | CREATE SECRET s1 (
36 |     TYPE R2,
37 |     PROVIDER config,
38 |     SCOPE ['s3://my_r2_scope', 's3://my_r2_scope2'],
39 |     account_id 'some_bogus_account',
40 |     key_id 123,
41 |     USE_SSL 'true',
42 |     URL_COMPATIBILITY_MODE '0'
43 | )
44 | 
45 | query I nosort s1
46 | FROM duckdb_secrets();
47 | ----
48 | 
49 | ### Now let's try some incorrect inputs
50 | 
51 | # Incorrect type
52 | statement error
53 | CREATE SECRET incorrect_type (
54 |     TYPE R2,
55 |     PROVIDER config,
56 |     USE_SSL 'fliepflap'
57 | )
58 | ----
59 | Binder Error: Failed to cast option 'use_ssl' to type 'BOOLEAN': 'Could not convert string 'fliepflap' to BOOL'
60 | 
61 | # Incorrect param altogether
62 | statement error
63 | CREATE SECRET incorrect_type (
64 |     TYPE R2,
65 |     PROVIDER config,
66 |     FLIEPFLAP true
67 | )
68 | ----
69 | Binder Error: Unknown parameter 'fliepflap' for secret type 'r2' with provider 'config'
70 | 
71 | # Incorrect param for this type, but correct for other
72 | statement error
73 | CREATE SECRET incorrect_type (
74 |     TYPE S3,
75 |     PROVIDER config,
76 |     ACCOUNT_ID 'my_acount'
77 | )
78 | ----
79 | Binder Error: Unknown parameter 'account_id' for secret type 's3' with provider 'config'
80 | 
81 | # Params can only occur once
82 | statement error
83 | CREATE SECRET duplicate_param (
84 |     TYPE R2,
85 |         PROVIDER config,
86 |         account_id 'some_bogus_account',
87 |         key_id 123,
88 |         KEY_ID 12098,
89 |         account_id blablabla
90 | )
91 | ----
92 | Binder Error: Duplicate query param found while parsing create secret: 'key_id'
93 | 


--------------------------------------------------------------------------------
/test/sql/copy/csv/test_csv_remote.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/csv/test_csv_remote.test
 2 | # description: Test reading csv files over http
 3 | # group: [csv]
 4 | 
 5 | require httpfs
 6 | 
 7 | statement ok
 8 | PRAGMA enable_verification
 9 | 
10 | # Test load from url with query string
11 | query IIIIIIIIIIII
12 | FROM sniff_csv('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv?v=1')
13 | ----
14 | ,	"	(empty)	\n	(empty)	0	0	[{'name': column00, 'type': BIGINT}, {'name': column01, 'type': VARCHAR}, {'name': column02, 'type': BIGINT}, {'name': column03, 'type': BIGINT}, {'name': column04, 'type': BIGINT}, {'name': column05, 'type': BIGINT}, {'name': column06, 'type': BIGINT}, {'name': column07, 'type': VARCHAR}, {'name': column08, 'type': VARCHAR}, {'name': column09, 'type': VARCHAR}, {'name': column10, 'type': VARCHAR}, {'name': column11, 'type': BIGINT}, {'name': column12, 'type': BIGINT}, {'name': column13, 'type': BIGINT}, {'name': column14, 'type': VARCHAR}, {'name': column15, 'type': VARCHAR}, {'name': column16, 'type': VARCHAR}, {'name': column17, 'type': BIGINT}]	NULL	NULL	NULL	FROM read_csv('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv?v=1', auto_detect=false, delim=',', quote='"', escape='', new_line='\n', skip=0, comment='', header=false, columns={'column00': 'BIGINT', 'column01': 'VARCHAR', 'column02': 'BIGINT', 'column03': 'BIGINT', 'column04': 'BIGINT', 'column05': 'BIGINT', 'column06': 'BIGINT', 'column07': 'VARCHAR', 'column08': 'VARCHAR', 'column09': 'VARCHAR', 'column10': 'VARCHAR', 'column11': 'BIGINT', 'column12': 'BIGINT', 'column13': 'BIGINT', 'column14': 'VARCHAR', 'column15': 'VARCHAR', 'column16': 'VARCHAR', 'column17': 'BIGINT'});
15 | 
16 | 
17 | # This test abuses the LOCAL_EXTENSION_REPO env to make sure tests are only run when running extension tests
18 | # in duckdb/duckdb. Otherwise you need to pass a data dir when exex
19 | 
20 | require-env LOCAL_EXTENSION_REPO
21 | 
22 | # regular csv file
23 | query ITTTIITITTIIII nosort webpagecsv
24 | SELECT * FROM read_csv_auto('data/csv/real/web_page.csv') ORDER BY 1;
25 | ----
26 | 
27 | # file with gzip
28 | query IIIIIIIIIIIIIII nosort lineitemcsv
29 | SELECT * FROM read_csv_auto('data/csv/lineitem1k.tbl.gz') ORDER BY ALL;
30 | ----
31 | 
32 | query ITTTIITITTIIII nosort webpagecsv
33 | SELECT * FROM read_csv_auto('https://raw.githubusercontent.com/duckdb/duckdb/main/data/csv/real/web_page.csv') ORDER BY 1;
34 | ----
35 | 
36 | query IIIIIIIIIIIIIII nosort lineitemcsv
37 | select * from read_csv_auto('https://raw.githubusercontent.com/duckdb/duckdb/main/data/csv/lineitem1k.tbl.gz') ORDER BY ALL;
38 | ----
39 | 


--------------------------------------------------------------------------------
/src/include/create_secret_functions.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "duckdb.hpp"
 4 | 
 5 | namespace duckdb {
 6 | struct CreateSecretInput;
 7 | struct S3AuthParams;
 8 | class CreateSecretFunction;
 9 | class BaseSecret;
10 | struct SecretEntry;
11 | class ExtensionLoader;
12 | 
13 | struct CreateS3SecretFunctions {
14 | public:
15 | 	//! Register all CreateSecretFunctions
16 | 	static void Register(ExtensionLoader &loader);
17 | 
18 | 	//! Secret refreshing mechanisms
19 | 	static CreateSecretInput GenerateRefreshSecretInfo(const SecretEntry &secret_entry, Value &refresh_info);
20 | 	static bool TryRefreshS3Secret(ClientContext &context, const SecretEntry &secret_to_refresh);
21 | 
22 | protected:
23 | 	//! Internal function to create BaseSecret from S3AuthParams
24 | 	static unique_ptr<BaseSecret> CreateSecretFunctionInternal(ClientContext &context, CreateSecretInput &input);
25 | 
26 | 	//! Function for the "settings" provider: creates secret from current duckdb settings
27 | 	static unique_ptr<BaseSecret> CreateS3SecretFromSettings(ClientContext &context, CreateSecretInput &input);
28 | 	//! Function for the "config" provider: creates secret from parameters passed by user
29 | 	static unique_ptr<BaseSecret> CreateS3SecretFromConfig(ClientContext &context, CreateSecretInput &input);
30 | 
31 | 	//! Helper function to set named params of secret function
32 | 	static void SetBaseNamedParams(CreateSecretFunction &function, string &type);
33 | 	//! Helper function to create secret types s3/r2/gcs
34 | 	static void RegisterCreateSecretFunction(ExtensionLoader &loader, string type);
35 | };
36 | 
37 | struct CreateBearerTokenFunctions {
38 | public:
39 | 	static constexpr const char *HUGGINGFACE_TYPE = "huggingface";
40 | 
41 | 	//! Register all CreateSecretFunctions
42 | 	static void Register(ExtensionLoader &loader);
43 | 
44 | protected:
45 | 	//! Internal function to create bearer token
46 | 	static unique_ptr<BaseSecret> CreateSecretFunctionInternal(ClientContext &context, CreateSecretInput &input,
47 | 	                                                           const string &token);
48 | 	//! Function for the "config" provider: creates secret from parameters passed by user
49 | 	static unique_ptr<BaseSecret> CreateBearerSecretFromConfig(ClientContext &context, CreateSecretInput &input);
50 | 	//! Function for the "config" provider: creates secret from parameters passed by user
51 | 	static unique_ptr<BaseSecret> CreateHuggingFaceSecretFromCredentialChain(ClientContext &context,
52 | 	                                                                         CreateSecretInput &input);
53 | };
54 | 
55 | } // namespace duckdb
56 | 


--------------------------------------------------------------------------------
/src/include/hffs.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "httpfs.hpp"
 4 | 
 5 | namespace duckdb {
 6 | 
 7 | struct ParsedHFUrl {
 8 | 	//! Path within the
 9 | 	string path;
10 | 	//! Name of the repo (i presume)
11 | 	string repository;
12 | 
13 | 	//! Endpoint, defaults to HF
14 | 	string endpoint = "https://huggingface.co";
15 | 	//! Which revision/branch/tag to use
16 | 	string revision = "main";
17 | 	//! For DuckDB this may be a sensible default?
18 | 	string repo_type = "datasets";
19 | };
20 | 
21 | class HuggingFaceFileSystem : public HTTPFileSystem {
22 | public:
23 | 	~HuggingFaceFileSystem() override;
24 | 
25 | 	vector<OpenFileInfo> Glob(const string &path, FileOpener *opener = nullptr) override;
26 | 
27 | 	duckdb::unique_ptr<HTTPResponse> HeadRequest(FileHandle &handle, string hf_url, HTTPHeaders header_map) override;
28 | 	duckdb::unique_ptr<HTTPResponse> GetRequest(FileHandle &handle, string hf_url, HTTPHeaders header_map) override;
29 | 	duckdb::unique_ptr<HTTPResponse> GetRangeRequest(FileHandle &handle, string hf_url, HTTPHeaders header_map,
30 | 	                                                 idx_t file_offset, char *buffer_out,
31 | 	                                                 idx_t buffer_out_len) override;
32 | 
33 | 	bool CanHandleFile(const string &fpath) override {
34 | 		return fpath.rfind("hf://", 0) == 0;
35 | 	};
36 | 
37 | 	string GetName() const override {
38 | 		return "HuggingFaceFileSystem";
39 | 	}
40 | 	static ParsedHFUrl HFUrlParse(const string &url);
41 | 	string GetHFUrl(const ParsedHFUrl &url);
42 | 	string GetTreeUrl(const ParsedHFUrl &url, idx_t limit);
43 | 	string GetFileUrl(const ParsedHFUrl &url);
44 | 
45 | 	static void SetParams(HTTPFSParams &params, const string &path, optional_ptr<FileOpener> opener);
46 | 
47 | protected:
48 | 	duckdb::unique_ptr<HTTPFileHandle> CreateHandle(const OpenFileInfo &file, FileOpenFlags flags,
49 | 	                                                optional_ptr<FileOpener> opener) override;
50 | 
51 | 	string ListHFRequest(ParsedHFUrl &url, HTTPFSParams &http_params, string &next_page_url,
52 | 	                     optional_ptr<HTTPState> state);
53 | };
54 | 
55 | class HFFileHandle : public HTTPFileHandle {
56 | 	friend class HuggingFaceFileSystem;
57 | 
58 | public:
59 | 	HFFileHandle(FileSystem &fs, ParsedHFUrl hf_url, const OpenFileInfo &file, FileOpenFlags flags,
60 | 	             unique_ptr<HTTPParams> http_params)
61 | 	    : HTTPFileHandle(fs, file, flags, std::move(http_params)), parsed_url(std::move(hf_url)) {
62 | 	}
63 | 	~HFFileHandle() override;
64 | 
65 | 	unique_ptr<HTTPClient> CreateClient() override;
66 | 
67 | protected:
68 | 	ParsedHFUrl parsed_url;
69 | };
70 | 
71 | } // namespace duckdb
72 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_s3_serialization.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/secrets/create_secret_s3_serialization.test
 2 | # description: Test serialization of the S3/GCS/r2 secrets
 3 | # group: [secrets]
 4 | 
 5 | require httpfs
 6 | 
 7 | require parquet
 8 | 
 9 | load __TEST_DIR__/test_serialize_secrets.db
10 | 
11 | statement ok
12 | PRAGMA enable_verification;
13 | 
14 | statement ok
15 | set secret_directory='__TEST_DIR__/create_secret_s3_serialization'
16 | 
17 | statement ok
18 | CREATE OR REPLACE PERSISTENT SECRET s1 (
19 |     TYPE S3,
20 |     PROVIDER config,
21 |     SCOPE 's3://my_s3_scope',
22 |     KEY_ID 'mekey',
23 |     SECRET 'mesecret',
24 |     REGION 'meregion',
25 |     SESSION_TOKEN 'mesesh',
26 |     ENDPOINT 'meendpoint',
27 |     URL_STYLE 'mahstyle',
28 |     USE_SSL true,
29 |     URL_COMPATIBILITY_MODE true
30 | )
31 | 
32 | statement ok
33 | CREATE OR REPLACE PERSISTENT SECRET s2 (
34 |     TYPE R2,
35 |     PROVIDER config,
36 |     SCOPE 's3://my_r2_scope',
37 |     ACCOUNT_ID 'some_bogus_account',
38 |     KEY_ID 'mekey',
39 |     SECRET 'mesecret',
40 |     SESSION_TOKEN 'mesesh',
41 |     URL_STYLE 'mahstyle',
42 |     USE_SSL 1,
43 |     URL_COMPATIBILITY_MODE 1
44 | )
45 | 
46 | statement ok
47 | CREATE OR REPLACE PERSISTENT SECRET s3 (
48 |     TYPE GCS,
49 |     PROVIDER config,
50 |     SCOPE 's3://my_gcs_scope',
51 |     KEY_ID 'mekey',
52 |     SECRET 'mesecret',
53 |     SESSION_TOKEN 'mesesh',
54 |     URL_STYLE 'mahstyle',
55 |     USE_SSL true,
56 |     URL_COMPATIBILITY_MODE true
57 | )
58 | 
59 | query IIII
60 | select name, type, provider, scope FROM duckdb_secrets() order by name;
61 | ----
62 | s1	s3	config	['s3://my_s3_scope']
63 | s2	r2	config	['s3://my_r2_scope']
64 | s3	gcs	config	['s3://my_gcs_scope']
65 | 
66 | # Note: this query prints the tokens as an unredacted string
67 | query I nosort secret_to_string
68 | select secret_string from duckdb_secrets(redact=false) order by type;
69 | ----
70 | 
71 | restart
72 | 
73 | # Now setting the secret dir somehwere nonexistent will yield no persistent secrets
74 | statement ok
75 | set secret_directory='__TEST_DIR__/does_not_exist1'
76 | 
77 | query I
78 | select count(*) FROM duckdb_secrets(redact=false);
79 | ----
80 | 0
81 | 
82 | restart
83 | 
84 | # However setting it to the dir that does, we can suddenly see our persisted secrets
85 | statement ok
86 | set secret_directory='__TEST_DIR__/create_secret_s3_serialization'
87 | 
88 | # After restart secrets are still there
89 | query IIII
90 | select name, type, provider, scope FROM duckdb_secrets() order by name;
91 | ----
92 | s1	s3	config	['s3://my_s3_scope']
93 | s2	r2	config	['s3://my_r2_scope']
94 | s3	gcs	config	['s3://my_gcs_scope']
95 | 
96 | # Note: this query prints the tokens as an unredacted string
97 | query I nosort secret_to_string
98 | select secret_string from duckdb_secrets(redact=false) order by type;
99 | ----


--------------------------------------------------------------------------------
/test/sql/copy/s3/parquet_s3_tpch.test_slow:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/s3/parquet_s3_tpch.test_slow
 2 | # description: Test all tpch queries on tpch sf0.01 over s3
 3 | # group: [s3]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | require tpch
10 | 
11 | require-env S3_TEST_SERVER_AVAILABLE 1
12 | 
13 | # Require that these environment variables are also set
14 | 
15 | require-env AWS_DEFAULT_REGION
16 | 
17 | require-env AWS_ACCESS_KEY_ID
18 | 
19 | require-env AWS_SECRET_ACCESS_KEY
20 | 
21 | require-env DUCKDB_S3_ENDPOINT
22 | 
23 | require-env DUCKDB_S3_USE_SSL
24 | 
25 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
26 | set ignore_error_messages
27 | 
28 | statement ok
29 | SET enable_http_metadata_cache=true;
30 | 
31 | statement ok
32 | set http_timeout=120000;
33 | 
34 | # More retries (longest wait will be 25600ms)
35 | statement ok
36 | set http_retries=6;
37 | 
38 | # Copy files to S3 before beginning tests
39 | statement ok
40 | CALL DBGEN(sf=0.01);
41 | 
42 | # copy tpch files to S3
43 | statement ok
44 | COPY lineitem to 's3://test-bucket/tpch-sf0_01/lineitem.parquet';
45 | COPY nation to 's3://test-bucket/tpch-sf0_01/nation.parquet';
46 | COPY region to 's3://test-bucket/tpch-sf0_01/region.parquet';
47 | COPY part to 's3://test-bucket/tpch-sf0_01/part.parquet';
48 | COPY supplier to 's3://test-bucket/tpch-sf0_01/supplier.parquet';
49 | COPY partsupp to 's3://test-bucket/tpch-sf0_01/partsupp.parquet';
50 | COPY customer to 's3://test-bucket/tpch-sf0_01/customer.parquet';
51 | COPY orders to 's3://test-bucket/tpch-sf0_01/orders.parquet';
52 | 
53 | # clears tables
54 | statement ok
55 | DROP TABLE lineitem;
56 | DROP TABLE nation;
57 | DROP TABLE region;
58 | DROP TABLE part;
59 | DROP TABLE supplier;
60 | DROP TABLE partsupp;
61 | DROP TABLE customer;
62 | DROP TABLE orders;
63 | 
64 | statement ok
65 | CREATE VIEW lineitem as SELECT * FROM 's3://test-bucket/tpch-sf0_01/lineitem.parquet';
66 | CREATE VIEW nation as SELECT * FROM 's3://test-bucket/tpch-sf0_01/nation.parquet';
67 | CREATE VIEW region as SELECT * FROM 's3://test-bucket/tpch-sf0_01/region.parquet';
68 | CREATE VIEW part as SELECT * FROM 's3://test-bucket/tpch-sf0_01/part.parquet';
69 | CREATE VIEW supplier as SELECT * FROM 's3://test-bucket/tpch-sf0_01/supplier.parquet';
70 | CREATE VIEW partsupp as SELECT * FROM 's3://test-bucket/tpch-sf0_01/partsupp.parquet';
71 | CREATE VIEW customer as SELECT * FROM 's3://test-bucket/tpch-sf0_01/customer.parquet';
72 | CREATE VIEW orders as SELECT * FROM 's3://test-bucket/tpch-sf0_01/orders.parquet';
73 | 
74 | 
75 | # Run TPCH SF1
76 | loop i 1 9
77 | 
78 | query I
79 | PRAGMA tpch(${i})
80 | ----
81 | <FILE>:duckdb/extension/tpch/dbgen/answers/sf0.01/q0${i}.csv
82 | 
83 | endloop
84 | 
85 | loop i 10 23
86 | 
87 | query I
88 | PRAGMA tpch(${i})
89 | ----
90 | <FILE>:duckdb/extension/tpch/dbgen/answers/sf0.01/q${i}.csv
91 | 
92 | endloop
93 | 


--------------------------------------------------------------------------------
/scripts/run_squid.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | help() {
 4 |     echo "Usage: ${0} [port] [auth]"
 5 |     echo "  port    Port number for squid to lisen to (by default 3128)"
 6 |     echo "  auth    Optional string ('auth') to force user basic authentification (autherwise no authentification is required)"
 7 |     exit 0
 8 | }
 9 | 
10 | port='3128'
11 | auth='false'
12 | log_dir="squid_logs"
13 | conf_file="squid.conf"
14 | pid_file='${service_name}.pid'
15 | 
16 | while [[ $# -gt 0 ]]; do
17 |   case "${1}" in
18 |     -h|--help)
19 |       help
20 |       ;;
21 |     -p|--port)
22 |       port="${2}"
23 |       shift # past argument
24 |       shift # past value
25 |       ;;
26 |     --auth)
27 |       auth='true'
28 |       conf_file="squid_auth.conf"
29 |       pid_file='${service_name}_auth.pid'
30 |       shift # past argument
31 |       ;;
32 |     --log_dir)
33 |       log_dir="${2}"
34 |       shift # past argument
35 |       shift # past value
36 |       ;;
37 |     *)
38 |       echo "Unknown option ${1}"
39 |       exit 1
40 |       ;;
41 |   esac
42 | done
43 | 
44 | mkdir "${log_dir}"
45 | touch "${log_dir}/daemon.log"
46 | chmod -R 777 "${log_dir}"
47 | 
48 | echo "http_port 127.0.0.1:${port}"                  >"${conf_file}"
49 | echo "pid_filename ${pid_file}"                    >>"${conf_file}"
50 | 
51 | echo 'logfile_rotate 0'                            >>"${conf_file}"
52 | echo "logfile_daemon ${log_dir}/daemon.log"        >>"${conf_file}"
53 | echo "access_log ${log_dir}/access.log"            >>"${conf_file}"
54 | echo "cache_log ${log_dir}/cache.log"              >>"${conf_file}"
55 | echo "cache_store_log ${log_dir}/cache_store.log"  >>"${conf_file}"
56 | 
57 | 
58 | if [[ "${auth}" == "true" ]]; then
59 |     # User 'john' with password 'doe'
60 |     echo 'john:$apr1$dalj9e7s$AhqY28Hvl3EcNblNJMiXa0' >squid_users
61 | 
62 |     squid_version="$(squid -v | head -n1 | grep -o 'Version [^ ]*' | cut -d ' ' -f 2)"
63 |     if [[ "$(uname)" == "Darwin" ]]; then
64 |         auth_basic_program="/opt/homebrew/Cellar/squid/${squid_version}/libexec/basic_ncsa_auth"
65 |     else
66 |         if [[ -e '/usr/lib64/squid/basic_ncsa_auth' ]]; then
67 |             auth_basic_program="/usr/lib64/squid/basic_ncsa_auth"
68 |         else
69 |             auth_basic_program="/usr/lib/squid/basic_ncsa_auth"
70 |         fi
71 |     fi
72 | 
73 |     echo '# Add authentification options'       >>"${conf_file}"
74 |     echo "auth_param basic program ${auth_basic_program} squid_users" >>"${conf_file}"
75 |     echo 'auth_param basic children 3'          >>"${conf_file}"
76 |     echo 'auth_param basic realm Squid BA'      >>"${conf_file}"
77 |     echo 'acl auth_users proxy_auth REQUIRED'   >>"${conf_file}"
78 |     echo 'http_access allow auth_users'         >>"${conf_file}"
79 |     echo 'http_access deny all'                 >>"${conf_file}"
80 | else
81 |     echo 'http_access allow localhost'          >>"${conf_file}"
82 | fi
83 | 
84 | exec squid -N -f "${conf_file}"
85 | 


--------------------------------------------------------------------------------
/test/sql/copy/parquet/test_parquet_remote.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/parquet/test_parquet_remote.test
 2 | # description: Parquet read from S3/HTTPS
 3 | # group: [parquet]
 4 | 
 5 | require httpfs
 6 | 
 7 | require parquet
 8 | 
 9 | # non existent host
10 | statement error
11 | SELECT * FROM PARQUET_SCAN('https://this-host-does-not-exist-for-sure/test.parquet');
12 | ----
13 | 
14 | # non existent file
15 | statement error
16 | SELECT * FROM PARQUET_SCAN('https://duckdb.org/test.parquet');
17 | ----
18 | 
19 | # missing path
20 | statement error
21 | SELECT * FROM PARQUET_SCAN('https://duckdb.org');
22 | ----
23 | 
24 | # empty path
25 | statement error
26 | SELECT * FROM PARQUET_SCAN('https://duckdb.org/');
27 | ----
28 | 
29 | # straightforward
30 | query IIII
31 | SELECT id, first_name, last_name, email FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') LIMIT 10;
32 | ----
33 | 1	Amanda	Jordan	ajordan0@com.com
34 | 2	Albert	Freeman	afreeman1@is.gd
35 | 3	Evelyn	Morgan	emorgan2@altervista.org
36 | 4	Denise	Riley	driley3@gmpg.org
37 | 5	Carlos	Burns	cburns4@miitbeian.gov.cn
38 | 6	Kathryn	White	kwhite5@google.com
39 | 7	Samuel	Holmes	sholmes6@foxnews.com
40 | 8	Harry	Howell	hhowell7@eepurl.com
41 | 9	Jose	Foster	jfoster8@yelp.com
42 | 10	Emily	Stewart	estewart9@opensource.org
43 | 
44 | 
45 | # with redirects
46 | query IIII
47 | SELECT id, first_name, last_name, email FROM PARQUET_SCAN('https://github.com/duckdb/duckdb/blob/main/data/parquet-testing/userdata1.parquet?raw=true') LIMIT 10;
48 | ----
49 | 1	Amanda	Jordan	ajordan0@com.com
50 | 2	Albert	Freeman	afreeman1@is.gd
51 | 3	Evelyn	Morgan	emorgan2@altervista.org
52 | 4	Denise	Riley	driley3@gmpg.org
53 | 5	Carlos	Burns	cburns4@miitbeian.gov.cn
54 | 6	Kathryn	White	kwhite5@google.com
55 | 7	Samuel	Holmes	sholmes6@foxnews.com
56 | 8	Harry	Howell	hhowell7@eepurl.com
57 | 9	Jose	Foster	jfoster8@yelp.com
58 | 10	Emily	Stewart	estewart9@opensource.org
59 | 
60 | # with explicit port nr
61 | query IIII
62 | SELECT id, first_name, last_name, email FROM PARQUET_SCAN('https://github.com:443/duckdb/duckdb/blob/main/data/parquet-testing/userdata1.parquet?raw=true') LIMIT 10;
63 | ----
64 | 1	Amanda	Jordan	ajordan0@com.com
65 | 2	Albert	Freeman	afreeman1@is.gd
66 | 3	Evelyn	Morgan	emorgan2@altervista.org
67 | 4	Denise	Riley	driley3@gmpg.org
68 | 5	Carlos	Burns	cburns4@miitbeian.gov.cn
69 | 6	Kathryn	White	kwhite5@google.com
70 | 7	Samuel	Holmes	sholmes6@foxnews.com
71 | 8	Harry	Howell	hhowell7@eepurl.com
72 | 9	Jose	Foster	jfoster8@yelp.com
73 | 10	Emily	Stewart	estewart9@opensource.org
74 | 
75 | query IIII
76 | SELECT id, first_name, last_name, email FROM PARQUET_SCAN('https://github.com/duckdb/duckdb-data/releases/download/v1.0/us+er+da+ta.parquet') LIMIT 1;
77 | ----
78 | 1	Amanda	Jordan	ajordan0@com.com
79 | 
80 | query IIII
81 | SELECT id, first_name, last_name, email FROM PARQUET_SCAN('https://github.com/duckdb/duckdb-data/releases/download/v1.0/us%2Ber%2Bda%2Bta.parquet') LIMIT 1;
82 | ----
83 | 1	Amanda	Jordan	ajordan0@com.com
84 | 


--------------------------------------------------------------------------------
/.github/workflows/IntegrationTests.yml:
--------------------------------------------------------------------------------
 1 | name: Integration Tests
 2 | on: [push, pull_request,repository_dispatch]
 3 | concurrency:
 4 |   group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
 5 |   cancel-in-progress: true
 6 | defaults:
 7 |   run:
 8 |     shell: bash
 9 | 
10 | jobs:
11 |   linux-tests-httpfs:
12 |     name: MinIO Tests
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       S3_TEST_SERVER_AVAILABLE: 1
16 |       AWS_DEFAULT_REGION: eu-west-1
17 |       AWS_ACCESS_KEY_ID: minio_duckdb_user
18 |       AWS_SECRET_ACCESS_KEY: minio_duckdb_user_password
19 |       DUCKDB_S3_ENDPOINT: duckdb-minio.com:9000
20 |       DUCKDB_S3_USE_SSL: false
21 |       HTTP_PROXY_PUBLIC: localhost:3128
22 |       TEST_PERSISTENT_SECRETS_AVAILABLE: true
23 |       CORE_EXTENSIONS: "parquet;json;tpch"
24 |       GEN: ninja
25 |       VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
26 |       VCPKG_TARGET_TRIPLET: x64-linux
27 |       PYTHON_HTTP_SERVER_URL: http://localhost:8008
28 |       PYTHON_HTTP_SERVER_DIR: /tmp/python_test_server
29 | 
30 |     steps:
31 |       - uses: actions/checkout@v4
32 |         with:
33 |           fetch-depth: 0
34 |           submodules: 'true'
35 | 
36 |       - uses: actions/setup-python@v4
37 |         with:
38 |           python-version: '3.10'
39 | 
40 |       - name: Install Ninja
41 |         shell: bash
42 |         run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build
43 | 
44 |       - name: Setup Ccache
45 |         uses: hendrikmuhs/ccache-action@main
46 |         with:
47 |           key: ${{ github.job }}
48 |           save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb-httpfs' }}
49 | 
50 |       - name: Setup vcpkg
51 |         uses: lukka/run-vcpkg@v11.1
52 |         with:
53 |           vcpkgGitCommitId: 5e5d0e1cd7785623065e77eff011afdeec1a3574
54 | 
55 |       - name: Fix permissions of test secrets
56 |         shell: bash
57 |         run: chmod -R 700 data/secrets
58 | 
59 |       # TODO: fix the authenticated proxy here
60 |       - name: Install and run http proxy squid
61 |         shell: bash
62 |         run: |
63 |           sudo apt-get install squid
64 |           ./scripts/run_squid.sh --port 3128 --log_dir squid_logs &
65 | 
66 |       - name: Run & Populate Python test server
67 |         shell: bash
68 |         run: |
69 |           mkdir -p $PYTHON_HTTP_SERVER_DIR
70 |           cd $PYTHON_HTTP_SERVER_DIR
71 |           python3 -m http.server 8008 &
72 | 
73 |       - name: Build
74 |         shell: bash
75 |         run: make
76 | 
77 |       - name: Install test server
78 |         shell: bash
79 |         run: |
80 |           sudo ./scripts/install_s3_test_server.sh
81 |           ./scripts/generate_presigned_url.sh
82 | 
83 |       - name: Start test server & run tests
84 |         shell: bash
85 |         run: |
86 |           source ./scripts/run_s3_test_server.sh
87 |           source ./scripts/set_s3_test_server_variables.sh
88 |           ./build/release/test/unittest "*" --skip-error-messages "[]"
89 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_transactional.test:
--------------------------------------------------------------------------------
  1 | # name: test/sql/secrets/create_secret_transactional.test
  2 | # description: Test secret transactional safety
  3 | # group: [secrets]
  4 | 
  5 | statement ok
  6 | PRAGMA enable_verification;
  7 | 
  8 | require httpfs
  9 | 
 10 | load __TEST_DIR__/create_secret_transactional.db
 11 | 
 12 | statement ok
 13 | set secret_directory='__TEST_DIR__/create_secret_transactional'
 14 | 
 15 | statement ok
 16 | PRAGMA threads=1
 17 | 
 18 | foreach secret_type TEMPORARY PERSISTENT
 19 | 
 20 | statement ok con1
 21 | BEGIN TRANSACTION
 22 | 
 23 | statement ok con1
 24 | CREATE ${secret_type} SECRET s1 (TYPE S3)
 25 | 
 26 | statement ok con2
 27 | BEGIN TRANSACTION
 28 | 
 29 | statement ok con2
 30 | CREATE ${secret_type} SECRET s2 (TYPE S3)
 31 | 
 32 | query I con1
 33 | SELECT name FROM duckdb_secrets();
 34 | ----
 35 | s1
 36 | 
 37 | query I con2
 38 | SELECT name FROM duckdb_secrets();
 39 | ----
 40 | s2
 41 | 
 42 | statement ok con1
 43 | COMMIT
 44 | 
 45 | # Transaction 2 still only sees own secret: it has not commited yet
 46 | query I con2
 47 | SELECT name FROM duckdb_secrets();
 48 | ----
 49 | s2
 50 | 
 51 | # New transaction will see only committed secret
 52 | query I con3
 53 | SELECT name FROM duckdb_secrets();
 54 | ----
 55 | s1
 56 | 
 57 | statement ok con2
 58 | COMMIT
 59 | 
 60 | # Now both are visible
 61 | query I con3
 62 | SELECT name FROM duckdb_secrets() ORDER BY name;
 63 | ----
 64 | s1
 65 | s2
 66 | 
 67 | statement ok con1
 68 | BEGIN TRANSACTION
 69 | 
 70 | statement ok con1
 71 | DROP SECRET s1;
 72 | 
 73 | # Drop not yet commited: con3 will not see it yet
 74 | query I con3
 75 | SELECT name FROM duckdb_secrets() ORDER BY name;
 76 | ----
 77 | s1
 78 | s2
 79 | 
 80 | # Commit the drop
 81 | statement ok con1
 82 | COMMIT
 83 | 
 84 | # Drop now visible to con3
 85 | query I con3
 86 | SELECT name FROM duckdb_secrets();
 87 | ----
 88 | s2
 89 | 
 90 | # Clean up for loop end
 91 | statement ok
 92 | DROP SECRET s2
 93 | 
 94 | endloop
 95 | 
 96 | # Now lets test transactional safety of lazily loaded persistent secrets
 97 | 
 98 | statement ok
 99 | CREATE PERSISTENT SECRET perm_s1 (TYPE S3)
100 | 
101 | restart
102 | 
103 | statement ok
104 | set secret_directory='__TEST_DIR__/create_secret_transactional'
105 | 
106 | # After restart, we create 2 connections that each add their own tmp secret; the perm secret is now lazily loaded!
107 | statement ok con1
108 | BEGIN TRANSACTION
109 | 
110 | statement ok con1
111 | CREATE SECRET tmp_s1 (TYPE S3)
112 | 
113 | statement ok con2
114 | BEGIN TRANSACTION
115 | 
116 | statement ok con2
117 | CREATE SECRET tmp_s2 (TYPE S3)
118 | 
119 | # Now con1 drops the lazily loaded perm secret
120 | statement ok con1
121 | DROP SECRET perm_s1;
122 | 
123 | query I con1
124 | SELECT name FROM duckdb_secrets();
125 | ----
126 | tmp_s1
127 | 
128 | # con2 still has both secrets
129 | query I con2
130 | SELECT name FROM duckdb_secrets() ORDER BY name;
131 | ----
132 | perm_s1
133 | tmp_s2
134 | 
135 | statement ok con1
136 | COMMIT
137 | 
138 | statement ok con2
139 | COMMIT
140 | 
141 | # Now the deletion is visible to con2
142 | query I con2
143 | SELECT name FROM duckdb_secrets() ORDER BY name;
144 | ----
145 | tmp_s1
146 | tmp_s2


--------------------------------------------------------------------------------
/test/sql/copy/s3/upload_file_parallel.test_slow:
--------------------------------------------------------------------------------
  1 | # name: test/sql/copy/s3/upload_file_parallel.test_slow
  2 | # description: Copy large parquet files from and to S3 in parallel.
  3 | # group: [s3]
  4 | 
  5 | require tpch
  6 | 
  7 | require parquet
  8 | 
  9 | require httpfs
 10 | 
 11 | require-env S3_TEST_SERVER_AVAILABLE 1
 12 | 
 13 | # Require that these environment variables are also set
 14 | 
 15 | require-env AWS_DEFAULT_REGION
 16 | 
 17 | require-env AWS_ACCESS_KEY_ID
 18 | 
 19 | require-env AWS_SECRET_ACCESS_KEY
 20 | 
 21 | require-env DUCKDB_S3_ENDPOINT
 22 | 
 23 | require-env DUCKDB_S3_USE_SSL
 24 | 
 25 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
 26 | set ignore_error_messages
 27 | 
 28 | statement ok
 29 | CALL DBGEN(sf=1)
 30 | 
 31 | statement ok
 32 | set http_timeout=120000;
 33 | 
 34 | # More retries (longest wait will be 25600ms)
 35 | statement ok
 36 | set http_retries=6;
 37 | 
 38 | query I
 39 | SELECT
 40 |     sum(l_extendedprice * l_discount) AS revenue
 41 | FROM
 42 |     lineitem
 43 | WHERE
 44 |     l_shipdate >= CAST('1994-01-01' AS date)
 45 |     AND l_shipdate < CAST('1995-01-01' AS date)
 46 |     AND l_discount BETWEEN 0.05
 47 |     AND 0.07
 48 |     AND l_quantity < 24;
 49 | ----
 50 | 123141078.2283
 51 | 
 52 | # We do this in parallel to also test synchronization of s3fs between 2 connections
 53 | concurrentloop threadid 0 2
 54 | 
 55 | statement ok
 56 | SET s3_endpoint='${DUCKDB_S3_ENDPOINT}';SET s3_use_ssl=${DUCKDB_S3_USE_SSL};
 57 | 
 58 | # Parquet file
 59 | statement ok
 60 | COPY lineitem TO 's3://test-bucket/multipart/export_large_${threadid}.parquet' (FORMAT 'parquet');
 61 | 
 62 | query I
 63 | SELECT
 64 |     sum(l_extendedprice * l_discount) AS revenue
 65 | FROM
 66 |     "s3://test-bucket/multipart/export_large_${threadid}.parquet"
 67 | WHERE
 68 |     l_shipdate >= CAST('1994-01-01' AS date)
 69 |     AND l_shipdate < CAST('1995-01-01' AS date)
 70 |     AND l_discount BETWEEN 0.05
 71 |     AND 0.07
 72 |     AND l_quantity < 24;
 73 | ----
 74 | 123141078.2283
 75 | 
 76 | endloop
 77 | 
 78 | statement ok
 79 | CALL dbgen(sf=0.01, suffix='_small');
 80 | 
 81 | query I
 82 | SELECT
 83 |     sum(l_extendedprice * l_discount) AS revenue
 84 | FROM
 85 |     lineitem_small
 86 | WHERE
 87 |     l_shipdate >= CAST('1994-01-01' AS date)
 88 |     AND l_shipdate < CAST('1995-01-01' AS date)
 89 |     AND l_discount BETWEEN 0.05
 90 |     AND 0.07
 91 |     AND l_quantity < 24;
 92 | ----
 93 | 1193053.2253
 94 | 
 95 | # Upload and query 100 tiny files in parallel
 96 | concurrentloop threadid 0 100
 97 | 
 98 | statement ok
 99 | SET s3_secret_access_key='${AWS_SECRET_ACCESS_KEY}';SET s3_access_key_id='${AWS_ACCESS_KEY_ID}';SET s3_region='${AWS_DEFAULT_REGION}'; SET s3_endpoint='${DUCKDB_S3_ENDPOINT}';SET s3_use_ssl=${DUCKDB_S3_USE_SSL};
100 | 
101 | statement ok
102 | SET s3_uploader_thread_limit=1
103 | 
104 | # Parquet file
105 | statement ok
106 | COPY lineitem_small TO 's3://test-bucket/multipart/export_small_${threadid}.parquet' (FORMAT 'parquet');
107 | 
108 | query I
109 | SELECT
110 |     sum(l_extendedprice * l_discount) AS revenue
111 | FROM
112 |     "s3://test-bucket/multipart/export_small_${threadid}.parquet"
113 | WHERE
114 |     l_shipdate >= CAST('1994-01-01' AS date)
115 |     AND l_shipdate < CAST('1995-01-01' AS date)
116 |     AND l_discount BETWEEN 0.05
117 |     AND 0.07
118 |     AND l_quantity < 24;
119 | ----
120 | 1193053.2253
121 | 
122 | endloop
123 | 


--------------------------------------------------------------------------------
/scripts/minio_s3.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   minio:
 3 |     image: minio/minio:RELEASE.2021-11-03T03-36-36Z
 4 |     hostname: duckdb-minio.com
 5 |     ports:
 6 |       - "9000:9000"
 7 |       - "9001:9001"
 8 |     volumes:
 9 |       - /tmp/minio_test_data:/data
10 |       - /tmp/minio_root_data:/root/.minio
11 |     environment:
12 |       - MINIO_ROOT_USER=duckdb_minio_admin
13 |       - MINIO_ROOT_PASSWORD=duckdb_minio_admin_password
14 |       - MINIO_REGION_NAME=eu-west-1
15 |       - MINIO_DOMAIN=duckdb-minio.com
16 |       - MINIO_ACCESS_KEY=duckdb_minio_admin
17 |       - MINIO_SECRET_KEY=duckdb_minio_admin_password
18 |     command: server /data --console-address ":9001"
19 | 
20 |   minio_setup:
21 |     image: minio/mc:RELEASE.2021-11-05T10-05-06Z
22 |     depends_on:
23 |       - minio
24 |     links:
25 |       - minio
26 |     volumes:
27 |       - ${PWD}/duckdb/data:/duckdb/data
28 |       - ${PWD}/test/test_data:/duckdb/test_data
29 | 
30 |     entrypoint: >
31 |       /bin/sh -c "
32 |         until (
33 |           /usr/bin/mc config host add myminio http://duckdb-minio.com:9000 duckdb_minio_admin duckdb_minio_admin_password
34 |         ) do
35 |           echo '...waiting...' && sleep 1;
36 |         done;
37 | 
38 |         /usr/bin/mc admin user add myminio minio_duckdb_user minio_duckdb_user_password
39 |         /usr/bin/mc admin user list myminio
40 |         /usr/bin/mc admin user info myminio minio_duckdb_user
41 |         /usr/bin/mc admin policy set myminio readwrite user=minio_duckdb_user
42 | 
43 |         /usr/bin/mc admin user add myminio minio_duckdb_user_2 minio_duckdb_user_2_password
44 |         /usr/bin/mc admin user list myminio
45 |         /usr/bin/mc admin user info myminio minio_duckdb_user_2
46 |         /usr/bin/mc admin policy set myminio readwrite user=minio_duckdb_user_2
47 | 
48 |         /usr/bin/mc rb --force myminio/test-bucket
49 |         /usr/bin/mc mb myminio/test-bucket
50 |         /usr/bin/mc policy get myminio/test-bucket
51 | 
52 |         /usr/bin/mc rb --force myminio/test-bucket-2
53 |         /usr/bin/mc mb myminio/test-bucket-2
54 |         /usr/bin/mc policy get myminio/test-bucket-2
55 | 
56 |         /usr/bin/mc rb --force myminio/test-bucket-public
57 |         /usr/bin/mc mb myminio/test-bucket-public
58 |         /usr/bin/mc policy set download myminio/test-bucket-public
59 |         /usr/bin/mc policy get myminio/test-bucket-public
60 | 
61 |         # This is for the test of presigned URLs
62 |         # !!! When missing, be sure that you have ran 'scripts/generate_presigned_url.sh' !!!
63 | 
64 |         # small file upload
65 |         /usr/bin/mc cp /duckdb/data/csv/phonenumbers.csv myminio/test-bucket/presigned/phonenumbers.csv
66 |         /usr/bin/mc cp /duckdb/data/parquet-testing/glob/t1.parquet myminio/test-bucket/presigned/t1.parquet
67 | 
68 |         # large file upload
69 |         /usr/bin/mc cp /duckdb/test_data/presigned-url-lineitem.parquet myminio/test-bucket/presigned/lineitem_large.parquet
70 | 
71 |         # Upload the db for the attach 
72 |         /usr/bin/mc cp /duckdb/test_data/attach.db myminio/test-bucket/presigned/attach.db
73 |         /usr/bin/mc cp /duckdb/test_data/lineitem_sf1.db myminio/test-bucket/presigned/lineitem_sf1.db
74 |     
75 |         /usr/bin/mc share download myminio/test-bucket/presigned/phonenumbers.csv
76 |         /usr/bin/mc share download myminio/test-bucket/presigned/t1.parquet
77 |         /usr/bin/mc share download myminio/test-bucket/presigned/lineitem_large.parquet
78 |         /usr/bin/mc share download myminio/test-bucket/presigned/attach.db
79 | 
80 |         echo 'FINISHED SETTING UP MINIO'
81 |         exit 0;
82 |       "


--------------------------------------------------------------------------------
/test/sql/copy/s3/metadata_cache.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/s3/metadata_cache.test
 2 | # description: Test metadata cache that caches reponses from the initial HEAD requests to open a file.
 3 | # group: [s3]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | require-env S3_TEST_SERVER_AVAILABLE 1
10 | 
11 | # Require that these environment variables are also set
12 | 
13 | require-env AWS_DEFAULT_REGION
14 | 
15 | require-env AWS_ACCESS_KEY_ID
16 | 
17 | require-env AWS_SECRET_ACCESS_KEY
18 | 
19 | require-env DUCKDB_S3_ENDPOINT
20 | 
21 | require-env DUCKDB_S3_USE_SSL
22 | 
23 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
24 | set ignore_error_messages
25 | 
26 | # this test was written before we implemented the external file cache
27 | # when it is enabled, the request counts are different
28 | # we disable it so this test still makes sense
29 | statement ok
30 | set enable_external_file_cache=false;
31 | 
32 | statement ok
33 | CREATE TABLE test as SELECT * FROM range(0,10) tbl(i);
34 | 
35 | statement ok
36 | CREATE TABLE test1 as SELECT * FROM range(10,20) tbl(i);
37 | 
38 | query II
39 | EXPLAIN ANALYZE COPY test TO 's3://test-bucket-public/root-dir/metadata_cache/test.parquet';
40 | ----
41 | analyzed_plan	<REGEX>:.*HTTP Stats.*\#HEAD\: 0.*GET\: 0.*PUT\: 1.*\#POST\: 0.*
42 | 
43 | query II
44 | EXPLAIN ANALYZE COPY test TO 's3://test-bucket-public/root-dir/metadata_cache/test1.parquet';
45 | ----
46 | analyzed_plan	<REGEX>:.*HTTP Stats.*\#HEAD\: 0.*GET\: 0.*PUT\: 1.*\#POST\: 0.*
47 | 
48 | # Now we query the file metadata without the global metadata cache: There should be 1 HEAD request for the file size,
49 | # then a GET for the pointer to the parquet metadata, then a GET for the metadata.
50 | query II
51 | EXPLAIN ANALYZE SELECT COUNT(*) FROM 's3://test-bucket-public/root-dir/metadata_cache/test.parquet';
52 | ----
53 | analyzed_plan	<REGEX>:.*HTTP Stats.*\#HEAD\: 1.*GET\: 1.*PUT\: 0.*\#POST\: 0.*
54 | 
55 | # Redoing query should still result in same request count
56 | query II
57 | EXPLAIN ANALYZE SELECT COUNT(*) FROM 's3://test-bucket-public/root-dir/metadata_cache/test.parquet';
58 | ----
59 | analyzed_plan	<REGEX>:.*HTTP Stats.*\#HEAD\: 1.*GET\: 1.*PUT\: 0.*\#POST\: 0.*
60 | 
61 | # Now enable the global metadata cache to store the results of the head requests, saving 1 HEAD per file
62 | statement ok
63 | SET enable_http_metadata_cache=true;
64 | 
65 | query II
66 | EXPLAIN ANALYZE SELECT COUNT(*) FROM 's3://test-bucket-public/root-dir/metadata_cache/test1.parquet';
67 | ----
68 | analyzed_plan	<REGEX>:.*HTTP Stats.*\#HEAD\: 1.*GET\: 1.*PUT\: 0.*\#POST\: 0.*
69 | 
70 | # Now with global metadata cache, we dont need to do the head request again. noice.
71 | query II
72 | EXPLAIN ANALYZE SELECT COUNT(*) FROM 's3://test-bucket-public/root-dir/metadata_cache/test1.parquet';
73 | ----
74 | analyzed_plan	<REGEX>:.*HTTP Stats.*\#HEAD\: 0.*GET\: 1.*PUT\: 0.*\#POST\: 0.*
75 | 
76 | # Now when we write a file to a cached url, this would break so the cache entry should be invalidated
77 | statement ok
78 | COPY (SELECT * from range(0,100) tbl(i)) TO 's3://test-bucket-public/root-dir/metadata_cache/test1.parquet';
79 | 
80 | # We need to do a new head request here
81 | query II
82 | EXPLAIN ANALYZE SELECT COUNT(*) FROM 's3://test-bucket-public/root-dir/metadata_cache/test1.parquet';
83 | ----
84 | analyzed_plan	<REGEX>:.*HTTP Stats.*\#HEAD\: 1.*GET\: 1.*PUT\: 0.*\#POST\: 0.*
85 | 
86 | # but now its cached again
87 | query II
88 | EXPLAIN ANALYZE SELECT COUNT(*) FROM 's3://test-bucket-public/root-dir/metadata_cache/test1.parquet';
89 | ----
90 | analyzed_plan	<REGEX>:.*HTTP Stats.*\#HEAD\: 0.*GET\: 1.*PUT\: 0.*\#POST\: 0.*
91 | 


--------------------------------------------------------------------------------
/src/include/http_state.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "duckdb/common/file_opener.hpp"
  4 | #include "duckdb/main/client_context.hpp"
  5 | #include "duckdb/main/client_data.hpp"
  6 | #include "duckdb/common/atomic.hpp"
  7 | #include "duckdb/common/optional_ptr.hpp"
  8 | #include "duckdb/main/client_context_state.hpp"
  9 | 
 10 | namespace duckdb {
 11 | 
 12 | class CachedFileHandle;
 13 | 
 14 | //! Represents a file that is intended to be fully downloaded, then used in parallel by multiple threads
 15 | class CachedFile : public enable_shared_from_this<CachedFile> {
 16 | 	friend class CachedFileHandle;
 17 | 
 18 | public:
 19 | 	unique_ptr<CachedFileHandle> GetHandle() {
 20 | 		auto this_ptr = shared_from_this();
 21 | 		return make_uniq<CachedFileHandle>(this_ptr);
 22 | 	}
 23 | 
 24 | private:
 25 | 	//! Cached Data
 26 | 	shared_ptr<char> data;
 27 | 	//! Data capacity
 28 | 	uint64_t capacity = 0;
 29 | 	//! Size of file
 30 | 	idx_t size;
 31 | 	//! Lock for initializing the file
 32 | 	mutex lock;
 33 | 	//! When initialized is set to true, the file is safe for parallel reading without holding the lock
 34 | 	atomic<bool> initialized = {false};
 35 | };
 36 | 
 37 | //! Handle to a CachedFile
 38 | class CachedFileHandle {
 39 | public:
 40 | 	explicit CachedFileHandle(shared_ptr<CachedFile> &file_p);
 41 | 
 42 | 	//! allocate a buffer for the file
 43 | 	void AllocateBuffer(idx_t size);
 44 | 	//! Indicate the file is fully downloaded and safe for parallel reading without lock
 45 | 	void SetInitialized(idx_t total_size);
 46 | 	//! Grow buffer to new size, copying over `bytes_to_copy` to the new buffer
 47 | 	void GrowBuffer(idx_t new_capacity, idx_t bytes_to_copy);
 48 | 	//! Write to the buffer
 49 | 	void Write(const char *buffer, idx_t length, idx_t offset = 0);
 50 | 
 51 | 	bool Initialized() {
 52 | 		return file->initialized;
 53 | 	}
 54 | 	const char *GetData() {
 55 | 		return file->data.get();
 56 | 	}
 57 | 	uint64_t GetCapacity() {
 58 | 		return file->capacity;
 59 | 	}
 60 | 	//! Return the size of the initialized file
 61 | 	idx_t GetSize() {
 62 | 		D_ASSERT(file->initialized);
 63 | 		return file->size;
 64 | 	}
 65 | 
 66 | private:
 67 | 	unique_ptr<lock_guard<mutex>> lock;
 68 | 	shared_ptr<CachedFile> file;
 69 | };
 70 | 
 71 | class HTTPState : public ClientContextState {
 72 | public:
 73 | 	//! Reset all counters and cached files
 74 | 	void Reset();
 75 | 	//! Get cache entry, create if not exists
 76 | 	shared_ptr<CachedFile> &GetCachedFile(const string &path);
 77 | 	//! Helper functions to get the HTTP state
 78 | 	static shared_ptr<HTTPState> TryGetState(ClientContext &context);
 79 | 	static shared_ptr<HTTPState> TryGetState(optional_ptr<FileOpener> opener);
 80 | 
 81 | 	bool IsEmpty() {
 82 | 		return head_count == 0 && get_count == 0 && put_count == 0 && post_count == 0 && delete_count == 0 &&
 83 | 		       total_bytes_received == 0 && total_bytes_sent == 0;
 84 | 	}
 85 | 
 86 | 	atomic<idx_t> head_count {0};
 87 | 	atomic<idx_t> get_count {0};
 88 | 	atomic<idx_t> put_count {0};
 89 | 	atomic<idx_t> post_count {0};
 90 | 	atomic<idx_t> delete_count {0};
 91 | 	atomic<idx_t> total_bytes_received {0};
 92 | 	atomic<idx_t> total_bytes_sent {0};
 93 | 
 94 | 	//! Called by the ClientContext when the current query ends
 95 | 	void QueryEnd(ClientContext &context) override {
 96 | 		Reset();
 97 | 	}
 98 | 	void WriteProfilingInformation(std::ostream &ss) override;
 99 | 
100 | private:
101 | 	//! Mutex to lock when getting the cached file(Parallel Only)
102 | 	mutex cached_files_mutex;
103 | 	//! In case of fully downloading the file, the cached files of this query
104 | 	unordered_map<string, shared_ptr<CachedFile>> cached_files;
105 | };
106 | 
107 | } // namespace duckdb
108 | 


--------------------------------------------------------------------------------
/test/sql/copy/s3/download_config.test:
--------------------------------------------------------------------------------
  1 | # name: test/sql/copy/s3/download_config.test
  2 | # description: Test S3 configuration
  3 | # group: [s3]
  4 | 
  5 | require parquet
  6 | 
  7 | require httpfs
  8 | 
  9 | require-env S3_TEST_SERVER_AVAILABLE 1
 10 | 
 11 | ## Require that these environment variables are also set
 12 | require-env AWS_DEFAULT_REGION
 13 | 
 14 | require-env AWS_ACCESS_KEY_ID
 15 | 
 16 | require-env AWS_SECRET_ACCESS_KEY
 17 | 
 18 | require-env DUCKDB_S3_ENDPOINT
 19 | 
 20 | require-env DUCKDB_S3_USE_SSL
 21 | 
 22 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
 23 | set ignore_error_messages
 24 | 
 25 | statement ok
 26 | CREATE TABLE test as SELECT * FROM range(0,10) tbl(i);
 27 | 
 28 | foreach url_style path vhost
 29 | # Have to set these because they get altered during the loop
 30 | statement ok
 31 | SET s3_secret_access_key='${AWS_SECRET_ACCESS_KEY}';
 32 | 
 33 | statement ok
 34 | SET s3_access_key_id='${AWS_ACCESS_KEY_ID}';
 35 | 
 36 | statement ok
 37 | SET s3_endpoint='${DUCKDB_S3_ENDPOINT}';
 38 | 
 39 | statement ok
 40 | SET http_retries=2;
 41 | 
 42 | statement ok
 43 | SET http_retry_wait_ms=10;
 44 | 
 45 | statement ok
 46 | SET http_retry_backoff=1;
 47 | 
 48 | statement ok
 49 | SET http_timeout=50000;
 50 | 
 51 | statement ok
 52 | SET http_keep_alive=false;
 53 | 
 54 | # Test the vhost style urls (this is the default)
 55 | statement ok
 56 | SET s3_url_style='${url_style}';
 57 | 
 58 | statement ok
 59 | COPY test TO 's3://test-bucket-public/root-dir/test_${url_style}_url_style.parquet';
 60 | 
 61 | # vhost style access
 62 | query I
 63 | SELECT i FROM "http://test-bucket-public.${DUCKDB_S3_ENDPOINT}/root-dir/test_${url_style}_url_style.parquet" LIMIT 3
 64 | ----
 65 | 0
 66 | 1
 67 | 2
 68 | 
 69 | # path style access
 70 | query I
 71 | SELECT i FROM "http://${DUCKDB_S3_ENDPOINT}/test-bucket-public/root-dir/test_${url_style}_url_style.parquet" LIMIT 3
 72 | ----
 73 | 0
 74 | 1
 75 | 2
 76 | 
 77 | # Test public access through s3 url
 78 | statement ok
 79 | SET s3_secret_access_key='';SET s3_access_key_id='';
 80 | 
 81 | query I
 82 | SELECT i FROM "s3://test-bucket-public/root-dir/test_${url_style}_url_style.parquet" LIMIT 3
 83 | ----
 84 | 0
 85 | 1
 86 | 2
 87 | 
 88 | endloop
 89 | 
 90 | # empty url style is also allowed to select the default
 91 | statement ok
 92 | SET s3_secret_access_key='${AWS_SECRET_ACCESS_KEY}';SET s3_access_key_id='${AWS_ACCESS_KEY_ID}';SET s3_region='${AWS_DEFAULT_REGION}'; SET s3_endpoint='${DUCKDB_S3_ENDPOINT}'; SET s3_use_ssl=${DUCKDB_S3_USE_SSL};
 93 | 
 94 | statement ok
 95 | COPY test TO 's3://test-bucket-public/root-dir/test_default_url_style.parquet';
 96 | 
 97 | query I
 98 | SELECT i FROM "http://test-bucket-public.${DUCKDB_S3_ENDPOINT}/root-dir/test_default_url_style.parquet" LIMIT 3
 99 | ----
100 | 0
101 | 1
102 | 2
103 | 
104 | # Incorrect path style throws error
105 | statement ok
106 | SET s3_url_style='handwritten';
107 | 
108 | statement error
109 | COPY test TO 's3://test-bucket-public/root-dir/test2.parquet';
110 | ----
111 | 
112 | # 404
113 | statement error
114 | SELECT i FROM "http://test-bucket-public.${DUCKDB_S3_ENDPOINT}/root-dir/non-existent-file-ljaslkjdas.parquet" LIMIT 3
115 | ----
116 | Unable to connect to URL "http://test-bucket-public.
117 | 
118 | # Connection error
119 | statement error
120 | SELECT i FROM "http://test-bucket-public.duckdb-minio-non-existent-host.com:9000/root-dir/non-existent-file-ljaslkjdas.parquet" LIMIT 3
121 | ----
122 | Could not establish connection error for HTTP HEAD to 'http://test-bucket-public.
123 | 
124 | # S3 errors should throw on
125 | statement error
126 | SELECT * FROM parquet_scan('s3://this-aint-no-bucket/no-path/no-file');
127 | ----
128 | Unable to connect to URL "http://
129 | 


--------------------------------------------------------------------------------
/test/sql/copy/s3/http_proxy.test:
--------------------------------------------------------------------------------
  1 | # name: test/sql/copy/s3/http_proxy.test
  2 | # description: Test http proxy
  3 | # group: [s3]
  4 | 
  5 | require parquet
  6 | 
  7 | require httpfs
  8 | 
  9 | require-env S3_TEST_SERVER_AVAILABLE 1
 10 | 
 11 | require-env HTTP_PROXY_PUBLIC
 12 | 
 13 | require-env AWS_DEFAULT_REGION
 14 | 
 15 | require-env AWS_ACCESS_KEY_ID
 16 | 
 17 | require-env AWS_SECRET_ACCESS_KEY
 18 | 
 19 | require-env DUCKDB_S3_ENDPOINT
 20 | 
 21 | require-env DUCKDB_S3_USE_SSL
 22 | 
 23 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
 24 | set ignore_error_messages
 25 | 
 26 | statement ok
 27 | PRAGMA enable_verification
 28 | 
 29 | statement ok
 30 | COPY (SELECT 'value-1' as value) TO 's3://test-bucket/proxy-test/test.parquet';
 31 | 
 32 | query I
 33 | FROM 's3://test-bucket/proxy-test/test.parquet'
 34 | ----
 35 | value-1
 36 | 
 37 | # Lets try a faulty proxy first
 38 | statement ok
 39 | set http_proxy='blabla:1337'
 40 | 
 41 | statement ok
 42 | set http_proxy_username='xxx'
 43 | 
 44 | statement ok
 45 | set http_proxy_password='yyy'
 46 | 
 47 | statement error
 48 | FROM 's3://test-bucket/proxy-test/test.parquet'
 49 | ----
 50 | Could not establish connection
 51 | 
 52 | # Now a working one
 53 | statement ok
 54 | set http_proxy='${HTTP_PROXY_PUBLIC}'
 55 | 
 56 | statement ok
 57 | RESET http_proxy_username
 58 | 
 59 | statement ok
 60 | RESET http_proxy_password
 61 | 
 62 | query I
 63 | FROM 's3://test-bucket/proxy-test/test.parquet'
 64 | ----
 65 | value-1
 66 | 
 67 | # And try the working one with an 'http://' prefix.
 68 | statement ok
 69 | set http_proxy='http://${HTTP_PROXY_PUBLIC}'
 70 | 
 71 | query I
 72 | FROM 's3://test-bucket/proxy-test/test.parquet'
 73 | ----
 74 | value-1
 75 | 
 76 | # Now we revert to the failing one
 77 | statement ok
 78 | set http_proxy='blabla:1337'
 79 | 
 80 | # But we create a HTTP secret with the proxy
 81 | statement ok
 82 | CREATE SECRET http1 (
 83 |     TYPE HTTP, 
 84 |     http_proxy '${HTTP_PROXY_PUBLIC}'
 85 | );
 86 | 
 87 | # This works now, because it uses the secret
 88 | query I
 89 | FROM 's3://test-bucket/proxy-test/test.parquet'
 90 | ----
 91 | value-1
 92 | 
 93 | statement ok
 94 | DROP SECRET http1
 95 | 
 96 | require-env HTTP_PROXY
 97 | 
 98 | statement error
 99 | FROM 's3://test-bucket/proxy-test/test.parquet'
100 | ----
101 | Could not establish connection
102 | 
103 | statement ok
104 | CREATE SECRET http1 (
105 |     TYPE HTTP, 
106 |     PROVIDER env
107 | );
108 | 
109 | # This works now, because it uses the secret
110 | query I
111 | FROM 's3://test-bucket/proxy-test/test.parquet'
112 | ----
113 | value-1
114 | 
115 | statement ok
116 | DROP SECRET http1
117 | 
118 | require-env HTTP_PROXY_PRIVATE
119 | 
120 | require-env HTTP_PROXY_PRIVATE_USERNAME
121 | 
122 | require-env HTTP_PROXY_PRIVATE_PASSWORD
123 | 
124 | # Let's try the private proxy
125 | statement ok
126 | CREATE SECRET http2 (
127 |     TYPE HTTP, 
128 |     http_proxy '${HTTP_PROXY_PRIVATE}',
129 |     http_proxy_username '${HTTP_PROXY_PRIVATE_USERNAME}',
130 |     http_proxy_password '${HTTP_PROXY_PRIVATE_PASSWORD}'
131 | );
132 | 
133 | # Correct auth means it works!
134 | query I
135 | FROM 's3://test-bucket/proxy-test/test.parquet'
136 | ----
137 | value-1
138 | 
139 | statement ok
140 | DROP SECRET http2
141 | 
142 | # Now lets try incorrect auth
143 | statement ok
144 | CREATE SECRET http3 (
145 |     TYPE HTTP, 
146 |     http_proxy '${HTTP_PROXY_PRIVATE}',
147 |     http_proxy_username 'malicious',
148 |     http_proxy_password 'intruder'
149 | );
150 | 
151 | # We get a tasty HTTP 407
152 | statement error
153 | FROM 's3://test-bucket/proxy-test/test.parquet'
154 | ----
155 | HTTP GET error on 'http://test-bucket.duckdb-minio.com:9000/proxy-test/test.parquet' (HTTP 407)
156 | 


--------------------------------------------------------------------------------
/test/sql/copy/parquet/test_parquet_remote_foreign_files.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/parquet/test_parquet_remote_foreign_files.test
 2 | # description: Test queries on tricky parquet files over http. Note: on GH connection issues, these tests fail silently
 3 | # group: [parquet]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | # /data/parquet-testing/bug1554.parquet
10 | query I
11 | SELECT COUNT(backlink_count) FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1554.parquet') WHERE http_status_code=200
12 | ----
13 | 0
14 | 
15 | query II
16 | SELECT http_status_code, COUNT(backlink_count) FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1554.parquet') GROUP BY http_status_code ORDER BY http_status_code
17 | ----
18 | 200	0
19 | 301	0
20 | 
21 | # /data/parquet-testing/bug1588.parquet
22 | 
23 | query I
24 | SELECT has_image_link FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1588.parquet') where has_image_link = 1
25 | ----
26 | 1
27 | 1
28 | 1
29 | 
30 | # /data/parquet-testing/bug1589.parquet
31 | query I
32 | SELECT backlink_count FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1589.parquet') LIMIT 1
33 | ----
34 | NULL
35 | 
36 | statement ok
37 | SELECT * FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1589.parquet')
38 | 
39 | 
40 | query I
41 | SELECT "inner"['str_field'] FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1618_struct_strings.parquet')
42 | ----
43 | hello
44 | NULL
45 | 
46 | query I
47 | SELECT "inner"['f64_field'] FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1618_struct_strings.parquet')
48 | ----
49 | NULL
50 | 1.23
51 | 
52 | query I
53 | SELECT "inner" FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1618_struct_strings.parquet')
54 | ----
55 | {'str_field': hello, 'f64_field': NULL}
56 | {'str_field': NULL, 'f64_field': 1.23}
57 | 
58 | # /data/parquet-testing/struct.parquet
59 | query I
60 | select "inner"['f64_field'] from parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/struct.parquet');
61 | ----
62 | NULL
63 | 1.23
64 | 
65 | # /data/parquet-testing/bug2267.parquet
66 | query I
67 | SELECT * FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug2267.parquet')
68 | ----
69 | [{'disabledPlans': [bea4c11e-220a-4e6d-8eb8-8ea15d019f90], 'skuId': c7df2760-2c81-4ef7-b578-5b5392b571df}, {'disabledPlans': [8a256a2b-b617-496d-b51b-e76466e88db0, 41781fb2-bc02-4b7c-bd55-b576c07bb09d, eec0eb4f-6444-4f95-aba0-50c24d67f998], 'skuId': 84a661c4-e949-4bd2-a560-ed7766fcaf2b}, {'disabledPlans': [], 'skuId': b05e124f-c7cc-45a0-a6aa-8cf78c946968}, {'disabledPlans': [], 'skuId': f30db892-07e9-47e9-837c-80727f46fd3d}]
70 | 
71 | query I
72 | SELECT assignedLicenses[1] FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug2267.parquet')
73 | ----
74 | {'disabledPlans': [bea4c11e-220a-4e6d-8eb8-8ea15d019f90], 'skuId': c7df2760-2c81-4ef7-b578-5b5392b571df}
75 | 
76 | # multiple files
77 | query II
78 | select * from parquet_scan(['https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/glob/t1.parquet', 'https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/glob/t2.parquet'])
79 | ----
80 | 1	a
81 | 2	b
82 | 
83 | # Malformed parquet to test fallback from prefetch
84 | query IIII
85 | select * from parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/arrow/nation.dict-malformed.parquet') limit 2;
86 | ----
87 | 0	ALGERIA	0	 haggle. carefully final deposits detect slyly agai
88 | 1	ARGENTINA	1	al foxes promise slyly according to the regular accounts. bold requests alon
89 | 


--------------------------------------------------------------------------------
/test/extension/autoloading_base.test:
--------------------------------------------------------------------------------
  1 | # name: test/extension/autoloading_base.test
  2 | # description: Base tests for the autoloading mechanism for extensions
  3 | # group: [extension]
  4 | 
  5 | require httpfs
  6 | 
  7 | # This test assumes icu and json to be available in the LOCAL_EXTENSION_REPO and NOT linked into duckdb statically
  8 | # -> this should be the case for our autoloading tests where we have the local_extension_repo variable set
  9 | require-env LOCAL_EXTENSION_REPO
 10 | 
 11 | # Ensure we have a clean extension directory without any preinstalled extensions
 12 | statement ok
 13 | set extension_directory='__TEST_DIR__/autoloading_base'
 14 | 
 15 | query I
 16 | SELECT (count(*) > 0) FROM duckdb_extensions() WHERE install_path ILIKE '%duckdb_extension'
 17 | ----
 18 | false
 19 | 
 20 | # All extensions reported by duckdb are either statically linked or not installed
 21 | query I
 22 | SELECT count(*) FROM duckdb_extensions() WHERE install_mode != 'NOT_INSTALLED' AND install_mode != 'STATICALLY_LINKED'
 23 | ----
 24 | 0
 25 | 
 26 | ### No autoloading nor installing: throw error with installation hint
 27 | statement ok
 28 | set autoload_known_extensions=false
 29 | 
 30 | statement ok
 31 | set autoinstall_known_extensions=false
 32 | 
 33 | statement error
 34 | SET s3_region='eu-west-1';
 35 | ----
 36 | <REGEX>:.*Catalog Error.*Setting with name "s3_region" is not in the catalog.*
 37 | 
 38 | statement error
 39 | select * from read_json_auto('data/json/example_n.ndjson');
 40 | ----
 41 | <REGEX>:.*Catalog Error.*Table Function with name "read_json_auto" is not in the catalog.*
 42 | 
 43 | statement error
 44 | select * from thistablefunctionwillnotexistfosho();
 45 | ----
 46 | <REGEX>:.*Catalog Error.*Table Function with name thistablefunctionwillnotexistfosho does not exist.*
 47 | 
 48 | ### Autoloading and installing, but the autoloading repository is set to non-existent location
 49 | statement ok
 50 | set autoload_known_extensions=true
 51 | 
 52 | statement ok
 53 | set autoinstall_known_extensions=true
 54 | 
 55 | # Override the default repo with a non-existent local repo
 56 | statement ok
 57 | set autoinstall_extension_repository='/tmp/non-existent-repo';
 58 | 
 59 | # Error should inform the user on whats happening
 60 | statement error
 61 | SET s3_region='eu-west-1';
 62 | ----
 63 | <REGEX>:Extension Autoloading Error.*An error occurred while trying to automatically install the required extension 'httpfs'.*
 64 | 
 65 | statement error
 66 | select * from read_json_auto('data/json/example_n.ndjson');
 67 | ----
 68 | <REGEX>:Extension Autoloading Error.*An error occurred while trying to automatically install the required extension 'json'.*
 69 | 
 70 | # Now override with non-existent remote repo
 71 | statement ok
 72 | set autoinstall_extension_repository='http://duckdb.org/what/are/the/odds/we/actually/make/this/path/and/break/this/tests';
 73 | 
 74 | # Error should inform the user on whats happening
 75 | statement error
 76 | SET s3_region='eu-west-1';
 77 | ----
 78 | <REGEX>:Extension Autoloading Error.*An error occurred while trying to automatically install the required extension 'httpfs'.*
 79 | 
 80 | statement error
 81 | select * from read_json_auto('data/json/example_n.ndjson');
 82 | ----
 83 | <REGEX>:Extension Autoloading Error.*An error occurred while trying to automatically install the required extension 'json'.*
 84 | 
 85 | statement error
 86 | select * from thistablefunctionwillnotexistfosho();
 87 | ----
 88 | <REGEX>:Catalog Error.*Table Function with name thistablefunctionwillnotexistfosho does not exist.*
 89 | 
 90 | ### Autoloading with correct tmp repo
 91 | statement ok
 92 | set autoinstall_extension_repository='${LOCAL_EXTENSION_REPO}';
 93 | 
 94 | statement ok
 95 | SET s3_region='eu-west-1';
 96 | 
 97 | statement ok
 98 | select * from read_json_auto('data/json/example_n.ndjson');
 99 | 
100 | query I
101 | SELECT (count(*) > 0) FROM duckdb_extensions() WHERE install_path ILIKE '%duckdb_extension';
102 | ----
103 | true
104 | 


--------------------------------------------------------------------------------
/test/sql/secrets/create_secret_storage_backends.test:
--------------------------------------------------------------------------------
  1 | # name: test/sql/secrets/create_secret_storage_backends.test
  2 | # description: Test different storage backends
  3 | # group: [secrets]
  4 | 
  5 | load __TEST_DIR__/create_secret_storage_backends.db
  6 | 
  7 | statement ok
  8 | PRAGMA enable_verification;
  9 | 
 10 | require httpfs
 11 | 
 12 | # Ensure any currently stored secrets don't interfere with the test
 13 | statement ok
 14 | set allow_persistent_secrets=false;
 15 | 
 16 | statement error
 17 | CREATE TEMPORARY SECRET s1 IN LOCAL_FILE ( TYPE S3 )
 18 | ----
 19 | Invalid Input Error: Persistent secrets are disabled. Restart DuckDB and enable persistent secrets through 'SET allow_persistent_secrets=true'
 20 | 
 21 | statement error
 22 | CREATE PERSISTENT SECRET s1 IN NON_EXISTENT_SECRET_STORAGE ( TYPE S3 )
 23 | ----
 24 | Invalid Input Error: Persistent secrets are disabled. Restart DuckDB and enable persistent secrets through 'SET allow_persistent_secrets=true'
 25 | 
 26 | # We have disabled the permanent secrets, so this should fail
 27 | statement error
 28 | CREATE PERSISTENT SECRET perm_s1 ( TYPE S3 )
 29 | ----
 30 | Invalid Input Error: Persistent secrets are disabled. Restart DuckDB and enable persistent secrets through 'SET allow_persistent_secrets=true'
 31 | 
 32 | restart
 33 | 
 34 | # Enable persistent secrets so we can set a 'secret_directory'
 35 | statement ok
 36 | set allow_persistent_secrets=true;
 37 | 
 38 | statement ok
 39 | set secret_directory='__TEST_DIR__/create_secret_storages'
 40 | 
 41 | # Default for persistent secret is currently LOCAL_FILE (only native persistent storage method currently)
 42 | statement ok
 43 | CREATE PERSISTENT SECRET perm_s1 ( TYPE S3 )
 44 | 
 45 | # Specifying IN ... implies persistent, hence this is okay
 46 | statement ok
 47 | CREATE SECRET perm_s2 IN LOCAL_FILE ( TYPE S3 )
 48 | 
 49 | # Explicitly stating temporary is cool
 50 | statement ok
 51 | CREATE TEMPORARY SECRET temp_s1 ( TYPE s3 );
 52 | 
 53 | # Not specifying it will use the system default (which is temp)
 54 | statement ok
 55 | CREATE SECRET temp_s2 ( TYPE s3 );
 56 | 
 57 | query IIIIII
 58 | SELECT * EXCLUDE (secret_string) FROM duckdb_secrets() ORDER BY name
 59 | ----
 60 | perm_s1	s3	config	true	local_file	['s3://', 's3n://', 's3a://']
 61 | perm_s2	s3	config	true	local_file	['s3://', 's3n://', 's3a://']
 62 | temp_s1	s3	config	false	memory	['s3://', 's3n://', 's3a://']
 63 | temp_s2	s3	config	false	memory	['s3://', 's3n://', 's3a://']
 64 | 
 65 | restart
 66 | 
 67 | # Since extensions can add secret storage backends, we allow switching the default backend
 68 | statement ok
 69 | set default_secret_storage='currently-non-existent'
 70 | 
 71 | statement ok
 72 | set secret_directory='__TEST_DIR__/create_secret_storages'
 73 | 
 74 | statement error
 75 | CREATE PERSISTENT SECRET s1 ( TYPE S3 )
 76 | ----
 77 | Secret storage 'currently-non-existent' not found!
 78 | 
 79 | # We can still work around this broken default by specifying the storage explicitly
 80 | statement ok
 81 | CREATE PERSISTENT SECRET s1 IN LOCAL_FILE ( TYPE S3 )
 82 | 
 83 | restart
 84 | 
 85 | statement ok
 86 | set secret_directory='__TEST_DIR__/create_secret_storages'
 87 | 
 88 | # Let's restore and now things work again
 89 | statement ok
 90 | reset default_secret_storage
 91 | 
 92 | statement ok
 93 | CREATE PERSISTENT SECRET s2 ( TYPE S3 )
 94 | 
 95 | query IIIIII
 96 | SELECT * EXCLUDE (secret_string) FROM duckdb_secrets() ORDER BY name
 97 | ----
 98 | perm_s1	s3	config	true	local_file	['s3://', 's3n://', 's3a://']
 99 | perm_s2	s3	config	true	local_file	['s3://', 's3n://', 's3a://']
100 | s1	s3	config	true	local_file	['s3://', 's3n://', 's3a://']
101 | s2	s3	config	true	local_file	['s3://', 's3n://', 's3a://']
102 | 
103 | statement maybe
104 | DROP SECRET perm_s1;
105 | ----
106 | Invalid Input Error: Failed to remove non-existent secret
107 | 
108 | statement maybe
109 | DROP SECRET perm_s2;
110 | ----
111 | Invalid Input Error: Failed to remove non-existent secret
112 | 


--------------------------------------------------------------------------------
/test/sql/secret/secret_refresh.test:
--------------------------------------------------------------------------------
  1 | # name: test/sql/secret/secret_refresh.test
  2 | # description: Tests secret refreshing
  3 | # group: [secret]
  4 | 
  5 | require-env S3_TEST_SERVER_AVAILABLE 1
  6 | 
  7 | require-env AWS_DEFAULT_REGION
  8 | 
  9 | require-env AWS_ACCESS_KEY_ID
 10 | 
 11 | require-env AWS_SECRET_ACCESS_KEY
 12 | 
 13 | require-env DUCKDB_S3_ENDPOINT
 14 | 
 15 | require-env DUCKDB_S3_USE_SSL
 16 | 
 17 | set ignore_error_messages
 18 | 
 19 | require httpfs
 20 | 
 21 | require parquet
 22 | 
 23 | statement ok
 24 | SET enable_logging=true
 25 | 
 26 | statement ok
 27 | set s3_use_ssl='${DUCKDB_S3_USE_SSL}'
 28 | 
 29 | statement ok
 30 | set s3_endpoint='${DUCKDB_S3_ENDPOINT}'
 31 | 
 32 | statement ok
 33 | set s3_region='${AWS_DEFAULT_REGION}'
 34 | 
 35 | # Create some test data
 36 | statement ok
 37 | CREATE SECRET s1 (
 38 |     TYPE S3,
 39 |     KEY_ID '${AWS_ACCESS_KEY_ID}',
 40 |     SECRET '${AWS_SECRET_ACCESS_KEY}'
 41 | )
 42 | 
 43 | statement ok
 44 | copy (select 1 as a) to 's3://test-bucket/test-file.parquet'
 45 | 
 46 | statement ok
 47 | DROP SECRET s1;
 48 | 
 49 | # Firstly: a secret that is initially wrong, but correct after refresh
 50 | statement ok
 51 | CREATE SECRET s1 (
 52 |     TYPE S3,
 53 |     KEY_ID 'BOGUS',
 54 |     SECRET 'ALSO BOGUS',
 55 |     REFRESH_INFO MAP {
 56 |         'KEY_ID': '${AWS_ACCESS_KEY_ID}',
 57 |         'SECRET': '${AWS_SECRET_ACCESS_KEY}'
 58 |     }
 59 | )
 60 | 
 61 | # Make the request: initial request will fail, but refresh will get triggered and the request succeeds on second attempt
 62 | statement ok
 63 | FROM "s3://test-bucket/test-file.parquet"
 64 | 
 65 | query I
 66 | SELECT message[0:46] FROM duckdb_logs WHERE message like '%Successfully refreshed secret%'
 67 | ----
 68 | Successfully refreshed secret: s1, new key_id:
 69 | 
 70 | # Cleanup: drop secret and logs
 71 | statement ok
 72 | DROP SECRET s1;set enable_logging=false;set logging_storage='stdout';set logging_storage='memory';set enable_logging=true;
 73 | 
 74 | # Secondly: a secret that is initially wrong, and still incorrect afterwards (REFRESH will just use the original secret input to refresh)
 75 | statement ok
 76 | CREATE SECRET s1 (
 77 |     TYPE S3,
 78 |     KEY_ID 'BOGUS',
 79 |     SECRET 'ALSO BOGUS',
 80 |     REFRESH 1
 81 | )
 82 | 
 83 | # TODO: add FORBIDDEN back in once enum util for http status codes is merged into httpfs
 84 | statement error
 85 | FROM "s3://test-bucket/test-file.parquet"
 86 | ----
 87 | HTTP Error: HTTP GET error on 'http://test-bucket.duckdb-minio.com:9000/test-file.parquet' (HTTP 403)
 88 | 
 89 | query I
 90 | SELECT message[0:46] FROM duckdb_logs WHERE message like '%Successfully refreshed secret%'
 91 | ----
 92 | Successfully refreshed secret: s1, new key_id:
 93 | 
 94 | # Cleanup: drop secret and logs
 95 | statement ok
 96 | DROP SECRET s1;set enable_logging=false;set logging_storage='stdout';set logging_storage='memory';set enable_logging=true;
 97 | 
 98 | # Thirdly: a secret that is initially wrong, and contains incorrect REFRESH_INFO
 99 | statement ok
100 | CREATE SECRET s1 (
101 |     TYPE S3,
102 |     KEY_ID 'BOGUS',
103 |     SECRET 'ALSO BOGUS',
104 |     REFRESH_INFO MAP {
105 |         'THIS_KEY_DOES_NOT_EXIST': '${BOGUS}'
106 |     }
107 | )
108 | 
109 | # For now, we throw the actual error that get's thrown during refresh. Since refresh is op-in for now that ensures user can understand what's happening
110 | statement error
111 | FROM "s3://test-bucket/test-file.parquet"
112 | ----
113 | Exception thrown while trying to refresh secret s1
114 | 
115 | # Cleanup: drop secret
116 | statement ok
117 | DROP SECRET s1;
118 | 
119 | # Set incorrect key id to force query to fail without secret
120 | statement ok
121 | set s3_access_key_id='bogus'
122 | 
123 | # Without secret this query will fail, but since there are no suitable secrets, no refresh attempt will be made
124 | # TODO: add FORBIDDEN in once enum util for http status codes is merged into httpfs
125 | statement error
126 | FROM "s3://test-bucket/test-file.parquet"
127 | ----
128 | HTTP Error: HTTP GET error on 'http://test-bucket.duckdb-minio.com:9000/test-file.parquet' (HTTP 403)
129 | 
130 | # -> log empty
131 | query II
132 | SELECT log_level, message FROM duckdb_logs WHERE message like '%Successfully refreshed secret%'
133 | ----
134 | 


--------------------------------------------------------------------------------
/test/sql/copy/s3/s3_hive_partition.test:
--------------------------------------------------------------------------------
  1 | # name: test/sql/copy/s3/s3_hive_partition.test
  2 | # description: Test the automatic parsing of the hive partitioning scheme
  3 | # group: [s3]
  4 | 
  5 | require parquet
  6 | 
  7 | require httpfs
  8 | 
  9 | require-env S3_TEST_SERVER_AVAILABLE 1
 10 | 
 11 | ## Require that these environment variables are also set
 12 | require-env AWS_DEFAULT_REGION
 13 | 
 14 | require-env AWS_ACCESS_KEY_ID
 15 | 
 16 | require-env AWS_SECRET_ACCESS_KEY
 17 | 
 18 | require-env DUCKDB_S3_ENDPOINT
 19 | 
 20 | require-env DUCKDB_S3_USE_SSL
 21 | 
 22 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
 23 | set ignore_error_messages
 24 | 
 25 | # Parquet filename name conflict
 26 | statement ok
 27 | CREATE TABLE test AS SELECT 1 as id, 'value1' as value;
 28 | CREATE TABLE test2 AS SELECT 2 as id, 'value2' as value;
 29 | 
 30 | statement ok
 31 | COPY test TO 's3://test-bucket/hive-partitioning/simple/key_!-_.*()=zisiswurking1/test.parquet';
 32 | COPY test2 TO 's3://test-bucket/hive-partitioning/simple/key_!-_.*()=zisiswurking2/test.parquet';
 33 | 
 34 | # test parsing hive partitioning scheme, with some common special characters
 35 | query III
 36 | select id, value, "key_!-_.*()" from parquet_scan('s3://test-bucket/hive-partitioning/simple/*/test.parquet', HIVE_PARTITIONING=1)
 37 | ----
 38 | 1	value1	zisiswurking1
 39 | 2	value2	zisiswurking2
 40 | 
 41 | # Test some medium sized files
 42 | statement ok
 43 | CREATE TABLE test3 as SELECT id FROM range(0,10000) tbl(id);
 44 | CREATE TABLE test4 as SELECT id FROM range(10000,20000) tbl(id);
 45 | 
 46 | statement ok
 47 | COPY test3 TO 's3://test-bucket/hive-partitioning/medium/part=1/part2=1/test.parquet';
 48 | COPY test4 TO 's3://test-bucket/hive-partitioning/medium/part=1/part2=2/test.parquet';
 49 | COPY test3 TO 's3://test-bucket/hive-partitioning/medium/part=1/part2=1/test.csv';
 50 | COPY test4 TO 's3://test-bucket/hive-partitioning/medium/part=1/part2=2/test.csv';
 51 | 
 52 | query II
 53 | select min(id), max(id) from parquet_scan('s3://test-bucket/hive-partitioning/medium/*/*/test.parquet', HIVE_PARTITIONING=1) where part2=2
 54 | ----
 55 | 10000	19999
 56 | 
 57 | query II
 58 | select min(id), max(id) from parquet_scan('s3://test-bucket/hive-partitioning/medium/*/*/test.parquet', HIVE_PARTITIONING=1) where part2=1
 59 | ----
 60 | 0	9999
 61 | 
 62 | query II
 63 | select min(id), max(id) from read_csv_auto('s3://test-bucket/hive-partitioning/medium/*/*/test.csv', HIVE_PARTITIONING=1) where part2=2
 64 | ----
 65 | 10000	19999
 66 | 
 67 | query II
 68 | select min(id), max(id) from read_csv_auto('s3://test-bucket/hive-partitioning/medium/*/*/test.csv', HIVE_PARTITIONING=1) where part2=1
 69 | ----
 70 | 0	9999
 71 | 
 72 | # check cases where there are file filters AND table filters
 73 | statement ok
 74 | Create table t1 (a int, b int, c int);
 75 | 
 76 | foreach i 0 1 2 3 4 5 6 7 8 9
 77 | 
 78 | statement ok
 79 | insert into t1 (select range, ${i}*10, ${i}*100 from range(0,10));
 80 | 
 81 | endloop
 82 | 
 83 | statement ok
 84 | COPY (SELECT * FROM t1) TO 's3://test-bucket/hive-partitioning/filter-test-parquet' (FORMAT PARQUET, PARTITION_BY c, OVERWRITE_OR_IGNORE);
 85 | 
 86 | statement ok
 87 | COPY (SELECT * FROM t1) TO 's3://test-bucket/hive-partitioning/filter-test-csv' (FORMAT CSV, PARTITION_BY c, OVERWRITE_OR_IGNORE);
 88 | 
 89 | # There should be Table Filters (id < 50) and file filters (c = 500)
 90 | query II
 91 | EXPLAIN select a from parquet_scan('s3://test-bucket/hive-partitioning/filter-test-parquet/*/*.parquet', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=0) where c=500 and a < 4;
 92 | ----
 93 | physical_plan	<REGEX>:.*PARQUET_SCAN.*Filters:.*a<4.*File Filters:.* \(CAST\(c AS.*INTEGER\) = 500\).*
 94 | 
 95 | # There should be Table Filters (id < 50) and file filters (c = 500)
 96 | query II
 97 | EXPLAIN select a from read_csv_auto('s3://test-bucket/hive-partitioning/filter-test-csv/*/*.csv', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=0) where c=500 and a < 4;
 98 | ----
 99 | physical_plan	<REGEX>:.*FILTER.*(a < 4).*READ_CSV_AUTO.*File Filters:.* \(CAST\(c AS.*INTEGER\) = 500\).*
100 | 
101 | statement error
102 | COPY (SELECT * FROM t1) TO 's3://test-bucket/hive-partitioning/filter-test-parquet' (FORMAT PARQUET, PARTITION_BY c, OVERWRITE);
103 | ----
104 | OVERWRITE is not supported for remote file systems
105 | 


--------------------------------------------------------------------------------
/src/http_state.cpp:
--------------------------------------------------------------------------------
  1 | #include "http_state.hpp"
  2 | #include "duckdb/main/query_profiler.hpp"
  3 | 
  4 | namespace duckdb {
  5 | 
  6 | CachedFileHandle::CachedFileHandle(shared_ptr<CachedFile> &file_p) {
  7 | 	// If the file was not yet initialized, we need to grab a lock.
  8 | 	if (!file_p->initialized) {
  9 | 		lock = make_uniq<lock_guard<mutex>>(file_p->lock);
 10 | 	}
 11 | 	file = file_p;
 12 | }
 13 | 
 14 | void CachedFileHandle::SetInitialized(idx_t total_size) {
 15 | 	if (file->initialized) {
 16 | 		throw InternalException("Cannot set initialized on cached file that was already initialized");
 17 | 	}
 18 | 	if (!lock) {
 19 | 		throw InternalException("Cannot set initialized on cached file without lock");
 20 | 	}
 21 | 	file->size = total_size;
 22 | 	file->initialized = true;
 23 | 	lock = nullptr;
 24 | }
 25 | 
 26 | void CachedFileHandle::AllocateBuffer(idx_t size) {
 27 | 	if (file->initialized) {
 28 | 		throw InternalException("Cannot allocate a buffer for a cached file that was already initialized");
 29 | 	}
 30 | 	file->data = shared_ptr<char>(new char[size], std::default_delete<char[]>());
 31 | 	file->capacity = size;
 32 | }
 33 | 
 34 | void CachedFileHandle::GrowBuffer(idx_t new_capacity, idx_t bytes_to_copy) {
 35 | 	// copy shared ptr to old data
 36 | 	auto old_data = file->data;
 37 | 	// allocate new buffer that can hold the new capacity
 38 | 	AllocateBuffer(new_capacity);
 39 | 	// copy the old data
 40 | 	Write(old_data.get(), bytes_to_copy);
 41 | }
 42 | 
 43 | void CachedFileHandle::Write(const char *buffer, idx_t length, idx_t offset) {
 44 | 	//! Only write to non-initialized files with a lock;
 45 | 	D_ASSERT(!file->initialized && lock);
 46 | 	memcpy(file->data.get() + offset, buffer, length);
 47 | }
 48 | 
 49 | void HTTPState::Reset() {
 50 | 	// Reset Counters
 51 | 	head_count = 0;
 52 | 	get_count = 0;
 53 | 	put_count = 0;
 54 | 	post_count = 0;
 55 | 	delete_count = 0;
 56 | 	total_bytes_received = 0;
 57 | 	total_bytes_sent = 0;
 58 | 
 59 | 	// Reset cached files
 60 | 	cached_files.clear();
 61 | }
 62 | 
 63 | shared_ptr<HTTPState> HTTPState::TryGetState(ClientContext &context) {
 64 | 	return context.registered_state->GetOrCreate<HTTPState>("http_state");
 65 | }
 66 | 
 67 | shared_ptr<HTTPState> HTTPState::TryGetState(optional_ptr<FileOpener> opener) {
 68 | 	auto client_context = FileOpener::TryGetClientContext(opener);
 69 | 	if (client_context) {
 70 | 		return TryGetState(*client_context);
 71 | 	}
 72 | 	return nullptr;
 73 | }
 74 | 
 75 | void HTTPState::WriteProfilingInformation(std::ostream &ss) {
 76 | 	string read = "in: " + StringUtil::BytesToHumanReadableString(total_bytes_received);
 77 | 	string written = "out: " + StringUtil::BytesToHumanReadableString(total_bytes_sent);
 78 | 	string head = "#HEAD: " + to_string(head_count);
 79 | 	string get = "#GET: " + to_string(get_count);
 80 | 	string put = "#PUT: " + to_string(put_count);
 81 | 	string post = "#POST: " + to_string(post_count);
 82 | 	string del = "#DELETE: " + to_string(delete_count);
 83 | 
 84 | 	constexpr idx_t TOTAL_BOX_WIDTH = 39;
 85 | 	ss << "┌─────────────────────────────────────┐\n";
 86 | 	ss << "│┌───────────────────────────────────┐│\n";
 87 | 	ss << "││" + QueryProfiler::DrawPadded("HTTPFS HTTP Stats", TOTAL_BOX_WIDTH - 4) + "││\n";
 88 | 	ss << "││                                   ││\n";
 89 | 	ss << "││" + QueryProfiler::DrawPadded(read, TOTAL_BOX_WIDTH - 4) + "││\n";
 90 | 	ss << "││" + QueryProfiler::DrawPadded(written, TOTAL_BOX_WIDTH - 4) + "││\n";
 91 | 	ss << "││" + QueryProfiler::DrawPadded(head, TOTAL_BOX_WIDTH - 4) + "││\n";
 92 | 	ss << "││" + QueryProfiler::DrawPadded(get, TOTAL_BOX_WIDTH - 4) + "││\n";
 93 | 	ss << "││" + QueryProfiler::DrawPadded(put, TOTAL_BOX_WIDTH - 4) + "││\n";
 94 | 	ss << "││" + QueryProfiler::DrawPadded(post, TOTAL_BOX_WIDTH - 4) + "││\n";
 95 | 	ss << "││" + QueryProfiler::DrawPadded(del, TOTAL_BOX_WIDTH - 4) + "││\n";
 96 | 	ss << "│└───────────────────────────────────┘│\n";
 97 | 	ss << "└─────────────────────────────────────┘\n";
 98 | }
 99 | 
100 | //! Get cache entry, create if not exists
101 | shared_ptr<CachedFile> &HTTPState::GetCachedFile(const string &path) {
102 | 	lock_guard<mutex> lock(cached_files_mutex);
103 | 	auto &cache_entry_ref = cached_files[path];
104 | 	if (!cache_entry_ref) {
105 | 		cache_entry_ref = make_shared_ptr<CachedFile>();
106 | 	}
107 | 	return cache_entry_ref;
108 | }
109 | 
110 | } // namespace duckdb
111 | 


--------------------------------------------------------------------------------
/test/sql/copy/s3/upload_small_file.test:
--------------------------------------------------------------------------------
 1 | # name: test/sql/copy/s3/upload_small_file.test
 2 | # description: Copy small csv/parquet files from and to S3.
 3 | # group: [s3]
 4 | 
 5 | require parquet
 6 | 
 7 | require httpfs
 8 | 
 9 | require-env S3_TEST_SERVER_AVAILABLE 1
10 | 
11 | # Require that these environment variables are also set
12 | 
13 | require-env AWS_DEFAULT_REGION
14 | 
15 | require-env AWS_ACCESS_KEY_ID
16 | 
17 | require-env AWS_SECRET_ACCESS_KEY
18 | 
19 | require-env DUCKDB_S3_ENDPOINT
20 | 
21 | require-env DUCKDB_S3_USE_SSL
22 | 
23 | # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
24 | set ignore_error_messages
25 | 
26 | statement ok
27 | CREATE TABLE web_page as (SELECT * FROM "duckdb/data/csv/real/web_page.csv");
28 | 
29 | query IIIIIIIIIIIIII
30 | SELECT * FROM web_page LIMIT 10;
31 | ----
32 | 1	AAAAAAAABAAAAAAA	1997-09-03	NULL	2450810	2452620	Y	98539	http://www.foo.com	welcome	2531	8	3	4
33 | 2	AAAAAAAACAAAAAAA	1997-09-03	2000-09-02	2450814	2452580	N	NULL	http://www.foo.com	protected	1564	4	3	1
34 | 3	AAAAAAAACAAAAAAA	2000-09-03	NULL	2450814	2452611	N	NULL	http://www.foo.com	feedback	1564	4	3	4
35 | 4	AAAAAAAAEAAAAAAA	1997-09-03	1999-09-03	2450812	2452579	N	NULL	http://www.foo.com	general	3732	18	7	1
36 | 5	AAAAAAAAEAAAAAAA	1999-09-04	2001-09-02	2450812	2452597	N	NULL	http://www.foo.com	welcome	3732	18	3	1
37 | 6	AAAAAAAAEAAAAAAA	2001-09-03	NULL	2450814	2452597	N	NULL	http://www.foo.com	ad	3732	18	7	4
38 | 7	AAAAAAAAHAAAAAAA	1997-09-03	NULL	2450815	2452574	N	NULL	http://www.foo.com	feedback	3034	18	7	4
39 | 8	AAAAAAAAIAAAAAAA	1997-09-03	2000-09-02	2450815	2452646	Y	1898	http://www.foo.com	protected	3128	12	2	4
40 | 9	AAAAAAAAIAAAAAAA	2000-09-03	NULL	2450807	2452579	Y	84146	http://www.foo.com	welcome	3128	13	5	3
41 | 10	AAAAAAAAKAAAAAAA	1997-09-03	1999-09-03	NULL	2452623	N	NULL	http://www.foo.com	NULL	NULL	NULL	NULL	NULL
42 | 
43 | # Parquet file
44 | statement ok
45 | COPY web_page TO 's3://test-bucket/multipart/web_page.parquet' (FORMAT 'parquet');
46 | 
47 | query IIIIIIIIIIIIII
48 | SELECT * FROM "s3://test-bucket/multipart/web_page.parquet" LIMIT 10;
49 | ----
50 | 1	AAAAAAAABAAAAAAA	1997-09-03	NULL	2450810	2452620	Y	98539	http://www.foo.com	welcome	2531	8	3	4
51 | 2	AAAAAAAACAAAAAAA	1997-09-03	2000-09-02	2450814	2452580	N	NULL	http://www.foo.com	protected	1564	4	3	1
52 | 3	AAAAAAAACAAAAAAA	2000-09-03	NULL	2450814	2452611	N	NULL	http://www.foo.com	feedback	1564	4	3	4
53 | 4	AAAAAAAAEAAAAAAA	1997-09-03	1999-09-03	2450812	2452579	N	NULL	http://www.foo.com	general	3732	18	7	1
54 | 5	AAAAAAAAEAAAAAAA	1999-09-04	2001-09-02	2450812	2452597	N	NULL	http://www.foo.com	welcome	3732	18	3	1
55 | 6	AAAAAAAAEAAAAAAA	2001-09-03	NULL	2450814	2452597	N	NULL	http://www.foo.com	ad	3732	18	7	4
56 | 7	AAAAAAAAHAAAAAAA	1997-09-03	NULL	2450815	2452574	N	NULL	http://www.foo.com	feedback	3034	18	7	4
57 | 8	AAAAAAAAIAAAAAAA	1997-09-03	2000-09-02	2450815	2452646	Y	1898	http://www.foo.com	protected	3128	12	2	4
58 | 9	AAAAAAAAIAAAAAAA	2000-09-03	NULL	2450807	2452579	Y	84146	http://www.foo.com	welcome	3128	13	5	3
59 | 10	AAAAAAAAKAAAAAAA	1997-09-03	1999-09-03	NULL	2452623	N	NULL	http://www.foo.com	NULL	NULL	NULL	NULL	NULL
60 | 
61 | # CSV file
62 | statement ok
63 | COPY web_page TO 's3://test-bucket/multipart/web_page.csv';
64 | 
65 | query IIIIIIIIIIIIII
66 | SELECT * FROM "s3://test-bucket/multipart/web_page.csv" LIMIT 10;
67 | ----
68 | 1	AAAAAAAABAAAAAAA	1997-09-03	NULL	2450810	2452620	Y	98539	http://www.foo.com	welcome	2531	8	3	4
69 | 2	AAAAAAAACAAAAAAA	1997-09-03	2000-09-02	2450814	2452580	N	NULL	http://www.foo.com	protected	1564	4	3	1
70 | 3	AAAAAAAACAAAAAAA	2000-09-03	NULL	2450814	2452611	N	NULL	http://www.foo.com	feedback	1564	4	3	4
71 | 4	AAAAAAAAEAAAAAAA	1997-09-03	1999-09-03	2450812	2452579	N	NULL	http://www.foo.com	general	3732	18	7	1
72 | 5	AAAAAAAAEAAAAAAA	1999-09-04	2001-09-02	2450812	2452597	N	NULL	http://www.foo.com	welcome	3732	18	3	1
73 | 6	AAAAAAAAEAAAAAAA	2001-09-03	NULL	2450814	2452597	N	NULL	http://www.foo.com	ad	3732	18	7	4
74 | 7	AAAAAAAAHAAAAAAA	1997-09-03	NULL	2450815	2452574	N	NULL	http://www.foo.com	feedback	3034	18	7	4
75 | 8	AAAAAAAAIAAAAAAA	1997-09-03	2000-09-02	2450815	2452646	Y	1898	http://www.foo.com	protected	3128	12	2	4
76 | 9	AAAAAAAAIAAAAAAA	2000-09-03	NULL	2450807	2452579	Y	84146	http://www.foo.com	welcome	3128	13	5	3
77 | 10	AAAAAAAAKAAAAAAA	1997-09-03	1999-09-03	NULL	2452623	N	NULL	http://www.foo.com	NULL	NULL	NULL	NULL	NULL
78 | 


--------------------------------------------------------------------------------