├── .DS_Store ├── .editorconfig ├── .github └── workflows │ ├── python-publish.yml │ └── python-test.yml ├── .gitignore ├── .python-version ├── .vscode ├── launch.json └── settings.json ├── LICENSE ├── README.md ├── assets └── LakeAPI.drawio.png ├── azure.duckdb_extension.gz ├── bmsdna └── lakeapi │ ├── __init__.py │ ├── api │ ├── __init__.py │ └── api.py │ ├── context │ ├── __init__.py │ ├── df_base.py │ ├── df_duckdb.py │ ├── df_odbc.py │ ├── df_polars.py │ └── source_uri.py │ ├── core │ ├── __init__.py │ ├── config.py │ ├── datasource.py │ ├── env.py │ ├── log.py │ ├── model.py │ ├── partition_utils.py │ ├── response.py │ ├── route.py │ ├── schema_cache.py │ ├── types.py │ ├── uservalidation.py │ └── yaml.py │ ├── endpoint │ ├── __init__.py │ ├── detail_endpoint.py │ ├── endpoint.py │ ├── endpoint_nearby.py │ ├── endpoint_search.py │ └── sql_endpoint.py │ ├── standalone │ └── __init__.py │ ├── tools │ ├── useradd.py │ └── validateschema.py │ └── utils │ ├── async_utils.py │ └── fast_api_utils.py ├── chinook.db ├── config_schema.json ├── config_test.yml ├── duckdb_tests.py ├── profile_start.bat ├── pyproject.toml ├── repo.py ├── start_test_instance.py ├── startup_perf.py ├── test_requests └── Test Requests │ ├── Jsonify Complex.bru │ ├── Test Nearby.bru │ ├── bruno.json │ ├── environments │ └── Local.bru │ └── filter fruits.bru ├── test_server ├── __init__.py └── sql_docker.env ├── tester.py ├── tests ├── __init__.py ├── chinook.db ├── conftest.py ├── create_test_data.py ├── data │ ├── chinook.db │ └── delta │ │ └── table_w_col_map │ │ ├── 97 │ │ └── part-00038-bb2ca269-9b06-433d-8733-e714448dda93.c000.snappy.parquet │ │ ├── 0g │ │ └── part-00006-510491fd-b429-477a-bc01-ba90158ece60.c000.snappy.parquet │ │ ├── 4M │ │ └── part-00004-816874eb-5a74-436a-8967-7f6d617e41f2.c000.snappy.parquet │ │ ├── 4P │ │ └── part-00047-b1e87770-7221-43b8-a74c-44ae08076a09.c000.snappy.parquet │ │ ├── 4Z │ │ └── part-00005-ea0c83d4-7e32-46a0-beca-f49e79046b80.c000.snappy.parquet │ │ ├── 6Q │ │ └── part-00054-31997fb2-91ff-43b2-824e-103cc0c7d756.c000.snappy.parquet │ │ ├── 6c │ │ └── part-00012-6259bde6-bdf7-4453-a230-0f2520a6240c.c000.snappy.parquet │ │ ├── 7w │ │ └── part-00000-be76109f-fbcd-49b1-8849-5fdf6c78f9bd.c000.snappy.parquet │ │ ├── 8r │ │ └── part-00068-6aa18a58-b31a-4343-9a94-5132835119af.c000.snappy.parquet │ │ ├── BI │ │ └── part-00044-4b8b65e2-6637-4999-a8dd-6327a3a34805.c000.snappy.parquet │ │ ├── BP │ │ └── part-00065-8e29739a-c689-4913-923f-b409ab10a6f3.c000.snappy.parquet │ │ ├── Dj │ │ └── part-00001-b9e46f6e-c6ce-4b84-a756-016350dd00a3.c000.snappy.parquet │ │ ├── Fk │ │ └── part-00057-2bea3d40-0236-46bc-b1d4-85bb1d7112a9.c000.snappy.parquet │ │ ├── Ft │ │ └── part-00046-583c311c-45d7-4590-96ff-157e8366390c.c000.snappy.parquet │ │ ├── GS │ │ └── part-00048-3ec7ca53-eef9-4e67-888f-bec41669d79c.c000.snappy.parquet │ │ ├── HN │ │ └── part-00058-cc1e9a26-6c3d-4288-b378-2cf2ba7745af.c000.snappy.parquet │ │ ├── HZ │ │ └── part-00042-9926dc4b-c61d-49c7-933a-9a9535a5d177.c000.snappy.parquet │ │ ├── IG │ │ └── part-00002-8edfa65e-7cb2-4f82-8771-579acedd6f7d.c000.snappy.parquet │ │ ├── II │ │ └── part-00059-422f439d-c57f-47e2-bef8-1e199e0fe46c.c000.snappy.parquet │ │ ├── IV │ │ └── part-00053-318b80d6-8bef-4057-8890-79bd5db60cac.c000.snappy.parquet │ │ ├── JH │ │ └── part-00043-d1d1b590-7042-4e9c-872b-0509dc58eed7.c000.snappy.parquet │ │ ├── Kc │ │ └── part-00051-9428b0ee-ad2e-4156-a046-4547a5d3b3b1.c000.snappy.parquet │ │ ├── Kt │ │ └── part-00036-2cbd4b24-74a9-4676-ad88-1ee06fb29bb8.c000.snappy.parquet │ │ ├── Lv │ │ └── part-00032-41bbaf1a-6279-47b7-8688-b235d1706aad.c000.snappy.parquet │ │ ├── M2 │ │ └── part-00072-07af31ba-a41c-4c04-87c5-87d870fdf475.c000.snappy.parquet │ │ ├── Mf │ │ └── part-00060-a8539436-96ba-402a-8a7b-dec2135b7b52.c000.snappy.parquet │ │ ├── NJ │ │ └── part-00055-4edebb18-a674-4a72-ad10-145050b27272.c000.snappy.parquet │ │ ├── No │ │ └── part-00076-3eca7b45-d1e3-4051-b035-4bcdd714abbc.c000.snappy.parquet │ │ ├── OX │ │ └── part-00031-cdfe144e-da91-461c-9894-574d7e64952f.c000.snappy.parquet │ │ ├── Oi │ │ └── part-00011-d8be12d7-d464-43fd-9700-a5b1ae9e5d0e.c000.snappy.parquet │ │ ├── P6 │ │ └── part-00078-6608497c-fad9-4191-bde7-a2350ce65125.c000.snappy.parquet │ │ ├── Pn │ │ └── part-00023-5e05b8f8-f80f-43f6-9333-2d263bfbcc14.c000.snappy.parquet │ │ ├── Pw │ │ └── part-00050-8f5580a5-dd4c-4a0e-b892-5dc2bd0695b5.c000.snappy.parquet │ │ ├── RC │ │ └── part-00071-4b8ccfe1-6aec-4bec-b70a-0754ae2b3bf8.c000.snappy.parquet │ │ ├── Rk │ │ └── part-00007-e6b43552-ef44-4af4-921d-d506eb2dfbbe.c000.snappy.parquet │ │ ├── SJ │ │ └── part-00061-ba3a9c66-779c-4665-8003-9c9ab799e5f6.c000.snappy.parquet │ │ ├── To │ │ └── part-00033-482da537-942c-4d93-b747-2ec4b4ece4e7.c000.snappy.parquet │ │ ├── Tp │ │ └── part-00077-eb1e0859-e136-4ed5-b4b5-8371acba0303.c000.snappy.parquet │ │ ├── Tw │ │ └── part-00035-cf597183-4ced-4d3e-bc02-8cae53bb4c25.c000.snappy.parquet │ │ ├── U2 │ │ └── part-00010-92ebf069-473c-4d07-a300-d72896a9c651.c000.snappy.parquet │ │ ├── U4 │ │ └── part-00073-2aaa75d8-484f-4033-a351-28f3a8c1540e.c000.snappy.parquet │ │ ├── U6 │ │ └── part-00032-f0c3ab56-cdb6-4d79-8aa2-908bc7cabb9f.c000.snappy.parquet │ │ ├── Wa │ │ └── part-00030-211e7855-f56c-4a9d-b632-19e2eadc1f58.c000.snappy.parquet │ │ ├── YY │ │ └── part-00045-1fae6e27-b6ea-4496-9751-eb8ce068051d.c000.snappy.parquet │ │ ├── YZ │ │ └── part-00019-0b4e7ee9-8c7b-4191-9dec-6c833e5550b4.c000.snappy.parquet │ │ ├── Yr │ │ └── part-00075-a362be3f-f74e-4522-87d6-f3e811d5e12f.c000.snappy.parquet │ │ ├── Zz │ │ └── part-00022-394e79b3-5bee-4835-8af4-1b69739c5a20.c000.snappy.parquet │ │ ├── _delta_log │ │ ├── 00000000000000000000.crc │ │ ├── 00000000000000000000.json │ │ ├── 00000000000000000001.crc │ │ ├── 00000000000000000001.json │ │ ├── 00000000000000000002.crc │ │ ├── 00000000000000000002.json │ │ ├── 00000000000000000003.crc │ │ ├── 00000000000000000003.json │ │ ├── 00000000000000000004.crc │ │ ├── 00000000000000000004.json │ │ ├── 00000000000000000005.crc │ │ └── 00000000000000000005.json │ │ ├── aL │ │ └── part-00018-2fe5e43f-121b-4179-b664-834c5465b967.c000.snappy.parquet │ │ ├── aR │ │ └── part-00008-d4bec0ab-eccc-4c99-9025-6ca6503ddd74.c000.snappy.parquet │ │ ├── b7 │ │ └── part-00067-a7f025fc-51bc-423b-af1b-d9d9b72ebc6c.c000.snappy.parquet │ │ ├── bT │ │ └── part-00021-39e0723e-0993-403b-8859-caa5f37a2fbb.c000.snappy.parquet │ │ ├── cF │ │ └── part-00049-e0050fd7-80fb-4265-a387-565207e45e4d.c000.snappy.parquet │ │ ├── company=ab │ │ └── part-00056-f6da9139-a113-467e-8ba5-c7758370da3a.c000.snappy.parquet │ │ ├── company=ac │ │ └── part-00033-84b83034-4286-45e6-9127-cc65f389a327.c000.snappy.parquet │ │ ├── company=ad │ │ └── part-00049-4b3f4ff8-05ed-47e6-be6d-f95fdef49711.c000.snappy.parquet │ │ ├── company=al │ │ └── part-00028-17ca4dc6-857d-4981-852a-17ab4e89ffd9.c000.snappy.parquet │ │ ├── company=an │ │ └── part-00069-7f716a4b-e3f4-4843-b255-316f894cd667.c000.snappy.parquet │ │ ├── company=at │ │ └── part-00034-d7f4034d-59b3-4d12-9416-80507cf32c57.c000.snappy.parquet │ │ ├── company=ay │ │ └── part-00030-c59a6d41-2735-4b97-be85-dd4c65655a00.c000.snappy.parquet │ │ ├── company=ba │ │ └── part-00027-b718de64-3731-4ad0-90ad-4db26b9bcc1d.c000.snappy.parquet │ │ ├── company=be │ │ └── part-00041-0efe3b2d-61dd-494a-aded-639ddc29a70a.c000.snappy.parquet │ │ ├── company=bo │ │ └── part-00035-49a6a289-1cbb-4771-b6b3-d87ddc6f3dd8.c000.snappy.parquet │ │ ├── company=br │ │ └── part-00008-566f0054-3608-451e-974b-264b75ce231a.c000.snappy.parquet │ │ ├── company=bu │ │ └── part-00072-3ec64246-453a-4d51-91c7-848304d4096f.c000.snappy.parquet │ │ ├── company=by │ │ └── part-00042-c40258e5-2723-4340-ba91-a641b231515a.c000.snappy.parquet │ │ ├── company=ca │ │ └── part-00005-42e7bc3f-aa89-4da9-b658-38a2ca8e4fd2.c000.snappy.parquet │ │ ├── company=ce │ │ └── part-00051-b0112e8d-c692-4b2b-b911-1bb92bd9382e.c000.snappy.parquet │ │ ├── company=ch │ │ └── part-00036-45350603-7694-4a2f-b65f-8ac04c196465.c000.snappy.parquet │ │ ├── company=cl │ │ └── part-00043-622f89c5-01f1-4abb-97ab-d7e11fad5624.c000.snappy.parquet │ │ ├── company=co │ │ └── part-00010-138ed8b4-c6e3-404c-b53a-ce33a2cf312b.c000.snappy.parquet │ │ ├── company=cr │ │ └── part-00055-149c9a22-5f1a-4d4e-9071-ecbe4a8ff971.c000.snappy.parquet │ │ ├── company=cu │ │ └── part-00062-b512dbb9-8644-4725-b6f8-66bd101d7d09.c000.snappy.parquet │ │ ├── company=da │ │ └── part-00032-c13bdcc7-bb30-48c1-8848-ec16a339e952.c000.snappy.parquet │ │ ├── company=de │ │ └── part-00023-6d446912-e456-4ae5-91ce-a077d768b645.c000.snappy.parquet │ │ ├── company=di │ │ └── part-00007-7dd774b2-1cb0-4c83-9813-ed75b2d389b6.c000.snappy.parquet │ │ ├── company=do │ │ └── part-00050-f7de5cd9-ea84-480c-a960-50de699bbece.c000.snappy.parquet │ │ ├── company=dr │ │ └── part-00047-03af00f0-8b3d-42f9-9cd9-94c8df3efc0f.c000.snappy.parquet │ │ ├── company=el │ │ └── part-00076-2015b5b5-d50c-4074-aaa4-ff0c6f19b2db.c000.snappy.parquet │ │ ├── company=es │ │ └── part-00048-6acda97e-206c-4c78-b13d-d67e95b96bb7.c000.snappy.parquet │ │ ├── company=fi │ │ └── part-00064-43e6ec96-6b6c-4211-8565-63ee05b8f0ff.c000.snappy.parquet │ │ ├── company=fl │ │ └── part-00079-71fc11e6-8efe-46d0-a500-19ec217bdac5.c000.snappy.parquet │ │ ├── company=fo │ │ └── part-00006-4151a5b0-c860-4a31-92b3-2b82fd2d99bd.c000.snappy.parquet │ │ ├── company=fr │ │ └── part-00063-d2b90342-e419-46c5-83e9-0a6d8a32aea4.c000.snappy.parquet │ │ ├── company=ga │ │ └── part-00061-7a78fb8a-bca8-4f17-9c5d-c9817da92b74.c000.snappy.parquet │ │ ├── company=gi │ │ └── part-00019-31f75af4-2504-4eb8-9d3d-955bbd86c1e0.c000.snappy.parquet │ │ ├── company=go │ │ └── part-00066-53270a3c-c02b-4608-9c9f-f60cae9ecb2f.c000.snappy.parquet │ │ ├── company=gr │ │ └── part-00020-d77466c4-7b96-4762-9476-1579b462d0b0.c000.snappy.parquet │ │ ├── company=ha │ │ └── part-00016-fad3113c-dfc1-4280-901d-aabdfe8a6f4b.c000.snappy.parquet │ │ ├── company=he │ │ └── part-00003-c6154e56-f3f8-4064-b712-aed57f913a5b.c000.snappy.parquet │ │ ├── company=ho │ │ └── part-00014-c339a09a-9f4b-4be0-986d-5707bd0cadaf.c000.snappy.parquet │ │ ├── company=ja │ │ └── part-00009-7732907a-bcc6-4ecb-b064-680fdeebcc3e.c000.snappy.parquet │ │ ├── company=ji │ │ └── part-00071-06f90521-9846-4ffc-9f47-acbf1871077c.c000.snappy.parquet │ │ ├── company=jo │ │ └── part-00004-8a0c1603-fa55-4a84-879b-f7e1c0d13c0a.c000.snappy.parquet │ │ ├── company=ke │ │ └── part-00031-aeda7b45-378b-424a-aad0-924701a9c6f9.c000.snappy.parquet │ │ ├── company=ko │ │ └── part-00070-5efdd77a-ecea-47b8-b532-de111e665f6a.c000.snappy.parquet │ │ ├── company=la │ │ └── part-00044-505b2259-c565-42ae-aa6e-b56629cffdf3.c000.snappy.parquet │ │ ├── company=le │ │ └── part-00018-8c97ce70-0ce3-4dc3-8ba9-7a2c51897e2c.c000.snappy.parquet │ │ ├── company=li │ │ └── part-00059-6716a935-b1e4-41f5-a1a7-fd0e707a9b90.c000.snappy.parquet │ │ ├── company=lo │ │ └── part-00000-522f840d-fb48-4f0b-a185-dedb749b0da0.c000.snappy.parquet │ │ ├── company=ma │ │ └── part-00001-086bf948-a7b4-4600-891d-01c4680fcab4.c000.snappy.parquet │ │ ├── company=mc │ │ └── part-00015-ea17f88c-c7c5-4ed5-aa9c-55907f85c0f8.c000.snappy.parquet │ │ ├── company=me │ │ └── part-00039-5e5ebd26-eb1f-4312-bd6f-73c8a4b2dfbe.c000.snappy.parquet │ │ ├── company=mi │ │ └── part-00017-64e30664-4bcf-4462-a617-bf05b3772a5d.c000.snappy.parquet │ │ ├── company=mo │ │ └── part-00012-67ab812e-793f-4100-b383-511e3206c975.c000.snappy.parquet │ │ ├── company=mu │ │ └── part-00060-e274f652-1f8d-4738-bc0f-c6e8a0d6bff1.c000.snappy.parquet │ │ ├── company=my │ │ └── part-00068-7451e7e9-9655-4f5f-81c0-8bc55cb50960.c000.snappy.parquet │ │ ├── company=ne │ │ └── part-00029-a579be5b-1744-4a5a-858f-fe5008ce81fb.c000.snappy.parquet │ │ ├── company=no │ │ └── part-00047-97b20913-8df7-4570-8c29-b0d364b5fe76.c000.snappy.parquet │ │ ├── company=ob │ │ └── part-00075-db82c46c-76c8-48a8-98e3-b1b4b8816a65.c000.snappy.parquet │ │ ├── company=on │ │ └── part-00074-25f1fa15-e2b1-4819-9920-57443ec7dd7b.c000.snappy.parquet │ │ ├── company=pa │ │ └── part-00021-080dbd1f-b083-4e46-bd6f-1ac16b1db973.c000.snappy.parquet │ │ ├── company=pe │ │ └── part-00038-63524ae8-1a5c-4b5c-8e7c-29b8a01601a2.c000.snappy.parquet │ │ ├── company=ph │ │ └── part-00013-d6cbca57-b07e-4958-a2e5-541fe27fefa8.c000.snappy.parquet │ │ ├── company=pr │ │ └── part-00073-5073dd1d-d9b3-4352-ba64-fb33743e7576.c000.snappy.parquet │ │ ├── company=ra │ │ └── part-00045-16e8d690-d897-44d0-8023-c464247974bf.c000.snappy.parquet │ │ ├── company=re │ │ └── part-00067-61dc56ee-b090-4330-894f-aa964229f4aa.c000.snappy.parquet │ │ ├── company=ri │ │ └── part-00037-e7b8827a-767b-47c0-98cd-be3749f77bb8.c000.snappy.parquet │ │ ├── company=ro │ │ └── part-00002-03d8285e-c751-4f6a-822a-d47c83ee8ceb.c000.snappy.parquet │ │ ├── company=ru │ │ └── part-00057-73304f98-6375-4906-b781-979acc7367ae.c000.snappy.parquet │ │ ├── company=sa │ │ └── part-00053-d402d6d7-61e7-4023-9d6b-d1e9ed152f49.c000.snappy.parquet │ │ ├── company=sc │ │ └── part-00022-0ce5b7db-639e-42a1-9d4b-90c5a46601e7.c000.snappy.parquet │ │ ├── company=sh │ │ └── part-00040-600b89ab-bb40-4cb6-9ae4-1ab9faa0f40e.c000.snappy.parquet │ │ ├── company=si │ │ └── part-00033-99e6cac7-a189-4772-a579-b9781b149235.c000.snappy.parquet │ │ ├── company=sm │ │ └── part-00025-cff7e4e8-d331-4e52-b464-8c5370bbcfee.c000.snappy.parquet │ │ ├── company=so │ │ └── part-00052-6678746c-5e3b-49c7-87e8-f2cab9010c92.c000.snappy.parquet │ │ ├── company=st │ │ └── part-00026-00eaed9e-c245-4134-9d52-482892b2c153.c000.snappy.parquet │ │ ├── company=sw │ │ └── part-00054-eaf85bb5-b702-4d0c-a6b7-67901f7cf7eb.c000.snappy.parquet │ │ ├── company=ta │ │ └── part-00046-26a01da5-5d3c-4597-89fe-7f6f9579e6f1.c000.snappy.parquet │ │ ├── company=th │ │ └── part-00007-574080d4-da2d-40ca-928b-fa7ca5d86d80.c000.snappy.parquet │ │ ├── company=to │ │ └── part-00065-dbe02d1a-c524-4aab-9ad6-10ead7a5d481.c000.snappy.parquet │ │ ├── company=tu │ │ └── part-00077-7c4ba0d6-9460-462d-ae66-8d4bd21b190b.c000.snappy.parquet │ │ ├── company=vi │ │ └── part-00029-63ae8fd1-c094-4c02-8b6b-c31713c3a0d3.c000.snappy.parquet │ │ ├── company=wa │ │ └── part-00024-e3781098-4ac4-4bf8-9ed1-81b9651d3769.c000.snappy.parquet │ │ ├── company=we │ │ └── part-00000-f42ba576-5cd6-415b-bad7-73a0b8e033df.c000.snappy.parquet │ │ ├── company=wi │ │ └── part-00011-d9fd8935-3809-4171-a9cd-ec472cd54d8c.c000.snappy.parquet │ │ ├── company=wo │ │ └── part-00058-c9201b73-9ea0-4ecb-a42e-744b07cce743.c000.snappy.parquet │ │ ├── company=ya │ │ └── part-00078-1e0fa90e-282e-420e-9ef0-616b4405badc.c000.snappy.parquet │ │ ├── d2 │ │ └── part-00009-6e0044a4-66de-445d-a3b6-a9677c7cf09f.c000.snappy.parquet │ │ ├── dD │ │ └── part-00000-1363075f-bb86-4349-bbe0-9e25d1e01873.c000.snappy.parquet │ │ ├── dR │ │ └── part-00056-6ee8afe2-9a97-4148-aaa3-63c29a154a2e.c000.snappy.parquet │ │ ├── eD │ │ └── part-00029-e49d438a-bcc4-401b-8b87-10bf8e421dd1.c000.snappy.parquet │ │ ├── ev │ │ └── part-00039-0d497b2d-8c3d-4539-8222-e9dbda28b768.c000.snappy.parquet │ │ ├── fh │ │ └── part-00014-f5160b94-c123-4eed-9cde-b5fea29637be.c000.snappy.parquet │ │ ├── gM │ │ └── part-00024-38a93f96-421f-4e16-8905-4fc4fb358d8a.c000.snappy.parquet │ │ ├── ia │ │ └── part-00031-9feb0dce-6f44-4da6-a2ea-e5116d09cc2c.c000.snappy.parquet │ │ ├── ip │ │ └── part-00016-c24c4267-fbc0-4ae0-ae0a-b3652e1ac0ee.c000.snappy.parquet │ │ ├── j7 │ │ └── part-00034-c507b451-6d28-4e80-8c84-431ac4114e83.c000.snappy.parquet │ │ ├── j9 │ │ └── part-00063-2dad4913-6407-430d-b1d3-2c3c111830b8.c000.snappy.parquet │ │ ├── jD │ │ └── part-00025-aba3487c-62c0-4743-b78e-ba7be5494d21.c000.snappy.parquet │ │ ├── jZ │ │ └── part-00028-57126dbd-869f-4ac8-b28e-0b2e2599fb63.c000.snappy.parquet │ │ ├── lP │ │ └── part-00020-143f9668-7f1a-4af2-97b9-8f5c078b48ee.c000.snappy.parquet │ │ ├── mX │ │ └── part-00037-f60b8a34-3c43-4e9f-a82e-2daf9ce070ac.c000.snappy.parquet │ │ ├── ma │ │ └── part-00079-a1e2177c-0968-4657-b448-6017b465e701.c000.snappy.parquet │ │ ├── nc │ │ └── part-00027-2856a17a-8c1b-4e01-985e-9f019d4af815.c000.snappy.parquet │ │ ├── oG │ │ └── part-00066-7a20116d-a4a9-4d34-bedd-a179ee40d373.c000.snappy.parquet │ │ ├── pw │ │ └── part-00024-17021728-8d4a-40c1-a19e-0a3ec0dde9fb.c000.snappy.parquet │ │ ├── q7 │ │ └── part-00041-15231660-8cc5-447f-87f8-936efb95e64c.c000.snappy.parquet │ │ ├── rc │ │ └── part-00064-ed14899a-d10c-45db-8adf-3d8adf26452c.c000.snappy.parquet │ │ ├── rv │ │ └── part-00003-3d6f5d79-b435-4616-8f43-7348f7065068.c000.snappy.parquet │ │ ├── sF │ │ └── part-00017-97d493cf-2d64-4293-9908-61d0262066b5.c000.snappy.parquet │ │ ├── sQ │ │ └── part-00015-7b76522c-479c-496e-8e2b-468d60f303d5.c000.snappy.parquet │ │ ├── tS │ │ └── part-00052-e83c55be-be73-42f4-b409-2a9de710864b.c000.snappy.parquet │ │ ├── u3 │ │ └── part-00062-75139a11-643c-4ac4-adff-9c6792fc5ac4.c000.snappy.parquet │ │ ├── uX │ │ └── part-00026-cb38386a-7866-455f-b3b5-26b57174591f.c000.snappy.parquet │ │ ├── vb │ │ └── part-00069-057cf04d-95e3-4a5a-9b06-c80719131585.c000.snappy.parquet │ │ ├── wV │ │ └── part-00013-a578ae3e-a6fb-4fc9-bc3d-83ae5e796d64.c000.snappy.parquet │ │ ├── xB │ │ └── part-00004-85f8ab87-e2f8-48e4-8ad6-a98825b6213d.c000.snappy.parquet │ │ ├── xL │ │ └── part-00074-2d34ec5a-3f4e-481b-8119-19f13e571cb1.c000.snappy.parquet │ │ ├── xq │ │ └── part-00070-6c760e79-76c0-44ad-9e8f-70b2b34e88ec.c000.snappy.parquet │ │ └── zX │ │ └── part-00040-c1df044d-b6db-41ac-bfd2-1c6e5717d0c6.c000.snappy.parquet ├── deltalake_tests.py ├── duckdb_tests.py ├── test_app.py ├── test_benchmark.py ├── test_blobb.py ├── test_colmap.py ├── test_complex.py ├── test_datamove.py ├── test_duckdb.py ├── test_filters.py ├── test_mssql.py ├── test_nearby.py ├── test_openid.py ├── test_output_formats.py ├── test_partition.py ├── test_performance.py ├── test_schema.py ├── test_search.py ├── test_source_uri.py ├── test_sql.py ├── test_sqlite.py ├── test_utils.py └── utils.py └── uv.lock /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/.DS_Store -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | 4 | [*.{ts,js,css,scss,sass,cs,py,sql,json,html}] 5 | indent_style = space 6 | indent_size = 4 7 | max_line_length = 119 -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | jobs: 16 | deploy: 17 | runs-on: ubuntu-latest 18 | environment: 19 | name: pypi 20 | url: https://pypi.org/p/bmsdna-lakeapi 21 | permissions: 22 | id-token: write 23 | steps: 24 | - uses: actions/checkout@v3 25 | - name: Set up Python 26 | uses: actions/setup-python@v3 27 | with: 28 | python-version: "3.11" 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | curl -LsSf https://astral.sh/uv/install.sh | sh 33 | uv sync --all-extras 34 | - name: Build package 35 | run: uv build 36 | - name: Publish package to PyPI 37 | uses: pypa/gh-action-pypi-publish@release/v1 38 | -------------------------------------------------------------------------------- /.github/workflows/python-test.yml: -------------------------------------------------------------------------------- 1 | name: Python Test 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | paths-ignore: ["README.md", "docs", ".github"] 7 | pull_request: 8 | branches: ["main"] 9 | paths-ignore: ["README.md", "docs", ".github"] 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | python-version: ["3.11"] 18 | 19 | steps: 20 | - uses: actions/checkout@v3 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v3 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - uses: Yarden-zamir/install-mssql-odbc@main 26 | with: 27 | ODBC_VERSION: 18 28 | - name: Install uv 29 | run: | 30 | python -m pip install --upgrade pip 31 | curl -LsSf https://astral.sh/uv/install.sh | sh 32 | 33 | - name: Install dependencies 34 | run: uv sync --all-extras --group dev --group test 35 | - name: Pyright 36 | run: | 37 | uv run pyright . 38 | - name: Create Test data 39 | run: uv run -m tests.create_test_data 40 | - name: Test with pytest (no benchmarks) 41 | run: uv run -m pytest --maxfail 3 --cov=bmsdna tests --benchmark-skip 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | tests/data 163 | !tests/data/chinook.db 164 | !tests/data/delta/table_w_col_map 165 | out 166 | 167 | .pymon 168 | 169 | profile.txt 170 | 171 | _temp 172 | 173 | __azurite* 174 | __blobstorage__ 175 | __tablestorage__ 176 | __queuestorage__ 177 | test_requests 178 | azure.duckdb_extension.gz 179 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.11 -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python: Debug with test config", 9 | "type": "python", 10 | "request": "launch", 11 | "module": "uvicorn", 12 | "args": ["start_test_instance:app", "--host", "0.0.0.0", "--port", "8080", "--reload"], 13 | "jinja": false, 14 | "justMyCode": false, 15 | "env": { 16 | "KEEP_SQL_SERVER": "1", 17 | "MY_SQL_PWD": "MyPass@word4tests", 18 | "NO_SQL_SERVER": "1" 19 | } 20 | }, 21 | { 22 | "name": "Python: Current File", 23 | "type": "python", 24 | "request": "launch", 25 | "program": "${file}", 26 | "console": "integratedTerminal", 27 | "justMyCode": true 28 | } 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.testing.pytestArgs": ["tests"], 3 | "python.testing.unittestEnabled": false, 4 | "python.testing.pytestEnabled": true, 5 | "editor.formatOnSave": true, 6 | "[python]": { 7 | "editor.defaultFormatter": "charliermarsh.ruff" 8 | }, 9 | "python.formatting.provider": "none", 10 | "cSpell.words": ["deltalake", "duckdb", "lakeapi", "odbc", "pyarrow", "sqlglot"] 11 | } 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Building Material Suisse 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /assets/LakeAPI.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/assets/LakeAPI.drawio.png -------------------------------------------------------------------------------- /azure.duckdb_extension.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/azure.duckdb_extension.gz -------------------------------------------------------------------------------- /bmsdna/lakeapi/__init__.py: -------------------------------------------------------------------------------- 1 | from bmsdna.lakeapi.core.config import Configs 2 | from bmsdna.lakeapi.api.api import init_lakeapi 3 | from bmsdna.lakeapi.core.config import get_default_config, BasicConfig 4 | from bmsdna.lakeapi.core.types import ( 5 | MetadataDetailResult, 6 | MetadataSchemaField, 7 | MetadataSchemaFieldType, 8 | ) 9 | from bmsdna.lakeapi.core.uservalidation import add_user_middlware 10 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/bmsdna/lakeapi/api/__init__.py -------------------------------------------------------------------------------- /bmsdna/lakeapi/api/api.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from fastapi import FastAPI 3 | from bmsdna.lakeapi.core.config import BasicConfig, Configs, get_default_config 4 | from bmsdna.lakeapi.core.route import init_routes 5 | import os 6 | 7 | 8 | @dataclass(frozen=True) 9 | class LakeApiStartInfo: 10 | start_config: BasicConfig 11 | config: Configs 12 | 13 | 14 | async def init_lakeapi( 15 | app: FastAPI, 16 | use_basic_auth: bool, 17 | start_config: BasicConfig | None = None, 18 | config: Configs | str | None = None, 19 | ) -> LakeApiStartInfo: 20 | start_config = start_config or get_default_config() 21 | real_config: Configs 22 | if config is None: 23 | real_config = Configs.from_yamls( 24 | start_config, os.getenv("CONFIG_PATH", "config.yml") 25 | ) 26 | elif isinstance(config, str): 27 | real_config = Configs.from_yamls(start_config, config) 28 | else: 29 | real_config = config 30 | router = await init_routes(real_config, start_config) 31 | if use_basic_auth: 32 | from bmsdna.lakeapi.core.uservalidation import add_user_middlware 33 | 34 | add_user_middlware(app, start_config, real_config.users) 35 | 36 | app.include_router(router) 37 | return LakeApiStartInfo(start_config, real_config) 38 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/context/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | from bmsdna.lakeapi.context.df_base import ExecutionContext 3 | from bmsdna.lakeapi.core.types import Engines 4 | 5 | 6 | def _duckdb(chunk_size: int): 7 | from bmsdna.lakeapi.context.df_duckdb import DuckDbExecutionContext 8 | 9 | return DuckDbExecutionContext(chunk_size=chunk_size) 10 | 11 | 12 | def _polars(chunk_size: int): 13 | from bmsdna.lakeapi.context.df_polars import PolarsExecutionContext 14 | 15 | return PolarsExecutionContext(chunk_size=chunk_size) 16 | 17 | 18 | def _odbc(chunk_size: int): 19 | from bmsdna.lakeapi.context.df_odbc import ODBCExecutionContext 20 | 21 | return ODBCExecutionContext(chunk_size=chunk_size) 22 | 23 | 24 | engine_registry = {"duckdb": _duckdb, "polars": _polars, "odbc": _odbc} 25 | 26 | 27 | def register_engine(engine: Engines, factory: Callable[[int], ExecutionContext]): 28 | engine_registry[engine] = factory 29 | 30 | 31 | def get_context_by_engine( 32 | engine: Engines, 33 | chunk_size: int, 34 | ) -> ExecutionContext: 35 | return engine_registry[engine](chunk_size) 36 | 37 | 38 | class ExecutionContextManager: 39 | default_engine: Engines 40 | 41 | def __init__( 42 | self, 43 | default_engine: Engines, 44 | default_chunk_size: int, 45 | ): 46 | self.default_engine = default_engine 47 | self.contexts: dict[str, ExecutionContext] = dict() 48 | self.default_chunk_size = default_chunk_size 49 | 50 | def get_context( 51 | self, 52 | engine: Engines | None, 53 | chunk_size: int | None = None, 54 | ): 55 | real_engine: Engines = engine or self.default_engine 56 | if real_engine not in self.contexts: 57 | self.contexts[real_engine] = get_context_by_engine( 58 | real_engine, chunk_size or self.default_chunk_size 59 | ) 60 | return self.contexts[real_engine] 61 | 62 | def __enter__(self, *args, **kwargs): 63 | return self 64 | 65 | def __exit__(self, *args, **kwargs): 66 | for _, ctx in self.contexts.items(): 67 | ctx.__exit__(*args, **kwargs) 68 | self.contexts = dict() 69 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/context/df_odbc.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from fastapi.concurrency import run_in_threadpool 4 | import pyarrow as pa 5 | from typing import List, Optional, Tuple, Any, Union, cast 6 | from bmsdna.lakeapi.core.types import FileTypes, OperatorType 7 | from bmsdna.lakeapi.context.df_base import ( 8 | FLAVORS, 9 | ExecutionContext, 10 | ResultData, 11 | get_sql, 12 | ) 13 | import arrow_odbc 14 | import pyarrow.dataset 15 | import sqlglot.expressions as ex 16 | import os 17 | from bmsdna.lakeapi.core.config import SearchConfig 18 | from sqlglot import from_, parse_one 19 | from uuid import uuid4 20 | from .source_uri import SourceUri 21 | 22 | ENABLE_COPY_TO = os.environ.get("ENABLE_COPY_TO", "0") == "1" 23 | 24 | 25 | def _get_temp_table_name(): 26 | return "temp_" + str(uuid4()).replace("-", "") 27 | 28 | 29 | arrow_odbc.enable_odbc_connection_pooling() 30 | 31 | 32 | class BatchReaderWrap: 33 | def __init__(self, rdr: arrow_odbc.BatchReader): 34 | self.rdr = rdr 35 | 36 | def __enter__(self, *args, **kwargs): 37 | return self 38 | 39 | @property 40 | def schema(self): 41 | return self.rdr.schema 42 | 43 | def __iter__(self): 44 | return self.rdr.__iter__() 45 | 46 | def __exit__(self, *args, **kwargs): 47 | pass 48 | 49 | 50 | class ODBCResultData(ResultData): 51 | def __init__( 52 | self, 53 | original_sql: Union[ex.Query, str], 54 | connection_string: str, 55 | chunk_size: int, 56 | ) -> None: 57 | super().__init__(chunk_size=chunk_size) 58 | self.original_sql = original_sql 59 | self.connection_string = connection_string 60 | self._arrow_schema = None 61 | self._df = None 62 | self.flavor: FLAVORS = ( 63 | "tsql" if " for SQL Server".lower() in connection_string.lower() else "ansi" 64 | ) 65 | self.dialect = ( 66 | "tsql" 67 | if " for SQL Server".lower() in connection_string.lower() 68 | else "duckdb" 69 | ) 70 | 71 | def query_builder(self) -> ex.Select: 72 | if not isinstance(self.original_sql, str): 73 | return from_(self.original_sql.subquery().as_("t")) 74 | else: 75 | return from_( 76 | cast(ex.Select, parse_one(self.original_sql, dialect=self.dialect)) 77 | .subquery() 78 | .as_("t") 79 | ) 80 | 81 | async def arrow_schema(self) -> pa.Schema: 82 | if self._arrow_schema is not None: 83 | return self._arrow_schema 84 | query = get_sql(self.original_sql, limit=0, dialect=self.dialect) 85 | batches = arrow_odbc.read_arrow_batches_from_odbc( 86 | query, connection_string=self.connection_string, batch_size=self.chunk_size 87 | ) 88 | assert batches is not None 89 | self._arrow_schema = batches.schema 90 | return self._arrow_schema 91 | 92 | async def get_df(self): 93 | if self._df is None: 94 | query = get_sql(self.original_sql, dialect=self.dialect) 95 | batch_reader = await run_in_threadpool( 96 | arrow_odbc.read_arrow_batches_from_odbc, 97 | query, 98 | connection_string=self.connection_string, 99 | batch_size=self.chunk_size, 100 | ) 101 | assert batch_reader is not None 102 | self._df = pa.Table.from_batches(batch_reader, batch_reader.schema) 103 | return self._df 104 | 105 | async def to_pandas(self): 106 | return (await self.get_df()).to_pandas() 107 | 108 | async def to_arrow_table(self): 109 | return await self.get_df() 110 | 111 | async def to_arrow_recordbatch(self, chunk_size: int = 10000): # type: ignore 112 | query = get_sql(self.original_sql, dialect=self.dialect) 113 | res = await run_in_threadpool( 114 | arrow_odbc.read_arrow_batches_from_odbc, 115 | query, 116 | connection_string=self.connection_string, 117 | batch_size=self.chunk_size, 118 | ) 119 | assert res is not None 120 | return BatchReaderWrap(res) 121 | 122 | 123 | class ODBCExecutionContext(ExecutionContext): 124 | def __init__(self, chunk_size: int): 125 | super().__init__(chunk_size=chunk_size, engine_name="odbc") 126 | self.res_con = None 127 | self.datasources = dict() 128 | self.persistance_file_name = None 129 | 130 | def register_arrow( 131 | self, name: str, ds: Union[pyarrow.dataset.Dataset, pyarrow.Table] 132 | ): 133 | raise NotImplementedError("Cannot read arrow in remote sql") 134 | 135 | def close(self): 136 | pass 137 | 138 | @property 139 | def dialect(self): 140 | if len(self.datasources) > 0: 141 | return ( 142 | "tsql" 143 | if " for SQL Server".lower() 144 | in self.datasources[[list(self.datasources.keys())[0]]] 145 | else "postgres" 146 | ) 147 | return "tsql" 148 | 149 | @property 150 | def supports_view_creation(self) -> bool: 151 | return False 152 | 153 | async def execute_sql( 154 | self, 155 | sql: Union[ 156 | ex.Query, 157 | str, 158 | ], 159 | ) -> ODBCResultData: 160 | # todo: get correct connection string somehow 161 | assert len(self.datasources) == 1 162 | return ODBCResultData( 163 | sql, 164 | chunk_size=self.chunk_size, 165 | connection_string=self.datasources[list(self.datasources.keys())[0]], 166 | ) 167 | 168 | def json_function(self, term: ex.Expression, assure_string=False): 169 | raise NotImplementedError( 170 | "Cannot convert to JSON in remote sql" 171 | ) # we could but sql does not support structured types anyway, so... 172 | 173 | def init_search( 174 | self, 175 | source_view: str, 176 | search_configs: list[SearchConfig], 177 | ): 178 | raise NotImplementedError("Not supported") 179 | 180 | def register_datasource( 181 | self, 182 | target_name: str, 183 | source_table_name: Optional[str], 184 | uri: SourceUri, 185 | file_type: FileTypes, 186 | partitions: List[Tuple[str, OperatorType, Any]] | None, 187 | ): 188 | assert file_type == "odbc" 189 | assert uri.account is None 190 | self.datasources[target_name] = uri.uri 191 | 192 | async def list_tables(self) -> ResultData: 193 | return await self.execute_sql( 194 | "SELECT table_schema, table_name as name, table_type from information_schema.tables" 195 | ) 196 | 197 | def get_modified_date( 198 | self, 199 | uri: SourceUri, 200 | file_type: FileTypes, 201 | ) -> datetime | None: 202 | return None 203 | 204 | def __enter__(self): 205 | return self 206 | 207 | def __exit__(self, *args, **kwargs): 208 | pass 209 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/context/source_uri.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Literal, TYPE_CHECKING 2 | import fsspec 3 | import adlfs 4 | import os 5 | import urllib.parse 6 | 7 | if TYPE_CHECKING: 8 | from azure.core.credentials import TokenCredential 9 | 10 | 11 | def _convert_options( 12 | uri: str, 13 | options: dict | None, 14 | flavor: Literal["fsspec", "object_store", "original"], 15 | token_retrieval_func: "Callable[[str], TokenCredential] | None" = None, 16 | ): 17 | if options is None: 18 | return uri, None 19 | if flavor == "fsspec": 20 | from deltalake2db.azure_helper import get_storage_options_fsspec 21 | 22 | return uri, get_storage_options_fsspec(options) 23 | elif flavor == "original": 24 | return uri, options 25 | else: 26 | from deltalake2db.azure_helper import get_storage_options_object_store 27 | 28 | nr, no = get_storage_options_object_store(uri, options, token_retrieval_func) 29 | assert isinstance(nr, str) 30 | return nr, no 31 | 32 | 33 | local_versions = dict() 34 | 35 | 36 | class SourceUri: 37 | uri: str 38 | account: str | None 39 | 40 | def __init__( 41 | self, 42 | uri: str, 43 | account: str | None, 44 | accounts: dict, 45 | data_path: str | None, 46 | token_retrieval_func: "Callable[[SourceUri, str], TokenCredential] | None" = None, 47 | ): 48 | self.uri = uri 49 | self.account = account 50 | self.accounts = accounts or {} 51 | self.data_path = data_path 52 | self.token_retrieval_func = token_retrieval_func 53 | self.retrieve_token = ( 54 | (lambda v: token_retrieval_func(self, v)) if token_retrieval_func else None 55 | ) 56 | self.real_uri = ( 57 | uri 58 | if "://" in uri or account is not None or data_path is None 59 | else os.path.join(data_path, uri) 60 | ) 61 | 62 | def is_azure(self): 63 | return ( 64 | self.uri.startswith("azure://") # duckdb 65 | or self.uri.startswith("az://") # duckdb 66 | or self.uri.startswith("abfs://") # fsspec 67 | or self.uri.startswith("abfss://") # fsspec 68 | ) 69 | 70 | def get_fs_spec(self) -> tuple[fsspec.AbstractFileSystem, str]: 71 | if self.account is None: 72 | return fsspec.filesystem("file"), self.real_uri 73 | real_uri = self.real_uri 74 | real_uri, opts = _convert_options( 75 | real_uri, 76 | self.accounts.get(self.account, {}), 77 | "fsspec", 78 | token_retrieval_func=self.retrieve_token, 79 | ) 80 | assert opts is not None 81 | if self.is_azure(): 82 | return adlfs.AzureBlobFileSystem(**opts), real_uri # type: ignore 83 | else: 84 | pr = urllib.parse.urlparse(self.uri) 85 | return fsspec.filesystem(pr, **opts), real_uri 86 | 87 | def get_uri_options( 88 | self, *, flavor: Literal["fsspec", "object_store", "original"] 89 | ) -> tuple[str, dict | None]: 90 | return _convert_options( 91 | self.real_uri, 92 | self.accounts.get(self.account) if self.account else None, 93 | flavor, 94 | token_retrieval_func=self.retrieve_token, 95 | ) 96 | 97 | def exists(self) -> bool: 98 | if self.account is None: 99 | return os.path.exists(self.real_uri) 100 | fs, fs_path = self.get_fs_spec() 101 | return fs.exists(fs_path) 102 | 103 | def copy_to_local(self, local_path: str): 104 | if self.account is None: 105 | raise ValueError("Cannot copy local files") 106 | 107 | from deltalake import DeltaTable 108 | 109 | df_uri, df_opts = self.get_uri_options(flavor="object_store") 110 | dt = DeltaTable(df_uri, storage_options=df_opts) 111 | vnr = dt.version() 112 | if local_versions.get(self.uri) == vnr: 113 | return SourceUri( 114 | uri=local_path, 115 | data_path=None, 116 | account=None, 117 | accounts=self.accounts, 118 | token_retrieval_func=self.token_retrieval_func, 119 | ) 120 | os.makedirs(local_path, exist_ok=True) 121 | fs, fs_path = self.get_fs_spec() 122 | fs.get(fs_path + "/", local_path, recursive=True) 123 | local_versions[self.uri] = vnr 124 | return SourceUri( 125 | uri=local_path, 126 | data_path=None, 127 | account=None, 128 | accounts=self.accounts, 129 | token_retrieval_func=self.token_retrieval_func, 130 | ) 131 | 132 | def __str__(self): 133 | return self.real_uri 134 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/bmsdna/lakeapi/core/__init__.py -------------------------------------------------------------------------------- /bmsdna/lakeapi/core/env.py: -------------------------------------------------------------------------------- 1 | # In this file we have parameter that can only be set using environment variables. 2 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/core/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | class CustomFormatter(logging.Formatter): 5 | grey = "\x1b[38;20m" 6 | yellow = "\x1b[33;20m" 7 | green = "\x1b[1;32m" 8 | red = "\x1b[31;20m" 9 | bold_red = "\x1b[31;1m" 10 | reset = "\x1b[0m" 11 | format_ = "%(levelname)s:\t%(asctime)s - %(name)s %(message)s" 12 | 13 | FORMATS = { 14 | logging.DEBUG: green + format_ + reset, 15 | logging.INFO: grey + format_ + reset, 16 | logging.WARNING: yellow + format_ + reset, 17 | logging.ERROR: red + format_ + reset, 18 | logging.CRITICAL: bold_red + format_ + reset, 19 | } 20 | 21 | def format(self, record): 22 | log_fmt = self.FORMATS.get(record.levelno) 23 | formatter = logging.Formatter(log_fmt) 24 | return formatter.format(record) 25 | 26 | 27 | def get_logger(name): 28 | logger = logging.getLogger(name) 29 | logger.setLevel(logging.DEBUG) 30 | 31 | # create console handler with a higher log level 32 | ch = logging.StreamHandler() 33 | ch.setLevel(logging.INFO) 34 | 35 | ch.setFormatter(CustomFormatter()) 36 | 37 | logger.addHandler(ch) 38 | return logger 39 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/core/partition_utils.py: -------------------------------------------------------------------------------- 1 | from typing import List, TYPE_CHECKING 2 | from bmsdna.lakeapi.context.source_uri import SourceUri 3 | 4 | if TYPE_CHECKING: 5 | from bmsdna.lakeapi.core.types import Param 6 | from bmsdna.lakeapi.core.config import BasicConfig 7 | 8 | 9 | def _with_implicit_parameters( 10 | paramslist: "List[Param]", 11 | file_type: str, 12 | uri: SourceUri, 13 | basic_config: "BasicConfig", 14 | ): 15 | if file_type == "delta": 16 | fs, fs_spec = uri.get_fs_spec() 17 | if not fs.exists(fs_spec + "/_delta_log"): 18 | return paramslist 19 | from deltalake import DeltaTable 20 | from deltalake.exceptions import DeltaError 21 | 22 | try: 23 | dt_uri, dt_opts = uri.get_uri_options(flavor="object_store") 24 | part_cols = ( 25 | DeltaTable(dt_uri, storage_options=dt_opts).metadata().partition_columns 26 | ) 27 | if part_cols and len(part_cols) > 0: 28 | all_names = [(p.colname or p.name).lower() for p in paramslist] 29 | new_params = list(paramslist) 30 | for pc in part_cols: 31 | if ( 32 | pc.lower() not in all_names 33 | and not basic_config.should_hide_col_name(pc) 34 | ): 35 | from bmsdna.lakeapi.core.types import Param 36 | 37 | new_params.append(Param(pc, operators=["="], colname=pc)) 38 | return new_params 39 | except FileNotFoundError: 40 | return paramslist # this is not critical here 41 | except DeltaError: 42 | return paramslist # this is not critical here 43 | 44 | return paramslist 45 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/core/route.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, Tuple, cast 2 | 3 | from fastapi import APIRouter 4 | from bmsdna.lakeapi.context import ExecutionContextManager 5 | 6 | from bmsdna.lakeapi.core.config import BasicConfig, Configs 7 | from bmsdna.lakeapi.core.log import get_logger 8 | 9 | 10 | logger = get_logger(__name__) 11 | 12 | all_lake_api_routers: list[Tuple[BasicConfig, Configs]] = [] 13 | 14 | 15 | async def init_routes(configs: Configs, basic_config: BasicConfig): 16 | from bmsdna.lakeapi.endpoint.endpoint import ( 17 | get_response_model, 18 | create_config_endpoint, 19 | ) 20 | from bmsdna.lakeapi.endpoint.detail_endpoint import create_detailed_meta_endpoint 21 | from bmsdna.lakeapi.endpoint.sql_endpoint import create_sql_endpoint 22 | from bmsdna.lakeapi.core.schema_cache import get_schema_cached 23 | 24 | all_lake_api_routers.append((basic_config, configs)) 25 | router = APIRouter() 26 | metadata = [] 27 | with ExecutionContextManager( 28 | basic_config.default_engine, 29 | basic_config.default_chunk_size, 30 | ) as mgr: 31 | for config in configs: 32 | methods = ( 33 | cast(list[Literal["get", "post"]], [config.api_method]) 34 | if isinstance(config.api_method, str) 35 | else config.api_method 36 | ) 37 | try: 38 | from bmsdna.lakeapi.core.datasource import Datasource 39 | 40 | assert config.datasource is not None 41 | realdataframe = Datasource( 42 | config.version_str, 43 | config.tag, 44 | config.name, 45 | config=config.datasource, 46 | sql_context=mgr.get_context(config.engine), 47 | basic_config=basic_config, 48 | accounts=configs.accounts, 49 | ) 50 | schema = await get_schema_cached( 51 | basic_config, realdataframe, config.datasource.get_unique_hash() 52 | ) 53 | if schema is None: 54 | logger.warning( 55 | f"Could not get response type for f{config.route}. Path does not exist:{realdataframe}" 56 | ) 57 | metadata.append( 58 | { 59 | "name": config.name, 60 | "tag": config.tag, 61 | "route": config.route, 62 | "methods": methods, 63 | "file_type": config.datasource.file_type, 64 | "uri": config.datasource.uri, 65 | "version": config.version, 66 | "schema": {n: str(schema.field(n).type) for n in schema.names} 67 | if schema 68 | else None, 69 | } 70 | ) 71 | 72 | except Exception as err: 73 | import traceback 74 | 75 | print(traceback.format_exc()) 76 | logger.warning( 77 | f"Could not get response type for f{config.route}. Error:{err}" 78 | ) 79 | schema = None 80 | 81 | response_model = ( 82 | get_response_model( 83 | config=config, 84 | schema=schema, 85 | basic_config=basic_config, 86 | ) 87 | if schema is not None 88 | else None 89 | ) 90 | create_detailed_meta_endpoint( 91 | schema=schema, 92 | config=config, 93 | configs=configs, 94 | router=router, 95 | basic_config=basic_config, 96 | ) 97 | for am in methods: 98 | create_config_endpoint( 99 | apimethod=am, 100 | config=config, 101 | router=router, 102 | response_model=response_model, 103 | schema=schema, 104 | basic_config=basic_config, 105 | configs=configs, 106 | ) 107 | 108 | @router.get( 109 | "/metadata", 110 | name="metadata", 111 | ) 112 | async def get_metadata(): 113 | return metadata 114 | 115 | if basic_config.enable_sql_endpoint: 116 | create_sql_endpoint( 117 | router=router, 118 | basic_config=basic_config, 119 | configs=configs, 120 | ) 121 | 122 | return router 123 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/core/schema_cache.py: -------------------------------------------------------------------------------- 1 | from bmsdna.lakeapi.core.datasource import Datasource 2 | from bmsdna.lakeapi.core.config import BasicConfig 3 | from bmsdna.lakeapi.utils.async_utils import _async 4 | 5 | 6 | async def get_schema_cached(cfg: BasicConfig, datasource: Datasource, key: str): 7 | if cfg.schema_cache_ttl is not None: 8 | import os 9 | import time 10 | import pyarrow.parquet as pq 11 | 12 | schema_cache_file = os.path.join( 13 | cfg.temp_folder_path, 14 | key + ".parquet", 15 | ) 16 | if ( 17 | not os.path.exists(schema_cache_file) 18 | or (time.time() - os.path.getmtime(schema_cache_file)) 19 | > cfg.schema_cache_ttl 20 | ): 21 | if not datasource.file_exists(): 22 | schema = None 23 | else: 24 | schema = await datasource.get_schema() 25 | pq.write_metadata(schema, schema_cache_file) 26 | else: 27 | schema = pq.read_schema(schema_cache_file) 28 | return schema 29 | else: 30 | if not datasource.file_exists(): 31 | return None 32 | return await _async(datasource.get_schema()) 33 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/core/types.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Any, List, Literal, cast 3 | from datetime import datetime, date, time, timedelta 4 | from decimal import Decimal 5 | from typing import Type, Optional 6 | from pydantic import BaseModel 7 | 8 | 9 | Engines = Literal["duckdb", "polars", "odbc"] 10 | 11 | 12 | FileTypes = Literal[ 13 | "delta", 14 | "parquet", 15 | "arrow", 16 | "arrow-stream", 17 | "avro", 18 | "csv", 19 | "json", 20 | "ndjson", 21 | "odbc", 22 | "sqlite", 23 | "duckdb", 24 | ] 25 | OutputFileType = Literal[ 26 | "json", 27 | "ndjson", 28 | "parquet", 29 | "csv", 30 | "csv4excel", 31 | "xlsx", 32 | "feather", 33 | "html", 34 | "scsv", 35 | "xml", 36 | "ipc", 37 | "arrow", 38 | "arrow-stream", 39 | ] 40 | OperatorType = Literal[ 41 | "<", 42 | "=", 43 | ">", 44 | ">=", 45 | "<=", 46 | "<>", 47 | "contains", 48 | "in", 49 | "not contains", 50 | "not in", 51 | "not null", 52 | "null", 53 | "between", 54 | "not between", 55 | "has", 56 | "startswith", 57 | ] 58 | DeltaOperatorTypes = Literal["<", "=", ">", ">=", "<=", "in", "not in"] 59 | PolaryTypeFunction = Literal[ 60 | "sum", 61 | "count", 62 | "mean", 63 | "median", 64 | "min", 65 | "max", 66 | "first", 67 | "std", 68 | "n_unique", 69 | "distinct", 70 | "mode", 71 | "null_count", 72 | ] 73 | 74 | CONFIG_DTYPE_MAP: dict[str, Type] = { 75 | "struct": dict, 76 | "string": str, 77 | "integer": int, 78 | "float": float, 79 | "array": list, 80 | "double": Decimal, 81 | "timesamp": datetime, 82 | "date": date, 83 | "time": time, 84 | "duration": timedelta, 85 | "str": str, 86 | "int": int, 87 | "bool": bool, 88 | "boolean": bool, 89 | } 90 | 91 | 92 | DTYPE_MAP = { 93 | "struct": dict, 94 | "string": str, 95 | "integer": int, 96 | "float": float, 97 | "array": list, 98 | "double": Decimal, 99 | "timesamp": datetime, 100 | "date": date, 101 | "time": time, 102 | } 103 | 104 | 105 | class MetadataSchemaFieldType(BaseModel): 106 | type_str: str 107 | orig_type_str: str 108 | fields: "Optional[list[MetadataSchemaField]]" 109 | inner: "Optional[MetadataSchemaFieldType]" 110 | 111 | 112 | class MetadataSchemaField(BaseModel): 113 | name: str 114 | type: MetadataSchemaFieldType 115 | max_str_length: Optional[int] = None 116 | 117 | 118 | @dataclass 119 | class SearchConfig: 120 | name: str 121 | columns: List[str] 122 | 123 | 124 | @dataclass 125 | class NearbyConfig: 126 | name: str 127 | lat_col: str 128 | lon_col: str 129 | 130 | 131 | @dataclass 132 | class Param: 133 | name: str 134 | combi: Optional[Optional[List[str]]] = None 135 | default: Optional[str] = None 136 | required: Optional[bool] = False 137 | operators: Optional[list[OperatorType]] = None 138 | colname: Optional[str] = None 139 | 140 | @property 141 | def real_default(self) -> str: 142 | import ast 143 | 144 | self._real_default = ast.literal_eval(self.default) if self.default else None 145 | return cast(str, self._real_default) 146 | 147 | 148 | class MetadataDetailResult(BaseModel): 149 | partition_values: Optional[list[dict[str, Any]]] = None 150 | partition_columns: List[str] 151 | max_string_lengths: dict[str, Optional[int]] 152 | data_schema: list[MetadataSchemaField] 153 | delta_meta: Optional[dict] = None 154 | delta_schema: Any = None 155 | parameters: Optional[List[Any]] = None 156 | search: Optional[List[Any]] = None 157 | nearby: Optional[List[Any]] = None 158 | modified_date: datetime | None = None 159 | 160 | 161 | MetadataSchemaFieldType.model_rebuild() 162 | MetadataSchemaField.model_rebuild() 163 | MetadataDetailResult.model_rebuild() 164 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/core/uservalidation.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | 3 | from fastapi import FastAPI, Request, Response 4 | from fastapi.security import HTTPBasic 5 | 6 | from bmsdna.lakeapi.core.config import BasicConfig, UserConfig 7 | from functools import lru_cache 8 | 9 | security = HTTPBasic() 10 | userhashmap: dict[str, str] | None = None 11 | 12 | 13 | @lru_cache 14 | def is_correct( 15 | hash: str, 16 | pwd_str: str, 17 | ): 18 | import argon2 19 | 20 | ph = argon2.PasswordHasher() 21 | return ph.verify( 22 | hash.encode("utf-8"), 23 | pwd_str.encode("utf-8"), 24 | ) 25 | 26 | 27 | def get_basic_auth_middleware_func(users: Sequence[UserConfig]): 28 | async def basic_auth_middleware_func(request: Request, call_next): 29 | import json 30 | 31 | credentials = await HTTPBasic(auto_error=False)(request) 32 | if credentials is None: 33 | return Response( 34 | status_code=401, 35 | headers={"WWW-Authenticate": "Basic"}, 36 | content=json.dumps({"detail": "Not authenticated"}), 37 | ) 38 | 39 | global userhashmap 40 | userhashmap = userhashmap or { 41 | ud["name"].casefold(): ud["passwordhash"] for ud in users if ud["name"] 42 | } # pay attention not to include an empty user by accident 43 | 44 | if credentials.username.casefold() not in userhashmap.keys(): 45 | return Response( 46 | status_code=401, 47 | headers={"WWW-Authenticate": "Basic"}, 48 | content=json.dumps({"detail": "Incorrect email or password"}), 49 | ) 50 | pwd_str = credentials.password 51 | hash = userhashmap[credentials.username.casefold()] 52 | 53 | is_correct_password = is_correct(hash, pwd_str) 54 | if not isinstance(is_correct_password, bool): 55 | is_correct_password = is_correct_password 56 | if not is_correct_password: 57 | return Response( 58 | status_code=401, 59 | headers={"WWW-Authenticate": "Basic"}, 60 | content="Incorrect email or password", 61 | ) 62 | request.scope["user"] = {"username": credentials.username} 63 | return await call_next(request) 64 | 65 | return basic_auth_middleware_func 66 | 67 | 68 | def add_user_middlware( 69 | app: FastAPI, 70 | basic_config: BasicConfig, 71 | users: Sequence[UserConfig], 72 | ): 73 | from starlette.middleware.base import BaseHTTPMiddleware 74 | 75 | app.add_middleware( 76 | BaseHTTPMiddleware, dispatch=get_basic_auth_middleware_func(users) 77 | ) 78 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/core/yaml.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from bmsdna.lakeapi.core.config import YamlData 3 | 4 | 5 | def get_yaml(file_path) -> YamlData: 6 | with open(file_path, encoding="utf-8") as f: 7 | yaml_config = yaml.safe_load(f) 8 | return yaml_config 9 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/endpoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/bmsdna/lakeapi/endpoint/__init__.py -------------------------------------------------------------------------------- /bmsdna/lakeapi/endpoint/endpoint_nearby.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | from bmsdna.lakeapi.context.df_base import ExecutionContext 3 | import sqlglot.expressions as ex 4 | 5 | from bmsdna.lakeapi.core.config import BasicConfig, Config 6 | from bmsdna.lakeapi.core.model import GeoModel 7 | from bmsdna.lakeapi.core.types import NearbyConfig 8 | from sqlglot import select 9 | 10 | 11 | NearbyType = list[tuple[GeoModel, NearbyConfig]] | None # list of config with values 12 | 13 | 14 | def parse_lat_lon(vl: str): 15 | lat, lon = vl.split(",") 16 | return float(lat), float(lon) 17 | 18 | 19 | def _to_geo(v: dict | GeoModel): 20 | return GeoModel(**v) if isinstance(v, dict) else v 21 | 22 | 23 | def get_nearby_filter( 24 | nearby_config: list[NearbyConfig], 25 | params: BaseModel, 26 | basic_config: BasicConfig, 27 | ) -> NearbyType: 28 | nearby_dict = {c.name.lower(): c for c in nearby_config} 29 | v = [ 30 | (_to_geo(v), nearby_dict[k.lower()]) 31 | for k, v in params.model_dump(exclude_unset=True).items() 32 | if k.lower() in nearby_dict and v is not None 33 | ] 34 | return v if len(v) > 0 else None 35 | 36 | 37 | def handle_nearby_request( 38 | context: ExecutionContext, 39 | config: Config, 40 | params: BaseModel, 41 | basic_config: BasicConfig, 42 | *, 43 | source_view: str, 44 | query: ex.Query, 45 | ): 46 | if config.nearby is None: 47 | return query 48 | nearbyes = get_nearby_filter( 49 | config.nearby, 50 | params, 51 | basic_config, 52 | ) 53 | if nearbyes is None: 54 | return query 55 | context.init_spatial() 56 | score_sum = None 57 | orders = [] 58 | wheres = [] 59 | for nearby_val, nearby_cfg in nearbyes: 60 | fn = context.distance_m_function( 61 | ex.column(nearby_cfg.lat_col, quoted=True), 62 | ex.column(nearby_cfg.lon_col, quoted=True), 63 | ex.convert(nearby_val.lat), 64 | ex.convert(nearby_val.lon), 65 | ) 66 | query = query.select(fn.as_(nearby_cfg.name)) 67 | orders.append(ex.column(nearby_cfg.name)) 68 | wheres.append(ex.column(nearby_cfg.name) <= nearby_val.distance_m) 69 | 70 | if len(orders) > 0 or len(wheres) > 0: 71 | sel = select("*").from_(ex.to_identifier("nearbys")) 72 | sel.with_("nearbys", query, copy=False) 73 | 74 | for w in wheres: 75 | sel.where(w, append=True, copy=False) 76 | sel.order_by(*orders, copy=False) 77 | return sel 78 | 79 | return query 80 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/endpoint/endpoint_search.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | from bmsdna.lakeapi.context.df_base import ExecutionContext 3 | import sqlglot.expressions as ex 4 | 5 | from bmsdna.lakeapi.core.config import BasicConfig, Config 6 | from bmsdna.lakeapi.core.types import SearchConfig 7 | 8 | SearchesType = list[tuple[str, SearchConfig]] | None # list of config with values 9 | 10 | 11 | def get_searches( 12 | search_config: list[SearchConfig], 13 | params: BaseModel, 14 | basic_config: BasicConfig, 15 | ) -> SearchesType: 16 | search_dict = {c.name.lower(): c for c in search_config} 17 | v = [ 18 | (v, search_dict[k.lower()]) 19 | for k, v in params.model_dump(exclude_unset=True).items() 20 | if k.lower() in search_dict 21 | and v is not None 22 | and len(v) >= basic_config.min_search_length 23 | ] 24 | return v if len(v) > 0 else None 25 | 26 | 27 | def handle_search_request( 28 | context: ExecutionContext, 29 | config: Config, 30 | params: BaseModel, 31 | basic_config: BasicConfig, 32 | *, 33 | source_view: str, 34 | query: ex.Query, 35 | ): 36 | if config.search is None: 37 | return query 38 | searches = get_searches(config.search, params, basic_config) 39 | if searches is None: 40 | return query 41 | context.init_search(source_view, config.search) 42 | score_sum = None 43 | 44 | for search_val, search_cfg in searches: 45 | score_sum = ( 46 | context.search_score_function( 47 | source_view, search_val, search_cfg, alias=None 48 | ) 49 | if score_sum is None 50 | else score_sum 51 | + context.search_score_function( 52 | source_view, search_val, search_cfg, alias=None 53 | ) 54 | ) 55 | assert score_sum is not None 56 | query = query.select(score_sum.as_("search_score")) 57 | assert isinstance(query, ex.Select) 58 | query = query.where(~ex.column("search_score").is_(ex.convert(None))) 59 | query = query.order_by(ex.column("search_score").desc(), append=False) 60 | return query 61 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/endpoint/sql_endpoint.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Union 2 | from fastapi import APIRouter, BackgroundTasks, Header, Query, Request 3 | from bmsdna.lakeapi.context.df_base import ExecutionContext, FileTypeNotSupportedError 4 | from bmsdna.lakeapi.core.config import BasicConfig, Configs 5 | from bmsdna.lakeapi.core.datasource import Datasource 6 | from bmsdna.lakeapi.core.log import get_logger 7 | from bmsdna.lakeapi.core.types import OutputFileType 8 | from bmsdna.lakeapi.core.response import create_response 9 | from bmsdna.lakeapi.context import get_context_by_engine, Engines 10 | from deltalake.exceptions import TableNotFoundError 11 | 12 | 13 | sql_contexts: dict[str, ExecutionContext] = {} 14 | 15 | logger = get_logger(__name__) 16 | 17 | 18 | def init_duck_con( 19 | con: ExecutionContext, 20 | basic_config: BasicConfig, 21 | configs: Configs, 22 | ): 23 | for cfg in configs: 24 | assert cfg.datasource is not None 25 | df = Datasource( 26 | cfg.version_str, 27 | cfg.tag, 28 | cfg.name, 29 | config=cfg.datasource, 30 | sql_context=con, 31 | accounts=configs.accounts, 32 | basic_config=basic_config, 33 | ) 34 | 35 | if cfg.engine != "odbc" and df.file_exists(): 36 | try: 37 | con.register_datasource( 38 | df.unique_table_name, 39 | df.tablename, 40 | df.execution_uri, 41 | df.config.file_type, 42 | None, 43 | ) 44 | except (FileTypeNotSupportedError, TableNotFoundError, FileNotFoundError): 45 | logger.warning(f"Cannot query {df.tablename}") 46 | 47 | 48 | def _get_sql_context( 49 | engine: Engines, 50 | basic_config: BasicConfig, 51 | configs: Configs, 52 | ): 53 | assert engine not in ["odbc", "sqlite"] 54 | global sql_contexts 55 | if engine not in sql_contexts: 56 | sql_contexts[engine] = get_context_by_engine( 57 | engine, 58 | basic_config.default_chunk_size, 59 | ) 60 | init_duck_con( 61 | sql_contexts[engine], 62 | basic_config, 63 | configs, 64 | ) 65 | if basic_config.prepare_sql_db_hook is not None: 66 | basic_config.prepare_sql_db_hook( 67 | sql_contexts[engine], 68 | ) 69 | 70 | return sql_contexts[engine] 71 | 72 | 73 | def create_sql_endpoint( 74 | router: APIRouter, 75 | basic_config: BasicConfig, 76 | configs: Configs, 77 | ): 78 | @router.on_event("shutdown") 79 | async def shutdown_event(): 80 | global sql_contexts 81 | for item in sql_contexts.values(): 82 | item.__exit__() 83 | sql_contexts = {} 84 | 85 | @router.get("/api/sql/tables", tags=["sql"], operation_id="get_sql_tables") 86 | async def get_sql_tables( 87 | request: Request, 88 | background_tasks: BackgroundTasks, 89 | Accept: Union[str, None] = Header(default=None), 90 | format: Optional[OutputFileType] = "json", 91 | engine: Engines = Query( 92 | title="$engine", alias="$engine", default="duckdb", include_in_schema=False 93 | ), 94 | ): 95 | con = _get_sql_context(engine, basic_config, configs) 96 | return (await (await con.list_tables()).to_arrow_table()).to_pylist() 97 | 98 | @router.post( 99 | "/api/sql", 100 | tags=["sql"], 101 | operation_id="post_sql_endpoint", 102 | ) 103 | async def get_sql_post( 104 | request: Request, 105 | background_tasks: BackgroundTasks, 106 | Accept: Union[str, None] = Header(default=None), 107 | format: Optional[OutputFileType] = "json", 108 | engine: Engines = Query( 109 | title="$engine", 110 | alias="$engine", 111 | default="duckdb", 112 | include_in_schema=False, 113 | ), 114 | ): 115 | body = await request.body() 116 | 117 | con = _get_sql_context(engine, basic_config, configs) 118 | 119 | sql = body.decode("utf-8") 120 | 121 | return await create_response( 122 | request.url, 123 | request.query_params, 124 | format or request.headers["Accept"], 125 | con, 126 | sql, 127 | basic_config=basic_config, 128 | close_context=False, 129 | ) 130 | 131 | @router.get( 132 | "/api/sql", 133 | tags=["sql"], 134 | operation_id="get_sql_endpoint", 135 | ) 136 | async def get_sql_get( 137 | request: Request, 138 | background_tasks: BackgroundTasks, 139 | sql: str, 140 | Accept: Union[str, None] = Header(default=None), 141 | format: Optional[OutputFileType] = "json", 142 | engine: Engines = Query( 143 | title="$engine", 144 | alias="$engine", 145 | default="duckdb", 146 | include_in_schema=False, 147 | ), 148 | ): 149 | con = _get_sql_context(engine, basic_config, configs) 150 | 151 | return await create_response( 152 | request.url, 153 | request.query_params, 154 | format or request.headers["Accept"], 155 | con, 156 | sql, 157 | basic_config=basic_config, 158 | close_context=False, 159 | ) 160 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/standalone/__init__.py: -------------------------------------------------------------------------------- 1 | from fastapi import Depends, FastAPI, Request 2 | import asyncio 3 | from bmsdna.lakeapi.api.api import init_lakeapi 4 | 5 | 6 | def run_fastapi(): 7 | app = FastAPI() 8 | 9 | async def _init(): 10 | await init_lakeapi(app, use_basic_auth=True) 11 | 12 | asyncio.run(_init()) 13 | 14 | @app.get("/") 15 | async def root(req: Request): 16 | return {"User": req.user["username"]} 17 | 18 | return app 19 | 20 | 21 | app = run_fastapi() 22 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/tools/useradd.py: -------------------------------------------------------------------------------- 1 | import argon2 2 | from typing import cast, Optional 3 | import secrets 4 | import string 5 | 6 | 7 | def generate_strong_password(): 8 | # define the alphabet 9 | letters = string.ascii_letters 10 | digits = string.digits 11 | special_chars = string.punctuation 12 | 13 | alphabet = letters + digits + special_chars 14 | 15 | # fix password length 16 | pwd_length = 12 17 | 18 | # generate a password string 19 | pwd = "" 20 | for i in range(pwd_length): 21 | pwd += "".join(secrets.choice(alphabet)) 22 | 23 | # generate password meeting constraints 24 | while True: 25 | pwd = "" 26 | for i in range(pwd_length): 27 | pwd += "".join(secrets.choice(alphabet)) 28 | 29 | if ( 30 | any(char in special_chars for char in pwd) 31 | and sum(char in digits for char in pwd) >= 2 32 | ): 33 | break 34 | return pwd 35 | 36 | 37 | def useradd(name: str, pwd: Optional[str], yaml_file: str): 38 | hasher = argon2.PasswordHasher() 39 | if not pwd: 40 | pwd = generate_strong_password() 41 | print(name + ": " + pwd) 42 | hash = hasher.hash(pwd) 43 | import ruamel.yaml as yml # need to use ruaml.yaml to write yaml because it preserves comments 44 | 45 | yaml = yml.YAML() 46 | yaml.indent = 2 47 | with open(yaml_file, "r", encoding="utf-8") as r: 48 | data = yaml.load(r) 49 | cast(list, data["users"]).append({"name": name, "passwordhash": hash}) 50 | with open(yaml_file, "w", encoding="utf-8") as f: 51 | yaml.dump(data, f) 52 | 53 | 54 | def useradd_cli(): 55 | import argparse 56 | 57 | parser = argparse.ArgumentParser() 58 | parser.add_argument("name") 59 | parser.add_argument("--password", required=False) 60 | parser.add_argument("--yaml-file", default="config.yml") 61 | args = parser.parse_args() 62 | useradd(args.name, args.password, args.yaml_file) 63 | 64 | 65 | if __name__ == "__main__": 66 | useradd_cli() 67 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/tools/validateschema.py: -------------------------------------------------------------------------------- 1 | import json 2 | import yaml 3 | import pydantic 4 | from bmsdna.lakeapi.core.config import YamlData 5 | 6 | 7 | def validate_schema(schema_file: str, yaml_file: str): 8 | td = pydantic.TypeAdapter(YamlData) 9 | with open(schema_file, "w") as str: 10 | json.dump(td.json_schema(), str, indent=4) 11 | 12 | with open(schema_file, "r") as str2: 13 | schema = json.load(str2) 14 | 15 | with open(yaml_file, "r", encoding="utf-8") as r: 16 | data = yaml.safe_load(r) 17 | json_str = json.dumps(data, indent=4) 18 | td.validate_json(json_str) 19 | print("ok") 20 | 21 | 22 | def validate_schema_cli(): 23 | import argparse 24 | 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument("--yaml-file", default="config.yml") 27 | parser.add_argument("--schema-file", default="config_schema.json") 28 | args = parser.parse_args() 29 | validate_schema(args.schema_file, args.yaml_file) 30 | 31 | 32 | if __name__ == "__main__": 33 | validate_schema_cli() 34 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/utils/async_utils.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from typing import Awaitable, TypeVar 3 | 4 | T = TypeVar("T") 5 | 6 | 7 | async def _async(thing: T | Awaitable[T]) -> T: 8 | if inspect.isawaitable(thing): 9 | return await thing 10 | return thing 11 | -------------------------------------------------------------------------------- /bmsdna/lakeapi/utils/fast_api_utils.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | from asyncio import ensure_future 4 | from functools import wraps 5 | from traceback import format_exception 6 | from typing import Any, Callable, Coroutine, Union 7 | 8 | from starlette.concurrency import run_in_threadpool 9 | 10 | NoArgsNoReturnFuncT = Callable[[], None] 11 | NoArgsNoReturnAsyncFuncT = Callable[[], Coroutine[Any, Any, None]] 12 | NoArgsNoReturnDecorator = Callable[ 13 | [Union[NoArgsNoReturnFuncT, NoArgsNoReturnAsyncFuncT]], NoArgsNoReturnAsyncFuncT 14 | ] 15 | 16 | 17 | def _repeat_every( 18 | *, 19 | seconds: float, 20 | wait_first: bool = False, 21 | logger: logging.Logger | None = None, 22 | raise_exceptions: bool = False, 23 | max_repetitions: int | None = None, 24 | ) -> NoArgsNoReturnDecorator: 25 | """ 26 | This function returns a decorator that modifies a function so it is periodically re-executed after its first call. 27 | 28 | The function it decorates should accept no arguments and return nothing. If necessary, this can be accomplished 29 | by using `functools.partial` or otherwise wrapping the target function prior to decoration. 30 | 31 | Parameters 32 | ---------- 33 | seconds: float 34 | The number of seconds to wait between repeated calls 35 | wait_first: bool (default False) 36 | If True, the function will wait for a single period before the first call 37 | logger: Optional[logging.Logger] (default None) 38 | The logger to use to log any exceptions raised by calls to the decorated function. 39 | If not provided, exceptions will not be logged by this function (though they may be handled by the event loop). 40 | raise_exceptions: bool (default False) 41 | If True, errors raised by the decorated function will be raised to the event loop's exception handler. 42 | Note that if an error is raised, the repeated execution will stop. 43 | Otherwise, exceptions are just logged and the execution continues to repeat. 44 | See https://docs.python.org/3/library/asyncio-eventloop.html#asyncio.loop.set_exception_handler for more info. 45 | max_repetitions: Optional[int] (default None) 46 | The maximum number of times to call the repeated function. If `None`, the function is repeated forever. 47 | """ 48 | 49 | def decorator( 50 | func: NoArgsNoReturnAsyncFuncT | NoArgsNoReturnFuncT, 51 | ) -> NoArgsNoReturnAsyncFuncT: 52 | """ 53 | Converts the decorated function into a repeated, periodically-called version of itself. 54 | """ 55 | is_coroutine = asyncio.iscoroutinefunction(func) 56 | 57 | @wraps(func) 58 | async def wrapped() -> None: 59 | repetitions = 0 60 | 61 | async def loop() -> None: 62 | nonlocal repetitions 63 | if wait_first: 64 | await asyncio.sleep(seconds) 65 | while max_repetitions is None or repetitions < max_repetitions: 66 | try: 67 | if is_coroutine: 68 | await func() # type: ignore 69 | else: 70 | await run_in_threadpool(func) 71 | repetitions += 1 72 | except Exception as exc: 73 | if logger is not None: 74 | formatted_exception = "".join( 75 | format_exception(type(exc), exc, exc.__traceback__) 76 | ) 77 | logger.error(formatted_exception) 78 | if raise_exceptions: 79 | raise exc 80 | await asyncio.sleep(seconds) 81 | 82 | ensure_future(loop()) 83 | 84 | return wrapped 85 | 86 | return decorator 87 | -------------------------------------------------------------------------------- /chinook.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/chinook.db -------------------------------------------------------------------------------- /duckdb_tests.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | import requests 3 | 4 | with open("azure.duckdb_extension.gz", "wb") as f: 5 | f.write( 6 | requests.get( 7 | "http://extensions.duckdb.org/v0.10.0/windows_amd64/azure.duckdb_extension.gz" 8 | ).content 9 | ) 10 | 11 | emulator_con_str = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;" 12 | 13 | with duckdb.connect() as con: 14 | con.execute( 15 | f"""FORCE INSTALL azure; 16 | INSTALL azure; 17 | LOAD azure; 18 | CREATE SECRET secret2 ( 19 | TYPE AZURE, 20 | CONNECTION_STRING '{emulator_con_str}' 21 | );""" 22 | ) 23 | with con.cursor() as cur: 24 | cur.execute( 25 | "SELECT count(*) FROM read_parquet('azure://testlake/td/faker.parquet')" 26 | ) 27 | print(cur.fetchall()) 28 | -------------------------------------------------------------------------------- /profile_start.bat: -------------------------------------------------------------------------------- 1 | python -m cProfile .\startup_perf.py > profile.txt -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "bmsdna-lakeapi" 3 | version = "0.23.0" 4 | description = "" 5 | authors = [{ name = "DWH Team", email = "you@example.com" }] 6 | dependencies = [ 7 | "pyyaml ~=6.0", 8 | "duckdb >=1.1.0,<2", 9 | "polars >=1.12.0,<2", 10 | "sqlglot >=24.0.0", 11 | "fastexcel >=0.10.4", 12 | "argon2-cffi >=23.1.0,<24", 13 | "xlsxwriter >=3.1.0,<4", 14 | "pyjwt >=2.6.0,<3", 15 | "ruamel.yaml >=0.18.5", 16 | "deltalake >=0.16.1", 17 | "fastapi >=0.110.0", 18 | "arrow-odbc >=5.0.0", 19 | "expandvars >=0.12.0", 20 | "pandas >=2.1.0,<3", 21 | "fsspec >=2024.2.0,<2025", 22 | "adlfs >=2024.2.0,<2025", 23 | "deltalake2db >=0.7.0", 24 | "numpy >=1.26.0,<2", 25 | ] 26 | requires-python = "~=3.10" 27 | 28 | [project.scripts] 29 | validate_lakeapi_schema = "bmsdna.lakeapi.tools.validateschema:validate_schema_cli" 30 | add_lakeapi_user = "bmsdna.lakeapi.tools.useradd:useradd_cli" 31 | 32 | [project.optional-dependencies] 33 | polars = ["fastexcel"] 34 | auth = ["argon2-cffi", "pyjwt"] 35 | useradd = ["ruamel.yaml"] 36 | odbc = ["arrow-odbc"] 37 | 38 | [build-system] 39 | requires = ["hatchling"] 40 | build-backend = "hatchling.build" 41 | 42 | [tool.pyright] 43 | venv = ".venv" 44 | venvPath = "." 45 | pythonVersion = "3.10" 46 | typeCheckingMode = "standard" 47 | 48 | [tool.black] 49 | line-length = 119 50 | 51 | [tool.coverage.report] 52 | exclude_lines = [ 53 | "pragma: no cover", 54 | "if TYPE_CHECKING:", 55 | "if __name__ == .__main__.:", 56 | "@(abc.)?abstractmethod", 57 | "raise AssertionError", 58 | "raise NotImplementedError", 59 | ] 60 | 61 | [tool.uv] 62 | native-tls = true 63 | keyring-provider = "subprocess" 64 | 65 | [[tool.uv.index]] 66 | name = "BMSAzureDevOps" 67 | url = "https://VssSessionToken@pkgs.dev.azure.com/bmeurope/_packaging/BMS/pypi/simple/" 68 | explicit = true 69 | publish-url = "https://pkgs.dev.azure.com/bmeurope/_packaging/BMS/pypi/upload/" 70 | 71 | [tool.hatch.build.targets.wheel] 72 | packages = ["bmsdna"] 73 | 74 | [dependency-groups] 75 | dev = [ 76 | "pyright >=1.1.308,<2", 77 | "uvicorn >=0.29.0", 78 | "psutil >=5.9.5,<6", 79 | "ruff >=0.4.7", 80 | "pyarrow-stubs>=17.19", 81 | ] 82 | test = [ 83 | "faker >=22.2.0,<23", 84 | "pytest >=7.3.1,<8", 85 | "httpx >=0.26.0", 86 | "pytest-env >=1.1.3,<2", 87 | "pytest-cov >=4.0.0,<5", 88 | "pytest-benchmark >=4.0.0,<5", 89 | "pytest-monitor >=1.6.5,<2", 90 | "coverage [toml]>=7.2.7,<8", 91 | "docker >=7.0.0,<8", 92 | "pyodbc >=5.0.1,<6", 93 | "azure-storage-blob >=12.12.0,<13", 94 | "adbc-driver-sqlite >=0.10.0", 95 | "python-dotenv >=1.0.1,<2", 96 | ] 97 | -------------------------------------------------------------------------------- /repo.py: -------------------------------------------------------------------------------- 1 | import polars as pl 2 | 3 | sql_cont = pl.SQLContext() 4 | sql_cont.register("test_fake_arrow", pl.scan_ipc("tests/data/arrow/fake.arrow")) 5 | print(sql_cont.execute("SELECT * FROM test_fake_arrow s LIMIT 1").collect()) 6 | -------------------------------------------------------------------------------- /start_test_instance.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | import bmsdna.lakeapi 3 | import fastapi 4 | import test_server 5 | import os 6 | 7 | no_sql = os.getenv("NO_SQL_SERVER", "0") == "1" 8 | sql_server = test_server.start_mssql_server() if not no_sql else None 9 | print("after start mssql") 10 | app = fastapi.FastAPI() 11 | 12 | 13 | def_cfg = bmsdna.lakeapi.get_default_config() # Get default startup config 14 | cfg = dataclasses.replace( 15 | def_cfg, enable_sql_endpoint=True, data_path="tests/data" 16 | ) # Use dataclasses.replace to set the properties you want 17 | sti = bmsdna.lakeapi.init_lakeapi( 18 | app, True, cfg, "config_test.yml" 19 | ) # Enable it. The first parameter is the FastAPI instance, the 2nd one is the basic config and the third one the config of the tables 20 | 21 | 22 | @app.on_event("shutdown") 23 | def shutdown_event(): 24 | # if os.getenv("KEEP_SQL_SERVER", "0") == "0": 25 | # sql_server.stop() 26 | pass 27 | -------------------------------------------------------------------------------- /startup_perf.py: -------------------------------------------------------------------------------- 1 | from bmsdna.lakeapi.core.config import Configs, get_default_config 2 | import dataclasses 3 | from bmsdna.lakeapi.core.route import init_routes 4 | import os 5 | 6 | def_cfg = get_default_config() # Get default startup config 7 | start_config = dataclasses.replace( 8 | def_cfg, enable_sql_endpoint=True, data_path="tests/data" 9 | ) 10 | real_config = Configs.from_yamls( 11 | start_config, os.getenv("CONFIG_PATH", "config_test.yml") 12 | ) 13 | router = init_routes(real_config, start_config) 14 | print("done") 15 | -------------------------------------------------------------------------------- /test_requests/Test Requests/Jsonify Complex.bru: -------------------------------------------------------------------------------- 1 | meta { 2 | name: Jsonify Complex 3 | type: http 4 | seq: 2 5 | } 6 | 7 | post { 8 | url: http://localhost:8080/api/v1/complexer/complex_fruits?limit=10&format=json&jsonify_complex=True&$engine=polars 9 | body: none 10 | auth: basic 11 | } 12 | 13 | query { 14 | limit: 10 15 | format: json 16 | jsonify_complex: True 17 | $engine: polars 18 | } 19 | 20 | auth:basic { 21 | username: test 22 | password: B~C:BB*_9-1u 23 | } 24 | -------------------------------------------------------------------------------- /test_requests/Test Requests/Test Nearby.bru: -------------------------------------------------------------------------------- 1 | meta { 2 | name: Test Nearby 3 | type: http 4 | seq: 3 5 | } 6 | 7 | post { 8 | url: http://localhost:8080/api/v1/test/fake_delta?limit=50&format=ndjson&$engine=polars 9 | body: json 10 | auth: basic 11 | } 12 | 13 | query { 14 | limit: 50 15 | format: ndjson 16 | $engine: polars 17 | } 18 | 19 | auth:basic { 20 | username: test 21 | password: B~C:BB*_9-1u 22 | } 23 | 24 | body:json { 25 | {"nearby": {"lat": 46.7, "lon": 8.6, "distance_m": 10000}} 26 | } 27 | -------------------------------------------------------------------------------- /test_requests/Test Requests/bruno.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1", 3 | "name": "Test Requests", 4 | "type": "collection" 5 | } -------------------------------------------------------------------------------- /test_requests/Test Requests/environments/Local.bru: -------------------------------------------------------------------------------- 1 | vars { 2 | } 3 | -------------------------------------------------------------------------------- /test_requests/Test Requests/filter fruits.bru: -------------------------------------------------------------------------------- 1 | meta { 2 | name: filter fruits 3 | type: http 4 | seq: 1 5 | } 6 | 7 | post { 8 | url: http://localhost:8080/api/v1/complexer/complex_fruits?limit=5&format=json&$engine=polars 9 | body: json 10 | auth: basic 11 | } 12 | 13 | query { 14 | limit: 5 15 | format: json 16 | $engine: polars 17 | } 18 | 19 | auth:basic { 20 | username: test 21 | password: B~C:BB*_9-1u 22 | } 23 | 24 | body:json { 25 | {"fruits_contains": "anan"} 26 | } 27 | -------------------------------------------------------------------------------- /test_server/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import docker 3 | from docker.models.containers import Container 4 | from time import sleep 5 | from typing import cast 6 | import docker.errors 7 | import os 8 | from azure.core.exceptions import ResourceExistsError 9 | 10 | 11 | def _getenvs(): 12 | envs = dict() 13 | with open("test_server/sql_docker.env", "r") as f: 14 | lines = f.readlines() 15 | envs = { 16 | item[0].strip(): item[1].strip() 17 | for item in [ 18 | line.split("=") 19 | for line in lines 20 | if len(line.strip()) > 0 and not line.startswith("#") 21 | ] 22 | } 23 | return envs 24 | 25 | 26 | def start_mssql_server() -> Container: 27 | client = docker.from_env() # code taken from https://github.com/fsspec/adlfs/blob/main/adlfs/tests/conftest.py#L72 28 | sql_server: Container | None = None 29 | try: 30 | m = cast(Container, client.containers.get("test4sql")) 31 | if m.status == "running": 32 | return m 33 | else: 34 | sql_server = m 35 | except docker.errors.NotFound: 36 | pass 37 | 38 | envs = _getenvs() 39 | 40 | if sql_server is None: 41 | # using podman: podman run --env-file=TESTS/SQL_DOCKER.ENV --publish=1439:1433 --name=mssql1 chriseaton/adventureworks:light 42 | # podman kill mssql1 43 | sql_server = client.containers.run( 44 | "chriseaton/adventureworks:light", 45 | environment=envs, 46 | detach=True, 47 | name="test4sql", 48 | ports={"1433/tcp": 1439}, 49 | ) # type: ignore 50 | assert sql_server is not None 51 | sql_server.start() 52 | print(sql_server.status) 53 | sleep( 54 | 45 55 | ) # the install script takes a sleep of 30s to create the db and then restores adventureworks 56 | import pyodbc 57 | 58 | with pyodbc.connect( 59 | "DRIVER={ODBC Driver 18 for SQL Server};SERVER=127.0.0.1,1439;ENCRYPT=yes;TrustServerCertificate=Yes;UID=sa;PWD=" 60 | + envs["MSSQL_SA_PASSWORD"] 61 | + ";DATABASE=master" 62 | ) as con: 63 | with con.cursor() as cur: 64 | cur.execute("SELECT * FROM sys.databases where name='AdventureWorks'") 65 | if len(cur.fetchall()) > 0: 66 | print("db created") 67 | else: 68 | print("db not created yet") 69 | sleep(25) 70 | print("Successfully created sql container...") 71 | return sql_server 72 | 73 | 74 | def get_test_blobstorage(): 75 | constr = os.getenv( 76 | "TEST_BLOB_CONSTR", 77 | "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;", 78 | ) 79 | from azure.storage.blob import ContainerClient 80 | 81 | cc = ContainerClient.from_connection_string(constr, "testlake") 82 | if not cc.exists(): 83 | cc.create_container() 84 | return cc 85 | 86 | 87 | def upload_to_azurite(): 88 | with get_test_blobstorage() as cc: 89 | faker_pq = "tests/data/startest/faker.parquet" 90 | try: 91 | with open(faker_pq, "rb") as f: 92 | cc.upload_blob("td/faker.parquet", f) 93 | except ResourceExistsError: 94 | pass # already uploaded 95 | fakeroot = Path("tests/data/delta/fake") 96 | for root, _, fls in os.walk(fakeroot): 97 | for fl in fls: 98 | try: 99 | rel = str(Path(root).relative_to(fakeroot)) 100 | with open(os.path.join(root, fl), "rb") as f: 101 | target_path = ( 102 | f"td/delta/fake/{rel}/{fl}" 103 | if rel != "." 104 | else f"td/delta/fake/{fl}" 105 | ) 106 | cc.upload_blob(target_path, f) 107 | except ResourceExistsError: 108 | pass # already uploaded 109 | 110 | 111 | def start_azurite() -> Container: 112 | client = docker.from_env() # code taken from https://github.com/fsspec/adlfs/blob/main/adlfs/tests/conftest.py#L72 113 | azurite_server: Container | None = None 114 | try: 115 | m = cast(Container, client.containers.get("test4azurite")) 116 | if m.status == "running": 117 | upload_to_azurite() 118 | return m 119 | else: 120 | azurite_server = m 121 | except docker.errors.NotFound: 122 | pass 123 | 124 | if azurite_server is None: 125 | azurite_server = client.containers.run( 126 | "mcr.microsoft.com/azure-storage/azurite:latest", 127 | detach=True, 128 | name="test4azurite", 129 | ports={"10000/tcp": 10000, "10001/tcp": 10001, "10002/tcp": 10002}, 130 | ) # type: ignore 131 | assert azurite_server is not None 132 | azurite_server.start() 133 | print(azurite_server.status) 134 | sleep(20) 135 | upload_to_azurite() 136 | print("Successfully created azurite container...") 137 | return azurite_server 138 | -------------------------------------------------------------------------------- /test_server/sql_docker.env: -------------------------------------------------------------------------------- 1 | SA_PASSWORD=MyPass@word4tests 2 | ACCEPT_EULA=Y 3 | MSSQL_PID=Express 4 | MSSQL_SA_PASSWORD=MyPass@word4tests -------------------------------------------------------------------------------- /tester.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | user = "test" 4 | pw = "B~C:BB*_9-1u" 5 | auth = (user, pw) 6 | engines = ["polars"] 7 | client = requests.Session() 8 | response = client.get("http://localhost:8080/metadata", auth=auth) 9 | assert response.status_code == 200 10 | jsd = response.json() 11 | for item in jsd: 12 | for e in engines: 13 | name = item["name"] 14 | tag = item["tag"] 15 | route = item["route"] 16 | meta_detail_route = ( 17 | "http://localhost:8080/" + route + f"/metadata_detail?%24engine={e}" 18 | ) 19 | print(meta_detail_route) 20 | response = client.get(meta_detail_route, auth=auth) 21 | if name not in ["not_existing", "not_existing2"]: 22 | assert name + "_" + str(response.status_code) == name + "_200" 23 | else: 24 | assert name + "_" + str(response.status_code) == name + "_404" 25 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/__init__.py -------------------------------------------------------------------------------- /tests/chinook.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/chinook.db -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | from dotenv import load_dotenv 4 | 5 | load_dotenv() 6 | 7 | 8 | @pytest.fixture(scope="session", autouse=True) 9 | def spawn_sql(): 10 | import test_server 11 | 12 | if os.getenv("NO_SQL_SERVER", "0") == "1": 13 | yield None 14 | else: 15 | sql_server = test_server.start_mssql_server() 16 | yield sql_server 17 | if os.getenv("KEEP_SQL_SERVER", "0") == "0": # can be handy during development 18 | sql_server.stop() 19 | 20 | 21 | @pytest.fixture(scope="session", autouse=True) 22 | def spawn_azurite(): 23 | import test_server 24 | 25 | if os.getenv("NO_AZURITE_DOCKER", "0") == "1": 26 | yield None 27 | else: 28 | azurite = test_server.start_azurite() 29 | yield azurite 30 | if ( 31 | os.getenv("KEEP_AZURITE_DOCKER", "0") == "0" 32 | ): # can be handy during development 33 | azurite.stop() 34 | -------------------------------------------------------------------------------- /tests/data/chinook.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/chinook.db -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/0g/part-00006-510491fd-b429-477a-bc01-ba90158ece60.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/0g/part-00006-510491fd-b429-477a-bc01-ba90158ece60.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/4M/part-00004-816874eb-5a74-436a-8967-7f6d617e41f2.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/4M/part-00004-816874eb-5a74-436a-8967-7f6d617e41f2.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/4P/part-00047-b1e87770-7221-43b8-a74c-44ae08076a09.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/4P/part-00047-b1e87770-7221-43b8-a74c-44ae08076a09.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/4Z/part-00005-ea0c83d4-7e32-46a0-beca-f49e79046b80.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/4Z/part-00005-ea0c83d4-7e32-46a0-beca-f49e79046b80.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/6Q/part-00054-31997fb2-91ff-43b2-824e-103cc0c7d756.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/6Q/part-00054-31997fb2-91ff-43b2-824e-103cc0c7d756.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/6c/part-00012-6259bde6-bdf7-4453-a230-0f2520a6240c.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/6c/part-00012-6259bde6-bdf7-4453-a230-0f2520a6240c.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/7w/part-00000-be76109f-fbcd-49b1-8849-5fdf6c78f9bd.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/7w/part-00000-be76109f-fbcd-49b1-8849-5fdf6c78f9bd.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/8r/part-00068-6aa18a58-b31a-4343-9a94-5132835119af.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/8r/part-00068-6aa18a58-b31a-4343-9a94-5132835119af.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/97/part-00038-bb2ca269-9b06-433d-8733-e714448dda93.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/97/part-00038-bb2ca269-9b06-433d-8733-e714448dda93.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/BI/part-00044-4b8b65e2-6637-4999-a8dd-6327a3a34805.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/BI/part-00044-4b8b65e2-6637-4999-a8dd-6327a3a34805.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/BP/part-00065-8e29739a-c689-4913-923f-b409ab10a6f3.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/BP/part-00065-8e29739a-c689-4913-923f-b409ab10a6f3.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Dj/part-00001-b9e46f6e-c6ce-4b84-a756-016350dd00a3.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Dj/part-00001-b9e46f6e-c6ce-4b84-a756-016350dd00a3.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Fk/part-00057-2bea3d40-0236-46bc-b1d4-85bb1d7112a9.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Fk/part-00057-2bea3d40-0236-46bc-b1d4-85bb1d7112a9.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Ft/part-00046-583c311c-45d7-4590-96ff-157e8366390c.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Ft/part-00046-583c311c-45d7-4590-96ff-157e8366390c.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/GS/part-00048-3ec7ca53-eef9-4e67-888f-bec41669d79c.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/GS/part-00048-3ec7ca53-eef9-4e67-888f-bec41669d79c.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/HN/part-00058-cc1e9a26-6c3d-4288-b378-2cf2ba7745af.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/HN/part-00058-cc1e9a26-6c3d-4288-b378-2cf2ba7745af.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/HZ/part-00042-9926dc4b-c61d-49c7-933a-9a9535a5d177.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/HZ/part-00042-9926dc4b-c61d-49c7-933a-9a9535a5d177.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/IG/part-00002-8edfa65e-7cb2-4f82-8771-579acedd6f7d.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/IG/part-00002-8edfa65e-7cb2-4f82-8771-579acedd6f7d.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/II/part-00059-422f439d-c57f-47e2-bef8-1e199e0fe46c.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/II/part-00059-422f439d-c57f-47e2-bef8-1e199e0fe46c.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/IV/part-00053-318b80d6-8bef-4057-8890-79bd5db60cac.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/IV/part-00053-318b80d6-8bef-4057-8890-79bd5db60cac.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/JH/part-00043-d1d1b590-7042-4e9c-872b-0509dc58eed7.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/JH/part-00043-d1d1b590-7042-4e9c-872b-0509dc58eed7.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Kc/part-00051-9428b0ee-ad2e-4156-a046-4547a5d3b3b1.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Kc/part-00051-9428b0ee-ad2e-4156-a046-4547a5d3b3b1.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Kt/part-00036-2cbd4b24-74a9-4676-ad88-1ee06fb29bb8.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Kt/part-00036-2cbd4b24-74a9-4676-ad88-1ee06fb29bb8.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Lv/part-00032-41bbaf1a-6279-47b7-8688-b235d1706aad.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Lv/part-00032-41bbaf1a-6279-47b7-8688-b235d1706aad.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/M2/part-00072-07af31ba-a41c-4c04-87c5-87d870fdf475.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/M2/part-00072-07af31ba-a41c-4c04-87c5-87d870fdf475.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Mf/part-00060-a8539436-96ba-402a-8a7b-dec2135b7b52.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Mf/part-00060-a8539436-96ba-402a-8a7b-dec2135b7b52.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/NJ/part-00055-4edebb18-a674-4a72-ad10-145050b27272.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/NJ/part-00055-4edebb18-a674-4a72-ad10-145050b27272.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/No/part-00076-3eca7b45-d1e3-4051-b035-4bcdd714abbc.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/No/part-00076-3eca7b45-d1e3-4051-b035-4bcdd714abbc.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/OX/part-00031-cdfe144e-da91-461c-9894-574d7e64952f.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/OX/part-00031-cdfe144e-da91-461c-9894-574d7e64952f.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Oi/part-00011-d8be12d7-d464-43fd-9700-a5b1ae9e5d0e.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Oi/part-00011-d8be12d7-d464-43fd-9700-a5b1ae9e5d0e.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/P6/part-00078-6608497c-fad9-4191-bde7-a2350ce65125.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/P6/part-00078-6608497c-fad9-4191-bde7-a2350ce65125.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Pn/part-00023-5e05b8f8-f80f-43f6-9333-2d263bfbcc14.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Pn/part-00023-5e05b8f8-f80f-43f6-9333-2d263bfbcc14.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Pw/part-00050-8f5580a5-dd4c-4a0e-b892-5dc2bd0695b5.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Pw/part-00050-8f5580a5-dd4c-4a0e-b892-5dc2bd0695b5.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/RC/part-00071-4b8ccfe1-6aec-4bec-b70a-0754ae2b3bf8.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/RC/part-00071-4b8ccfe1-6aec-4bec-b70a-0754ae2b3bf8.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Rk/part-00007-e6b43552-ef44-4af4-921d-d506eb2dfbbe.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Rk/part-00007-e6b43552-ef44-4af4-921d-d506eb2dfbbe.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/SJ/part-00061-ba3a9c66-779c-4665-8003-9c9ab799e5f6.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/SJ/part-00061-ba3a9c66-779c-4665-8003-9c9ab799e5f6.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/To/part-00033-482da537-942c-4d93-b747-2ec4b4ece4e7.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/To/part-00033-482da537-942c-4d93-b747-2ec4b4ece4e7.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Tp/part-00077-eb1e0859-e136-4ed5-b4b5-8371acba0303.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Tp/part-00077-eb1e0859-e136-4ed5-b4b5-8371acba0303.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Tw/part-00035-cf597183-4ced-4d3e-bc02-8cae53bb4c25.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Tw/part-00035-cf597183-4ced-4d3e-bc02-8cae53bb4c25.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/U2/part-00010-92ebf069-473c-4d07-a300-d72896a9c651.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/U2/part-00010-92ebf069-473c-4d07-a300-d72896a9c651.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/U4/part-00073-2aaa75d8-484f-4033-a351-28f3a8c1540e.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/U4/part-00073-2aaa75d8-484f-4033-a351-28f3a8c1540e.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/U6/part-00032-f0c3ab56-cdb6-4d79-8aa2-908bc7cabb9f.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/U6/part-00032-f0c3ab56-cdb6-4d79-8aa2-908bc7cabb9f.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Wa/part-00030-211e7855-f56c-4a9d-b632-19e2eadc1f58.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Wa/part-00030-211e7855-f56c-4a9d-b632-19e2eadc1f58.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/YY/part-00045-1fae6e27-b6ea-4496-9751-eb8ce068051d.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/YY/part-00045-1fae6e27-b6ea-4496-9751-eb8ce068051d.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/YZ/part-00019-0b4e7ee9-8c7b-4191-9dec-6c833e5550b4.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/YZ/part-00019-0b4e7ee9-8c7b-4191-9dec-6c833e5550b4.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Yr/part-00075-a362be3f-f74e-4522-87d6-f3e811d5e12f.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Yr/part-00075-a362be3f-f74e-4522-87d6-f3e811d5e12f.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/Zz/part-00022-394e79b3-5bee-4835-8af4-1b69739c5a20.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/Zz/part-00022-394e79b3-5bee-4835-8af4-1b69739c5a20.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/_delta_log/00000000000000000000.crc: -------------------------------------------------------------------------------- 1 | {"txnId":"d224a603-53f1-4f3d-8552-eb49fd87d709","tableSizeBytes":148639,"numFiles":85,"numMetadata":1,"numProtocol":1,"metadata":{"id":"73b35691-bf7a-4728-9bff-5f7703ac3c6f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"company\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"phone_number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["company"],"configuration":{"delta.autoOptimize.optimizeWrite":"true","delta.targetFileSize":"33554432","delta.tuneFileSizesForRewrites":"true"},"createdTime":1690885391467},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[148639,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}} 2 | -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/_delta_log/00000000000000000001.crc: -------------------------------------------------------------------------------- 1 | {"txnId":"31be9a97-5440-4403-ba36-2da092261e43","tableSizeBytes":148639,"numFiles":85,"numMetadata":1,"numProtocol":1,"metadata":{"id":"73b35691-bf7a-4728-9bff-5f7703ac3c6f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"address\"}},{\"name\":\"age\",\"type\":\"double\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":2,\"delta.columnMapping.physicalName\":\"age\"}},{\"name\":\"company\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":3,\"delta.columnMapping.physicalName\":\"company\"}},{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":4,\"delta.columnMapping.physicalName\":\"id\"}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":5,\"delta.columnMapping.physicalName\":\"name\"}},{\"name\":\"phone_number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":6,\"delta.columnMapping.physicalName\":\"phone_number\"}}]}","partitionColumns":["company"],"configuration":{"delta.columnMapping.mode":"name","delta.autoOptimize.optimizeWrite":"true","delta.columnMapping.maxColumnId":"6","delta.targetFileSize":"33554432","delta.tuneFileSizesForRewrites":"true"},"createdTime":1690885391467},"protocol":{"minReaderVersion":2,"minWriterVersion":5},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[148639,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}} 2 | -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/_delta_log/00000000000000000001.json: -------------------------------------------------------------------------------- 1 | {"commitInfo":{"timestamp":1690885397656,"userId":"2797914831036774","userName":"adrian.ehrsam@bmsuisse.ch","operation":"SET TBLPROPERTIES","operationParameters":{"properties":"{\"delta.columnMapping.mode\":\"name\",\"delta.minReaderVersion\":\"2\",\"delta.minWriterVersion\":\"5\"}"},"notebook":{"notebookId":"3271485675102593"},"clusterId":"0428-070410-lm8e9giw","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Databricks-Runtime/12.2.x-photon-scala2.12","txnId":"31be9a97-5440-4403-ba36-2da092261e43"}} 2 | {"protocol":{"minReaderVersion":2,"minWriterVersion":5}} 3 | {"metaData":{"id":"73b35691-bf7a-4728-9bff-5f7703ac3c6f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"address\"}},{\"name\":\"age\",\"type\":\"double\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":2,\"delta.columnMapping.physicalName\":\"age\"}},{\"name\":\"company\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":3,\"delta.columnMapping.physicalName\":\"company\"}},{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":4,\"delta.columnMapping.physicalName\":\"id\"}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":5,\"delta.columnMapping.physicalName\":\"name\"}},{\"name\":\"phone_number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":6,\"delta.columnMapping.physicalName\":\"phone_number\"}}]}","partitionColumns":["company"],"configuration":{"delta.columnMapping.mode":"name","delta.autoOptimize.optimizeWrite":"true","delta.columnMapping.maxColumnId":"6","delta.targetFileSize":"33554432","delta.tuneFileSizesForRewrites":"true"},"createdTime":1690885391467}} 4 | -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/_delta_log/00000000000000000002.crc: -------------------------------------------------------------------------------- 1 | {"txnId":"b53fa2af-6e65-4d4a-b625-a495fdbfd3d8","tableSizeBytes":148639,"numFiles":85,"numMetadata":1,"numProtocol":1,"metadata":{"id":"73b35691-bf7a-4728-9bff-5f7703ac3c6f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"address\"}},{\"name\":\"age\",\"type\":\"double\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":2,\"delta.columnMapping.physicalName\":\"age\"}},{\"name\":\"company\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":3,\"delta.columnMapping.physicalName\":\"company\"}},{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":4,\"delta.columnMapping.physicalName\":\"id\"}},{\"name\":\"Super Name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":5,\"delta.columnMapping.physicalName\":\"name\"}},{\"name\":\"phone_number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":6,\"delta.columnMapping.physicalName\":\"phone_number\"}}]}","partitionColumns":["company"],"configuration":{"delta.columnMapping.mode":"name","delta.autoOptimize.optimizeWrite":"true","delta.columnMapping.maxColumnId":"6","delta.targetFileSize":"33554432","delta.tuneFileSizesForRewrites":"true"},"createdTime":1690885391467},"protocol":{"minReaderVersion":2,"minWriterVersion":5},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[148639,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}} 2 | -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/_delta_log/00000000000000000002.json: -------------------------------------------------------------------------------- 1 | {"commitInfo":{"timestamp":1690885398332,"userId":"2797914831036774","userName":"adrian.ehrsam@bmsuisse.ch","operation":"RENAME COLUMN","operationParameters":{"oldColumnPath":"name","newColumnPath":"Super Name"},"notebook":{"notebookId":"3271485675102593"},"clusterId":"0428-070410-lm8e9giw","readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Databricks-Runtime/12.2.x-photon-scala2.12","txnId":"b53fa2af-6e65-4d4a-b625-a495fdbfd3d8"}} 2 | {"metaData":{"id":"73b35691-bf7a-4728-9bff-5f7703ac3c6f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"address\"}},{\"name\":\"age\",\"type\":\"double\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":2,\"delta.columnMapping.physicalName\":\"age\"}},{\"name\":\"company\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":3,\"delta.columnMapping.physicalName\":\"company\"}},{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":4,\"delta.columnMapping.physicalName\":\"id\"}},{\"name\":\"Super Name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":5,\"delta.columnMapping.physicalName\":\"name\"}},{\"name\":\"phone_number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":6,\"delta.columnMapping.physicalName\":\"phone_number\"}}]}","partitionColumns":["company"],"configuration":{"delta.columnMapping.mode":"name","delta.autoOptimize.optimizeWrite":"true","delta.columnMapping.maxColumnId":"6","delta.targetFileSize":"33554432","delta.tuneFileSizesForRewrites":"true"},"createdTime":1690885391467}} 3 | -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/_delta_log/00000000000000000003.crc: -------------------------------------------------------------------------------- 1 | {"txnId":"550bb6b9-337e-47cd-959e-1cb275585aa1","tableSizeBytes":148639,"numFiles":85,"numMetadata":1,"numProtocol":1,"metadata":{"id":"73b35691-bf7a-4728-9bff-5f7703ac3c6f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"address\"}},{\"name\":\"age\",\"type\":\"double\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":2,\"delta.columnMapping.physicalName\":\"age\"}},{\"name\":\"company\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":3,\"delta.columnMapping.physicalName\":\"company\"}},{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":4,\"delta.columnMapping.physicalName\":\"id\"}},{\"name\":\"Super Name_\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":5,\"delta.columnMapping.physicalName\":\"name\"}},{\"name\":\"phone_number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":6,\"delta.columnMapping.physicalName\":\"phone_number\"}}]}","partitionColumns":["company"],"configuration":{"delta.columnMapping.mode":"name","delta.autoOptimize.optimizeWrite":"true","delta.columnMapping.maxColumnId":"6","delta.targetFileSize":"33554432","delta.tuneFileSizesForRewrites":"true"},"createdTime":1690885391467},"protocol":{"minReaderVersion":2,"minWriterVersion":5},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[148639,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}} 2 | -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/_delta_log/00000000000000000003.json: -------------------------------------------------------------------------------- 1 | {"commitInfo":{"timestamp":1690885399396,"userId":"2797914831036774","userName":"adrian.ehrsam@bmsuisse.ch","operation":"RENAME COLUMN","operationParameters":{"oldColumnPath":"Super Name","newColumnPath":"Super Name_"},"notebook":{"notebookId":"3271485675102593"},"clusterId":"0428-070410-lm8e9giw","readVersion":2,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Databricks-Runtime/12.2.x-photon-scala2.12","txnId":"550bb6b9-337e-47cd-959e-1cb275585aa1"}} 2 | {"metaData":{"id":"73b35691-bf7a-4728-9bff-5f7703ac3c6f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"address\"}},{\"name\":\"age\",\"type\":\"double\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":2,\"delta.columnMapping.physicalName\":\"age\"}},{\"name\":\"company\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":3,\"delta.columnMapping.physicalName\":\"company\"}},{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":4,\"delta.columnMapping.physicalName\":\"id\"}},{\"name\":\"Super Name_\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":5,\"delta.columnMapping.physicalName\":\"name\"}},{\"name\":\"phone_number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":6,\"delta.columnMapping.physicalName\":\"phone_number\"}}]}","partitionColumns":["company"],"configuration":{"delta.columnMapping.mode":"name","delta.autoOptimize.optimizeWrite":"true","delta.columnMapping.maxColumnId":"6","delta.targetFileSize":"33554432","delta.tuneFileSizesForRewrites":"true"},"createdTime":1690885391467}} 3 | -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/_delta_log/00000000000000000004.crc: -------------------------------------------------------------------------------- 1 | {"txnId":"02e21747-7962-4bd8-bc97-04066eecfdb7","tableSizeBytes":148639,"numFiles":85,"numMetadata":1,"numProtocol":1,"metadata":{"id":"73b35691-bf7a-4728-9bff-5f7703ac3c6f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"address\"}},{\"name\":\"age\",\"type\":\"double\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":2,\"delta.columnMapping.physicalName\":\"age\"}},{\"name\":\"company\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":3,\"delta.columnMapping.physicalName\":\"company\"}},{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":4,\"delta.columnMapping.physicalName\":\"id\"}},{\"name\":\"Super Name_\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":5,\"delta.columnMapping.physicalName\":\"name\"}},{\"name\":\"phone_number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":6,\"delta.columnMapping.physicalName\":\"phone_number\"}},{\"name\":\"new_Column\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":7,\"delta.columnMapping.physicalName\":\"col-eb647ef5-c40c-4421-b9a7-942c027ccaee\"}}]}","partitionColumns":["company"],"configuration":{"delta.columnMapping.mode":"name","delta.autoOptimize.optimizeWrite":"true","delta.columnMapping.maxColumnId":"7","delta.targetFileSize":"33554432","delta.tuneFileSizesForRewrites":"true"},"createdTime":1690885391467},"protocol":{"minReaderVersion":2,"minWriterVersion":5},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[148639,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}} 2 | -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/_delta_log/00000000000000000004.json: -------------------------------------------------------------------------------- 1 | {"commitInfo":{"timestamp":1690885400195,"userId":"2797914831036774","userName":"adrian.ehrsam@bmsuisse.ch","operation":"ADD COLUMNS","operationParameters":{"columns":"[{\"column\":{\"name\":\"new_Column\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}}]"},"notebook":{"notebookId":"3271485675102593"},"clusterId":"0428-070410-lm8e9giw","readVersion":3,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Databricks-Runtime/12.2.x-photon-scala2.12","txnId":"02e21747-7962-4bd8-bc97-04066eecfdb7"}} 2 | {"metaData":{"id":"73b35691-bf7a-4728-9bff-5f7703ac3c6f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"address\"}},{\"name\":\"age\",\"type\":\"double\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":2,\"delta.columnMapping.physicalName\":\"age\"}},{\"name\":\"company\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":3,\"delta.columnMapping.physicalName\":\"company\"}},{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":4,\"delta.columnMapping.physicalName\":\"id\"}},{\"name\":\"Super Name_\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":5,\"delta.columnMapping.physicalName\":\"name\"}},{\"name\":\"phone_number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":6,\"delta.columnMapping.physicalName\":\"phone_number\"}},{\"name\":\"new_Column\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":7,\"delta.columnMapping.physicalName\":\"col-eb647ef5-c40c-4421-b9a7-942c027ccaee\"}}]}","partitionColumns":["company"],"configuration":{"delta.columnMapping.mode":"name","delta.autoOptimize.optimizeWrite":"true","delta.columnMapping.maxColumnId":"7","delta.targetFileSize":"33554432","delta.tuneFileSizesForRewrites":"true"},"createdTime":1690885391467}} 3 | -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/_delta_log/00000000000000000005.crc: -------------------------------------------------------------------------------- 1 | {"txnId":"36cb3982-55d8-4ba3-904f-804fa3ed8d52","tableSizeBytes":199912,"numFiles":85,"numMetadata":1,"numProtocol":1,"metadata":{"id":"73b35691-bf7a-4728-9bff-5f7703ac3c6f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"address\"}},{\"name\":\"age\",\"type\":\"double\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":2,\"delta.columnMapping.physicalName\":\"age\"}},{\"name\":\"company\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":3,\"delta.columnMapping.physicalName\":\"company\"}},{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":4,\"delta.columnMapping.physicalName\":\"id\"}},{\"name\":\"Super Name_\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":5,\"delta.columnMapping.physicalName\":\"name\"}},{\"name\":\"phone_number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":6,\"delta.columnMapping.physicalName\":\"phone_number\"}},{\"name\":\"new_Column\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":7,\"delta.columnMapping.physicalName\":\"col-eb647ef5-c40c-4421-b9a7-942c027ccaee\"}}]}","partitionColumns":["company"],"configuration":{"delta.columnMapping.mode":"name","delta.autoOptimize.optimizeWrite":"true","delta.columnMapping.maxColumnId":"7","delta.targetFileSize":"33554432","delta.tuneFileSizesForRewrites":"true"},"createdTime":1690885391467},"protocol":{"minReaderVersion":2,"minWriterVersion":5},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[199912,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}} 2 | -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/aL/part-00018-2fe5e43f-121b-4179-b664-834c5465b967.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/aL/part-00018-2fe5e43f-121b-4179-b664-834c5465b967.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/aR/part-00008-d4bec0ab-eccc-4c99-9025-6ca6503ddd74.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/aR/part-00008-d4bec0ab-eccc-4c99-9025-6ca6503ddd74.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/b7/part-00067-a7f025fc-51bc-423b-af1b-d9d9b72ebc6c.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/b7/part-00067-a7f025fc-51bc-423b-af1b-d9d9b72ebc6c.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/bT/part-00021-39e0723e-0993-403b-8859-caa5f37a2fbb.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/bT/part-00021-39e0723e-0993-403b-8859-caa5f37a2fbb.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/cF/part-00049-e0050fd7-80fb-4265-a387-565207e45e4d.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/cF/part-00049-e0050fd7-80fb-4265-a387-565207e45e4d.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ab/part-00056-f6da9139-a113-467e-8ba5-c7758370da3a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ab/part-00056-f6da9139-a113-467e-8ba5-c7758370da3a.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ac/part-00033-84b83034-4286-45e6-9127-cc65f389a327.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ac/part-00033-84b83034-4286-45e6-9127-cc65f389a327.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ad/part-00049-4b3f4ff8-05ed-47e6-be6d-f95fdef49711.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ad/part-00049-4b3f4ff8-05ed-47e6-be6d-f95fdef49711.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=al/part-00028-17ca4dc6-857d-4981-852a-17ab4e89ffd9.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=al/part-00028-17ca4dc6-857d-4981-852a-17ab4e89ffd9.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=an/part-00069-7f716a4b-e3f4-4843-b255-316f894cd667.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=an/part-00069-7f716a4b-e3f4-4843-b255-316f894cd667.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=at/part-00034-d7f4034d-59b3-4d12-9416-80507cf32c57.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=at/part-00034-d7f4034d-59b3-4d12-9416-80507cf32c57.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ay/part-00030-c59a6d41-2735-4b97-be85-dd4c65655a00.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ay/part-00030-c59a6d41-2735-4b97-be85-dd4c65655a00.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ba/part-00027-b718de64-3731-4ad0-90ad-4db26b9bcc1d.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ba/part-00027-b718de64-3731-4ad0-90ad-4db26b9bcc1d.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=be/part-00041-0efe3b2d-61dd-494a-aded-639ddc29a70a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=be/part-00041-0efe3b2d-61dd-494a-aded-639ddc29a70a.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=bo/part-00035-49a6a289-1cbb-4771-b6b3-d87ddc6f3dd8.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=bo/part-00035-49a6a289-1cbb-4771-b6b3-d87ddc6f3dd8.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=br/part-00008-566f0054-3608-451e-974b-264b75ce231a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=br/part-00008-566f0054-3608-451e-974b-264b75ce231a.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=bu/part-00072-3ec64246-453a-4d51-91c7-848304d4096f.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=bu/part-00072-3ec64246-453a-4d51-91c7-848304d4096f.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=by/part-00042-c40258e5-2723-4340-ba91-a641b231515a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=by/part-00042-c40258e5-2723-4340-ba91-a641b231515a.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ca/part-00005-42e7bc3f-aa89-4da9-b658-38a2ca8e4fd2.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ca/part-00005-42e7bc3f-aa89-4da9-b658-38a2ca8e4fd2.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ce/part-00051-b0112e8d-c692-4b2b-b911-1bb92bd9382e.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ce/part-00051-b0112e8d-c692-4b2b-b911-1bb92bd9382e.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ch/part-00036-45350603-7694-4a2f-b65f-8ac04c196465.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ch/part-00036-45350603-7694-4a2f-b65f-8ac04c196465.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=cl/part-00043-622f89c5-01f1-4abb-97ab-d7e11fad5624.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=cl/part-00043-622f89c5-01f1-4abb-97ab-d7e11fad5624.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=co/part-00010-138ed8b4-c6e3-404c-b53a-ce33a2cf312b.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=co/part-00010-138ed8b4-c6e3-404c-b53a-ce33a2cf312b.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=cr/part-00055-149c9a22-5f1a-4d4e-9071-ecbe4a8ff971.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=cr/part-00055-149c9a22-5f1a-4d4e-9071-ecbe4a8ff971.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=cu/part-00062-b512dbb9-8644-4725-b6f8-66bd101d7d09.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=cu/part-00062-b512dbb9-8644-4725-b6f8-66bd101d7d09.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=da/part-00032-c13bdcc7-bb30-48c1-8848-ec16a339e952.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=da/part-00032-c13bdcc7-bb30-48c1-8848-ec16a339e952.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=de/part-00023-6d446912-e456-4ae5-91ce-a077d768b645.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=de/part-00023-6d446912-e456-4ae5-91ce-a077d768b645.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=di/part-00007-7dd774b2-1cb0-4c83-9813-ed75b2d389b6.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=di/part-00007-7dd774b2-1cb0-4c83-9813-ed75b2d389b6.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=do/part-00050-f7de5cd9-ea84-480c-a960-50de699bbece.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=do/part-00050-f7de5cd9-ea84-480c-a960-50de699bbece.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=dr/part-00047-03af00f0-8b3d-42f9-9cd9-94c8df3efc0f.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=dr/part-00047-03af00f0-8b3d-42f9-9cd9-94c8df3efc0f.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=el/part-00076-2015b5b5-d50c-4074-aaa4-ff0c6f19b2db.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=el/part-00076-2015b5b5-d50c-4074-aaa4-ff0c6f19b2db.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=es/part-00048-6acda97e-206c-4c78-b13d-d67e95b96bb7.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=es/part-00048-6acda97e-206c-4c78-b13d-d67e95b96bb7.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=fi/part-00064-43e6ec96-6b6c-4211-8565-63ee05b8f0ff.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=fi/part-00064-43e6ec96-6b6c-4211-8565-63ee05b8f0ff.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=fl/part-00079-71fc11e6-8efe-46d0-a500-19ec217bdac5.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=fl/part-00079-71fc11e6-8efe-46d0-a500-19ec217bdac5.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=fo/part-00006-4151a5b0-c860-4a31-92b3-2b82fd2d99bd.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=fo/part-00006-4151a5b0-c860-4a31-92b3-2b82fd2d99bd.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=fr/part-00063-d2b90342-e419-46c5-83e9-0a6d8a32aea4.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=fr/part-00063-d2b90342-e419-46c5-83e9-0a6d8a32aea4.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ga/part-00061-7a78fb8a-bca8-4f17-9c5d-c9817da92b74.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ga/part-00061-7a78fb8a-bca8-4f17-9c5d-c9817da92b74.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=gi/part-00019-31f75af4-2504-4eb8-9d3d-955bbd86c1e0.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=gi/part-00019-31f75af4-2504-4eb8-9d3d-955bbd86c1e0.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=go/part-00066-53270a3c-c02b-4608-9c9f-f60cae9ecb2f.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=go/part-00066-53270a3c-c02b-4608-9c9f-f60cae9ecb2f.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=gr/part-00020-d77466c4-7b96-4762-9476-1579b462d0b0.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=gr/part-00020-d77466c4-7b96-4762-9476-1579b462d0b0.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ha/part-00016-fad3113c-dfc1-4280-901d-aabdfe8a6f4b.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ha/part-00016-fad3113c-dfc1-4280-901d-aabdfe8a6f4b.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=he/part-00003-c6154e56-f3f8-4064-b712-aed57f913a5b.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=he/part-00003-c6154e56-f3f8-4064-b712-aed57f913a5b.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ho/part-00014-c339a09a-9f4b-4be0-986d-5707bd0cadaf.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ho/part-00014-c339a09a-9f4b-4be0-986d-5707bd0cadaf.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ja/part-00009-7732907a-bcc6-4ecb-b064-680fdeebcc3e.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ja/part-00009-7732907a-bcc6-4ecb-b064-680fdeebcc3e.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ji/part-00071-06f90521-9846-4ffc-9f47-acbf1871077c.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ji/part-00071-06f90521-9846-4ffc-9f47-acbf1871077c.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=jo/part-00004-8a0c1603-fa55-4a84-879b-f7e1c0d13c0a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=jo/part-00004-8a0c1603-fa55-4a84-879b-f7e1c0d13c0a.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ke/part-00031-aeda7b45-378b-424a-aad0-924701a9c6f9.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ke/part-00031-aeda7b45-378b-424a-aad0-924701a9c6f9.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ko/part-00070-5efdd77a-ecea-47b8-b532-de111e665f6a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ko/part-00070-5efdd77a-ecea-47b8-b532-de111e665f6a.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=la/part-00044-505b2259-c565-42ae-aa6e-b56629cffdf3.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=la/part-00044-505b2259-c565-42ae-aa6e-b56629cffdf3.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=le/part-00018-8c97ce70-0ce3-4dc3-8ba9-7a2c51897e2c.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=le/part-00018-8c97ce70-0ce3-4dc3-8ba9-7a2c51897e2c.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=li/part-00059-6716a935-b1e4-41f5-a1a7-fd0e707a9b90.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=li/part-00059-6716a935-b1e4-41f5-a1a7-fd0e707a9b90.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=lo/part-00000-522f840d-fb48-4f0b-a185-dedb749b0da0.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=lo/part-00000-522f840d-fb48-4f0b-a185-dedb749b0da0.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ma/part-00001-086bf948-a7b4-4600-891d-01c4680fcab4.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ma/part-00001-086bf948-a7b4-4600-891d-01c4680fcab4.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=mc/part-00015-ea17f88c-c7c5-4ed5-aa9c-55907f85c0f8.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=mc/part-00015-ea17f88c-c7c5-4ed5-aa9c-55907f85c0f8.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=me/part-00039-5e5ebd26-eb1f-4312-bd6f-73c8a4b2dfbe.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=me/part-00039-5e5ebd26-eb1f-4312-bd6f-73c8a4b2dfbe.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=mi/part-00017-64e30664-4bcf-4462-a617-bf05b3772a5d.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=mi/part-00017-64e30664-4bcf-4462-a617-bf05b3772a5d.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=mo/part-00012-67ab812e-793f-4100-b383-511e3206c975.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=mo/part-00012-67ab812e-793f-4100-b383-511e3206c975.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=mu/part-00060-e274f652-1f8d-4738-bc0f-c6e8a0d6bff1.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=mu/part-00060-e274f652-1f8d-4738-bc0f-c6e8a0d6bff1.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=my/part-00068-7451e7e9-9655-4f5f-81c0-8bc55cb50960.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=my/part-00068-7451e7e9-9655-4f5f-81c0-8bc55cb50960.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ne/part-00029-a579be5b-1744-4a5a-858f-fe5008ce81fb.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ne/part-00029-a579be5b-1744-4a5a-858f-fe5008ce81fb.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=no/part-00047-97b20913-8df7-4570-8c29-b0d364b5fe76.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=no/part-00047-97b20913-8df7-4570-8c29-b0d364b5fe76.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ob/part-00075-db82c46c-76c8-48a8-98e3-b1b4b8816a65.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ob/part-00075-db82c46c-76c8-48a8-98e3-b1b4b8816a65.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=on/part-00074-25f1fa15-e2b1-4819-9920-57443ec7dd7b.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=on/part-00074-25f1fa15-e2b1-4819-9920-57443ec7dd7b.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=pa/part-00021-080dbd1f-b083-4e46-bd6f-1ac16b1db973.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=pa/part-00021-080dbd1f-b083-4e46-bd6f-1ac16b1db973.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=pe/part-00038-63524ae8-1a5c-4b5c-8e7c-29b8a01601a2.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=pe/part-00038-63524ae8-1a5c-4b5c-8e7c-29b8a01601a2.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ph/part-00013-d6cbca57-b07e-4958-a2e5-541fe27fefa8.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ph/part-00013-d6cbca57-b07e-4958-a2e5-541fe27fefa8.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=pr/part-00073-5073dd1d-d9b3-4352-ba64-fb33743e7576.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=pr/part-00073-5073dd1d-d9b3-4352-ba64-fb33743e7576.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ra/part-00045-16e8d690-d897-44d0-8023-c464247974bf.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ra/part-00045-16e8d690-d897-44d0-8023-c464247974bf.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=re/part-00067-61dc56ee-b090-4330-894f-aa964229f4aa.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=re/part-00067-61dc56ee-b090-4330-894f-aa964229f4aa.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ri/part-00037-e7b8827a-767b-47c0-98cd-be3749f77bb8.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ri/part-00037-e7b8827a-767b-47c0-98cd-be3749f77bb8.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ro/part-00002-03d8285e-c751-4f6a-822a-d47c83ee8ceb.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ro/part-00002-03d8285e-c751-4f6a-822a-d47c83ee8ceb.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ru/part-00057-73304f98-6375-4906-b781-979acc7367ae.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ru/part-00057-73304f98-6375-4906-b781-979acc7367ae.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=sa/part-00053-d402d6d7-61e7-4023-9d6b-d1e9ed152f49.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=sa/part-00053-d402d6d7-61e7-4023-9d6b-d1e9ed152f49.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=sc/part-00022-0ce5b7db-639e-42a1-9d4b-90c5a46601e7.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=sc/part-00022-0ce5b7db-639e-42a1-9d4b-90c5a46601e7.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=sh/part-00040-600b89ab-bb40-4cb6-9ae4-1ab9faa0f40e.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=sh/part-00040-600b89ab-bb40-4cb6-9ae4-1ab9faa0f40e.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=si/part-00033-99e6cac7-a189-4772-a579-b9781b149235.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=si/part-00033-99e6cac7-a189-4772-a579-b9781b149235.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=sm/part-00025-cff7e4e8-d331-4e52-b464-8c5370bbcfee.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=sm/part-00025-cff7e4e8-d331-4e52-b464-8c5370bbcfee.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=so/part-00052-6678746c-5e3b-49c7-87e8-f2cab9010c92.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=so/part-00052-6678746c-5e3b-49c7-87e8-f2cab9010c92.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=st/part-00026-00eaed9e-c245-4134-9d52-482892b2c153.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=st/part-00026-00eaed9e-c245-4134-9d52-482892b2c153.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=sw/part-00054-eaf85bb5-b702-4d0c-a6b7-67901f7cf7eb.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=sw/part-00054-eaf85bb5-b702-4d0c-a6b7-67901f7cf7eb.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ta/part-00046-26a01da5-5d3c-4597-89fe-7f6f9579e6f1.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ta/part-00046-26a01da5-5d3c-4597-89fe-7f6f9579e6f1.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=th/part-00007-574080d4-da2d-40ca-928b-fa7ca5d86d80.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=th/part-00007-574080d4-da2d-40ca-928b-fa7ca5d86d80.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=to/part-00065-dbe02d1a-c524-4aab-9ad6-10ead7a5d481.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=to/part-00065-dbe02d1a-c524-4aab-9ad6-10ead7a5d481.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=tu/part-00077-7c4ba0d6-9460-462d-ae66-8d4bd21b190b.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=tu/part-00077-7c4ba0d6-9460-462d-ae66-8d4bd21b190b.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=vi/part-00029-63ae8fd1-c094-4c02-8b6b-c31713c3a0d3.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=vi/part-00029-63ae8fd1-c094-4c02-8b6b-c31713c3a0d3.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=wa/part-00024-e3781098-4ac4-4bf8-9ed1-81b9651d3769.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=wa/part-00024-e3781098-4ac4-4bf8-9ed1-81b9651d3769.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=we/part-00000-f42ba576-5cd6-415b-bad7-73a0b8e033df.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=we/part-00000-f42ba576-5cd6-415b-bad7-73a0b8e033df.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=wi/part-00011-d9fd8935-3809-4171-a9cd-ec472cd54d8c.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=wi/part-00011-d9fd8935-3809-4171-a9cd-ec472cd54d8c.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=wo/part-00058-c9201b73-9ea0-4ecb-a42e-744b07cce743.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=wo/part-00058-c9201b73-9ea0-4ecb-a42e-744b07cce743.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/company=ya/part-00078-1e0fa90e-282e-420e-9ef0-616b4405badc.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/company=ya/part-00078-1e0fa90e-282e-420e-9ef0-616b4405badc.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/d2/part-00009-6e0044a4-66de-445d-a3b6-a9677c7cf09f.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/d2/part-00009-6e0044a4-66de-445d-a3b6-a9677c7cf09f.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/dD/part-00000-1363075f-bb86-4349-bbe0-9e25d1e01873.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/dD/part-00000-1363075f-bb86-4349-bbe0-9e25d1e01873.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/dR/part-00056-6ee8afe2-9a97-4148-aaa3-63c29a154a2e.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/dR/part-00056-6ee8afe2-9a97-4148-aaa3-63c29a154a2e.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/eD/part-00029-e49d438a-bcc4-401b-8b87-10bf8e421dd1.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/eD/part-00029-e49d438a-bcc4-401b-8b87-10bf8e421dd1.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/ev/part-00039-0d497b2d-8c3d-4539-8222-e9dbda28b768.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/ev/part-00039-0d497b2d-8c3d-4539-8222-e9dbda28b768.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/fh/part-00014-f5160b94-c123-4eed-9cde-b5fea29637be.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/fh/part-00014-f5160b94-c123-4eed-9cde-b5fea29637be.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/gM/part-00024-38a93f96-421f-4e16-8905-4fc4fb358d8a.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/gM/part-00024-38a93f96-421f-4e16-8905-4fc4fb358d8a.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/ia/part-00031-9feb0dce-6f44-4da6-a2ea-e5116d09cc2c.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/ia/part-00031-9feb0dce-6f44-4da6-a2ea-e5116d09cc2c.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/ip/part-00016-c24c4267-fbc0-4ae0-ae0a-b3652e1ac0ee.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/ip/part-00016-c24c4267-fbc0-4ae0-ae0a-b3652e1ac0ee.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/j7/part-00034-c507b451-6d28-4e80-8c84-431ac4114e83.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/j7/part-00034-c507b451-6d28-4e80-8c84-431ac4114e83.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/j9/part-00063-2dad4913-6407-430d-b1d3-2c3c111830b8.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/j9/part-00063-2dad4913-6407-430d-b1d3-2c3c111830b8.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/jD/part-00025-aba3487c-62c0-4743-b78e-ba7be5494d21.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/jD/part-00025-aba3487c-62c0-4743-b78e-ba7be5494d21.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/jZ/part-00028-57126dbd-869f-4ac8-b28e-0b2e2599fb63.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/jZ/part-00028-57126dbd-869f-4ac8-b28e-0b2e2599fb63.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/lP/part-00020-143f9668-7f1a-4af2-97b9-8f5c078b48ee.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/lP/part-00020-143f9668-7f1a-4af2-97b9-8f5c078b48ee.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/mX/part-00037-f60b8a34-3c43-4e9f-a82e-2daf9ce070ac.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/mX/part-00037-f60b8a34-3c43-4e9f-a82e-2daf9ce070ac.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/ma/part-00079-a1e2177c-0968-4657-b448-6017b465e701.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/ma/part-00079-a1e2177c-0968-4657-b448-6017b465e701.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/nc/part-00027-2856a17a-8c1b-4e01-985e-9f019d4af815.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/nc/part-00027-2856a17a-8c1b-4e01-985e-9f019d4af815.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/oG/part-00066-7a20116d-a4a9-4d34-bedd-a179ee40d373.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/oG/part-00066-7a20116d-a4a9-4d34-bedd-a179ee40d373.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/pw/part-00024-17021728-8d4a-40c1-a19e-0a3ec0dde9fb.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/pw/part-00024-17021728-8d4a-40c1-a19e-0a3ec0dde9fb.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/q7/part-00041-15231660-8cc5-447f-87f8-936efb95e64c.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/q7/part-00041-15231660-8cc5-447f-87f8-936efb95e64c.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/rc/part-00064-ed14899a-d10c-45db-8adf-3d8adf26452c.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/rc/part-00064-ed14899a-d10c-45db-8adf-3d8adf26452c.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/rv/part-00003-3d6f5d79-b435-4616-8f43-7348f7065068.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/rv/part-00003-3d6f5d79-b435-4616-8f43-7348f7065068.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/sF/part-00017-97d493cf-2d64-4293-9908-61d0262066b5.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/sF/part-00017-97d493cf-2d64-4293-9908-61d0262066b5.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/sQ/part-00015-7b76522c-479c-496e-8e2b-468d60f303d5.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/sQ/part-00015-7b76522c-479c-496e-8e2b-468d60f303d5.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/tS/part-00052-e83c55be-be73-42f4-b409-2a9de710864b.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/tS/part-00052-e83c55be-be73-42f4-b409-2a9de710864b.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/u3/part-00062-75139a11-643c-4ac4-adff-9c6792fc5ac4.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/u3/part-00062-75139a11-643c-4ac4-adff-9c6792fc5ac4.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/uX/part-00026-cb38386a-7866-455f-b3b5-26b57174591f.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/uX/part-00026-cb38386a-7866-455f-b3b5-26b57174591f.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/vb/part-00069-057cf04d-95e3-4a5a-9b06-c80719131585.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/vb/part-00069-057cf04d-95e3-4a5a-9b06-c80719131585.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/wV/part-00013-a578ae3e-a6fb-4fc9-bc3d-83ae5e796d64.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/wV/part-00013-a578ae3e-a6fb-4fc9-bc3d-83ae5e796d64.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/xB/part-00004-85f8ab87-e2f8-48e4-8ad6-a98825b6213d.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/xB/part-00004-85f8ab87-e2f8-48e4-8ad6-a98825b6213d.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/xL/part-00074-2d34ec5a-3f4e-481b-8119-19f13e571cb1.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/xL/part-00074-2d34ec5a-3f4e-481b-8119-19f13e571cb1.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/xq/part-00070-6c760e79-76c0-44ad-9e8f-70b2b34e88ec.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/xq/part-00070-6c760e79-76c0-44ad-9e8f-70b2b34e88ec.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/data/delta/table_w_col_map/zX/part-00040-c1df044d-b6db-41ac-bfd2-1c6e5717d0c6.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmsuisse/lakeapi/2b42dde15369a87fcf21ec9723347232d46708fc/tests/data/delta/table_w_col_map/zX/part-00040-c1df044d-b6db-41ac-bfd2-1c6e5717d0c6.c000.snappy.parquet -------------------------------------------------------------------------------- /tests/deltalake_tests.py: -------------------------------------------------------------------------------- 1 | from threading import active_count 2 | from uuid import uuid4 3 | import os 4 | import psutil 5 | import shutil 6 | import deltalake 7 | import pyarrow.dataset as ds 8 | 9 | process = psutil.Process() 10 | 11 | 12 | def naive_json_1(res: ds.Dataset, fn: str): # This works, no matter how often executed 13 | import json 14 | 15 | first = True 16 | with open(fn, "w") as f: 17 | f.write("[") 18 | for batch in res.to_batches(batch_size=1000): 19 | aspy = batch.to_pylist() 20 | for i in aspy: 21 | if first: 22 | first = False 23 | else: 24 | f.write(", ") 25 | f.write(json.dumps(i, default=str)) 26 | f.write("]") 27 | 28 | 29 | if os.path.exists("out"): 30 | shutil.rmtree("out") 31 | os.makedirs("out", exist_ok=True) 32 | i = 0 33 | while i < 100: 34 | print(f"{i} run") 35 | i += 1 36 | 37 | dt = deltalake.DeltaTable( 38 | "tests/data/delta/fake", 39 | ) 40 | dss = dt.to_pyarrow_dataset() 41 | 42 | count = active_count() 43 | rss = process.memory_info().rss 44 | print(f"before Naive impl: NR Threads: {count}. RAM: {rss/1000/1000} MB") 45 | naive_json_1(dss, f"out/{str(uuid4())}.json") 46 | -------------------------------------------------------------------------------- /tests/duckdb_tests.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | from threading import active_count 3 | from uuid import uuid4 4 | import os 5 | import psutil 6 | import shutil 7 | import deltalake 8 | 9 | process = psutil.Process() 10 | 11 | 12 | def naive_json_1( 13 | res: duckdb.DuckDBPyConnection, fn: str 14 | ): # This works, no matter how often executed 15 | import json 16 | 17 | first = True 18 | with open(fn, "w") as f: 19 | f.write("[") 20 | with res.fetch_record_batch(1000) as reader: 21 | for batch in reader: 22 | aspy = batch.to_pylist() 23 | for i in aspy: 24 | if first: 25 | first = False 26 | else: 27 | f.write(", ") 28 | f.write(json.dumps(i, default=str)) 29 | f.write("]") 30 | 31 | 32 | shutil.rmtree("out") 33 | os.makedirs("out", exist_ok=True) 34 | i = 0 35 | while i < 100: 36 | print(f"{i} run") 37 | i += 1 38 | with duckdb.connect() as con: 39 | dt = deltalake.DeltaTable( 40 | "tests/data/delta/fake", 41 | ) 42 | ds = dt.to_pyarrow_dataset() 43 | 44 | con.register("fake", ds) 45 | naive_json_1(con.execute("SELECT * FROM fake"), f"out/{str(uuid4())}.json") 46 | count = active_count() 47 | rss = process.memory_info().rss 48 | print(f"After Naive impl: NR Threads: {count}. RAM: {rss/1000/1000} MB") 49 | con.execute( 50 | f"CREATE TEMP VIEW t1 AS SELECT * FROM fake order by 1; COPY (SELECT * FROM t1) TO 'out/{str(uuid4())}.json' (FORMAT JSON, Array True);drop VIEW t1" 51 | ) # Works only once 52 | count = active_count() 53 | rss = process.memory_info().rss 54 | print(f"NR Threads: {count}. RAM: {rss/1000/1000} MB") 55 | -------------------------------------------------------------------------------- /tests/test_benchmark.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | import sys 3 | 4 | sys.path.append(".") 5 | from .utils import get_app 6 | 7 | import pytest 8 | 9 | client = TestClient(get_app()) 10 | 11 | user = "test" 12 | pw = "B~C:BB*_9-1u" 13 | auth = (user, pw) 14 | 15 | min_rounds = 1 16 | 17 | 18 | def benchmark_engine(engine="duckdb"): 19 | total_time = 0 20 | response = client.get(f"/api/v1/test/fake_{engine}?limit=10&abc=a", auth=auth) 21 | 22 | assert response.status_code == 200 23 | assert len(response.json()) == 10 24 | 25 | client.get(f"/api/v1/test/fake_delta?limit=100&abc=a&%24engine={engine}", auth=auth) 26 | client.get(f"/api/v1/test/fake_delta?limit=100&abc=b&%24engine={engine}", auth=auth) 27 | client.get(f"/api/v1/test/fake_delta?limit=100&abc=c&%24engine={engine}", auth=auth) 28 | 29 | return True 30 | 31 | 32 | @pytest.mark.benchmark(min_rounds=min_rounds, warmup=False) 33 | def test_benchmark_duckdb(benchmark): 34 | result = benchmark(benchmark_engine, engine="duckdb") 35 | assert result == True 36 | 37 | 38 | @pytest.mark.benchmark(min_rounds=min_rounds, warmup=False) 39 | def test_benchmark_polars(benchmark): 40 | result = benchmark(benchmark_engine, engine="polars") 41 | assert result == True 42 | -------------------------------------------------------------------------------- /tests/test_blobb.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | from .utils import get_app, get_auth 3 | 4 | client = TestClient(get_app()) 5 | auth = get_auth() 6 | import pytest 7 | 8 | engines = ["duckdb", "polars"] 9 | 10 | 11 | @pytest.mark.parametrize("engine", engines) 12 | def test_parquet(engine): 13 | response = client.get( 14 | f"/api/v1/blobb/blob_test?format=json&limit=50&$engine={engine}", 15 | auth=auth, 16 | ) 17 | assert response.status_code == 200 18 | fakedt = response.json() 19 | assert len(fakedt) == 50 20 | 21 | 22 | @pytest.mark.parametrize("engine", engines) 23 | def test_delta(engine): 24 | response = client.get( 25 | f"/api/v1/blobb/fake?format=json&limit=50&$engine={engine}", 26 | auth=auth, 27 | ) 28 | assert response.status_code == 200 29 | fakedt = response.json() 30 | assert len(fakedt) == 50 31 | -------------------------------------------------------------------------------- /tests/test_colmap.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | from .utils import get_app, get_auth 3 | import sys 4 | 5 | sys.path.append(".") 6 | client = TestClient(get_app()) 7 | auth = get_auth() 8 | 9 | engines = ("duckdb", "polars") 10 | 11 | 12 | def test_no_filter(): 13 | for e in engines: 14 | response = client.get( 15 | f"/api/v1/deltatest/table_w_col_map?limit=50&format=json&%24engine={e}", 16 | auth=auth, 17 | ) 18 | assert response.status_code == 200 19 | jsd = response.json() 20 | assert len(jsd) == 50 21 | 22 | 23 | def test_col_map_filter(): 24 | for e in engines: 25 | response = client.get( 26 | f"/api/v1/deltatest/table_w_col_map?limit=50&format=json&Super_Name_=John Duncan&%24engine={e}", 27 | auth=auth, 28 | ) 29 | assert response.status_code == 200 30 | jsd = response.json() 31 | assert len(jsd) == 1 32 | for item in jsd: 33 | assert item["Super Name_"] == "John Duncan" 34 | -------------------------------------------------------------------------------- /tests/test_complex.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | from .utils import get_app, get_auth 3 | import sys 4 | 5 | sys.path.append(".") 6 | client = TestClient(get_app()) 7 | auth = get_auth() 8 | engines = ["duckdb", "polars"] 9 | 10 | 11 | def test_returns_complex(): 12 | for e in engines: 13 | response = client.post( 14 | f"/api/v1/complexer/complex_fruits?limit=10&format=json&%24engine={e}", 15 | auth=auth, 16 | ) 17 | assert response.status_code == 200 18 | jsd = response.json() 19 | assert len(jsd) == 6 20 | assert isinstance(jsd[0]["vitamines"], list) 21 | assert isinstance(jsd[0]["person"], dict) 22 | 23 | 24 | def test_returns_jsonify(): 25 | for e in engines: 26 | response = client.post( 27 | f"/api/v1/complexer/complex_fruits?limit=10&format=json&jsonify_complex=True&%24engine={e}", 28 | auth=auth, 29 | ) 30 | assert response.status_code == 200 31 | jsd = response.json() 32 | assert len(jsd) == 6 33 | assert isinstance(jsd[0]["vitamines"], str) 34 | assert isinstance(jsd[0]["person"], str) 35 | import json 36 | 37 | assert isinstance(json.loads(jsd[0]["vitamines"]), list) 38 | assert isinstance(json.loads(jsd[0]["person"]), dict) 39 | 40 | 41 | def test_returns_metadatadeta(): 42 | response = client.get( 43 | "/api/v1/complexer/complex_fruits/metadata_detail", 44 | auth=auth, 45 | ) 46 | assert response.status_code == 200 47 | 48 | jsd = response.json() 49 | 50 | vit = [p for p in jsd["data_schema"] if p["name"] == "vitamines"][0] 51 | assert vit is not None 52 | assert vit["type"]["type_str"].lower().startswith("list<") 53 | 54 | per = [p for p in jsd["data_schema"] if p["name"] == "person"][0] 55 | assert per is not None 56 | assert per["type"]["type_str"].lower().startswith("struct<") 57 | 58 | assert "person" not in jsd["max_string_lengths"] 59 | assert "vitamines" not in jsd["max_string_lengths"] 60 | assert "fruits" in jsd["max_string_lengths"] 61 | 62 | 63 | def test_returns_metadatadeta_partition(): 64 | response = client.get( 65 | "/api/v1/test/fruits_partition/metadata_detail", 66 | auth=auth, 67 | ) 68 | assert response.status_code == 200 69 | 70 | jsd = response.json() 71 | 72 | assert jsd["max_string_lengths"]["my_empty_col"] is None 73 | 74 | 75 | def test_returns_metadatadeta_jsonifiyed(): 76 | response = client.get( 77 | "/api/v1/complexer/complex_fruits/metadata_detail?jsonify_complex=True", 78 | auth=auth, 79 | ) 80 | assert response.status_code == 200 81 | 82 | jsd = response.json() 83 | 84 | vit = [p for p in jsd["data_schema"] if p["name"] == "vitamines"][0] 85 | assert vit is not None 86 | assert vit["type"]["type_str"].lower() == "string" 87 | 88 | per = [p for p in jsd["data_schema"] if p["name"] == "person"][0] 89 | assert per is not None 90 | assert per["type"]["type_str"].lower() == "string" 91 | 92 | assert "person" in jsd["max_string_lengths"] 93 | assert "vitamines" in jsd["max_string_lengths"] 94 | assert "fruits" in jsd["max_string_lengths"] 95 | 96 | 97 | def test_returns_csv(): 98 | for e in engines: 99 | response = client.post( 100 | f"/api/v1/complexer/complex_fruits?limit=10&format=csv&%24engine={e}", 101 | auth=auth, 102 | ) 103 | assert response.status_code == 200 104 | import csv 105 | 106 | reader = csv.DictReader(response.text.splitlines()) 107 | line1 = reader.__next__() 108 | assert isinstance(line1["vitamines"], str) 109 | assert isinstance(line1["person"], str) 110 | import json 111 | 112 | assert isinstance(json.loads(line1["vitamines"]), list) 113 | assert isinstance(json.loads(line1["person"]), dict) 114 | -------------------------------------------------------------------------------- /tests/test_datamove.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | from .utils import get_app, get_auth, create_rows_faker 3 | import sys 4 | import polars as pl 5 | from deltalake import write_deltalake 6 | import os 7 | from hashlib import md5 8 | 9 | sys.path.append(".") 10 | client = TestClient(get_app()) 11 | auth = get_auth() 12 | engines = ["duckdb", "polars"] 13 | 14 | 15 | def test_data_overwrite(): 16 | for e in engines: 17 | response = client.get( 18 | f"/api/v1/test/fake_delta?limit=1000&&format=json&%24engine={e}", auth=auth 19 | ) 20 | assert response.status_code == 200 21 | assert len(response.json()) == 1000 22 | 23 | df_faker = pl.DataFrame(create_rows_faker(1001)).to_pandas() 24 | 25 | df_faker["name_md5_prefix_2"] = [ 26 | md5(val.encode("UTF-8")).hexdigest()[:1] for val in df_faker["name"] 27 | ] 28 | 29 | df_faker["name1"] = df_faker["name"] 30 | 31 | print(df_faker) 32 | assert os.path.exists("tests/data/delta/fake/_delta_log") 33 | write_deltalake( 34 | "tests/data/delta/fake", df_faker, mode="overwrite", schema_mode="overwrite" 35 | ) 36 | 37 | for e in engines: 38 | response = client.get( 39 | f"/api/v1/test/fake_delta?limit=1000&&format=json&%24engine={e}", auth=auth 40 | ) 41 | assert response.status_code == 200 42 | assert len(response.json()) == 1000 43 | -------------------------------------------------------------------------------- /tests/test_duckdb.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | from .utils import get_app, get_auth 3 | import sys 4 | import csv 5 | 6 | sys.path.append(".") 7 | client = TestClient(get_app()) 8 | auth = get_auth() 9 | engines = ["duckdb"] 10 | 11 | 12 | def test_duckdb_file_type(): 13 | for _ in range(2): 14 | response = client.get("/api/v1/test/fake_duck?limit=1&format=json", auth=auth) 15 | assert response.status_code == 200 16 | 17 | 18 | def test_duckdb_file_type_limit_100(): 19 | for _ in range(2): 20 | response = client.get("/api/v1/test/fake_duck?limit=100&format=json", auth=auth) 21 | assert response.status_code == 200 22 | assert len(response.json()) == 100 23 | 24 | 25 | def test_duckdb_fruits_fruit_param(): 26 | for _ in range(2): 27 | response = client.get( 28 | "/api/v1/test/fruits_duck?limit=2&format=json&fruits=banana", auth=auth 29 | ) 30 | assert response.status_code == 200 31 | assert response.json() == [ 32 | {"A": 1, "fruits": "banana", "B": 5, "cars": "beetle"}, 33 | {"A": 2, "fruits": "banana", "B": 4, "cars": "audi"}, 34 | ] 35 | 36 | 37 | def test_duckdb_fruits_car_param(): 38 | for _ in range(2): 39 | response = client.get( 40 | "/api/v1/test/fruits_duck?limit=2&format=json&cars=lamborghini", auth=auth 41 | ) 42 | assert response.status_code == 200 43 | assert response.json() == [ 44 | {"A": 0, "fruits": "apple", "B": 5, "cars": "lamborghini"} 45 | ] 46 | 47 | 48 | def test_data_csv(): 49 | for _ in range(2): 50 | response = client.get( 51 | "/api/v1/test/fruits_duck?limit=1&format=csv&cars=audi", auth=auth 52 | ) 53 | assert response.status_code == 200 54 | txt = response.text 55 | 56 | reader = csv.DictReader(txt.splitlines()) 57 | line1 = reader.__next__() 58 | assert line1 == {"A": "2", "fruits": "banana", "B": "4", "cars": "audi"} 59 | -------------------------------------------------------------------------------- /tests/test_mssql.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | from .utils import get_app, get_auth 3 | 4 | client = TestClient(get_app()) 5 | auth = get_auth() 6 | 7 | 8 | def test_simple_department(): 9 | response = client.get( 10 | "/api/v1/mssql/mssql_department?format=json&limit=50", 11 | auth=auth, 12 | ) 13 | assert response.status_code == 200 14 | departments = response.json() 15 | assert len(departments) == 16 16 | 17 | 18 | def test_filter_group_name(): 19 | response = client.get( 20 | "/api/v1/mssql/mssql_department?format=json&limit=100&GroupName=Research%20and%20Development", 21 | auth=auth, 22 | ) 23 | assert response.status_code == 200 24 | tables = response.json() 25 | assert len(tables) == 3 26 | 27 | 28 | def test_filter_offset(): 29 | response = client.get( 30 | "/api/v1/mssql/mssql_department?format=json&limit=100&offset=10", 31 | auth=auth, 32 | ) 33 | assert response.status_code == 200 34 | tables = response.json() 35 | assert len(tables) == 6 36 | 37 | 38 | def test_metadata_detail(): 39 | response = client.get( 40 | "/api/v1/mssql/mssql_department/metadata_detail", 41 | auth=auth, 42 | ) 43 | assert response.status_code == 200 44 | -------------------------------------------------------------------------------- /tests/test_nearby.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | from .utils import get_app, get_auth 3 | import sys 4 | import json 5 | import pytest 6 | 7 | sys.path.append(".") 8 | client = TestClient(get_app()) 9 | auth = get_auth() 10 | engines = ["duckdb", "polars"] 11 | 12 | 13 | @pytest.mark.parametrize("engine", engines) 14 | def test_nearby(engine): 15 | response = client.post( 16 | f"/api/v1/test/fake_delta?limit=50&format=ndjson&%24engine={engine}", 17 | auth=auth, 18 | json={"nearby": {"lat": 46.7, "lon": 8.6, "distance_m": 10000}}, 19 | ) 20 | assert response.status_code == 200 21 | lines = [json.loads(l) for l in response.text.split("\n") if len(l) > 0] 22 | assert ( 23 | len(lines) >= 15 and len(lines) <= 40 24 | ) # it's a bit fuzzy since distance calc is never 100% accurate 25 | 26 | assert lines[0]["nearby"] <= lines[1]["nearby"] 27 | assert lines[1]["nearby"] <= lines[2]["nearby"] 28 | assert lines[2]["nearby"] <= lines[3]["nearby"] 29 | for item in lines: 30 | assert item["nearby"] <= 10000 31 | 32 | 33 | def test_no_nearby(): 34 | for e in engines: 35 | response = client.post( 36 | f"/api/v1/test/fake_delta?limit=50&format=ndjson&%24engine={e}", 37 | auth=auth, 38 | json={}, 39 | ) 40 | assert response.status_code == 200 41 | assert response.status_code == 200 42 | lines = [json.loads(l) for l in response.text.split("\n") if len(l) > 0] 43 | assert len(lines) == 50 44 | assert "nearby" not in lines[0] 45 | -------------------------------------------------------------------------------- /tests/test_openid.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append(".") 4 | 5 | from fastapi.testclient import TestClient 6 | from .utils import get_app, get_auth 7 | 8 | auth = get_auth() 9 | 10 | engines = ("duckdb", "polars") 11 | 12 | 13 | def test_openid(): 14 | for engine in engines: 15 | client = TestClient(get_app(default_engine=engine)) 16 | response = client.get("/openapi.json", auth=auth) 17 | assert response.status_code == 200 18 | jsd = response.json() 19 | assert isinstance(jsd, dict) 20 | paths = jsd["paths"] 21 | schema = jsd["components"]["schemas"] 22 | ## TODO : Add more tests for different endpoints 23 | assert "test_fake_delta_partition" in schema 24 | assert "test_fake_polars_postParameter" in schema 25 | assert "abc" in schema["test_fake_polars_postParameter"]["properties"] 26 | assert "/api/v1/startest/fruits" in paths 27 | assert isinstance(paths["/api/v1/startest/fruits"], dict) 28 | assert len(paths["/api/v1/startest/fruits"].keys()) == 1 29 | assert "get" in paths["/api/v1/startest/fruits"].keys() 30 | 31 | assert isinstance(paths["/api/v1/startest/fruits_partition"], dict) 32 | assert len(paths["/api/v1/startest/fruits_partition"].keys()) == 1 33 | assert "post" in paths["/api/v1/startest/fruits_partition"].keys() 34 | -------------------------------------------------------------------------------- /tests/test_output_formats.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | from .utils import get_app, get_auth 3 | import sys 4 | import pyarrow as pa 5 | import pytest 6 | 7 | sys.path.append(".") 8 | client = TestClient(get_app()) 9 | auth = get_auth() 10 | engines = ["duckdb", "polars"] 11 | 12 | 13 | @pytest.mark.parametrize("engine", engines) 14 | def test_data_csv4excel(engine): 15 | # csv 4 excel is a really ... strange... format 16 | response = client.get( 17 | f"/api/v1/test/fruits?limit=1&format=csv4excel&cars=audi&%24engine={engine}", 18 | auth=auth, 19 | ) 20 | assert response.status_code == 200 21 | 22 | import csv 23 | 24 | firstline = response.content[0:6] 25 | assert firstline.decode("ascii") == "sep=,\n" 26 | rest = response.content[6:].decode("utf-16-le") 27 | reader = csv.DictReader(rest.splitlines()) 28 | line1 = reader.__next__() 29 | assert line1 == {"A": "2", "fruits": "banana", "B": "4", "cars": "audi"} 30 | 31 | 32 | @pytest.mark.parametrize("engine", engines) 33 | def test_data_csv_custom(engine): 34 | response = client.get( 35 | f"/api/v1/test/fruits?limit=1&format=csv&cars=audi&%24engine={engine}&$encoding=utf-16-be&$csv_separator=|", 36 | auth=auth, 37 | ) 38 | assert response.status_code == 200 39 | 40 | rest = response.content.decode("utf-16-be") 41 | assert "|" in rest 42 | 43 | lines = rest.replace("\r\n", "\n").split("\n") 44 | header = lines[0].replace('"', "").split("|") 45 | line1 = lines[1].replace('"', "").split("|") 46 | line1_dict = dict(zip(header, line1)) 47 | 48 | assert line1_dict == {"A": "2", "fruits": "banana", "B": "4", "cars": "audi"} 49 | 50 | response = client.get( 51 | f"/api/v1/test/fruits?limit=1&format=csv&cars=audi&%24engine={engine}&$encoding=cp850&$csv_separator=\\t", 52 | auth=auth, 53 | ) 54 | assert response.status_code == 200 55 | 56 | rest = response.content.decode("cp850") 57 | assert "\t" in rest 58 | lines = rest.replace("\r\n", "\n").split("\n") 59 | header = lines[0].replace('"', "").split("\t") 60 | line1 = lines[1].replace('"', "").split("\t") 61 | line1_dict = dict(zip(header, line1)) 62 | assert line1_dict == {"A": "2", "fruits": "banana", "B": "4", "cars": "audi"} 63 | 64 | 65 | def test_data_html(): 66 | for e in engines: 67 | # csv 4 excel is a really ... strange... format 68 | response = client.get( 69 | f"/api/v1/test/fruits?limit=1&format=html&cars=audi&%24engine={e}", 70 | auth=auth, 71 | ) 72 | assert response.status_code == 200 73 | assert response.text.startswith("<") 74 | 75 | 76 | def test_data_xml(): 77 | for e in engines: 78 | # csv 4 excel is a really ... strange... format 79 | response = client.get( 80 | f"/api/v1/test/fruits?limit=1&format=xml&cars=audi&%24engine={e}", auth=auth 81 | ) 82 | assert response.status_code == 200 83 | assert response.text.startswith("<") 84 | 85 | 86 | def test_data_arrow_stream(): 87 | for e in engines: 88 | # csv 4 excel is a really ... strange... format 89 | response = client.get( 90 | f"/api/v1/test/fruits?limit=1&format=arrow-stream&cars=audi&%24engine={e}", 91 | auth=auth, 92 | ) 93 | assert response.status_code == 200 94 | import tempfile 95 | 96 | temp_fn = tempfile.mktemp() 97 | with open(temp_fn, "wb") as f: 98 | f.write(response.content) 99 | with pa.OSFile(temp_fn, "rb") as fl: 100 | with pa.ipc.open_stream(fl) as reader: 101 | df = reader.read_pandas() 102 | assert df["A"][0] == 2 103 | -------------------------------------------------------------------------------- /tests/test_partition.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | from .utils import get_app, get_auth 3 | import sys 4 | import polars as pl 5 | import pandas as pd 6 | import pytest 7 | 8 | sys.path.append(".") 9 | client = TestClient(get_app()) 10 | auth = get_auth() 11 | 12 | engines = ("duckdb", "polars") 13 | 14 | 15 | @pytest.mark.parametrize("engine", engines) 16 | def test_data_partition(engine): 17 | for _ in range(2): 18 | response = client.get( 19 | f"/api/v1/test/fruits_partition?limit=1&format=json&cars=audi&%24engine={engine}", 20 | auth=auth, 21 | ) 22 | assert response.status_code == 200 23 | assert response.json() == [ 24 | { 25 | "A": 2, 26 | "fruits": "banana", 27 | "B": 4, 28 | "cars": "audi", 29 | "my_empty_col": None, 30 | } 31 | ] 32 | response = client.get( 33 | f"/api/v1/test/fruits_partition?limit=1&format=json&fruits=ananas&%24engine={engine}", 34 | auth=auth, 35 | ) 36 | assert response.status_code == 200 37 | assert response.json() == [ 38 | { 39 | "A": 9, 40 | "fruits": "ananas", 41 | "B": 9, 42 | "cars": "fiat", 43 | "my_empty_col": None, 44 | } 45 | ] 46 | 47 | 48 | @pytest.mark.parametrize("engine", engines) 49 | def test_data_partition_mod(engine): 50 | for _ in range(2): 51 | response = client.get( 52 | f"/api/v1/test/fruits_partition_mod?limit=1&format=json&cars=audi&%24engine={engine}", 53 | auth=auth, # works because of implicit parameters 54 | ) 55 | assert response.status_code == 200 56 | assert response.json() == [ 57 | { 58 | "A": 2, 59 | "fruits": "banana", 60 | "B": 4, 61 | "cars": "audi", 62 | } 63 | ] 64 | response = client.post( 65 | f"/api/v1/test/fruits_partition_mod?limit=1&format=json&%24engine={engine}", 66 | auth=auth, 67 | json={"cars_in": ["audi"]}, 68 | ) 69 | assert response.status_code == 200 70 | assert response.json() == [ 71 | { 72 | "A": 2, 73 | "fruits": "banana", 74 | "B": 4, 75 | "cars": "audi", 76 | } 77 | ] 78 | 79 | 80 | @pytest.mark.parametrize("engine", engines) 81 | def test_data_partition_int(engine): 82 | for _ in range(2): 83 | response = client.get( 84 | f"/api/v1/test/fruits_partition_int?limit=1&format=json&A=2&%24engine={engine}", 85 | auth=auth, # works because of implicit parameters 86 | ) 87 | assert response.status_code == 200 88 | assert response.json() == [ 89 | { 90 | "A": 2, 91 | "fruits": "banana", 92 | "B": 4, 93 | "cars": "audi", 94 | } 95 | ] 96 | -------------------------------------------------------------------------------- /tests/test_performance.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | from .utils import get_app, get_auth 3 | import time 4 | import sys 5 | import asyncio 6 | 7 | sys.path.append(".") 8 | client = TestClient(get_app()) 9 | auth = get_auth() 10 | 11 | engines = ("duckdb", "polars") 12 | 13 | 14 | def test_async_execution(): 15 | # simulate a lot of async requests and guarantee that the execution is under 1 second 16 | 17 | async def call_api_1(engine, format): 18 | start = time.time() 19 | response = client.get( 20 | f"/api/v1/test/fake_delta?limit=10000&format=json&%24engine={engine}&format={format}", 21 | auth=auth, 22 | ) 23 | end = time.time() 24 | assert response.status_code == 200 25 | 26 | duration = end - start 27 | print(f"Engine {engine} took {duration} seconds with format {format}") 28 | assert duration < 1.0 29 | 30 | async def call_api_2(engine, format): 31 | start = time.time() 32 | response = client.get( 33 | f"/api/v1/test/fruits_partition?limit=10000&format=json&cars=audi&%24engine={engine}&format={format}", 34 | auth=auth, 35 | ) 36 | end = time.time() 37 | assert response.status_code == 200 38 | 39 | duration = end - start 40 | print(f"Engine {engine} took {duration} seconds with format {format}") 41 | assert duration < 1.0 42 | 43 | async def call_api_3(engine, format): 44 | start = time.time() 45 | response = client.get( 46 | f"/api/v1/test/fruits_sortby_desc?limit=10000&format=json&cars=audi&%24engine={engine}&format={format}", 47 | auth=auth, 48 | ) 49 | end = time.time() 50 | assert response.status_code == 200 51 | 52 | duration = end - start 53 | print(f"Engine {engine} took {duration} seconds with format {format}") 54 | assert duration < 1.0 55 | 56 | async def call_api_4(engine, format): 57 | start = time.time() 58 | response = client.get( 59 | f"/api/v1/test/fruits_sortby_asc?limit=10000&format=json&cars=audi&%24engine={engine}&format={format}", 60 | auth=auth, 61 | ) 62 | end = time.time() 63 | assert response.status_code == 200 64 | 65 | duration = end - start 66 | print(f"Engine {engine} took {duration} seconds with format {format}") 67 | assert duration < 1.0 68 | 69 | async def main(): 70 | tasks = [] 71 | for _ in range(100): 72 | for e in engines: 73 | for f in ("json", "csv", "arrow", "parquet"): 74 | tasks.append(asyncio.create_task(call_api_1(e, f))) 75 | tasks.append(asyncio.create_task(call_api_2(e, f))) 76 | tasks.append(asyncio.create_task(call_api_3(e, f))) 77 | tasks.append(asyncio.create_task(call_api_4(e, f))) 78 | print(f"Running {len(tasks)} tasks") 79 | await asyncio.gather(*tasks) 80 | 81 | asyncio.run(main()) 82 | -------------------------------------------------------------------------------- /tests/test_schema.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append(".") 4 | 5 | 6 | def test_schema(): 7 | from bmsdna.lakeapi.tools.validateschema import validate_schema 8 | 9 | validate_schema("config_schema.json", "config_test.yml") 10 | -------------------------------------------------------------------------------- /tests/test_search.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | from .utils import get_app, get_auth 3 | import sys 4 | 5 | sys.path.append(".") 6 | client = TestClient(get_app()) 7 | auth = get_auth() 8 | engines = ["duckdb"] 9 | 10 | 11 | def test_search(): 12 | for e in engines: 13 | response = client.get( 14 | f"/api/v1/test/search_sample?limit=5&format=json&%24engine={e}&search=Karen%20example", 15 | auth=auth, 16 | ) 17 | assert response.status_code == 200 18 | jsd = response.json() 19 | assert len(jsd) >= 3 20 | assert len(jsd) <= 5 21 | assert jsd[0]["search_score"] is not None 22 | assert jsd[1]["search_score"] is not None 23 | assert jsd[2]["search_score"] is not None 24 | 25 | assert jsd[0]["search_score"] >= jsd[1]["search_score"] 26 | assert jsd[1]["search_score"] >= jsd[2]["search_score"] 27 | for item in jsd: 28 | assert ( 29 | "karen" 30 | in (item["email"] + " " + item["name"] + " " + item["address"]).lower() 31 | or "example" 32 | in (item["email"] + " " + item["name"] + " " + item["address"]).lower() 33 | ) 34 | 35 | 36 | def test_no_search(): 37 | for e in engines: 38 | response = client.get( 39 | f"/api/v1/test/search_sample?limit=5&format=json&%24engine={e}", 40 | auth=auth, 41 | ) 42 | assert response.status_code == 200 43 | jsd = response.json() 44 | assert len(jsd) >= 3 45 | assert len(jsd) <= 5 46 | assert "search_score" not in jsd[0] 47 | -------------------------------------------------------------------------------- /tests/test_source_uri.py: -------------------------------------------------------------------------------- 1 | from bmsdna.lakeapi.context.source_uri import _convert_options 2 | from azure.core.credentials import TokenCredential, AccessToken 3 | from datetime import datetime 4 | 5 | 6 | def fake_token(**kwargs): 7 | if len(kwargs) > 0: 8 | kwargs_str = ",".join([f"{k}={v}" for k, v in kwargs.items()]) 9 | return "fake_token_" + kwargs_str 10 | return "fake_token" 11 | 12 | 13 | class FakeCredential(TokenCredential): 14 | def __init__(self, _type): 15 | self._type = _type 16 | 17 | def get_token(self, *args, **kwargs): 18 | exp = int(datetime.now().timestamp()) + 72000 19 | if len(kwargs) > 0: 20 | kwargs_str = ",".join([f"{k}={v}" for k, v in kwargs.items()]) 21 | return AccessToken(token="fake_token_" + kwargs_str, expires_on=exp) 22 | return AccessToken(token="fake_token", expires_on=exp) 23 | 24 | 25 | def test_fsspec(): 26 | o_non_anon = {"account_name": "blubb"} 27 | full_url = "https://blubb.blob.core.windows.net/xyz/abc" 28 | _, o = _convert_options( 29 | full_url, 30 | o_non_anon, 31 | flavor="fsspec", 32 | token_retrieval_func=lambda x: FakeCredential(x), 33 | ) 34 | assert o == {"account_name": "blubb", "anon": False} 35 | assert "anon" not in o_non_anon 36 | 37 | o_anon = {"account_name": "blubb", "anon": True} 38 | _, o = _convert_options( 39 | full_url, o_anon, flavor="fsspec", token_retrieval_func=FakeCredential 40 | ) 41 | assert o == {"account_name": "blubb", "anon": True} 42 | 43 | 44 | def test_account_key(): 45 | o_non_anon = {"account_name": "blubb", "account_key": "nix"} 46 | full_url = "https://blubb.blob.core.windows.net/xyz/abc" 47 | _, o = _convert_options( 48 | full_url, o_non_anon, flavor="object_store", token_retrieval_func=FakeCredential 49 | ) 50 | assert o == {"account_name": "blubb", "account_key": "nix"} 51 | 52 | _, o2 = _convert_options( 53 | full_url, o_non_anon, flavor="fsspec", token_retrieval_func=FakeCredential 54 | ) 55 | assert o2 == {"account_name": "blubb", "account_key": "nix"} 56 | 57 | 58 | if __name__ == "__main__": 59 | test_fsspec() 60 | test_account_key() 61 | -------------------------------------------------------------------------------- /tests/test_sql.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | from .utils import get_app, get_auth 3 | 4 | client = TestClient(get_app()) 5 | auth = get_auth() 6 | 7 | engines = ["duckdb", "polars"] 8 | 9 | 10 | def test_tables(): 11 | for e in engines: 12 | response = client.get( 13 | f"/api/sql/tables?%24engine={e}", 14 | auth=auth, 15 | ) 16 | assert response.status_code == 200 17 | tables = response.json() 18 | assert len(tables) > 5 19 | 20 | 21 | def test_get(): 22 | for e in engines: 23 | response = client.get( 24 | f"/api/sql?%24engine={e}&sql=SELECT distinct B FROM complexer_complex_fruits union select distinct A as B FROM startest_fruits", 25 | auth=auth, 26 | ) 27 | assert response.status_code == 200 28 | tables = response.json() 29 | assert len(tables) > 5 30 | 31 | 32 | def test_post(): 33 | for e in engines: 34 | response = client.post( 35 | f"/api/sql?%24engine={e}&", 36 | auth=auth, 37 | data="SELECT distinct B FROM complexer_complex_fruits union select distinct A as B FROM startest_fruits", # type: ignore 38 | ) 39 | assert response.status_code == 200 40 | tables = response.json() 41 | assert len(tables) > 5 42 | 43 | 44 | def test_sql_where_post(): 45 | # better naming needed in the future 46 | 47 | query = """select * 48 | from test_fruits 49 | where cars = 'audi' 50 | and fruits = 'banana' 51 | and A = 2 and B = 4""" 52 | 53 | response = client.post( 54 | "/api/sql", 55 | auth=auth, 56 | data=query, # type: ignore 57 | ) 58 | assert response.status_code == 200 59 | print(response.json()) 60 | assert response.json() == [ 61 | { 62 | "A": 2, 63 | "fruits": "banana", 64 | "B": 4, 65 | "cars": "audi", 66 | } 67 | ] 68 | 69 | 70 | def test_sql_where_get(): 71 | # better naming needed in the future 72 | 73 | query = """select * 74 | from test_fruits 75 | where cars = 'audi' 76 | and fruits = 'banana' 77 | and A = 2 and B = 4""" 78 | 79 | response = client.get( 80 | "/api/sql", 81 | auth=auth, 82 | params={"sql": query}, 83 | ) 84 | assert response.status_code == 200 85 | print(response.json()) 86 | assert response.json() == [ 87 | { 88 | "A": 2, 89 | "fruits": "banana", 90 | "B": 4, 91 | "cars": "audi", 92 | } 93 | ] 94 | -------------------------------------------------------------------------------- /tests/test_sqlite.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | from .utils import get_app, get_auth 3 | 4 | client = TestClient(get_app()) 5 | auth = get_auth() 6 | 7 | 8 | def test_simple_customers(): 9 | response = client.get( 10 | "/api/v1/sqlite/sqlite_customers?format=json&limit=50", 11 | auth=auth, 12 | ) 13 | assert response.status_code == 200 14 | tables = response.json() 15 | assert len(tables) == 50 16 | 17 | 18 | def test_filter_country(): 19 | response = client.get( 20 | "/api/v1/sqlite/sqlite_customers?format=json&limit=100&Country=Germany", 21 | auth=auth, 22 | ) 23 | assert response.status_code == 200 24 | tables = response.json() 25 | assert len(tables) == 4 26 | 27 | 28 | def test_metadata_detail(): 29 | response = client.get( 30 | "/api/v1/sqlite/sqlite_customers/metadata_detail", 31 | auth=auth, 32 | ) 33 | assert response.status_code == 200 34 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append(".") 4 | 5 | from bmsdna.lakeapi.core.yaml import get_yaml 6 | 7 | 8 | def test_load_yaml(): 9 | y = get_yaml("config_test.yml") 10 | assert y.get("app").get("title") == "LakeAPI" 11 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from fastapi import FastAPI, Request, status 3 | import dataclasses 4 | from faker import Faker 5 | from fastapi.exceptions import RequestValidationError 6 | from fastapi.responses import JSONResponse 7 | import random 8 | import os 9 | import logging 10 | 11 | 12 | def get_app(default_engine="duckdb"): 13 | import bmsdna.lakeapi 14 | 15 | os.environ["MY_SQL_PWD"] = "MyPass@word4tests" 16 | app = FastAPI() 17 | def_cfg = bmsdna.lakeapi.get_default_config() 18 | cfg = dataclasses.replace( 19 | def_cfg, 20 | enable_sql_endpoint=True, 21 | data_path="tests/data", 22 | default_engine=default_engine, 23 | ) 24 | 25 | async def _init(): 26 | await bmsdna.lakeapi.init_lakeapi(app, True, cfg, "config_test.yml") 27 | 28 | sti = asyncio.run(_init()) 29 | 30 | @app.exception_handler(RequestValidationError) 31 | async def validation_exception_handler( 32 | request: Request, exc: RequestValidationError 33 | ): 34 | exc_str = f"{exc}".replace("\n", " ").replace(" ", " ") 35 | logging.error(f"{request}: {exc_str}") 36 | content = {"status_code": 10422, "message": exc_str, "data": None} 37 | return JSONResponse( 38 | content=content, status_code=status.HTTP_422_UNPROCESSABLE_ENTITY 39 | ) 40 | 41 | @app.get("/") 42 | async def root(req: Request): 43 | return {"User": req.user["username"]} 44 | 45 | return app 46 | 47 | 48 | def get_auth(): 49 | user = "test" 50 | pw = "B~C:BB*_9-1u" 51 | return (user, pw) 52 | 53 | 54 | lat1 = 46.7079055 55 | lon1 = 8.6527287 56 | 57 | lat2 = 45.752338 58 | lon2 = 11.790724 59 | 60 | 61 | def create_rows_faker(num=1): 62 | fake = Faker() 63 | output = [ 64 | { 65 | "name": fake.name(), 66 | "address": fake.address(), 67 | "name": fake.name(), 68 | "email": fake.email(), 69 | "bs": fake.bs(), 70 | "city": fake.city(), 71 | "state": fake.state(), 72 | "date_time": fake.date_time(), 73 | "paragraph": fake.paragraph(), 74 | "Conrad": fake.catch_phrase(), 75 | "randomdata": random.randint(1000, 2000), 76 | "abc": random.choice(["a", "b", "c"]), 77 | "geo_lat": lat1 78 | + (lat1 - lat2) / num * x, # not really random, but good to test 79 | "geo_lon": lon1 + (lon2 - lon1) / num * x, 80 | } 81 | for x in range(num) 82 | ] 83 | return output 84 | --------------------------------------------------------------------------------