├── .gitignore ├── .github ├── dependabot.yml ├── FUNDING.yml └── workflows │ ├── release.yml │ └── build.yml ├── docker-compose.yml ├── License ├── src ├── odbc_writer │ ├── binary.rs │ ├── boolean.rs │ ├── text.rs │ ├── map_arrow_to_odbc.rs │ └── timestamp.rs ├── reader │ ├── decimal.rs │ ├── binary.rs │ ├── time.rs │ ├── to_record_batch.rs │ ├── concurrent_odbc_reader.rs │ ├── text.rs │ ├── map_odbc_to_arrow.rs │ └── odbc_reader.rs ├── lib.rs ├── error.rs ├── decimal.rs ├── date_time.rs ├── schema.rs ├── reader.rs └── odbc_writer.rs ├── Contributing.md ├── Cargo.toml ├── Readme.md └── CHANGELOG.md /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | - package-ecosystem: github-actions 9 | directory: "/" 10 | schedule: 11 | interval: daily -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | # Docker compose file used for local development 2 | 3 | services: 4 | postgres: 5 | image: postgres:17 6 | ports: 7 | - "5432:5432" 8 | environment: 9 | POSTGRES_DB: test 10 | POSTGRES_USER: test 11 | POSTGRES_PASSWORD: test 12 | 13 | # Microsoft SQL database used for testing 14 | mssql: 15 | image: mcr.microsoft.com/mssql/server:2019-CU5-ubuntu-18.04 16 | ports: 17 | - 1433:1433 18 | 19 | environment: 20 | - MSSQL_SA_PASSWORD=My@Test@Password1 21 | command: ["/opt/mssql/bin/sqlservr", "--accept-eula", "--reset-sa-password"] -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [pacman82] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] -------------------------------------------------------------------------------- /License: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Markus Klein 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /src/odbc_writer/binary.rs: -------------------------------------------------------------------------------- 1 | use arrow::array::{Array, BinaryArray}; 2 | use odbc_api::buffers::{AnySliceMut, BufferDesc}; 3 | 4 | use super::{WriteStrategy, WriterError}; 5 | 6 | pub struct VariadicBinary { 7 | buffer_start_size: usize, 8 | } 9 | 10 | impl VariadicBinary { 11 | pub fn new(buffer_start_size: usize) -> Self { 12 | VariadicBinary { buffer_start_size } 13 | } 14 | } 15 | 16 | impl WriteStrategy for VariadicBinary { 17 | fn buffer_desc(&self) -> BufferDesc { 18 | BufferDesc::Binary { 19 | length: self.buffer_start_size, 20 | } 21 | } 22 | 23 | fn write_rows( 24 | &self, 25 | param_offset: usize, 26 | to: AnySliceMut<'_>, 27 | from: &dyn Array, 28 | ) -> Result<(), WriterError> { 29 | let from = from.as_any().downcast_ref::().unwrap(); 30 | let mut to = to.as_bin_view().unwrap(); 31 | for (row_index, element) in from.iter().enumerate() { 32 | if let Some(bytes) = element { 33 | to.ensure_max_element_length(bytes.len(), row_index) 34 | .map_err(WriterError::RebindBuffer)?; 35 | to.set_cell(param_offset + row_index, Some(bytes)) 36 | } else { 37 | to.set_cell(param_offset + row_index, None); 38 | } 39 | } 40 | Ok(()) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /Contributing.md: -------------------------------------------------------------------------------- 1 | # Contributions 2 | 3 | Whether they be in code, interesting feature suggestions, design critique or bug reports, all contributions are welcome. Please start an issue, before investing a lot of work. This helps avoid situations there I would feel the need to reject a large body of work, and a lot of your time has been wasted. `odbc-arrow` is a pet project and a work of love, which implies that I maintain it in my spare time. Please understand that I may not always react immediately. If you contribute code to fix a Bug, please also contribute the test to fix it. Happy contributing. 4 | 5 | ## Local build and test setup 6 | 7 | Running local tests currently requires: 8 | 9 | * Docker and Docker compose. 10 | * An ODBC driver manager 11 | * A driver for Microsoft SQL Server 12 | * Rust toolchain (cargo) 13 | 14 | You can install these requirements from here: 15 | 16 | * Install Rust compiler and Cargo. Follow the instructions on [this site](https://www.rust-lang.org/en-US/install.html). 17 | * Install PostgreSQL ODBC drivers 18 | * [Microsoft ODBC Driver 18 for SQL Server](https://docs.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server?view=sql-server-ver15). 19 | * An ODBC Driver manager if you are not on windows: 20 | 21 | With docker installed run: 22 | 23 | ```shell 24 | docker-compose up 25 | ``` 26 | 27 | This starts the Relational Databases used for testing. 28 | 29 | We now can execute the tests in Rust typical fashion using: 30 | 31 | ```shell 32 | cargo test 33 | ``` 34 | -------------------------------------------------------------------------------- /src/reader/decimal.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use arrow::array::{ArrayRef, Decimal128Builder}; 4 | use odbc_api::{ 5 | buffers::{AnySlice, BufferDesc}, 6 | decimal_text_to_i128, 7 | }; 8 | 9 | use super::{MappingError, ReadStrategy}; 10 | 11 | pub struct Decimal { 12 | precision: u8, 13 | /// We know scale to be non-negative, yet we can save us some conversions storing it as i8. 14 | scale: i8, 15 | } 16 | 17 | impl Decimal { 18 | pub fn new(precision: u8, scale: i8) -> Self { 19 | Self { precision, scale } 20 | } 21 | } 22 | 23 | impl ReadStrategy for Decimal { 24 | fn buffer_desc(&self) -> BufferDesc { 25 | BufferDesc::Text { 26 | // Must be able to hold num precision digits a sign and a decimal point 27 | max_str_len: self.precision as usize + 2, 28 | } 29 | } 30 | 31 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 32 | let view = column_view.as_text_view().unwrap(); 33 | let mut builder = Decimal128Builder::new(); 34 | let scale = self.scale as usize; 35 | 36 | for opt in view.iter() { 37 | if let Some(text) = opt { 38 | let num = decimal_text_to_i128(text, scale); 39 | builder.append_value(num); 40 | } else { 41 | builder.append_null(); 42 | } 43 | } 44 | 45 | Ok(Arc::new( 46 | builder 47 | .finish() 48 | .with_precision_and_scale(self.precision, self.scale) 49 | .unwrap(), 50 | )) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/odbc_writer/boolean.rs: -------------------------------------------------------------------------------- 1 | use arrow::array::{Array, BooleanArray}; 2 | use odbc_api::{ 3 | Bit, 4 | buffers::{AnySliceMut, BufferDesc}, 5 | }; 6 | 7 | use crate::WriterError; 8 | 9 | use super::WriteStrategy; 10 | 11 | pub fn boolean_to_bit(nullable: bool) -> Box { 12 | if nullable { 13 | Box::new(Nullable) 14 | } else { 15 | Box::new(NonNullable) 16 | } 17 | } 18 | 19 | struct Nullable; 20 | 21 | impl WriteStrategy for Nullable { 22 | fn buffer_desc(&self) -> BufferDesc { 23 | BufferDesc::Bit { nullable: true } 24 | } 25 | 26 | fn write_rows( 27 | &self, 28 | param_offset: usize, 29 | column_buf: AnySliceMut<'_>, 30 | array: &dyn Array, 31 | ) -> Result<(), WriterError> { 32 | let from = array.as_any().downcast_ref::().unwrap(); 33 | let mut to = column_buf.as_nullable_slice::().unwrap(); 34 | for (index, cell) in from.iter().enumerate() { 35 | to.set_cell(index + param_offset, cell.map(Bit::from_bool)) 36 | } 37 | Ok(()) 38 | } 39 | } 40 | 41 | struct NonNullable; 42 | 43 | impl WriteStrategy for NonNullable { 44 | fn buffer_desc(&self) -> BufferDesc { 45 | BufferDesc::Bit { nullable: false } 46 | } 47 | 48 | fn write_rows( 49 | &self, 50 | param_offset: usize, 51 | column_buf: AnySliceMut<'_>, 52 | array: &dyn Array, 53 | ) -> Result<(), WriterError> { 54 | let from = array.as_any().downcast_ref::().unwrap(); 55 | let to = column_buf.as_slice::().unwrap(); 56 | for index in 0..from.len() { 57 | to[index + param_offset] = Bit::from_bool(from.value(index)) 58 | } 59 | Ok(()) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release-plz 2 | 3 | permissions: 4 | pull-requests: write 5 | contents: write 6 | 7 | on: 8 | push: 9 | branches: 10 | - main 11 | 12 | jobs: 13 | 14 | # Release unpublished packages. 15 | release-plz-release: 16 | name: Release-plz release 17 | runs-on: ubuntu-latest 18 | steps: 19 | - name: Install UnixODBC 20 | run: | 21 | apt-get update 22 | ACCEPT_EULA=Y apt-get install -y unixodbc-dev 23 | shell: sudo bash {0} 24 | - name: Install Rust toolchain 25 | uses: dtolnay/rust-toolchain@stable 26 | - name: Checkout repository 27 | uses: actions/checkout@v6 28 | with: 29 | fetch-depth: 0 30 | - name: Run release-plz 31 | uses: MarcoIeni/release-plz-action@v0.5 32 | with: 33 | command: release 34 | env: 35 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 36 | CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} 37 | 38 | # Create a PR with the new versions and changelog, preparing the next release. 39 | release-plz-pr: 40 | name: Release-plz PR 41 | runs-on: ubuntu-latest 42 | concurrency: 43 | group: release-plz-${{ github.ref }} 44 | cancel-in-progress: false 45 | steps: 46 | - name: Install UnixODBC 47 | run: | 48 | apt-get update 49 | ACCEPT_EULA=Y apt-get install -y unixodbc-dev 50 | shell: sudo bash {0} 51 | - name: Install Rust toolchain 52 | uses: dtolnay/rust-toolchain@stable 53 | - name: Checkout repository 54 | uses: actions/checkout@v6 55 | with: 56 | fetch-depth: 0 57 | - name: Run release-plz 58 | uses: MarcoIeni/release-plz-action@v0.5 59 | with: 60 | command: release-pr 61 | env: 62 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 63 | CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} -------------------------------------------------------------------------------- /src/reader/binary.rs: -------------------------------------------------------------------------------- 1 | use std::{convert::TryInto, sync::Arc}; 2 | 3 | use arrow::array::{ArrayRef, BinaryBuilder, FixedSizeBinaryBuilder}; 4 | use odbc_api::buffers::{AnySlice, BufferDesc}; 5 | 6 | use super::{MappingError, ReadStrategy}; 7 | 8 | pub struct Binary { 9 | /// Maximum length in bytes of elements 10 | max_len: usize, 11 | } 12 | 13 | impl Binary { 14 | pub fn new(max_len: usize) -> Self { 15 | Self { max_len } 16 | } 17 | } 18 | 19 | impl ReadStrategy for Binary { 20 | fn buffer_desc(&self) -> BufferDesc { 21 | BufferDesc::Binary { 22 | length: self.max_len, 23 | } 24 | } 25 | 26 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 27 | let view = column_view.as_bin_view().unwrap(); 28 | let mut builder = BinaryBuilder::new(); 29 | for value in view.iter() { 30 | if let Some(bytes) = value { 31 | builder.append_value(bytes); 32 | } else { 33 | builder.append_null(); 34 | } 35 | } 36 | Ok(Arc::new(builder.finish())) 37 | } 38 | } 39 | 40 | pub struct FixedSizedBinary { 41 | /// Length in bytes of elements 42 | len: u32, 43 | } 44 | 45 | impl FixedSizedBinary { 46 | pub fn new(len: u32) -> Self { 47 | Self { len } 48 | } 49 | } 50 | 51 | impl ReadStrategy for FixedSizedBinary { 52 | fn buffer_desc(&self) -> BufferDesc { 53 | BufferDesc::Binary { 54 | length: self.len as usize, 55 | } 56 | } 57 | 58 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 59 | let view = column_view.as_bin_view().unwrap(); 60 | let mut builder = FixedSizeBinaryBuilder::new(self.len.try_into().unwrap()); 61 | for value in view.iter() { 62 | if let Some(bytes) = value { 63 | builder.append_value(bytes).unwrap(); 64 | } else { 65 | builder.append_null(); 66 | } 67 | } 68 | Ok(Arc::new(builder.finish())) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Fill Apache Arrow arrays from ODBC data sources. 2 | //! 3 | //! ## Usage 4 | //! 5 | //! ```no_run 6 | //! use arrow_odbc::{odbc_api::{Environment, ConnectionOptions}, OdbcReaderBuilder}; 7 | //! 8 | //! const CONNECTION_STRING: &str = "\ 9 | //! Driver={ODBC Driver 18 for SQL Server};\ 10 | //! Server=localhost;\ 11 | //! UID=SA;\ 12 | //! PWD=My@Test@Password1;\ 13 | //! "; 14 | //! 15 | //! fn main() -> Result<(), anyhow::Error> { 16 | //! // Your application is fine if you spin up only one Environment. 17 | //! let odbc_environment = Environment::new()?; 18 | //! 19 | //! // Connect with database. 20 | //! let connection = odbc_environment.connect_with_connection_string( 21 | //! CONNECTION_STRING, 22 | //! ConnectionOptions::default() 23 | //! )?; 24 | //! 25 | //! // This SQL statement does not require any arguments. 26 | //! let parameters = (); 27 | //! 28 | //! // Do not apply any timeout. 29 | //! let timeout_sec = None; 30 | //! 31 | //! // Execute query and create result set 32 | //! let cursor = connection 33 | //! .execute("SELECT * FROM MyTable", parameters, timeout_sec)? 34 | //! .expect("SELECT statement must produce a cursor"); 35 | //! 36 | //! // Read result set as arrow batches. Infer Arrow types automatically using the meta 37 | //! // information of `cursor`. 38 | //! let arrow_record_batches = OdbcReaderBuilder::new().build(cursor)?; 39 | //! 40 | //! for batch in arrow_record_batches { 41 | //! // ... process batch ... 42 | //! } 43 | //! 44 | //! Ok(()) 45 | //! } 46 | //! ``` 47 | mod date_time; 48 | mod decimal; 49 | mod error; 50 | mod odbc_writer; 51 | mod reader; 52 | mod schema; 53 | 54 | // Rexport odbc_api and arrow to make it easier for downstream crates to depend to avoid version 55 | // mismatches 56 | pub use arrow; 57 | pub use odbc_api; 58 | 59 | pub use self::{ 60 | error::Error, 61 | odbc_writer::{OdbcWriter, WriterError, insert_into_table, insert_statement_from_schema}, 62 | reader::{ 63 | BufferAllocationOptions, ColumnFailure, ConcurrentOdbcReader, OdbcReader, 64 | OdbcReaderBuilder, TextEncoding, 65 | }, 66 | schema::arrow_schema_from, 67 | }; 68 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build and test 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | linux: 7 | name: Build & Test Linux 8 | runs-on: ubuntu-latest 9 | 10 | services: 11 | sqlserver: 12 | image: mcr.microsoft.com/mssql/server:2019-latest 13 | ports: 14 | - 1433:1433 15 | env: 16 | ACCEPT_EULA: Y 17 | SA_PASSWORD: My@Test@Password1 18 | 19 | postgres: 20 | image: postgres:17 21 | ports: 22 | - "5432:5432" 23 | env: 24 | POSTGRES_DB: test 25 | POSTGRES_USER: test 26 | POSTGRES_PASSWORD: test 27 | 28 | steps: 29 | - name: Checkout 30 | uses: actions/checkout@v6 31 | - name: Install latests rust toolchain 32 | uses: actions-rs/toolchain@v1 33 | with: 34 | toolchain: stable 35 | profile: minimal 36 | override: true 37 | # selecting a toolchain either by action or manual `rustup` calls should happen 38 | # before the plugin, as the cache uses the current rustc version as its cache key 39 | - name: Rust build cache 40 | uses: Swatinem/rust-cache@v2 41 | - name: Build 42 | run: cargo build 43 | - name: Install ODBC Drivers 44 | run: | 45 | apt-get update 46 | apt-get install -y unixodbc-dev odbc-postgresql msodbcsql18 47 | # Fix PostgreSQL driver paths 48 | sed --in-place 's/psqlodbca.so/\/usr\/lib\/x86_64-linux-gnu\/odbc\/psqlodbca.so/' /etc/odbcinst.ini 49 | sed --in-place 's/psqlodbcw.so/\/usr\/lib\/x86_64-linux-gnu\/odbc\/psqlodbcw.so/' /etc/odbcinst.ini 50 | shell: sudo bash {0} 51 | - name: Print odbcinst.ini 52 | run: cat /etc/odbcinst.ini 53 | - name: Test 54 | run: cargo test 55 | 56 | dependabot: 57 | needs: [linux] 58 | permissions: 59 | pull-requests: write 60 | contents: write 61 | runs-on: ubuntu-latest 62 | if: ${{ github.actor == 'dependabot[bot]' && github.event_name == 'pull_request'}} 63 | steps: 64 | - name: Merge Depenabot Pull Request 65 | run: gh pr merge --auto --rebase "$PR_URL" 66 | env: 67 | PR_URL: ${{github.event.pull_request.html_url}} 68 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 69 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use thiserror::Error; 2 | 3 | use crate::reader::ColumnFailure; 4 | 5 | /// A variation of things which can go wrong then creating an [`crate::OdbcReader`]. 6 | #[derive(Error, Debug)] 7 | pub enum Error { 8 | /// Failure to retrieve the number of columns from the result set. 9 | #[error("Unable to retrieve number of columns in result set.\n{0}")] 10 | UnableToRetrieveNumCols(odbc_api::Error), 11 | /// Indicates that the error is related to a specify column. 12 | #[error( 13 | "There is a problem with the SQL type of the column with name: {} and index {}:\n{source}", 14 | name, 15 | index 16 | )] 17 | ColumnFailure { 18 | // Name of the erroneous column 19 | name: String, 20 | // Zero based index of the erroneous column 21 | index: usize, 22 | // Cause of the error 23 | source: ColumnFailure, 24 | }, 25 | /// Failure during constructing an OdbcReader, if it turns out the buffer memory size limit is 26 | /// too small. 27 | #[error( 28 | "The Odbc buffer is limited to a size of {max_bytes_per_batch} bytes. Yet a single row \ 29 | does require up to {bytes_per_row}. This means the buffer is not large enough to hold a \ 30 | single row of data. Please note that the buffers in ODBC must always be able to hold the \ 31 | largest possible value of variadic types. You should either set a higher upper bound for \ 32 | the buffer size, or limit the length of the variadic columns." 33 | )] 34 | OdbcBufferTooSmall { 35 | max_bytes_per_batch: usize, 36 | bytes_per_row: usize, 37 | }, 38 | /// We use UTF-16 encoding on windows by default. Since UTF-8 locals on windows system can not 39 | /// be expected to be the default. Since we use wide methods the ODBC standard demands the 40 | /// encoding to be UTF-16. 41 | #[cfg(any(feature = "wide", all(not(feature = "narrow"), target_os = "windows")))] 42 | #[error( 43 | "Expected the database to return UTF-16, yet what came back was not valid UTF-16. Precise \ 44 | encoding error: {source}. This is likely a bug in your ODBC driver not supporting wide \ 45 | method calls correctly." 46 | )] 47 | EncodingInvalid { source: std::char::DecodeUtf16Error }, 48 | /// We expect UTF-8 to be the default on non-windows platforms. Yet still some systems are 49 | /// configured different. 50 | #[cfg(not(any(feature = "wide", all(not(feature = "narrow"), target_os = "windows"))))] 51 | #[error( 52 | "Expected the database to return UTF-8, yet what came back was not valid UTF-8. According \ 53 | to the ODBC standard the encoding is specified by your system locale. So you may want to \ 54 | check your environment and whether it specifies to use an UTF-8 charset. However it is \ 55 | worth noting that drivers take some liberty with the interpretation. Your connection \ 56 | string and other configurations specific to your database may also influence client side \ 57 | encoding. Precise encoding error: {source}" 58 | )] 59 | EncodingInvalid { source: std::string::FromUtf8Error }, 60 | } 61 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "arrow-odbc" 3 | version = "21.0.0" 4 | authors = ["Markus Klein"] 5 | edition = "2024" 6 | license = "MIT" 7 | repository = "https://github.com/pacman82/arrow-odbc" 8 | documentation = "https://docs.rs/arrow-odbc/" 9 | 10 | # A short blurb about the package. This is not rendered in any format when 11 | # uploaded to crates.io (aka this is not markdown). 12 | description = "Read/Write Apache Arrow arrays from/to ODBC data sources." 13 | 14 | # This is a list of up to five keywords that describe this crate. Keywords 15 | # are searchable on crates.io, and you may choose any words that would 16 | # help someone find this crate. 17 | keywords = ["odbc", "database", "sql", "arrow"] 18 | 19 | # This is a list of up to five categories where this crate would fit. 20 | # Categories are a fixed list available at crates.io/category_slugs, and 21 | # they must match exactly. 22 | categories = ["database"] 23 | 24 | # This points to a file under the package root (relative to this `Cargo.toml`). 25 | # The contents of this file are stored and indexed in the registry. 26 | # crates.io will render this file and place the result on the crate's page. 27 | readme = "Readme.md" 28 | 29 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 30 | 31 | [dependencies] 32 | arrow = { version = ">= 29, < 58", default-features = false, features = [ 33 | "chrono-tz", 34 | ] } 35 | # We use chrono for date conversions. 36 | chrono = "0.4.39" 37 | log = "0.4.27" 38 | thiserror = "2.0.12" 39 | 40 | odbc-api = ">= 17, < 21" 41 | atoi = "2.0.0" 42 | 43 | 44 | [dev-dependencies] 45 | anyhow = "1.0.97" 46 | # Function name macro is used to ensure unique table names in test 47 | stdext = "0.3.3" 48 | float_eq = "1.0.1" 49 | 50 | [features] 51 | 52 | # On linux we assume use of a UTF-8 locale. So we set the narrow features implying that for queries, 53 | # connection strings and error messages the driver and driver manager supply utf8-strings. This 54 | # might also be slightly faster since no transcoding between UTF-8 and UTF-16 is required. 55 | # Overall speed always depends on the driver, but it is reasonable to assume this is faster, more 56 | # importantly, UTF-8 is likely to be more battled tested on these platforms, while UTF-16 is "only" 57 | # required by the standard. 58 | 59 | # On windows we can not assume the default locale to be UTF-8, so we compile odbc-api with default 60 | # features implying the use of UTF-16 for queries, connection strings and error messages. This 61 | # should work on any system. However if you would like to use the narrow UTF-8 function calls on 62 | # windows systems you can set this feature flag. 63 | narrow = ["odbc-api/narrow"] 64 | 65 | # On linux we assume use of a UTF-8 locale. So we set the narrow features implying that for queries, 66 | # connection strings and error messages the driver and driver manager supply utf8-strings. This 67 | # might also be slightly faster since no transcoding between UTF-8 and UTF-16 is required. 68 | # Overall speed always depends on the driver, but it is reasonable to assume this is faster, more 69 | # importantly, UTF-8 is likely to be more battled tested on these platforms, while UTF-16 is "only" 70 | # required by the standard. However, if you are e.g. faced with a driver which does not use UTF-8, 71 | # but only ascii, or want to use the wide functions calls for any other reason on a non-windows 72 | # system you can set the `wide` feature flag to overwrite this behavior. 73 | wide = ["odbc-api/wide"] 74 | -------------------------------------------------------------------------------- /src/reader/time.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | ops::{Add, Mul}, 3 | sync::Arc, 4 | }; 5 | 6 | use arrow::array::{ 7 | ArrayRef, Time32MillisecondBuilder, Time64MicrosecondBuilder, Time64NanosecondBuilder, 8 | }; 9 | use atoi::FromRadix10; 10 | use odbc_api::{ 11 | buffers::{AnySlice, BufferDesc}, 12 | sys::Time, 13 | }; 14 | 15 | use super::{MappingError, ReadStrategy}; 16 | 17 | pub fn seconds_since_midnight(time: &Time) -> i32 { 18 | (time.hour as i32 * 60 + time.minute as i32) * 60 + time.second as i32 19 | } 20 | 21 | /// Strategy for fetching the time as text and parsing it into an `i32` which represents 22 | /// milliseconds after midnight. 23 | pub struct TimeMsI32; 24 | 25 | impl ReadStrategy for TimeMsI32 { 26 | fn buffer_desc(&self) -> BufferDesc { 27 | BufferDesc::Text { 28 | // Expected format is HH:MM:SS.fff 29 | max_str_len: 12, 30 | } 31 | } 32 | 33 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 34 | let view = column_view.as_text_view().unwrap(); 35 | let mut builder = Time32MillisecondBuilder::new(); 36 | 37 | for opt in view.iter() { 38 | if let Some(text) = opt { 39 | let num = ticks_since_midnights_from_text::(text, 3); 40 | builder.append_value(num); 41 | } else { 42 | builder.append_null(); 43 | } 44 | } 45 | 46 | Ok(Arc::new(builder.finish())) 47 | } 48 | } 49 | 50 | /// Strategy for fetching the time as text and parsing it into an `i32` which represents 51 | /// milliseconds after midnight. 52 | pub struct TimeUsI64; 53 | 54 | impl ReadStrategy for TimeUsI64 { 55 | fn buffer_desc(&self) -> BufferDesc { 56 | BufferDesc::Text { 57 | // Expected format is HH:MM:SS.ffffff 58 | max_str_len: 15, 59 | } 60 | } 61 | 62 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 63 | let view = column_view.as_text_view().unwrap(); 64 | let mut builder = Time64MicrosecondBuilder::new(); 65 | 66 | for opt in view.iter() { 67 | if let Some(text) = opt { 68 | let num = ticks_since_midnights_from_text::(text, 6); 69 | builder.append_value(num); 70 | } else { 71 | builder.append_null(); 72 | } 73 | } 74 | 75 | Ok(Arc::new(builder.finish())) 76 | } 77 | } 78 | 79 | /// Strategy for fetching the time as text and parsing it into an `i32` which represents 80 | /// milliseconds after midnight. 81 | pub struct TimeNsI64; 82 | 83 | impl ReadStrategy for TimeNsI64 { 84 | fn buffer_desc(&self) -> BufferDesc { 85 | BufferDesc::Text { 86 | // Expected format is HH:MM:SS.fffffffff 87 | max_str_len: 18, 88 | } 89 | } 90 | 91 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 92 | let view = column_view.as_text_view().unwrap(); 93 | let mut builder = Time64NanosecondBuilder::new(); 94 | 95 | for opt in view.iter() { 96 | if let Some(text) = opt { 97 | let num = ticks_since_midnights_from_text::(text, 9); 98 | builder.append_value(num); 99 | } else { 100 | builder.append_null(); 101 | } 102 | } 103 | 104 | Ok(Arc::new(builder.finish())) 105 | } 106 | } 107 | 108 | fn ticks_since_midnights_from_text(text: &[u8], precision: u32) -> I 109 | where 110 | I: Tick, 111 | { 112 | // HH:MM:SS.fff 113 | // 012345678901 114 | let (hours, hours_digits) = I::from_radix_10(&text[0..2]); 115 | debug_assert_eq!(2, hours_digits); 116 | debug_assert_eq!(b':', text[2]); 117 | let (min, min_digits) = I::from_radix_10(&text[3..5]); 118 | debug_assert_eq!(2, min_digits); 119 | debug_assert_eq!(b':', text[5]); 120 | let (sec, sec_digits) = I::from_radix_10(&text[6..8]); 121 | debug_assert_eq!(2, sec_digits); 122 | // check for fractional part 123 | let (frac, frac_digits) = if text.len() > 9 { 124 | I::from_radix_10(&text[9..]) 125 | } else { 126 | (I::ZERO, 0) 127 | }; 128 | let frac = frac * I::TEN.pow(precision - frac_digits as u32); 129 | ((hours * I::SIXTY + min) * I::SIXTY + sec) * I::TEN.pow(precision) + frac 130 | } 131 | 132 | trait Tick: FromRadix10 + Mul + Add { 133 | const ZERO: Self; 134 | const TEN: Self; 135 | const SIXTY: Self; 136 | 137 | fn pow(self, exp: u32) -> Self; 138 | } 139 | 140 | impl Tick for i32 { 141 | const ZERO: Self = 0; 142 | const TEN: Self = 10; 143 | const SIXTY: Self = 60; 144 | 145 | fn pow(self, exp: u32) -> Self { 146 | self.pow(exp) 147 | } 148 | } 149 | 150 | impl Tick for i64 { 151 | const ZERO: Self = 0; 152 | const TEN: Self = 10; 153 | const SIXTY: Self = 60; 154 | 155 | fn pow(self, exp: u32) -> Self { 156 | self.pow(exp) 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /src/reader/to_record_batch.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use arrow::{ 4 | datatypes::{Schema, SchemaRef}, 5 | record_batch::RecordBatch, 6 | }; 7 | use log::info; 8 | use odbc_api::{ResultSetMetadata, buffers::ColumnarAnyBuffer}; 9 | 10 | use crate::{BufferAllocationOptions, ColumnFailure, Error, arrow_schema_from}; 11 | 12 | use super::{MappingError, ReadStrategy, TextEncoding, choose_column_strategy}; 13 | 14 | /// Transforms batches fetched from an ODBC data source in a 15 | /// [`odbc_api::bufferers::ColumnarAnyBuffer`] into arrow tables of the specified schemas. It also 16 | /// allocates the buffers to hold the ODBC batches with the matching buffer descriptions. 17 | pub struct ToRecordBatch { 18 | /// Must contain one item for each field in [`Self::schema`]. Encapsulates all the column type 19 | /// specific decisions which go into filling an Arrow array from an ODBC data source. 20 | column_strategies: Vec>, 21 | /// Arrow schema describing the arrays we want to fill from the Odbc data source. 22 | schema: SchemaRef, 23 | } 24 | 25 | impl ToRecordBatch { 26 | pub fn new( 27 | cursor: &mut impl ResultSetMetadata, 28 | schema: Option, 29 | buffer_allocation_options: BufferAllocationOptions, 30 | map_value_errors_to_null: bool, 31 | dbms_name: Option<&str>, 32 | trim_fixed_sized_character_strings: bool, 33 | text_encoding: TextEncoding, 34 | ) -> Result { 35 | // Infer schema if not given by the user 36 | let schema = if let Some(schema) = schema { 37 | schema 38 | } else { 39 | Arc::new(arrow_schema_from( 40 | cursor, 41 | dbms_name, 42 | map_value_errors_to_null, 43 | )?) 44 | }; 45 | 46 | let column_strategies: Vec> = schema 47 | .fields() 48 | .iter() 49 | .enumerate() 50 | .map(|(index, field)| { 51 | let col_index = (index + 1).try_into().unwrap(); 52 | choose_column_strategy( 53 | field, 54 | cursor, 55 | col_index, 56 | buffer_allocation_options, 57 | map_value_errors_to_null, 58 | trim_fixed_sized_character_strings, 59 | text_encoding, 60 | ) 61 | .map_err(|cause| cause.into_crate_error(field.name().clone(), index)) 62 | }) 63 | .collect::>()?; 64 | 65 | Ok(ToRecordBatch { 66 | column_strategies, 67 | schema, 68 | }) 69 | } 70 | 71 | /// Logs buffer description and sizes 72 | pub fn row_size_in_bytes(&self) -> usize { 73 | let mut total_bytes = 0; 74 | for (read, field) in self.column_strategies.iter().zip(self.schema.fields()) { 75 | let name = field.name(); 76 | let desc = read.buffer_desc(); 77 | let bytes_per_row = desc.bytes_per_row(); 78 | info!("Column '{name}'\nBytes used per row: {bytes_per_row}"); 79 | total_bytes += bytes_per_row; 80 | } 81 | info!("Total memory usage per row for single transit buffer: {total_bytes}"); 82 | total_bytes 83 | } 84 | 85 | pub fn allocate_buffer( 86 | &self, 87 | max_batch_size: usize, 88 | fallibale_allocations: bool, 89 | ) -> Result { 90 | let descs = self.column_strategies.iter().map(|cs| cs.buffer_desc()); 91 | 92 | let row_set_buffer = if fallibale_allocations { 93 | ColumnarAnyBuffer::try_from_descs(max_batch_size, descs) 94 | .map_err(|err| map_allocation_error(err, &self.schema))? 95 | } else { 96 | ColumnarAnyBuffer::from_descs(max_batch_size, descs) 97 | }; 98 | Ok(row_set_buffer) 99 | } 100 | 101 | pub fn schema(&self) -> &SchemaRef { 102 | &self.schema 103 | } 104 | 105 | pub fn buffer_to_record_batch( 106 | &self, 107 | odbc_buffer: &ColumnarAnyBuffer, 108 | ) -> Result { 109 | let arrow_columns = self 110 | .column_strategies 111 | .iter() 112 | .enumerate() 113 | .map(|(index, strat)| { 114 | let column_view = odbc_buffer.column(index); 115 | strat.fill_arrow_array(column_view) 116 | }) 117 | .collect::, _>>()?; 118 | let record_batch = RecordBatch::try_new(self.schema.clone(), arrow_columns).unwrap(); 119 | Ok(record_batch) 120 | } 121 | } 122 | 123 | fn map_allocation_error(error: odbc_api::Error, schema: &Schema) -> Error { 124 | match error { 125 | odbc_api::Error::TooLargeColumnBufferSize { 126 | buffer_index, 127 | num_elements, 128 | element_size, 129 | } => Error::ColumnFailure { 130 | name: schema.field(buffer_index as usize).name().clone(), 131 | index: buffer_index as usize, 132 | source: ColumnFailure::TooLarge { 133 | num_elements, 134 | element_size, 135 | }, 136 | }, 137 | _ => { 138 | panic!("Unexpected error in upstream ODBC api error library") 139 | } 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /src/odbc_writer/text.rs: -------------------------------------------------------------------------------- 1 | use arrow::array::{Array, LargeStringArray, StringArray}; 2 | use odbc_api::buffers::{AnySliceMut, BufferDesc, TextColumnSliceMut}; 3 | 4 | use super::{WriteStrategy, WriterError}; 5 | 6 | #[cfg(not(target_os = "windows"))] 7 | pub type Utf8ToNativeText = Utf8ToNarrow; 8 | 9 | #[cfg(target_os = "windows")] 10 | pub type Utf8ToNativeText = Utf8ToWide; 11 | 12 | #[cfg(not(target_os = "windows"))] 13 | pub type LargeUtf8ToNativeText = LargeUtf8ToNarrow; 14 | 15 | #[cfg(target_os = "windows")] 16 | pub type LargeUtf8ToNativeText = LargeUtf8ToWide; 17 | 18 | #[cfg_attr(target_os = "windows", allow(dead_code))] 19 | pub struct Utf8ToNarrow; 20 | 21 | impl WriteStrategy for Utf8ToNarrow { 22 | fn buffer_desc(&self) -> BufferDesc { 23 | BufferDesc::Text { max_str_len: 1 } 24 | } 25 | 26 | fn write_rows( 27 | &self, 28 | param_offset: usize, 29 | to: AnySliceMut<'_>, 30 | from: &dyn Array, 31 | ) -> Result<(), WriterError> { 32 | let from = from.as_any().downcast_ref::().unwrap(); 33 | let to = to.as_text_view().unwrap(); 34 | insert_into_narrow_slice(from.iter(), to, param_offset)?; 35 | Ok(()) 36 | } 37 | } 38 | 39 | #[cfg_attr(target_os = "windows", allow(dead_code))] 40 | pub struct LargeUtf8ToNarrow; 41 | 42 | impl WriteStrategy for LargeUtf8ToNarrow { 43 | fn buffer_desc(&self) -> BufferDesc { 44 | BufferDesc::Text { max_str_len: 1 } 45 | } 46 | 47 | fn write_rows( 48 | &self, 49 | param_offset: usize, 50 | to: AnySliceMut<'_>, 51 | from: &dyn Array, 52 | ) -> Result<(), WriterError> { 53 | let from = from.as_any().downcast_ref::().unwrap(); 54 | let to = to.as_text_view().unwrap(); 55 | insert_into_narrow_slice(from.iter(), to, param_offset)?; 56 | Ok(()) 57 | } 58 | } 59 | 60 | #[cfg_attr(target_os = "windows", allow(dead_code))] 61 | fn insert_into_narrow_slice<'a>( 62 | from: impl Iterator>, 63 | mut to: TextColumnSliceMut, 64 | param_offset: usize, 65 | ) -> Result<(), WriterError> { 66 | for (row_index, element) in from.enumerate() { 67 | // Total number of rows written into the inserter (`to`). This includes the values from the 68 | // current batch (`row_index`), as well as the ones from the previous batches 69 | // (`param_offset`). In case of reallocation, we need to copy all these values. Also, this 70 | // is the index of the element we currently want to write. 71 | let num_rows_written_so_far = param_offset + row_index; 72 | if let Some(text) = element { 73 | to.ensure_max_element_length(text.len(), num_rows_written_so_far) 74 | .map_err(WriterError::RebindBuffer)?; 75 | to.set_cell(num_rows_written_so_far, Some(text.as_bytes())) 76 | } else { 77 | to.set_cell(num_rows_written_so_far, None); 78 | } 79 | } 80 | Ok(()) 81 | } 82 | 83 | pub struct Utf8ToWide; 84 | 85 | impl WriteStrategy for Utf8ToWide { 86 | fn buffer_desc(&self) -> BufferDesc { 87 | BufferDesc::WText { max_str_len: 1 } 88 | } 89 | 90 | fn write_rows( 91 | &self, 92 | param_offset: usize, 93 | to: AnySliceMut<'_>, 94 | from: &dyn Array, 95 | ) -> Result<(), WriterError> { 96 | let from = from.as_any().downcast_ref::().unwrap(); 97 | let to = to.as_w_text_view().unwrap(); 98 | insert_into_wide_slice(from.iter(), to, param_offset)?; 99 | Ok(()) 100 | } 101 | } 102 | 103 | pub struct LargeUtf8ToWide; 104 | 105 | impl WriteStrategy for LargeUtf8ToWide { 106 | fn buffer_desc(&self) -> BufferDesc { 107 | BufferDesc::WText { max_str_len: 1 } 108 | } 109 | 110 | fn write_rows( 111 | &self, 112 | param_offset: usize, 113 | to: AnySliceMut<'_>, 114 | from: &dyn Array, 115 | ) -> Result<(), WriterError> { 116 | let from = from.as_any().downcast_ref::().unwrap(); 117 | let to = to.as_w_text_view().unwrap(); 118 | insert_into_wide_slice(from.iter(), to, param_offset)?; 119 | Ok(()) 120 | } 121 | } 122 | 123 | fn insert_into_wide_slice<'a>( 124 | from: impl Iterator>, 125 | mut to: TextColumnSliceMut, 126 | at: usize, 127 | ) -> Result<(), WriterError> { 128 | // We must first encode the utf8 input to utf16. We reuse this buffer for that in order to avoid 129 | // allocations. 130 | let mut utf_16 = Vec::new(); 131 | for (row_index, element) in from.enumerate() { 132 | // Total number of rows written into the inserter (`to`). This includes the values from the 133 | // current batch (`row_index`), as well as the ones from the previous batches (`at`). In 134 | // case of reallocation, we need to copy all these values. Also, this is the index of the 135 | // element we currently want to write. 136 | let num_rows_written_so_far = at + row_index; 137 | if let Some(text) = element { 138 | utf_16.extend(text.encode_utf16()); 139 | to.ensure_max_element_length(utf_16.len(), num_rows_written_so_far) 140 | .map_err(WriterError::RebindBuffer)?; 141 | to.set_cell(num_rows_written_so_far, Some(&utf_16)); 142 | utf_16.clear(); 143 | } else { 144 | to.set_cell(num_rows_written_so_far, None); 145 | } 146 | } 147 | Ok(()) 148 | } 149 | -------------------------------------------------------------------------------- /src/decimal.rs: -------------------------------------------------------------------------------- 1 | use arrow::{ 2 | array::{Array, Decimal128Array, Decimal256Array}, 3 | datatypes::{ArrowPrimitiveType, Decimal256Type}, 4 | }; 5 | use odbc_api::buffers::{AnySliceMut, BufferDesc}; 6 | 7 | use crate::{WriterError, odbc_writer::WriteStrategy}; 8 | 9 | pub struct NullableDecimal128AsText { 10 | precision: u8, 11 | scale: i8, 12 | } 13 | 14 | impl NullableDecimal128AsText { 15 | pub fn new(precision: u8, scale: i8) -> Self { 16 | Self { precision, scale } 17 | } 18 | } 19 | 20 | pub struct NullableDecimal256AsText { 21 | precision: u8, 22 | scale: i8, 23 | } 24 | 25 | impl NullableDecimal256AsText { 26 | pub fn new(precision: u8, scale: i8) -> Self { 27 | Self { precision, scale } 28 | } 29 | } 30 | 31 | /// Length of a text representation of a decimal 32 | fn len_text(scale: i8, precision: u8) -> usize { 33 | match scale { 34 | // Precision digits + (- scale zeroes) + sign 35 | i8::MIN..=-1 => (precision as i32 - scale as i32 + 1).try_into().unwrap(), 36 | // Precision digits + sign 37 | 0 => precision as usize + 1, 38 | // Precision digits + radix character (`.`) + sign 39 | 1.. => precision as usize + 1 + 1, 40 | } 41 | } 42 | 43 | impl WriteStrategy for NullableDecimal128AsText { 44 | fn buffer_desc(&self) -> BufferDesc { 45 | BufferDesc::Text { 46 | max_str_len: len_text(self.scale, self.precision), 47 | } 48 | } 49 | 50 | fn write_rows( 51 | &self, 52 | param_offset: usize, 53 | column_buf: AnySliceMut<'_>, 54 | array: &dyn Array, 55 | ) -> Result<(), WriterError> { 56 | let length = len_text(self.scale, self.precision); 57 | 58 | let from = array.as_any().downcast_ref::().unwrap(); 59 | let mut to = column_buf.as_text_view().unwrap(); 60 | 61 | for (index, cell) in from.iter().enumerate() { 62 | if let Some(value) = cell { 63 | let buf = to.set_mut(index + param_offset, length); 64 | write_i128_as_decimal(value, self.precision, self.scale, buf) 65 | } else { 66 | to.set_cell(index + param_offset, None) 67 | } 68 | } 69 | Ok(()) 70 | } 71 | } 72 | 73 | impl WriteStrategy for NullableDecimal256AsText { 74 | fn buffer_desc(&self) -> BufferDesc { 75 | BufferDesc::Text { 76 | max_str_len: len_text(self.scale, self.precision), 77 | } 78 | } 79 | 80 | fn write_rows( 81 | &self, 82 | param_offset: usize, 83 | column_buf: AnySliceMut<'_>, 84 | array: &dyn Array, 85 | ) -> Result<(), WriterError> { 86 | let from = array.as_any().downcast_ref::().unwrap(); 87 | let mut to = column_buf.as_text_view().unwrap(); 88 | let length = len_text(self.scale, self.precision); 89 | 90 | for (index, cell) in from.iter().enumerate() { 91 | if let Some(value) = cell { 92 | let buf = to.set_mut(index + param_offset, length); 93 | write_i256_as_decimal(value, self.precision, self.scale, buf) 94 | } else { 95 | to.set_cell(index + param_offset, None) 96 | } 97 | } 98 | Ok(()) 99 | } 100 | } 101 | 102 | fn write_i128_as_decimal(mut n: i128, precision: u8, scale: i8, text: &mut [u8]) { 103 | if n.is_negative() { 104 | n *= n.signum(); 105 | text[0] = b'-'; 106 | } else { 107 | text[0] = b'+'; 108 | } 109 | 110 | // Number of digits + one decimal separator (`.`) 111 | let str_len: i32 = (len_text(scale, precision) - 1).try_into().unwrap(); 112 | 113 | let ten = 10; 114 | for index in (0..str_len).rev() { 115 | // In case of negative scale, fill the last digits with zeroes 116 | let char = if (str_len - index) <= -scale as i32 { 117 | b'0' 118 | // The separator will not be printed in case of scale <= 0 since index is never going to 119 | // reach `precision`. 120 | } else if index == precision as i32 - scale as i32 { 121 | b'.' 122 | } else { 123 | let digit: u8 = (n % ten) as u8; 124 | n /= ten; 125 | b'0' + digit 126 | }; 127 | // +1 offset to make space for sign character 128 | text[index as usize + 1] = char; 129 | } 130 | } 131 | 132 | type I256 = ::Native; 133 | 134 | fn write_i256_as_decimal(mut n: I256, precision: u8, scale: i8, text: &mut [u8]) { 135 | if n.lt(&I256::ZERO) { 136 | n = n.checked_mul(I256::MINUS_ONE).unwrap(); 137 | text[0] = b'-'; 138 | } else { 139 | text[0] = b'+'; 140 | } 141 | 142 | // Number of digits + one decimal separator (`.`) 143 | let str_len: i32 = (len_text(scale, precision) - 1).try_into().unwrap(); 144 | 145 | let ten = I256::from_i128(10); 146 | for index in (0..str_len).rev() { 147 | let char = if (str_len - index) <= -scale as i32 { 148 | b'0' 149 | // The separator will not be printed in case of scale == 0 since index is never going to 150 | // reach `precision`. 151 | } else if index == precision as i32 - scale as i32 { 152 | b'.' 153 | } else { 154 | let digit: u8 = n.checked_rem(ten).unwrap().to_i128().unwrap() as u8; 155 | n = n.checked_div(ten).unwrap(); 156 | b'0' + digit 157 | }; 158 | // +1 offset to make space for sign character 159 | text[index as usize + 1] = char; 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /src/odbc_writer/map_arrow_to_odbc.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use arrow::{ 4 | array::{Array, PrimitiveArray}, 5 | datatypes::ArrowPrimitiveType, 6 | }; 7 | use odbc_api::buffers::{AnySliceMut, BufferDesc, Item}; 8 | 9 | use crate::WriterError; 10 | 11 | use super::WriteStrategy; 12 | 13 | pub trait MapArrowToOdbc { 14 | type ArrowElement; 15 | 16 | fn map_with( 17 | nullable: bool, 18 | arrow_to_odbc: impl Fn(Self::ArrowElement) -> U + 'static, 19 | ) -> Box 20 | where 21 | U: Item; 22 | 23 | fn identical(nullable: bool) -> Box 24 | where 25 | Self::ArrowElement: Item; 26 | } 27 | 28 | impl MapArrowToOdbc for T 29 | where 30 | T: ArrowPrimitiveType, 31 | { 32 | type ArrowElement = T::Native; 33 | 34 | fn map_with( 35 | nullable: bool, 36 | arrow_to_odbc: impl Fn(Self::ArrowElement) -> U + 'static, 37 | ) -> Box 38 | where 39 | U: Item, 40 | { 41 | if nullable { 42 | Box::new(Nullable::::new(arrow_to_odbc)) 43 | } else { 44 | Box::new(NonNullable::::new(arrow_to_odbc)) 45 | } 46 | } 47 | 48 | fn identical(nullable: bool) -> Box 49 | where 50 | Self::ArrowElement: Item, 51 | { 52 | if nullable { 53 | Box::new(NullableIdentical::::new()) 54 | } else { 55 | Box::new(NonNullableIdentical::::new()) 56 | } 57 | } 58 | } 59 | 60 | struct Nullable { 61 | // We use this type information to correctly downcast from a `&dyn Array`. 62 | _primitive_type: PhantomData

, 63 | arrow_to_odbc: F, 64 | } 65 | 66 | impl Nullable { 67 | fn new(arrow_to_odbc: F) -> Self { 68 | Self { 69 | _primitive_type: PhantomData, 70 | arrow_to_odbc, 71 | } 72 | } 73 | } 74 | 75 | impl WriteStrategy for Nullable 76 | where 77 | P: ArrowPrimitiveType, 78 | F: Fn(P::Native) -> U, 79 | U: Item, 80 | { 81 | fn buffer_desc(&self) -> BufferDesc { 82 | U::buffer_desc(true) 83 | } 84 | 85 | fn write_rows( 86 | &self, 87 | param_offset: usize, 88 | column_buf: AnySliceMut<'_>, 89 | array: &dyn Array, 90 | ) -> Result<(), WriterError> { 91 | let from = array.as_any().downcast_ref::>().unwrap(); 92 | let mut to = column_buf.as_nullable_slice::().unwrap(); 93 | for (index, cell) in from.iter().enumerate() { 94 | to.set_cell(index + param_offset, cell.map(&self.arrow_to_odbc)) 95 | } 96 | Ok(()) 97 | } 98 | } 99 | 100 | struct NonNullable { 101 | // We use this type information to correctly downcast from a `&dyn Array`. 102 | _primitive_type: PhantomData

, 103 | arrow_to_odbc: F, 104 | } 105 | 106 | impl NonNullable { 107 | fn new(arrow_to_odbc: F) -> Self { 108 | Self { 109 | _primitive_type: PhantomData, 110 | arrow_to_odbc, 111 | } 112 | } 113 | } 114 | 115 | impl WriteStrategy for NonNullable 116 | where 117 | P: ArrowPrimitiveType, 118 | F: Fn(P::Native) -> U, 119 | U: Item, 120 | { 121 | fn buffer_desc(&self) -> BufferDesc { 122 | U::buffer_desc(false) 123 | } 124 | 125 | fn write_rows( 126 | &self, 127 | param_offset: usize, 128 | column_buf: AnySliceMut<'_>, 129 | array: &dyn Array, 130 | ) -> Result<(), WriterError> { 131 | let from = array.as_any().downcast_ref::>().unwrap(); 132 | let to = column_buf.as_slice::().unwrap(); 133 | for index in 0..from.len() { 134 | to[index + param_offset] = (self.arrow_to_odbc)(from.value(index)) 135 | } 136 | Ok(()) 137 | } 138 | } 139 | 140 | struct NullableIdentical

{ 141 | _phantom: PhantomData

, 142 | } 143 | 144 | impl

NullableIdentical

{ 145 | pub fn new() -> Self { 146 | Self { 147 | _phantom: PhantomData, 148 | } 149 | } 150 | } 151 | 152 | impl

WriteStrategy for NullableIdentical

153 | where 154 | P: ArrowPrimitiveType, 155 | P::Native: Item, 156 | { 157 | fn buffer_desc(&self) -> BufferDesc { 158 | P::Native::buffer_desc(true) 159 | } 160 | 161 | fn write_rows( 162 | &self, 163 | param_offset: usize, 164 | column_buf: AnySliceMut<'_>, 165 | array: &dyn Array, 166 | ) -> Result<(), WriterError> { 167 | let from = array.as_any().downcast_ref::>().unwrap(); 168 | let mut to = column_buf.as_nullable_slice::().unwrap(); 169 | for (index, cell) in from.iter().enumerate() { 170 | to.set_cell(index + param_offset, cell); 171 | } 172 | Ok(()) 173 | } 174 | } 175 | 176 | struct NonNullableIdentical

{ 177 | _phantom: PhantomData

, 178 | } 179 | 180 | impl

NonNullableIdentical

{ 181 | pub fn new() -> Self { 182 | Self { 183 | _phantom: PhantomData, 184 | } 185 | } 186 | } 187 | 188 | impl

WriteStrategy for NonNullableIdentical

189 | where 190 | P: ArrowPrimitiveType, 191 | P::Native: Item, 192 | { 193 | fn buffer_desc(&self) -> BufferDesc { 194 | P::Native::buffer_desc(false) 195 | } 196 | 197 | fn write_rows( 198 | &self, 199 | param_offset: usize, 200 | column_buf: AnySliceMut<'_>, 201 | array: &dyn Array, 202 | ) -> Result<(), WriterError> { 203 | let from = array.as_any().downcast_ref::>().unwrap(); 204 | let to = column_buf.as_slice::().unwrap(); 205 | to[param_offset..(param_offset + from.len())].copy_from_slice(from.values()); 206 | Ok(()) 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /src/reader/concurrent_odbc_reader.rs: -------------------------------------------------------------------------------- 1 | use arrow::{ 2 | datatypes::SchemaRef, 3 | error::ArrowError, 4 | record_batch::{RecordBatch, RecordBatchReader}, 5 | }; 6 | use odbc_api::{BlockCursor, ConcurrentBlockCursor, Cursor, buffers::ColumnarAnyBuffer}; 7 | 8 | use crate::Error; 9 | 10 | use super::{odbc_reader::odbc_to_arrow_error, to_record_batch::ToRecordBatch}; 11 | 12 | /// Arrow ODBC reader. Implements the [`arrow::record_batch::RecordBatchReader`] trait so it can be 13 | /// used to fill Arrow arrays from an ODBC data source. Similar to [`crate::OdbcReader`], yet 14 | /// [`ConcurrentOdbcReader`] fetches ODBC batches in a second transit buffer eagerly from the 15 | /// database in a dedicated system thread. This allows the allocation of the Arrow arrays and your 16 | /// application logic to run on the main thread, while fetching the batches from the source happens 17 | /// concurrently. You need twice the memory for the transit buffer for this strategy, since one is 18 | /// may be in use by the main thread in order to copy values into arrow arrays, while the other is 19 | /// used to write values from the database. 20 | /// 21 | /// # Example 22 | /// 23 | /// ```no_run 24 | /// use arrow_odbc::{odbc_api::{Environment, ConnectionOptions}, OdbcReaderBuilder}; 25 | /// use std::sync::OnceLock; 26 | /// 27 | /// // In order to fetch in a dedicated system thread we need a cursor with static lifetime, 28 | /// // this implies a static ODBC environment. 29 | /// static ENV: OnceLock = OnceLock::new(); 30 | /// 31 | /// const CONNECTION_STRING: &str = "\ 32 | /// Driver={ODBC Driver 18 for SQL Server};\ 33 | /// Server=localhost;\ 34 | /// UID=SA;\ 35 | /// PWD=My@Test@Password1;\ 36 | /// "; 37 | /// 38 | /// fn main() -> Result<(), anyhow::Error> { 39 | /// 40 | /// let odbc_environment = ENV.get_or_init(|| {Environment::new().unwrap() }); 41 | /// 42 | /// // Connect with database. 43 | /// let connection = odbc_environment.connect_with_connection_string( 44 | /// CONNECTION_STRING, 45 | /// ConnectionOptions::default() 46 | /// )?; 47 | /// 48 | /// // This SQL statement does not require any arguments. 49 | /// let parameters = (); 50 | /// 51 | /// // Do not apply any timout. 52 | /// let timeout_sec = None; 53 | /// 54 | /// // Execute query and create result set 55 | /// let cursor = connection 56 | /// // Using `into_cursor` instead of `execute` takes ownership of the connection and 57 | /// // allows for a cursor with static lifetime. 58 | /// .into_cursor("SELECT * FROM MyTable", parameters, timeout_sec) 59 | /// .map_err(|e| e.error)? 60 | /// .expect("SELECT statement must produce a cursor"); 61 | /// 62 | /// // Construct ODBC reader and make it concurrent 63 | /// let arrow_record_batches = OdbcReaderBuilder::new().build(cursor)?.into_concurrent()?; 64 | /// 65 | /// for batch in arrow_record_batches { 66 | /// // ... process batch ... 67 | /// } 68 | /// Ok(()) 69 | /// } 70 | /// ``` 71 | pub struct ConcurrentOdbcReader { 72 | /// We fill the buffers using ODBC concurrently. The buffer currently being filled is bound to 73 | /// the Cursor. This is the buffer which is unbound and read by the application to fill the 74 | /// arrow arrays. After being read we will reuse the buffer and bind it to the cursor in order 75 | /// to safe allocations. 76 | buffer: ColumnarAnyBuffer, 77 | /// Converts the content of ODBC buffers into Arrow record batches 78 | converter: ToRecordBatch, 79 | /// Fetches values from the ODBC datasource using columnar batches. Values are streamed batch 80 | /// by batch in order to avoid reallocation of the buffers used for tranistion. 81 | batch_stream: ConcurrentBlockCursor, 82 | } 83 | 84 | impl ConcurrentOdbcReader { 85 | /// The schema implied by `block_cursor` and `converter` must match. Invariant is hard to check 86 | /// in type system, keep this constructor private to this crate. Users should use 87 | /// [`crate::OdbcReader::into_concurrent`] instead. 88 | pub(crate) fn from_block_cursor( 89 | block_cursor: BlockCursor, 90 | converter: ToRecordBatch, 91 | fallibale_allocations: bool, 92 | ) -> Result { 93 | let max_batch_size = block_cursor.row_array_size(); 94 | let batch_stream = ConcurrentBlockCursor::from_block_cursor(block_cursor); 95 | // Note that we delay buffer allocation until after the fetch thread has started and we 96 | // start fetching the first row group concurrently as early, not waiting for the buffer 97 | // allocation to go through. 98 | let buffer = converter.allocate_buffer(max_batch_size, fallibale_allocations)?; 99 | 100 | Ok(Self { 101 | buffer, 102 | converter, 103 | batch_stream, 104 | }) 105 | } 106 | 107 | /// Destroy the ODBC arrow reader and yield the underlyinng cursor object. 108 | /// 109 | /// One application of this is to process more than one result set in case you executed a stored 110 | /// procedure. 111 | /// 112 | /// Due to the concurrent fetching of row groups you can not know how many row groups have been 113 | /// extracted once the cursor is returned. Unless that is that the entire cursor has been 114 | /// consumed i.e. [`Self::next`] returned `None`. 115 | pub fn into_cursor(self) -> Result { 116 | self.batch_stream.into_cursor() 117 | } 118 | } 119 | 120 | impl Iterator for ConcurrentOdbcReader 121 | where 122 | C: Cursor, 123 | { 124 | type Item = Result; 125 | 126 | fn next(&mut self) -> Option { 127 | match self.batch_stream.fetch_into(&mut self.buffer) { 128 | // We successfully fetched a batch from the database. Try to copy it into a record batch 129 | // and forward errors if any. 130 | Ok(true) => { 131 | let result_record_batch = self 132 | .converter 133 | .buffer_to_record_batch(&self.buffer) 134 | .map_err(|mapping_error| ArrowError::ExternalError(Box::new(mapping_error))); 135 | Some(result_record_batch) 136 | } 137 | // We ran out of batches in the result set. End the iterator. 138 | Ok(false) => None, 139 | // We had an error fetching the next batch from the database, let's report it as an 140 | // external error. 141 | Err(odbc_error) => Some(Err(odbc_to_arrow_error(odbc_error))), 142 | } 143 | } 144 | } 145 | 146 | impl RecordBatchReader for ConcurrentOdbcReader 147 | where 148 | C: Cursor, 149 | { 150 | fn schema(&self) -> SchemaRef { 151 | self.converter.schema().clone() 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src/date_time.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | convert::TryInto, 3 | fmt::Display, 4 | io::Write, 5 | marker::PhantomData, 6 | ops::{Div, Mul, Rem}, 7 | }; 8 | 9 | use arrow::{ 10 | array::{Array, PrimitiveArray}, 11 | datatypes::{ 12 | ArrowPrimitiveType, Time32MillisecondType, Time64MicrosecondType, Time64NanosecondType, 13 | }, 14 | }; 15 | use chrono::{Datelike, NaiveDate}; 16 | use odbc_api::{ 17 | buffers::{AnySliceMut, BufferDesc, TextColumnSliceMut}, 18 | sys::{Date, Time, Timestamp}, 19 | }; 20 | 21 | use crate::{WriterError, odbc_writer::WriteStrategy, reader::MappingError}; 22 | 23 | /// Transform date to days since unix epoch as i32 24 | pub fn days_since_epoch(date: &Date) -> i32 { 25 | let unix_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); 26 | let date = 27 | NaiveDate::from_ymd_opt(date.year as i32, date.month as u32, date.day as u32).unwrap(); 28 | let duration = date.signed_duration_since(unix_epoch); 29 | duration.num_days().try_into().unwrap() 30 | } 31 | 32 | pub fn seconds_since_epoch(from: &Timestamp) -> i64 { 33 | let ndt = NaiveDate::from_ymd_opt(from.year as i32, from.month as u32, from.day as u32) 34 | .unwrap() 35 | .and_hms_opt(from.hour as u32, from.minute as u32, from.second as u32) 36 | .unwrap(); 37 | ndt.and_utc().timestamp() 38 | } 39 | 40 | pub fn ms_since_epoch(from: &Timestamp) -> i64 { 41 | let ndt = NaiveDate::from_ymd_opt(from.year as i32, from.month as u32, from.day as u32) 42 | .unwrap() 43 | .and_hms_nano_opt( 44 | from.hour as u32, 45 | from.minute as u32, 46 | from.second as u32, 47 | from.fraction, 48 | ) 49 | .unwrap(); 50 | ndt.and_utc().timestamp_millis() 51 | } 52 | 53 | pub fn us_since_epoch(from: &Timestamp) -> i64 { 54 | let ndt = NaiveDate::from_ymd_opt(from.year as i32, from.month as u32, from.day as u32) 55 | .unwrap() 56 | .and_hms_nano_opt( 57 | from.hour as u32, 58 | from.minute as u32, 59 | from.second as u32, 60 | from.fraction, 61 | ) 62 | .unwrap(); 63 | ndt.and_utc().timestamp_micros() 64 | } 65 | 66 | pub fn ns_since_epoch(from: &Timestamp) -> Result { 67 | let ndt = NaiveDate::from_ymd_opt(from.year as i32, from.month as u32, from.day as u32) 68 | .unwrap() 69 | .and_hms_nano_opt( 70 | from.hour as u32, 71 | from.minute as u32, 72 | from.second as u32, 73 | from.fraction, 74 | ) 75 | .unwrap(); 76 | 77 | // The dates that can be represented as nanoseconds are between 1677-09-21T00:12:44.0 and 78 | // 2262-04-11T23:47:16.854775804 79 | ndt.and_utc() 80 | .timestamp_nanos_opt() 81 | .ok_or(MappingError::OutOfRangeTimestampNs { value: ndt }) 82 | } 83 | 84 | pub fn epoch_to_date(from: i32) -> Date { 85 | // Offset between between ce and unix epoch 86 | const OFFSET: i32 = 719_163; 87 | let nd = NaiveDate::from_num_days_from_ce_opt(from + OFFSET).unwrap(); 88 | Date { 89 | year: nd.year().try_into().unwrap(), 90 | month: nd.month().try_into().unwrap(), 91 | day: nd.day().try_into().unwrap(), 92 | } 93 | } 94 | 95 | pub fn sec_since_midnight_to_time(from: i32) -> Time { 96 | let unit_min = 60; 97 | let unit_hour = unit_min * 60; 98 | let hour = from / unit_hour; 99 | let minute = (from % unit_hour) / unit_min; 100 | let second = from % unit_min; 101 | Time { 102 | hour: hour.try_into().unwrap(), 103 | minute: minute.try_into().unwrap(), 104 | second: second.try_into().unwrap(), 105 | } 106 | } 107 | 108 | pub struct NullableTimeAsText

{ 109 | _phantom: PhantomData

, 110 | } 111 | 112 | impl

NullableTimeAsText

{ 113 | pub fn new() -> Self { 114 | Self { 115 | _phantom: PhantomData, 116 | } 117 | } 118 | } 119 | 120 | pub trait TimePrimitive { 121 | type Integer: From 122 | + Copy 123 | + Mul 124 | + Div 125 | + Rem 126 | + Display; 127 | const SCALE: usize; 128 | const PRECISION_FACTOR: Self::Integer; 129 | const STR_LEN: usize; 130 | 131 | fn insert_at(index: usize, from: Self::Integer, to: &mut TextColumnSliceMut) { 132 | let sixty: Self::Integer = 60.into(); 133 | let unit_min = sixty * Self::PRECISION_FACTOR; 134 | let unit_hour = unit_min * sixty; 135 | let hour = from / unit_hour; 136 | let minute = (from % unit_hour) / unit_min; 137 | let second = (from % unit_min) / Self::PRECISION_FACTOR; 138 | let fraction = from % Self::PRECISION_FACTOR; 139 | write!( 140 | to.set_mut(index, Self::STR_LEN), 141 | "{hour:02}:{minute:02}:{second:02}.{fraction:0s$}", 142 | s = Self::SCALE 143 | ) 144 | .unwrap(); 145 | } 146 | } 147 | 148 | impl TimePrimitive for Time32MillisecondType { 149 | type Integer = i32; 150 | const SCALE: usize = 3; 151 | const PRECISION_FACTOR: i32 = 1_000; 152 | // Length of text representation of time. HH:MM:SS.fff 153 | const STR_LEN: usize = 12; 154 | } 155 | 156 | impl TimePrimitive for Time64MicrosecondType { 157 | type Integer = i64; 158 | 159 | const SCALE: usize = 6; 160 | const PRECISION_FACTOR: i64 = 1_000_000; 161 | // Length of text representation of time. HH:MM:SS.ffffff 162 | const STR_LEN: usize = 15; 163 | } 164 | 165 | impl TimePrimitive for Time64NanosecondType { 166 | type Integer = i64; 167 | // For now we insert nanoseconds with a precision of 7 digits rather than 9 168 | const SCALE: usize = 9; 169 | const PRECISION_FACTOR: i64 = 1_000_000_000; 170 | // Length of text representation of time. HH:MM:SS.fffffffff 171 | const STR_LEN: usize = 18; 172 | } 173 | 174 | impl

WriteStrategy for NullableTimeAsText

175 | where 176 | P: ArrowPrimitiveType + TimePrimitive::Native>, 177 | { 178 | fn buffer_desc(&self) -> BufferDesc { 179 | BufferDesc::Text { 180 | max_str_len: P::STR_LEN, 181 | } 182 | } 183 | 184 | fn write_rows( 185 | &self, 186 | param_offset: usize, 187 | column_buf: AnySliceMut<'_>, 188 | array: &dyn Array, 189 | ) -> Result<(), WriterError> { 190 | let from = array.as_any().downcast_ref::>().unwrap(); 191 | let mut to = column_buf.as_text_view().unwrap(); 192 | for (index, elapsed_since_midnight) in from.iter().enumerate() { 193 | if let Some(from) = elapsed_since_midnight { 194 | P::insert_at(index + param_offset, from, &mut to) 195 | } else { 196 | to.set_cell(index + param_offset, None) 197 | } 198 | } 199 | Ok(()) 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # arrow-odbc 2 | 3 | [![Docs](https://docs.rs/arrow-odbc/badge.svg)](https://docs.rs/arrow-odbc/) 4 | [![Licence](https://img.shields.io/crates/l/arrow-odbc)](https://github.com/pacman82/arrow-odbc/blob/main/License) 5 | [![Crates.io](https://img.shields.io/crates/v/arrow-odbc)](https://crates.io/crates/arrow-odbc) 6 | 7 | Fill Apache Arrow arrays from ODBC data sources. `arrow-odbc` is build on top of the [`arrow`](https://crates.io/crates/arrow) and [`odbc-api`](https://crates.io/crates/odbc-api) crates and enables you to read the data of an ODBC data source as sequence of Apache Arrow record batches. `arrow-odbc` can also be used to insert the contens of Arrow record batches into a database table. 8 | 9 | This repository contains the code of the `arrow-odbc` Rust crate. The repository containing the code for the [`arrow-odbc` Python wheel](https://pypi.org/project/arrow-odbc/) resides in the [`arrow-odbc-py` repository](https://github.com/pacman82/arrow-odbc-py). 10 | 11 | ## About Arrow 12 | 13 | > [Apache Arrow](https://arrow.apache.org/) defines a language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware like CPUs and GPUs. The Arrow memory format also supports zero-copy reads for lightning-fast data access without serialization overhead. 14 | 15 | ## About ODBC 16 | 17 | [ODBC](https://docs.microsoft.com/en-us/sql/odbc/microsoft-open-database-connectivity-odbc) (Open DataBase Connectivity) is a standard which enables you to access data from a wide variaty of data sources using SQL. 18 | 19 | ## Usage 20 | 21 | ```rust 22 | use arrow_odbc::OdbcReaderBuilder; 23 | // You can use the reexport of odbc_api to make sure the version used by arrow_odbc is in sync with 24 | // the version directly used by your application. 25 | use arrow_odbc::odbc_api as odbc_api; 26 | use odbc_api::{Environment, ConnectionOptions}; 27 | 28 | const CONNECTION_STRING: &str = "\ 29 | Driver={ODBC Driver 18 for SQL Server};\ 30 | Server=localhost;\ 31 | UID=SA;\ 32 | PWD=My@Test@Password1;\ 33 | "; 34 | 35 | fn main() -> Result<(), anyhow::Error> { 36 | 37 | let odbc_environment = Environment::new()?; 38 | 39 | // Connect with database. 40 | let connection = odbc_environment.connect_with_connection_string( 41 | CONNECTION_STRING, 42 | ConnectionOptions::default(), 43 | )?; 44 | 45 | // This SQL statement does not require any arguments. 46 | let parameters = (); 47 | 48 | // Execute query and create result set 49 | let cursor = connection 50 | .execute("SELECT * FROM MyTable", parameters)? 51 | .expect("SELECT statement must produce a cursor"); 52 | 53 | // Read result set as arrow batches. Infer Arrow types automatically using the meta 54 | // information of `cursor`. 55 | let arrow_record_batches = OdbcReaderBuilder::new() 56 | // Use at most 256 MiB for transit buffer 57 | .with_max_bytes_per_batch(256 * 1024 * 1024) 58 | .build(cursor)?; 59 | 60 | for batch in arrow_record_batches { 61 | // ... process batch ... 62 | } 63 | Ok(()) 64 | } 65 | ``` 66 | 67 | ## Matching of ODBC to Arrow types then querying 68 | 69 | | ODBC | Arrow | 70 | | ------------------------ | -------------------- | 71 | | Numeric(p <= 38) | Decimal128 | 72 | | Decimal(p <= 38, s >= 0) | Decimal128 | 73 | | Integer | Int32 | 74 | | SmallInt | Int16 | 75 | | Real | Float32 | 76 | | Float(p <=24) | Float32 | 77 | | Double | Float64 | 78 | | Float(p > 24) | Float64 | 79 | | Date | Date32 | 80 | | LongVarbinary | Binary | 81 | | Time(p = 0) | Time32Second | 82 | | Time(p = 1..3) | Time32Millisecond | 83 | | Time(p = 4..6) | Time64Microsecond | 84 | | Time(p = 7..9) | Time64Nanosecond | 85 | | Timestamp(p = 0) | TimestampSecond | 86 | | Timestamp(p: 1..3) | TimestampMilliSecond | 87 | | Timestamp(p: 4..6) | TimestampMicroSecond | 88 | | Timestamp(p >= 7 ) | TimestampNanoSecond | 89 | | BigInt | Int64 | 90 | | TinyInt Signed | Int8 | 91 | | TinyInt Unsigend | UInt8 | 92 | | Bit | Boolean | 93 | | Varbinary | Binary | 94 | | Binary | FixedSizedBinary | 95 | | All others | Utf8 | 96 | 97 | ## Matching of Arrow to ODBC types then inserting 98 | 99 | | Arrow | ODBC | 100 | | --------------------- | ------------------ | 101 | | Utf8 | VarChar | 102 | | LargeUtf8 | VarChar | 103 | | Decimal128(p, s = 0) | VarChar(p + 1) | 104 | | Decimal128(p, s != 0) | VarChar(p + 2) | 105 | | Decimal128(p, s < 0) | VarChar(p - s + 1) | 106 | | Decimal256(p, s = 0) | VarChar(p + 1) | 107 | | Decimal256(p, s != 0) | VarChar(p + 2) | 108 | | Decimal256(p, s < 0) | VarChar(p - s + 1) | 109 | | Int8 | TinyInt | 110 | | Int16 | SmallInt | 111 | | Int32 | Integer | 112 | | Int64 | BigInt | 113 | | Float16 | Real | 114 | | Float32 | Real | 115 | | Float64 | Double | 116 | | Timestamp s | Timestamp(7) | 117 | | Timestamp ms | Timestamp(7) | 118 | | Timestamp us | Timestamp(7) | 119 | | Timestamp ns | Timestamp(7) | 120 | | Timestamp with Tz s | VarChar(25) | 121 | | Timestamp with Tz ms | VarChar(29) | 122 | | Timestamp with Tz us | VarChar(32) | 123 | | Timestamp with Tz ns | VarChar(35) | 124 | | Date32 | Date | 125 | | Date64 | Date | 126 | | Time32 s | Time | 127 | | Time32 ms | VarChar(12) | 128 | | Time64 us | VarChar(15) | 129 | | Time64 ns | VarChar(18) | 130 | | Binary | Varbinary | 131 | | FixedBinary(l) | Varbinary(l) | 132 | | All others | Unsupported | 133 | 134 | The mapping for insertion is not the optimal yet, but before spending a lot of work on improving it I was curious that usecase would pop up for users. So if something does not work, but maybe could provided a better mapping of Arrow to ODBC types, feel free to open an issue. If you do so please give a lot of context of what you are trying to do. 135 | 136 | ## Build 137 | 138 | To build `arrow-odbc` and compile it as a part of your Rust project you need to link against an ODBC driver manager. On Windows this is already part of the system, so there is nothing to do. On Linux and MacOS it is recommended to install UnixODBC. 139 | 140 | ### Ubuntu 141 | 142 | ```shell 143 | sudo apt-get install unixodbc-dev 144 | ``` 145 | 146 | ### Mac OS 147 | 148 | ```shell 149 | brew install unixodbc 150 | ``` 151 | 152 | ### Mac OS ARM 153 | 154 | On MacOS with ARM brew installs into a directory not found by cargo during linking. There are likely many ways to deal with this. Since the author does not have access to an ARM Mac, here only a collection of things that have worked for other users. 155 | 156 | * Installing unixODBC itself from source with make/configure instead of brew 157 | * Installing unixODBC with brew and creating a symlink for its binary directory `sudo ln -s /opt/homebrew/lib /Users//lib` 158 | -------------------------------------------------------------------------------- /src/odbc_writer/timestamp.rs: -------------------------------------------------------------------------------- 1 | //! Logic for inserting timestamp from Arrow Arrays into ODBC databases. 2 | 3 | use std::{io::Write, marker::PhantomData, sync::Arc}; 4 | 5 | use arrow::{ 6 | array::{Array, ArrowPrimitiveType, PrimitiveArray, timezone::Tz}, 7 | datatypes::{ 8 | TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, 9 | TimestampSecondType, 10 | }, 11 | }; 12 | use chrono::{DateTime, Datelike, TimeZone, Timelike}; 13 | use log::debug; 14 | use odbc_api::{ 15 | buffers::{AnySliceMut, BufferDesc}, 16 | sys::Timestamp, 17 | }; 18 | 19 | use super::{WriteStrategy, WriterError, map_arrow_to_odbc::MapArrowToOdbc}; 20 | 21 | pub fn insert_timestamp_strategy( 22 | is_nullable: bool, 23 | time_unit: &TimeUnit, 24 | time_zone: Option>, 25 | ) -> Result, WriterError> { 26 | let ws = match (time_unit, time_zone) { 27 | (TimeUnit::Second, None) => { 28 | TimestampSecondType::map_with(is_nullable, epoch_to_timestamp_s) 29 | } 30 | (TimeUnit::Millisecond, None) => { 31 | TimestampMillisecondType::map_with(is_nullable, epoch_to_timestamp_ms) 32 | } 33 | (TimeUnit::Microsecond, None) => { 34 | TimestampMicrosecondType::map_with(is_nullable, epoch_to_timestamp_us) 35 | } 36 | (TimeUnit::Nanosecond, None) => TimestampNanosecondType::map_with(is_nullable, |ns| { 37 | // Drop the last to digits of precision, since we bind it with precision 7 and not 9. 38 | epoch_to_timestamp_ns((ns / 100) * 100) 39 | }), 40 | (TimeUnit::Second, Some(tz)) => { 41 | Box::new(TimestampTzToText::::new(tz)?) 42 | } 43 | (TimeUnit::Millisecond, Some(tz)) => { 44 | Box::new(TimestampTzToText::::new(tz)?) 45 | } 46 | (TimeUnit::Microsecond, Some(tz)) => { 47 | Box::new(TimestampTzToText::::new(tz)?) 48 | } 49 | (TimeUnit::Nanosecond, Some(tz)) => { 50 | Box::new(TimestampTzToText::::new(tz)?) 51 | } 52 | }; 53 | Ok(ws) 54 | } 55 | 56 | pub fn epoch_to_timestamp_ns(from: i64) -> Timestamp { 57 | let ndt = DateTime::from_timestamp_nanos(from); 58 | datetime_to_timestamp(ndt) 59 | } 60 | 61 | pub fn epoch_to_timestamp_us(from: i64) -> Timestamp { 62 | let ndt = 63 | DateTime::from_timestamp_micros(from).expect("Timestamp must be in range for microseconds"); 64 | datetime_to_timestamp(ndt) 65 | } 66 | 67 | pub fn epoch_to_timestamp_ms(from: i64) -> Timestamp { 68 | let ndt = 69 | DateTime::from_timestamp_millis(from).expect("Timestamp must be in range for milliseconds"); 70 | datetime_to_timestamp(ndt) 71 | } 72 | 73 | pub fn epoch_to_timestamp_s(from: i64) -> Timestamp { 74 | let ndt = DateTime::from_timestamp_millis(from * 1_000) 75 | .expect("Timestamp must be in range for milliseconds"); 76 | datetime_to_timestamp(ndt) 77 | } 78 | 79 | fn datetime_to_timestamp(ndt: DateTime) -> Timestamp { 80 | let date = ndt.date_naive(); 81 | let time = ndt.time(); 82 | Timestamp { 83 | year: date.year().try_into().unwrap(), 84 | month: date.month().try_into().unwrap(), 85 | day: date.day().try_into().unwrap(), 86 | hour: time.hour().try_into().unwrap(), 87 | minute: time.minute().try_into().unwrap(), 88 | second: time.second().try_into().unwrap(), 89 | fraction: time.nanosecond(), 90 | } 91 | } 92 | 93 | /// Strategy for writing a timestamp with timezone as text into the database. Microsoft SQL Server 94 | /// supports this via `SQL_SS_TIMESTAMPOFFSET`, yet this is an extension of the ODBC standard. So 95 | /// maybe for now we are safer just to write it as a string literal. 96 | pub struct TimestampTzToText

{ 97 | time_zone: Tz, 98 | _phantom: PhantomData

, 99 | } 100 | 101 | impl

TimestampTzToText

{ 102 | pub fn new(time_zone: Arc) -> Result { 103 | let tz = time_zone.parse().map_err(|e| { 104 | debug!("Failed to parse time zone '{time_zone}'. Original error: {e}"); 105 | WriterError::InvalidTimeZone { time_zone } 106 | })?; 107 | Ok(Self { 108 | time_zone: tz, 109 | _phantom: PhantomData, 110 | }) 111 | } 112 | } 113 | 114 | impl

WriteStrategy for TimestampTzToText

115 | where 116 | P: ArrowPrimitiveType + InserableAsTimestampWithTimeZone, 117 | { 118 | fn buffer_desc(&self) -> BufferDesc { 119 | BufferDesc::Text { 120 | max_str_len: P::FORMAT_WITH_TIME_ZONE_LEN, 121 | } 122 | } 123 | 124 | fn write_rows( 125 | &self, 126 | param_offset: usize, 127 | column_buf: AnySliceMut<'_>, 128 | array: &dyn Array, 129 | ) -> Result<(), WriterError> { 130 | let from = array.as_any().downcast_ref::>().unwrap(); 131 | let mut to = column_buf.as_text_view().unwrap(); 132 | for (index, timestamp) in from.iter().enumerate() { 133 | if let Some(timestamp) = timestamp { 134 | let dt = P::to_regional_datetime(timestamp, &self.time_zone); 135 | write!( 136 | to.set_mut(index + param_offset, P::FORMAT_WITH_TIME_ZONE_LEN), 137 | "{}", 138 | dt.format(P::FORMAT_STRING), 139 | ) 140 | .unwrap(); 141 | } else { 142 | to.set_cell(index + param_offset, None) 143 | } 144 | } 145 | Ok(()) 146 | } 147 | } 148 | 149 | trait InserableAsTimestampWithTimeZone { 150 | /// Length of the string representation of a timestamp with time zone, e.g. "2023-10-01 12:34:56.789+02:00" 151 | const FORMAT_WITH_TIME_ZONE_LEN: usize; 152 | const FORMAT_STRING: &'static str; 153 | fn to_regional_datetime(epoch: i64, time_zone: &Tz) -> DateTime; 154 | } 155 | 156 | impl InserableAsTimestampWithTimeZone for TimestampSecondType { 157 | const FORMAT_WITH_TIME_ZONE_LEN: usize = 25; // "YYYY-MM-DD HH:MM:SS+00:00" 158 | const FORMAT_STRING: &'static str = "%Y-%m-%d %H:%M:%S%Z"; 159 | 160 | fn to_regional_datetime(epoch: i64, time_zone: &Tz) -> DateTime { 161 | time_zone 162 | .timestamp_opt(epoch, 0) 163 | .earliest() 164 | .expect("Timestamp must be in range for the timezone") 165 | } 166 | } 167 | 168 | impl InserableAsTimestampWithTimeZone for TimestampMillisecondType { 169 | const FORMAT_WITH_TIME_ZONE_LEN: usize = 29; // "YYYY-MM-DD HH:MM:SS.fff+00:00" 170 | const FORMAT_STRING: &'static str = "%Y-%m-%d %H:%M:%S.%3f%Z"; 171 | 172 | fn to_regional_datetime(epoch: i64, time_zone: &Tz) -> DateTime { 173 | let epoch_sec = epoch / 1_000; 174 | let nano = (epoch % 1_000) * 1_000_000; // Convert milliseconds to nanoseconds 175 | time_zone 176 | .timestamp_opt(epoch_sec, nano as u32) 177 | .earliest() 178 | .expect("Timestamp must be in range for the timezone") 179 | } 180 | } 181 | 182 | impl InserableAsTimestampWithTimeZone for TimestampMicrosecondType { 183 | const FORMAT_WITH_TIME_ZONE_LEN: usize = 32; // "YYYY-MM-DD HH:MM:SS.fff+00:00" 184 | const FORMAT_STRING: &'static str = "%Y-%m-%d %H:%M:%S.%6f%Z"; 185 | 186 | fn to_regional_datetime(epoch: i64, time_zone: &Tz) -> DateTime { 187 | let epoch_sec = epoch / 1_000_000; 188 | let nano = (epoch % 1_000_000) * 1_000; // Convert milliseconds to nanoseconds 189 | time_zone 190 | .timestamp_opt(epoch_sec, nano as u32) 191 | .earliest() 192 | .expect("Timestamp must be in range for the timezone") 193 | } 194 | } 195 | 196 | impl InserableAsTimestampWithTimeZone for TimestampNanosecondType { 197 | const FORMAT_WITH_TIME_ZONE_LEN: usize = 35; // "YYYY-MM-DD HH:MM:SS.fff+00:00" 198 | const FORMAT_STRING: &'static str = "%Y-%m-%d %H:%M:%S.%9f%Z"; 199 | 200 | fn to_regional_datetime(epoch: i64, time_zone: &Tz) -> DateTime { 201 | let epoch_sec = epoch / 1_000_000_000; 202 | let nano = epoch % 1_000_000_000; // Convert milliseconds to nanoseconds 203 | time_zone 204 | .timestamp_opt(epoch_sec, nano as u32) 205 | .earliest() 206 | .expect("Timestamp must be in range for the timezone") 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /src/schema.rs: -------------------------------------------------------------------------------- 1 | use arrow::datatypes::{DataType as ArrowDataType, Field, Schema, TimeUnit}; 2 | use log::debug; 3 | use odbc_api::{ColumnDescription, DataType as OdbcDataType, ResultSetMetadata, sys::SqlDataType}; 4 | use std::convert::TryInto; 5 | 6 | use crate::{ColumnFailure, Error}; 7 | 8 | /// Query the metadata to create an arrow schema. This method is invoked automatically for you by 9 | /// [`crate::OdbcReaderBuilder::build`]. You may want to call this method in situation there you 10 | /// want to create an arrow schema without creating the reader yet. 11 | /// 12 | /// # Parameters 13 | /// 14 | /// * `result_set_metadata`: Used to query metadata about the columns in the result set, which is 15 | /// used to determine the arrow schema. 16 | /// * `dbms_name`: If provided, it is used to account for Database specific behavior than mapping 17 | /// types. Currently it is used to map `TIME` types from 'Microsoft SQL Server' to `Time32` or 18 | /// `Time64` 19 | /// * `map_value_errors_to_null`: In case falliable conversions should result in `NULL` the arrow 20 | /// field must be nullable, even if the source column on the database is not nullable. 21 | /// 22 | /// # Example 23 | /// 24 | /// ``` 25 | /// use anyhow::Error; 26 | /// 27 | /// use arrow_odbc::{arrow_schema_from, arrow::datatypes::Schema, odbc_api::Connection}; 28 | /// 29 | /// fn fetch_schema_for_table( 30 | /// table_name: &str, 31 | /// connection: &Connection<'_> 32 | /// ) -> Result { 33 | /// // Query column with values to get a cursor 34 | /// let sql = format!("SELECT * FROM {}", table_name); 35 | /// let mut prepared = connection.prepare(&sql)?; 36 | /// 37 | /// // Now that we have prepared statement, we want to use it to query metadata. 38 | /// let map_errors_to_null = false; 39 | /// let dbms_name = None; 40 | /// let schema = arrow_schema_from(&mut prepared, dbms_name, map_errors_to_null)?; 41 | /// Ok(schema) 42 | /// } 43 | /// ``` 44 | pub fn arrow_schema_from( 45 | result_set_metadata: &mut impl ResultSetMetadata, 46 | dbms_name: Option<&str>, 47 | map_value_errors_to_null: bool, 48 | ) -> Result { 49 | let num_cols: u16 = result_set_metadata 50 | .num_result_cols() 51 | .map_err(Error::UnableToRetrieveNumCols)? 52 | .try_into() 53 | .unwrap(); 54 | let mut fields = Vec::new(); 55 | for index in 0..num_cols { 56 | let field = arrow_field_from( 57 | result_set_metadata, 58 | dbms_name, 59 | index, 60 | map_value_errors_to_null, 61 | )?; 62 | 63 | fields.push(field) 64 | } 65 | Ok(Schema::new(fields)) 66 | } 67 | 68 | fn arrow_field_from( 69 | resut_set_metadata: &mut impl ResultSetMetadata, 70 | dbms_name: Option<&str>, 71 | index: u16, 72 | map_value_errors_to_null: bool, 73 | ) -> Result { 74 | let mut column_description = ColumnDescription::default(); 75 | resut_set_metadata 76 | .describe_col(index + 1, &mut column_description) 77 | .map_err(|cause| Error::ColumnFailure { 78 | name: "Unknown".to_owned(), 79 | index: index as usize, 80 | source: ColumnFailure::FailedToDescribeColumn(cause), 81 | })?; 82 | let name = column_description 83 | .name_to_string() 84 | .map_err(|source| Error::EncodingInvalid { source })?; 85 | debug!( 86 | "ODBC driver reported for column {index}. Relational type: {:?}; Nullability: {:?}; \ 87 | Name: '{name}';", 88 | column_description.data_type, column_description.nullability 89 | ); 90 | let data_type = match column_description.data_type { 91 | OdbcDataType::Numeric { 92 | precision: p @ 0..=38, 93 | scale, 94 | } 95 | | OdbcDataType::Decimal { 96 | precision: p @ 0..=38, 97 | scale, 98 | } => ArrowDataType::Decimal128(p as u8, scale.try_into().unwrap()), 99 | OdbcDataType::Integer => ArrowDataType::Int32, 100 | OdbcDataType::SmallInt => ArrowDataType::Int16, 101 | OdbcDataType::Real | OdbcDataType::Float { precision: 0..=24 } => ArrowDataType::Float32, 102 | OdbcDataType::Float { precision: _ } | OdbcDataType::Double => ArrowDataType::Float64, 103 | OdbcDataType::Date => ArrowDataType::Date32, 104 | OdbcDataType::Timestamp { precision: 0 } => { 105 | ArrowDataType::Timestamp(TimeUnit::Second, None) 106 | } 107 | OdbcDataType::Timestamp { precision: 1..=3 } => { 108 | ArrowDataType::Timestamp(TimeUnit::Millisecond, None) 109 | } 110 | OdbcDataType::Timestamp { precision: 4..=6 } => { 111 | ArrowDataType::Timestamp(TimeUnit::Microsecond, None) 112 | } 113 | OdbcDataType::Timestamp { precision: _ } => { 114 | ArrowDataType::Timestamp(TimeUnit::Nanosecond, None) 115 | } 116 | OdbcDataType::BigInt => ArrowDataType::Int64, 117 | OdbcDataType::TinyInt => { 118 | let is_unsigned = resut_set_metadata 119 | .column_is_unsigned(index + 1) 120 | .map_err(|e| Error::ColumnFailure { 121 | name: name.clone(), 122 | index: index as usize, 123 | source: ColumnFailure::FailedToDescribeColumn(e), 124 | })?; 125 | if is_unsigned { 126 | ArrowDataType::UInt8 127 | } else { 128 | ArrowDataType::Int8 129 | } 130 | } 131 | OdbcDataType::Bit => ArrowDataType::Boolean, 132 | OdbcDataType::Binary { length } => { 133 | let length = length 134 | .ok_or_else(|| Error::ColumnFailure { 135 | name: name.clone(), 136 | index: index as usize, 137 | source: ColumnFailure::ZeroSizedColumn { 138 | sql_type: OdbcDataType::Binary { length }, 139 | }, 140 | })? 141 | .get() 142 | .try_into() 143 | .unwrap(); 144 | ArrowDataType::FixedSizeBinary(length) 145 | } 146 | OdbcDataType::LongVarbinary { length: _ } | OdbcDataType::Varbinary { length: _ } => { 147 | ArrowDataType::Binary 148 | } 149 | OdbcDataType::Time { precision } => precision_to_time(precision), 150 | OdbcDataType::Other { 151 | data_type: SqlDataType(-154), 152 | column_size: _, 153 | decimal_digits, 154 | } => { 155 | if dbms_name.is_some_and(|name| name == "Microsoft SQL Server") { 156 | // SQL Server's -154 is used by Microsoft SQL Server for Timestamps without a time 157 | // zone. 158 | precision_to_time(decimal_digits) 159 | } else { 160 | // Other databases may use -154 for other purposes, so we treat it as a string. 161 | ArrowDataType::Utf8 162 | } 163 | } 164 | OdbcDataType::Other { 165 | data_type: SqlDataType(-98), 166 | column_size: _, 167 | decimal_digits: _, 168 | } => { 169 | // IBM DB2 names seem platform specific. E.g.; "DB2/LINUXX8664" 170 | if dbms_name.is_some_and(|name| name.starts_with("DB2/")) { 171 | // IBM DB2's -98 is used for binary blob types. 172 | ArrowDataType::Binary 173 | } else { 174 | // Other databases may use -98 for other purposes, so we treat it as a string. 175 | ArrowDataType::Utf8 176 | } 177 | } 178 | OdbcDataType::Unknown 179 | | OdbcDataType::Numeric { .. } 180 | | OdbcDataType::Decimal { .. } 181 | | OdbcDataType::Other { 182 | data_type: _, 183 | column_size: _, 184 | decimal_digits: _, 185 | } 186 | | OdbcDataType::WChar { length: _ } 187 | | OdbcDataType::Char { length: _ } 188 | | OdbcDataType::WVarchar { length: _ } 189 | | OdbcDataType::WLongVarchar { length: _ } 190 | | OdbcDataType::LongVarchar { length: _ } 191 | | OdbcDataType::Varchar { length: _ } => ArrowDataType::Utf8, 192 | }; 193 | let is_falliable = matches!(data_type, ArrowDataType::Timestamp(TimeUnit::Nanosecond, _)); 194 | let nullable = 195 | column_description.could_be_nullable() || (is_falliable && map_value_errors_to_null); 196 | let field = Field::new(name, data_type, nullable); 197 | Ok(field) 198 | } 199 | 200 | fn precision_to_time(precision: i16) -> ArrowDataType { 201 | match precision { 202 | 0 => ArrowDataType::Time32(TimeUnit::Second), 203 | 1..=3 => ArrowDataType::Time32(TimeUnit::Millisecond), 204 | 4..=6 => ArrowDataType::Time64(TimeUnit::Microsecond), 205 | 7..=9 => ArrowDataType::Time64(TimeUnit::Nanosecond), 206 | _ => ArrowDataType::Utf8, 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /src/reader/text.rs: -------------------------------------------------------------------------------- 1 | use std::{char::decode_utf16, cmp::min, num::NonZeroUsize, sync::Arc}; 2 | 3 | use arrow::array::{ArrayRef, StringBuilder}; 4 | use odbc_api::{ 5 | DataType as OdbcDataType, 6 | buffers::{AnySlice, BufferDesc}, 7 | }; 8 | 9 | use super::{ColumnFailure, MappingError, ReadStrategy}; 10 | 11 | /// This function decides wether this column will be queried as narrow (assumed to be utf-8) or 12 | /// wide text (assumed to be utf-16). The reason we do not always use narrow is that the encoding 13 | /// dependends on the system locals which is usually not UTF-8 on windows systems. Furthermore we 14 | /// are trying to adapt the buffer size to the maximum string length the column could contain. 15 | pub fn choose_text_strategy( 16 | sql_type: OdbcDataType, 17 | lazy_display_size: impl FnOnce() -> Result, odbc_api::Error>, 18 | max_text_size: Option, 19 | trim_fixed_sized_character_strings: bool, 20 | text_encoding: TextEncoding, 21 | ) -> Result, ColumnFailure> { 22 | let apply_buffer_limit = |len| match (len, max_text_size) { 23 | (None, None) => Err(ColumnFailure::ZeroSizedColumn { sql_type }), 24 | (None, Some(limit)) => Ok(limit), 25 | (Some(len), None) => Ok(len), 26 | (Some(len), Some(limit)) => Ok(min(len, limit)), 27 | }; 28 | let is_fixed_sized_char = matches!( 29 | sql_type, 30 | OdbcDataType::Char { .. } | OdbcDataType::WChar { .. } 31 | ); 32 | let trim = trim_fixed_sized_character_strings && is_fixed_sized_char; 33 | let strategy: Box = if text_encoding.use_utf16() { 34 | let hex_len = sql_type 35 | .utf16_len() 36 | .map(Ok) 37 | .or_else(|| lazy_display_size().transpose()) 38 | .transpose() 39 | .map_err(|source| ColumnFailure::UnknownStringLength { sql_type, source })?; 40 | let hex_len = apply_buffer_limit(hex_len.map(NonZeroUsize::get))?; 41 | wide_text_strategy(hex_len, trim) 42 | } else { 43 | let octet_len = sql_type 44 | .utf8_len() 45 | .map(Ok) 46 | .or_else(|| lazy_display_size().transpose()) 47 | .transpose() 48 | .map_err(|source| ColumnFailure::UnknownStringLength { sql_type, source })?; 49 | let octet_len = apply_buffer_limit(octet_len.map(NonZeroUsize::get))?; 50 | // So far only Linux users seemed to have complained about panics due to garbage indices? 51 | // Linux usually would use UTF-8, so we only invest work in working around this for narrow 52 | // strategies 53 | narrow_text_strategy(octet_len, trim) 54 | }; 55 | 56 | Ok(strategy) 57 | } 58 | 59 | /// Used to indicate the preferred encoding for text columns. 60 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 61 | pub enum TextEncoding { 62 | /// Evaluates to [`Self::Utf16`] on windows and [`Self::Utf8`] on other systems. We do this, 63 | /// because most systems e.g. MacOs and Linux use UTF-8 as their default encoding, while windows 64 | /// may still use a Latin1 or some other extended ASCII as their narrow encoding. On the other 65 | /// hand many Posix drivers are lacking in their support for wide function calls and UTF-16. So 66 | /// using `Wide` on windows and `Narrow` everythere else is a good starting point. 67 | Auto, 68 | /// Use narrow characters (one byte) to encode text in payloads. ODBC lets the client choose the 69 | /// encoding which should be based on the system local. This is often not what is actually 70 | /// happening though. If we use narrow encoding, we assume the text to be UTF-8 and error if we 71 | /// find that not to be the case. 72 | Utf8, 73 | /// Use wide characters (two bytes) to encode text in payloads. ODBC defines the encoding to 74 | /// be always UTF-16. 75 | Utf16, 76 | } 77 | 78 | impl Default for TextEncoding { 79 | fn default() -> Self { 80 | Self::Auto 81 | } 82 | } 83 | 84 | impl TextEncoding { 85 | pub fn use_utf16(&self) -> bool { 86 | match self { 87 | Self::Auto => cfg!(target_os = "windows"), 88 | Self::Utf8 => false, 89 | Self::Utf16 => true, 90 | } 91 | } 92 | } 93 | 94 | fn wide_text_strategy(u16_len: usize, trim: bool) -> Box { 95 | Box::new(WideText::new(u16_len, trim)) 96 | } 97 | 98 | fn narrow_text_strategy(octet_len: usize, trim: bool) -> Box { 99 | Box::new(NarrowText::new(octet_len, trim)) 100 | } 101 | 102 | /// Strategy requesting the text from the database as UTF-16 (Wide characters) and emmitting it as 103 | /// UTF-8. We use it, since the narrow representation in ODBC is not always guaranteed to be UTF-8, 104 | /// but depends on the local instead. 105 | pub struct WideText { 106 | /// Maximum string length in u16, excluding terminating zero 107 | max_str_len: usize, 108 | /// Wether the string should be trimmed. 109 | trim: bool, 110 | } 111 | 112 | impl WideText { 113 | pub fn new(max_str_len: usize, trim: bool) -> Self { 114 | Self { max_str_len, trim } 115 | } 116 | } 117 | 118 | impl ReadStrategy for WideText { 119 | fn buffer_desc(&self) -> BufferDesc { 120 | BufferDesc::WText { 121 | max_str_len: self.max_str_len, 122 | } 123 | } 124 | 125 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 126 | let view = column_view.as_w_text_view().unwrap(); 127 | let item_capacity = view.len(); 128 | // Any utf-16 character could take up to 4 Bytes if represented as utf-8, but since mostly 129 | // this is 1 to one, and also not every string is likeyl to use its maximum capacity, we 130 | // rather accept the reallocation in these scenarios. 131 | let data_capacity = self.max_str_len * item_capacity; 132 | let mut builder = StringBuilder::with_capacity(item_capacity, data_capacity); 133 | // Buffer used to convert individual values from utf16 to utf8. 134 | let mut buf_utf8 = String::new(); 135 | for value in view.iter() { 136 | buf_utf8.clear(); 137 | let opt = if let Some(utf16) = value { 138 | for c in decode_utf16(utf16.as_slice().iter().cloned()) { 139 | buf_utf8.push(c.unwrap()); 140 | } 141 | let slice = if self.trim { 142 | buf_utf8.trim() 143 | } else { 144 | buf_utf8.as_str() 145 | }; 146 | Some(slice) 147 | } else { 148 | None 149 | }; 150 | builder.append_option(opt); 151 | } 152 | Ok(Arc::new(builder.finish())) 153 | } 154 | } 155 | 156 | pub struct NarrowText { 157 | /// Maximum string length in u8, excluding terminating zero 158 | max_str_len: usize, 159 | /// Wether the string should be trimmed. 160 | trim: bool, 161 | } 162 | 163 | impl NarrowText { 164 | pub fn new(max_str_len: usize, trim: bool) -> Self { 165 | Self { max_str_len, trim } 166 | } 167 | } 168 | 169 | impl ReadStrategy for NarrowText { 170 | fn buffer_desc(&self) -> BufferDesc { 171 | BufferDesc::Text { 172 | max_str_len: self.max_str_len, 173 | } 174 | } 175 | 176 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 177 | let view = column_view.as_text_view().unwrap(); 178 | let mut builder = StringBuilder::with_capacity(view.len(), self.max_str_len * view.len()); 179 | for value in view.iter() { 180 | builder.append_option( 181 | value 182 | .map(|bytes| { 183 | let untrimmed = 184 | std::str::from_utf8(bytes).map_err(|_| MappingError::InvalidUtf8 { 185 | lossy_value: String::from_utf8_lossy(bytes).into_owned(), 186 | })?; 187 | Ok(if self.trim { 188 | untrimmed.trim() 189 | } else { 190 | untrimmed 191 | }) 192 | }) 193 | .transpose()?, 194 | ); 195 | } 196 | Ok(Arc::new(builder.finish())) 197 | } 198 | } 199 | 200 | #[cfg(test)] 201 | mod tests { 202 | use odbc_api::buffers::{AnySlice, ColumnBuffer, TextColumn}; 203 | 204 | use crate::reader::{MappingError, ReadStrategy as _}; 205 | 206 | use super::NarrowText; 207 | 208 | #[test] 209 | fn must_return_error_for_invalid_utf8() { 210 | // Given a slice with invalid utf-8 211 | let mut column = TextColumn::new(1, 10); 212 | column.set_value(0, Some(&[b'H', b'e', b'l', b'l', b'o', 0xc3])); 213 | let column_view = AnySlice::Text(column.view(1)); 214 | 215 | // When 216 | let strategy = NarrowText::new(5, false); 217 | let result = strategy.fill_arrow_array(column_view); 218 | 219 | // Then 220 | let error = result.unwrap_err(); 221 | let MappingError::InvalidUtf8 { lossy_value } = error else { 222 | panic!("Not an InvalidUtf8 error") 223 | }; 224 | assert_eq!(lossy_value, "Hello�"); 225 | } 226 | } 227 | -------------------------------------------------------------------------------- /src/reader/map_odbc_to_arrow.rs: -------------------------------------------------------------------------------- 1 | use std::{marker::PhantomData, sync::Arc}; 2 | 3 | use arrow::{ 4 | array::{ArrayRef, PrimitiveBuilder}, 5 | datatypes::ArrowPrimitiveType, 6 | }; 7 | use chrono::NaiveDateTime; 8 | use odbc_api::buffers::{AnySlice, BufferDesc, Item}; 9 | use thiserror::Error; 10 | 11 | use super::ReadStrategy; 12 | 13 | /// Extend an arrow primitive type to serve as a builder for Read strategies. 14 | pub trait MapOdbcToArrow { 15 | type ArrowElement; 16 | 17 | /// Use the provided function to convert an element of an ODBC column buffer into the desired 18 | /// element of an arrow array. This method assumes the conversion is falliable. 19 | fn map_falliable( 20 | nullable: bool, 21 | map_errors_to_null: bool, 22 | odbc_to_arrow: impl Fn(&U) -> Result + 'static + Send, 23 | ) -> Box 24 | where 25 | U: Item + 'static + Send; 26 | 27 | /// Use the infallible function provided to convert an element of an ODBC column buffer into the 28 | /// desired element of an arrow array. 29 | fn map_infalliable( 30 | nullable: bool, 31 | odbc_to_arrow: impl Fn(&U) -> Self::ArrowElement + 'static + Send, 32 | ) -> Box 33 | where 34 | U: Item + 'static + Send; 35 | 36 | /// Should the arrow array element be identical to an item in the ODBC buffer no mapping is 37 | /// needed. We still need to account for nullability. 38 | fn identical(nullable: bool) -> Box 39 | where 40 | Self::ArrowElement: Item; 41 | } 42 | 43 | impl MapOdbcToArrow for T 44 | where 45 | T: ArrowPrimitiveType + Send, 46 | { 47 | type ArrowElement = T::Native; 48 | 49 | fn map_falliable( 50 | nullable: bool, 51 | map_errors_to_null: bool, 52 | odbc_to_arrow: impl Fn(&U) -> Result + 'static + Send, 53 | ) -> Box 54 | where 55 | U: Item + 'static + Send, 56 | { 57 | if map_errors_to_null { 58 | return Box::new(ErrorToNullStrategy::::new(odbc_to_arrow)); 59 | } 60 | 61 | if nullable { 62 | return Box::new(NullableStrategy::::new(odbc_to_arrow)); 63 | } 64 | 65 | Box::new(NonNullableStrategy::::new(odbc_to_arrow)) 66 | } 67 | 68 | fn map_infalliable( 69 | nullable: bool, 70 | odbc_to_arrow: impl Fn(&U) -> Self::ArrowElement + 'static + Send, 71 | ) -> Box 72 | where 73 | U: Item + 'static + Send, 74 | { 75 | if nullable { 76 | Box::new(NullableStrategy::::new(OkWrappedMapped( 77 | odbc_to_arrow, 78 | ))) 79 | } else { 80 | Box::new(NonNullableStrategy::::new(OkWrappedMapped( 81 | odbc_to_arrow, 82 | ))) 83 | } 84 | } 85 | 86 | fn identical(nullable: bool) -> Box 87 | where 88 | Self::ArrowElement: Item, 89 | { 90 | if nullable { 91 | Box::new(NullableDirectStrategy::::new()) 92 | } else { 93 | Box::new(NonNullDirectStrategy::::new()) 94 | } 95 | } 96 | } 97 | 98 | /// We introduce this trait instead of using the Fn(...) trait syntax directly, in order to being 99 | /// able to provide an implementation for `OkWrappedMapped`. Which in turn we need to reuse our 100 | /// Strategy implementations for falliable and infalliable cases. 101 | /// 102 | /// We could save our selves all of this if Rust would be better at figuring out then to promote 103 | /// the lifetimes in closures to higher order liftimes, but at time of writing this, I've not been 104 | /// able to Ok wrapping with a straight forward lambda `|e| Ok(f(e))``. (Current version 1.79). 105 | /// 106 | /// Since Fn traits can not be implemented manually either we introduce this one. 107 | trait MapElement { 108 | fn map_element(&self, odbc: &O) -> Result; 109 | } 110 | 111 | impl MapElement for T 112 | where 113 | T: Fn(&O) -> Result, 114 | { 115 | fn map_element(&self, odbc: &O) -> Result { 116 | self(odbc) 117 | } 118 | } 119 | 120 | struct OkWrappedMapped(F); 121 | 122 | impl MapElement for OkWrappedMapped 123 | where 124 | F: Fn(&O) -> A, 125 | { 126 | fn map_element(&self, odbc: &O) -> Result { 127 | Ok((self.0)(odbc)) 128 | } 129 | } 130 | 131 | struct NonNullDirectStrategy { 132 | phantom: PhantomData, 133 | } 134 | 135 | impl NonNullDirectStrategy { 136 | fn new() -> Self { 137 | Self { 138 | phantom: PhantomData, 139 | } 140 | } 141 | } 142 | 143 | impl ReadStrategy for NonNullDirectStrategy 144 | where 145 | T: ArrowPrimitiveType + Send, 146 | T::Native: Item, 147 | { 148 | fn buffer_desc(&self) -> BufferDesc { 149 | T::Native::buffer_desc(false) 150 | } 151 | 152 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 153 | let slice = T::Native::as_slice(column_view).unwrap(); 154 | let mut builder = PrimitiveBuilder::::with_capacity(slice.len()); 155 | builder.append_slice(slice); 156 | Ok(Arc::new(builder.finish())) 157 | } 158 | } 159 | 160 | struct NullableDirectStrategy { 161 | phantom: PhantomData, 162 | } 163 | 164 | impl NullableDirectStrategy { 165 | fn new() -> Self { 166 | Self { 167 | phantom: PhantomData, 168 | } 169 | } 170 | } 171 | 172 | impl ReadStrategy for NullableDirectStrategy 173 | where 174 | T: ArrowPrimitiveType + Send, 175 | T::Native: Item, 176 | { 177 | fn buffer_desc(&self) -> BufferDesc { 178 | T::Native::buffer_desc(true) 179 | } 180 | 181 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 182 | let values = T::Native::as_nullable_slice(column_view).unwrap(); 183 | let mut builder = PrimitiveBuilder::::with_capacity(values.len()); 184 | for value in values { 185 | builder.append_option(value.copied()); 186 | } 187 | Ok(Arc::new(builder.finish())) 188 | } 189 | } 190 | 191 | struct NonNullableStrategy { 192 | _primitive_type: PhantomData

, 193 | _odbc_item: PhantomData, 194 | odbc_to_arrow: F, 195 | } 196 | 197 | impl NonNullableStrategy { 198 | fn new(odbc_to_arrow: F) -> Self { 199 | Self { 200 | _primitive_type: PhantomData, 201 | _odbc_item: PhantomData, 202 | odbc_to_arrow, 203 | } 204 | } 205 | } 206 | 207 | impl ReadStrategy for NonNullableStrategy 208 | where 209 | P: ArrowPrimitiveType + Send, 210 | O: Item + Send, 211 | F: MapElement, 212 | { 213 | fn buffer_desc(&self) -> BufferDesc { 214 | O::buffer_desc(false) 215 | } 216 | 217 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 218 | let slice = column_view.as_slice::().unwrap(); 219 | let mut builder = PrimitiveBuilder::

::with_capacity(slice.len()); 220 | for odbc_value in slice { 221 | builder.append_value(self.odbc_to_arrow.map_element(odbc_value)?); 222 | } 223 | Ok(Arc::new(builder.finish())) 224 | } 225 | } 226 | 227 | struct NullableStrategy { 228 | _primitive_type: PhantomData

, 229 | _odbc_item: PhantomData, 230 | odbc_to_arrow: F, 231 | } 232 | 233 | impl NullableStrategy { 234 | fn new(odbc_to_arrow: F) -> Self { 235 | Self { 236 | _primitive_type: PhantomData, 237 | _odbc_item: PhantomData, 238 | odbc_to_arrow, 239 | } 240 | } 241 | } 242 | 243 | impl ReadStrategy for NullableStrategy 244 | where 245 | P: ArrowPrimitiveType + Send, 246 | O: Item + Send, 247 | F: MapElement, 248 | { 249 | fn buffer_desc(&self) -> BufferDesc { 250 | O::buffer_desc(true) 251 | } 252 | 253 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 254 | let opts = column_view.as_nullable_slice::().unwrap(); 255 | let mut builder = PrimitiveBuilder::

::with_capacity(opts.len()); 256 | for odbc_opt in opts { 257 | builder.append_option( 258 | odbc_opt 259 | .map(|odbc_element| self.odbc_to_arrow.map_element(odbc_element)) 260 | .transpose()?, 261 | ); 262 | } 263 | Ok(Arc::new(builder.finish())) 264 | } 265 | } 266 | 267 | /// Map invalid values to `NULL` rather than emitting a [`MappingError`]`. 268 | struct ErrorToNullStrategy { 269 | _primitive_type: PhantomData

, 270 | _odbc_item: PhantomData, 271 | odbc_to_arrow: F, 272 | } 273 | 274 | impl ErrorToNullStrategy { 275 | fn new(odbc_to_arrow: F) -> Self { 276 | Self { 277 | _primitive_type: PhantomData, 278 | _odbc_item: PhantomData, 279 | odbc_to_arrow, 280 | } 281 | } 282 | } 283 | 284 | impl ReadStrategy for ErrorToNullStrategy 285 | where 286 | P: ArrowPrimitiveType + Send, 287 | O: Item + Send, 288 | F: Fn(&O) -> Result + Send, 289 | { 290 | fn buffer_desc(&self) -> BufferDesc { 291 | O::buffer_desc(true) 292 | } 293 | 294 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 295 | let opts = column_view.as_nullable_slice::().unwrap(); 296 | let mut builder = PrimitiveBuilder::

::with_capacity(opts.len()); 297 | for odbc_opt in opts { 298 | builder.append_option(odbc_opt.and_then(|val| (self.odbc_to_arrow)(val).ok())); 299 | } 300 | Ok(Arc::new(builder.finish())) 301 | } 302 | } 303 | 304 | /// The source value returned from the ODBC datasource is out of range and can not be mapped into 305 | /// its Arrow target type. 306 | #[derive(Error, Debug)] 307 | pub enum MappingError { 308 | #[error( 309 | "Timestamp is not representable in arrow: {value}\n\ 310 | Timestamps with nanoseconds precision are represented using a signed 64 Bit integer. This \ 311 | limits their range to values between 1677-09-21 00:12:44 and 2262-04-11 \ 312 | 23:47:16.854775807. The value returned from the database is outside of this range. \ 313 | Suggestions to fix this error either reduce the precision or fetch the values as text." 314 | )] 315 | OutOfRangeTimestampNs { value: NaiveDateTime }, 316 | #[error("Datasource returned invalid UTF-8. Lossy representation of value: {lossy_value}")] 317 | InvalidUtf8 { lossy_value: String }, 318 | } 319 | -------------------------------------------------------------------------------- /src/reader.rs: -------------------------------------------------------------------------------- 1 | use std::{convert::TryInto, sync::Arc}; 2 | 3 | use arrow::{ 4 | array::{ArrayRef, BooleanBuilder}, 5 | datatypes::{ 6 | DataType as ArrowDataType, Date32Type, Field, Float32Type, Float64Type, Int8Type, 7 | Int16Type, Int32Type, Int64Type, Time32SecondType, TimeUnit, TimestampMicrosecondType, 8 | TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt8Type, 9 | }, 10 | }; 11 | 12 | use log::debug; 13 | use odbc_api::{ 14 | Bit, DataType as OdbcDataType, ResultSetMetadata, 15 | buffers::{AnySlice, BufferDesc, Item}, 16 | }; 17 | use thiserror::Error; 18 | use time::{TimeMsI32, TimeNsI64, TimeUsI64, seconds_since_midnight}; 19 | 20 | mod binary; 21 | mod concurrent_odbc_reader; 22 | mod decimal; 23 | mod map_odbc_to_arrow; 24 | mod odbc_reader; 25 | mod text; 26 | mod time; 27 | mod to_record_batch; 28 | 29 | use crate::date_time::{ 30 | days_since_epoch, ms_since_epoch, ns_since_epoch, seconds_since_epoch, us_since_epoch, 31 | }; 32 | 33 | pub use self::{ 34 | binary::{Binary, FixedSizedBinary}, 35 | concurrent_odbc_reader::ConcurrentOdbcReader, 36 | decimal::Decimal, 37 | map_odbc_to_arrow::{MapOdbcToArrow, MappingError}, 38 | odbc_reader::{OdbcReader, OdbcReaderBuilder}, 39 | text::{TextEncoding, choose_text_strategy}, 40 | }; 41 | 42 | /// All decisions needed to copy data from an ODBC buffer to an Arrow Array 43 | pub trait ReadStrategy { 44 | /// Describes the buffer which is bound to the ODBC cursor. 45 | fn buffer_desc(&self) -> BufferDesc; 46 | 47 | /// Create an arrow array from an ODBC buffer described in [`Self::buffer_description`]. 48 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result; 49 | } 50 | 51 | pub struct NonNullableBoolean; 52 | 53 | impl ReadStrategy for NonNullableBoolean { 54 | fn buffer_desc(&self) -> BufferDesc { 55 | BufferDesc::Bit { nullable: false } 56 | } 57 | 58 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 59 | let values = Bit::as_slice(column_view).unwrap(); 60 | let mut builder = BooleanBuilder::new(); 61 | for bit in values { 62 | builder.append_value(bit.as_bool()); 63 | } 64 | Ok(Arc::new(builder.finish())) 65 | } 66 | } 67 | 68 | pub struct NullableBoolean; 69 | 70 | impl ReadStrategy for NullableBoolean { 71 | fn buffer_desc(&self) -> BufferDesc { 72 | BufferDesc::Bit { nullable: true } 73 | } 74 | 75 | fn fill_arrow_array(&self, column_view: AnySlice) -> Result { 76 | let values = Bit::as_nullable_slice(column_view).unwrap(); 77 | let mut builder = BooleanBuilder::new(); 78 | for bit in values { 79 | builder.append_option(bit.copied().map(Bit::as_bool)) 80 | } 81 | Ok(Arc::new(builder.finish())) 82 | } 83 | } 84 | 85 | /// Allows setting limits for buffers bound to the ODBC data source. Check this out if you find that 86 | /// you get memory allocation, or zero sized column errors. Used than constructing a reader using 87 | /// [`crate::OdbcReaderBuilder`]. 88 | #[derive(Default, Debug, Clone, Copy)] 89 | pub struct BufferAllocationOptions { 90 | /// An upper limit for the size of buffers bound to variadic text columns of the data source. 91 | /// This limit does not (directly) apply to the size of the created arrow buffers, but rather 92 | /// applies to the buffers used for the data in transit. Use this option if you have e.g. 93 | /// `VARCHAR(MAX)` fields in your database schema. In such a case without an upper limit, the 94 | /// ODBC driver of your data source is asked for the maximum size of an element, and is likely 95 | /// to answer with either `0` or a value which is way larger than any actual entry in the column 96 | /// If you can not adapt your database schema, this limit might be what you are looking for. On 97 | /// windows systems the size is double words (16Bit), as windows utilizes an UTF-16 encoding. So 98 | /// this translates to roughly the size in letters. On non windows systems this is the size in 99 | /// bytes and the datasource is assumed to utilize an UTF-8 encoding. `None` means no upper 100 | /// limit is set and the maximum element size, reported by ODBC is used to determine buffer 101 | /// sizes. 102 | pub max_text_size: Option, 103 | /// An upper limit for the size of buffers bound to variadic binary columns of the data source. 104 | /// This limit does not (directly) apply to the size of the created arrow buffers, but rather 105 | /// applies to the buffers used for the data in transit. Use this option if you have e.g. 106 | /// `VARBINARY(MAX)` fields in your database schema. In such a case without an upper limit, the 107 | /// ODBC driver of your data source is asked for the maximum size of an element, and is likely 108 | /// to answer with either `0` or a value which is way larger than any actual entry in the 109 | /// column. If you can not adapt your database schema, this limit might be what you are looking 110 | /// for. This is the maximum size in bytes of the binary column. 111 | pub max_binary_size: Option, 112 | /// Set to `true` in order to trigger an [`ColumnFailure::TooLarge`] instead of a panic in case 113 | /// the buffers can not be allocated due to their size. This might have a performance cost for 114 | /// constructing the reader. `false` by default. 115 | pub fallibale_allocations: bool, 116 | } 117 | 118 | pub fn choose_column_strategy( 119 | field: &Field, 120 | query_metadata: &mut impl ResultSetMetadata, 121 | col_index: u16, 122 | buffer_allocation_options: BufferAllocationOptions, 123 | map_value_errors_to_null: bool, 124 | trim_fixed_sized_character_strings: bool, 125 | text_encoding: TextEncoding, 126 | ) -> Result, ColumnFailure> { 127 | let strat: Box = match field.data_type() { 128 | ArrowDataType::Boolean => { 129 | if field.is_nullable() { 130 | Box::new(NullableBoolean) 131 | } else { 132 | Box::new(NonNullableBoolean) 133 | } 134 | } 135 | ArrowDataType::Int8 => Int8Type::identical(field.is_nullable()), 136 | ArrowDataType::Int16 => Int16Type::identical(field.is_nullable()), 137 | ArrowDataType::Int32 => Int32Type::identical(field.is_nullable()), 138 | ArrowDataType::Int64 => Int64Type::identical(field.is_nullable()), 139 | ArrowDataType::UInt8 => UInt8Type::identical(field.is_nullable()), 140 | ArrowDataType::Float32 => Float32Type::identical(field.is_nullable()), 141 | ArrowDataType::Float64 => Float64Type::identical(field.is_nullable()), 142 | ArrowDataType::Date32 => Date32Type::map_infalliable(field.is_nullable(), days_since_epoch), 143 | ArrowDataType::Time32(TimeUnit::Second) => { 144 | Time32SecondType::map_infalliable(field.is_nullable(), seconds_since_midnight) 145 | } 146 | ArrowDataType::Time32(TimeUnit::Millisecond) => Box::new(TimeMsI32), 147 | ArrowDataType::Time64(TimeUnit::Microsecond) => Box::new(TimeUsI64), 148 | ArrowDataType::Time64(TimeUnit::Nanosecond) => Box::new(TimeNsI64), 149 | ArrowDataType::Utf8 => { 150 | let sql_type = query_metadata 151 | .col_data_type(col_index) 152 | .map_err(ColumnFailure::FailedToDescribeColumn)?; 153 | // Use a zero based index here, because we use it everywhere else there we communicate 154 | // with users. 155 | debug!("Relational type of column {}: {sql_type:?}", col_index - 1); 156 | let lazy_display_size = || query_metadata.col_display_size(col_index); 157 | // Use the SQL type first to determine buffer length. 158 | choose_text_strategy( 159 | sql_type, 160 | lazy_display_size, 161 | buffer_allocation_options.max_text_size, 162 | trim_fixed_sized_character_strings, 163 | text_encoding, 164 | )? 165 | } 166 | ArrowDataType::Decimal128(precision, scale @ 0..) => { 167 | Box::new(Decimal::new(*precision, *scale)) 168 | } 169 | ArrowDataType::Binary => { 170 | let sql_type = query_metadata 171 | .col_data_type(col_index) 172 | .map_err(ColumnFailure::FailedToDescribeColumn)?; 173 | let length = sql_type.column_size(); 174 | let length = match (length, buffer_allocation_options.max_binary_size) { 175 | (None, None) => return Err(ColumnFailure::ZeroSizedColumn { sql_type }), 176 | (None, Some(limit)) => limit, 177 | (Some(len), None) => len.get(), 178 | (Some(len), Some(limit)) => { 179 | if len.get() < limit { 180 | len.get() 181 | } else { 182 | limit 183 | } 184 | } 185 | }; 186 | Box::new(Binary::new(length)) 187 | } 188 | ArrowDataType::Timestamp(TimeUnit::Second, _) => { 189 | TimestampSecondType::map_infalliable(field.is_nullable(), seconds_since_epoch) 190 | } 191 | ArrowDataType::Timestamp(TimeUnit::Millisecond, _) => { 192 | TimestampMillisecondType::map_infalliable(field.is_nullable(), ms_since_epoch) 193 | } 194 | ArrowDataType::Timestamp(TimeUnit::Microsecond, _) => { 195 | TimestampMicrosecondType::map_infalliable(field.is_nullable(), us_since_epoch) 196 | } 197 | ArrowDataType::Timestamp(TimeUnit::Nanosecond, _) => { 198 | TimestampNanosecondType::map_falliable( 199 | field.is_nullable(), 200 | map_value_errors_to_null, 201 | ns_since_epoch, 202 | ) 203 | } 204 | ArrowDataType::FixedSizeBinary(length) => { 205 | Box::new(FixedSizedBinary::new((*length).try_into().unwrap())) 206 | } 207 | unsupported_arrow_type => { 208 | return Err(ColumnFailure::UnsupportedArrowType( 209 | unsupported_arrow_type.clone(), 210 | )); 211 | } 212 | }; 213 | Ok(strat) 214 | } 215 | 216 | /// Read error related to a specific column 217 | #[derive(Error, Debug)] 218 | pub enum ColumnFailure { 219 | /// We are getting a display or column size from ODBC but it is not larger than 0. 220 | #[error( 221 | "The ODBC driver did not specify a sensible upper bound for the column. This usually \ 222 | happens for large variadic types (E.g. VARCHAR(max)). In other cases it can be a \ 223 | shortcoming of the ODBC driver. Try casting the column into a type with a sensible upper \ 224 | bound. `arrow-odbc` also allows the application to specify a generic upper bound, which it \ 225 | would automatically apply. The type of the column causing this error is {:?}.", 226 | sql_type 227 | )] 228 | ZeroSizedColumn { sql_type: OdbcDataType }, 229 | /// Unable to retrieve the column display size for the column. 230 | #[error( 231 | "Unable to deduce the maximum string length for the SQL Data Type reported by the ODBC \ 232 | driver. Reported SQL data type is: {:?}.\n Error fetching column display or octet size: \ 233 | {source}", 234 | sql_type 235 | )] 236 | UnknownStringLength { 237 | sql_type: OdbcDataType, 238 | source: odbc_api::Error, 239 | }, 240 | /// The type specified in the arrow schema is not supported to be fetched from the database. 241 | #[error( 242 | "Unsupported arrow type: `{0}`. This type can currently not be fetched from an ODBC data \ 243 | source by an instance of OdbcReader." 244 | )] 245 | UnsupportedArrowType(ArrowDataType), 246 | /// At ODBC api calls gaining information about the columns did fail. 247 | #[error( 248 | "An error occurred fetching the column description or data type from the metainformation \ 249 | attached to the ODBC result set:\n{0}" 250 | )] 251 | FailedToDescribeColumn(#[source] odbc_api::Error), 252 | #[error( 253 | "Column buffer is too large to be allocated. Tried to alloacte {num_elements} elements \ 254 | with {element_size} bytes in size each." 255 | )] 256 | TooLarge { 257 | num_elements: usize, 258 | element_size: usize, 259 | }, 260 | } 261 | 262 | impl ColumnFailure { 263 | /// Provides the error with additional context of Error with column name and index. 264 | pub fn into_crate_error(self, name: String, index: usize) -> crate::Error { 265 | crate::Error::ColumnFailure { 266 | name, 267 | index, 268 | source: self, 269 | } 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /src/odbc_writer.rs: -------------------------------------------------------------------------------- 1 | use std::{borrow::Cow, cmp::min, sync::Arc}; 2 | 3 | use thiserror::Error; 4 | 5 | use arrow::{ 6 | array::Array, 7 | datatypes::{ 8 | DataType, Date32Type, Date64Type, Field, Float16Type, Float32Type, Float64Type, Int8Type, 9 | Int16Type, Int32Type, Int64Type, Schema, Time32MillisecondType, Time32SecondType, 10 | Time64MicrosecondType, Time64NanosecondType, TimeUnit, UInt8Type, 11 | }, 12 | error::ArrowError, 13 | record_batch::{RecordBatch, RecordBatchReader}, 14 | }; 15 | use odbc_api::{ 16 | ColumnarBulkInserter, Connection, ConnectionTransitions, Prepared, 17 | buffers::{AnyBuffer, AnySliceMut, BufferDesc}, 18 | handles::{AsStatementRef, StatementConnection, StatementImpl, StatementParent}, 19 | }; 20 | 21 | use crate::{ 22 | date_time::{NullableTimeAsText, epoch_to_date, sec_since_midnight_to_time}, 23 | decimal::{NullableDecimal128AsText, NullableDecimal256AsText}, 24 | odbc_writer::timestamp::insert_timestamp_strategy, 25 | }; 26 | 27 | use self::{ 28 | binary::VariadicBinary, 29 | boolean::boolean_to_bit, 30 | map_arrow_to_odbc::MapArrowToOdbc, 31 | text::{LargeUtf8ToNativeText, Utf8ToNativeText}, 32 | }; 33 | 34 | mod binary; 35 | mod boolean; 36 | mod map_arrow_to_odbc; 37 | mod text; 38 | mod timestamp; 39 | 40 | /// Fastest and most convinient way to stream the contents of arrow record batches into a database 41 | /// table. For usecase there you want to insert repeatedly into the same table from different 42 | /// streams it is more efficient to create an instance of [`self::OdbcWriter`] and reuse it. 43 | /// 44 | /// **Note:** 45 | /// 46 | /// If table or column names are derived from user input, be sure to sanatize the input in order to 47 | /// prevent SQL injection attacks. 48 | pub fn insert_into_table( 49 | connection: &Connection, 50 | batches: &mut impl RecordBatchReader, 51 | table_name: &str, 52 | batch_size: usize, 53 | ) -> Result<(), WriterError> { 54 | let schema = batches.schema(); 55 | let mut inserter = 56 | OdbcWriter::with_connection(connection, schema.as_ref(), table_name, batch_size)?; 57 | inserter.write_all(batches) 58 | } 59 | 60 | /// Generates an insert statement using the table and column names. 61 | /// 62 | /// `INSERT INTO (, , ...) VALUES (?, ?, ...)` 63 | fn insert_statement_text(table: &str, column_names: &[&'_ str]) -> String { 64 | // Generate statement text from table name and headline 65 | let column_names = column_names 66 | .iter() 67 | .map(|cn| quote_column_name(cn)) 68 | .collect::>(); 69 | let columns = column_names.join(", "); 70 | let values = column_names 71 | .iter() 72 | .map(|_| "?") 73 | .collect::>() 74 | .join(", "); 75 | // Do not finish the statement with a semicolon. There is anecodtical evidence of IBM db2 not 76 | // allowing the command, because it expects now multiple statements. 77 | // See: 78 | format!("INSERT INTO {table} ({columns}) VALUES ({values})") 79 | } 80 | 81 | /// Wraps column name in quotes, if need be. 82 | fn quote_column_name(column_name: &str) -> Cow<'_, str> { 83 | // We do not want to apply quoting in case the string is already quoted. See: 84 | // 85 | // 86 | // Another approach would have been to apply quoting after detecting keywords. Yet the list of 87 | // reserved keywords is large. There is also the issue with different databases having different 88 | // quoting rules. So the strategy choosen here is to apply quoting in less situations and not 89 | // more, so the user has more control over the final statement. This crate is about arrow and 90 | // odbc, less so about SQL dialects and statement construction. 91 | let is_already_quoted = || { 92 | (column_name.starts_with('"') && column_name.ends_with('"')) 93 | || column_name.starts_with('[') && column_name.ends_with(']') 94 | || column_name.starts_with('`') && column_name.ends_with('`') 95 | }; 96 | let contains_invalid_characters = || column_name.contains(|c| !valid_in_column_name(c)); 97 | let needs_quotes = contains_invalid_characters() && !is_already_quoted(); 98 | if needs_quotes { 99 | Cow::Owned(format!("\"{column_name}\"")) 100 | } else { 101 | Cow::Borrowed(column_name) 102 | } 103 | } 104 | 105 | /// Check if this character is allowed in an unquoted column name 106 | fn valid_in_column_name(c: char) -> bool { 107 | // See: 108 | // 109 | c.is_alphanumeric() || c == '@' || c == '$' || c == '#' || c == '_' 110 | } 111 | 112 | /// Creates an SQL insert statement from an arrow schema. The resulting statement will have one 113 | /// placeholer (`?`) for each column in the statement. 114 | /// 115 | /// **Note:** 116 | /// 117 | /// If the column name contains any character which would make it not a valid qualifier for transact 118 | /// SQL it will be wrapped in double quotes (`"`) within the insert schema. Valid names consist of 119 | /// alpha numeric characters, `@`, `$`, `#` and `_`. 120 | /// 121 | /// # Example 122 | /// 123 | /// ``` 124 | /// use arrow_odbc::{ 125 | /// insert_statement_from_schema, 126 | /// arrow::datatypes::{Field, DataType, Schema}, 127 | /// }; 128 | /// 129 | /// let field_a = Field::new("a", DataType::Int64, false); 130 | /// let field_b = Field::new("b", DataType::Boolean, false); 131 | /// 132 | /// let schema = Schema::new(vec![field_a, field_b]); 133 | /// let sql = insert_statement_from_schema(&schema, "MyTable"); 134 | /// 135 | /// assert_eq!("INSERT INTO MyTable (a, b) VALUES (?, ?)", sql) 136 | /// ``` 137 | /// 138 | /// This function is automatically invoked by [`crate::OdbcWriter::with_connection`]. 139 | pub fn insert_statement_from_schema(schema: &Schema, table_name: &str) -> String { 140 | let fields = schema.fields(); 141 | let num_columns = fields.len(); 142 | let column_names: Vec<_> = (0..num_columns) 143 | .map(|i| fields[i].name().as_str()) 144 | .collect(); 145 | insert_statement_text(table_name, &column_names) 146 | } 147 | 148 | /// Emitted writing values from arror arrays into a table on the database 149 | #[derive(Debug, Error)] 150 | pub enum WriterError { 151 | #[error("Failure to bind the array parameter buffers to the statement.\n{0}")] 152 | BindParameterBuffers(#[source] odbc_api::Error), 153 | #[error("Failure to execute the sql statement, sending the data to the database.\n{0}")] 154 | ExecuteStatment(#[source] odbc_api::Error), 155 | #[error("An error occured rebinding a parameter buffer to the sql statement.\n{0}")] 156 | RebindBuffer(#[source] odbc_api::Error), 157 | #[error("The arrow data type {0} is not supported for insertion.")] 158 | UnsupportedArrowDataType(DataType), 159 | #[error("An error occured extracting a record batch from an error reader.\n{0}")] 160 | ReadingRecordBatch(#[source] ArrowError), 161 | #[error("Unable to parse '{time_zone}' into a valid IANA time zone.")] 162 | InvalidTimeZone { time_zone: Arc }, 163 | #[error("An error occurred preparing SQL statement. SQL:\n{sql}\n{source}")] 164 | PreparingInsertStatement { 165 | #[source] 166 | source: odbc_api::Error, 167 | sql: String, 168 | }, 169 | } 170 | 171 | /// Inserts batches from an [`arrow::record_batch::RecordBatchReader`] into a database. 172 | pub struct OdbcWriter { 173 | /// Prepared statement with bound array parameter buffers. Data is copied into these buffers 174 | /// until they are full. Then we execute the statement. This is repeated until we run out of 175 | /// data. 176 | inserter: ColumnarBulkInserter, 177 | /// For each field in the arrow schema we decide on which buffer to use to send the parameters 178 | /// to the database, and need to remember how to copy the data from an arrow array to an odbc 179 | /// mutable buffer slice for any column. 180 | strategies: Vec>, 181 | } 182 | 183 | impl OdbcWriter 184 | where 185 | S: AsStatementRef, 186 | { 187 | /// Construct a new ODBC writer using an alredy existing prepared statement. Usually you want to 188 | /// call a higher level constructor like [`Self::with_connection`]. Yet, this constructor is 189 | /// useful in two scenarios. 190 | /// 191 | /// 1. The prepared statement is already constructed and you do not want to spend the time to 192 | /// prepare it again. 193 | /// 2. You want to use the arrow arrays as arrar parameters for a statement, but that statement 194 | /// is not necessarily an INSERT statement with a simple 1to1 mapping of columns between 195 | /// table and arrow schema. 196 | /// 197 | /// # Parameters 198 | /// 199 | /// * `row_capacity`: The amount of rows send to the database in each chunk. With the exception 200 | /// of the last chunk, which may be smaller. 201 | /// * `schema`: Schema needs to have one column for each positional parameter of the statement 202 | /// and match the data which will be supplied to the instance later. Otherwise your code will 203 | /// panic. 204 | /// * `statement`: A prepared statement whose SQL text representation contains one placeholder 205 | /// for each column. The order of the placeholers must correspond to the orders of the columns 206 | /// in the `schema`. 207 | pub fn new( 208 | row_capacity: usize, 209 | schema: &Schema, 210 | statement: Prepared, 211 | ) -> Result { 212 | let strategies: Vec<_> = schema 213 | .fields() 214 | .iter() 215 | .map(|field| field_to_write_strategy(field.as_ref())) 216 | .collect::>()?; 217 | let descriptions = strategies.iter().map(|cws| cws.buffer_desc()); 218 | let inserter = statement 219 | .into_column_inserter(row_capacity, descriptions) 220 | .map_err(WriterError::BindParameterBuffers)?; 221 | 222 | Ok(Self { 223 | inserter, 224 | strategies, 225 | }) 226 | } 227 | 228 | /// Consumes all the batches in the record batch reader and sends them chunk by chunk to the 229 | /// database. 230 | pub fn write_all( 231 | &mut self, 232 | reader: impl Iterator>, 233 | ) -> Result<(), WriterError> { 234 | for result in reader { 235 | let record_batch = result.map_err(WriterError::ReadingRecordBatch)?; 236 | self.write_batch(&record_batch)?; 237 | } 238 | self.flush()?; 239 | Ok(()) 240 | } 241 | 242 | /// Consumes a single batch and sends it chunk by chunk to the database. The last batch may not 243 | /// be consumed until [`Self::flush`] is called. 244 | pub fn write_batch(&mut self, record_batch: &RecordBatch) -> Result<(), WriterError> { 245 | let capacity = self.inserter.capacity(); 246 | let mut remanining_rows = record_batch.num_rows(); 247 | // The record batch may contain more rows than the capacity of our writer can hold. So we 248 | // need to be able to fill the buffers multiple times and send them to the database in 249 | // between. 250 | while remanining_rows != 0 { 251 | let chunk_size = min(capacity - self.inserter.num_rows(), remanining_rows); 252 | let param_offset = self.inserter.num_rows(); 253 | self.inserter.set_num_rows(param_offset + chunk_size); 254 | let chunk = record_batch.slice(record_batch.num_rows() - remanining_rows, chunk_size); 255 | for (index, (array, strategy)) in chunk 256 | .columns() 257 | .iter() 258 | .zip(self.strategies.iter()) 259 | .enumerate() 260 | { 261 | strategy.write_rows(param_offset, self.inserter.column_mut(index), array)? 262 | } 263 | 264 | // If we used up all capacity we send the parameters to the database and reset the 265 | // parameter buffers. 266 | if self.inserter.num_rows() == capacity { 267 | self.flush()?; 268 | } 269 | remanining_rows -= chunk_size; 270 | } 271 | 272 | Ok(()) 273 | } 274 | 275 | /// The number of row in an individual record batch must not necessarily match the capacity of 276 | /// the buffers owned by this writer. Therfore sometimes records are not send to the database 277 | /// immediatly but rather we wait for the buffers to be filled then reading the next batch. Once 278 | /// we reach the last batch however, there is no "next batch" anymore. In that case we call this 279 | /// method in order to send the remainder of the records to the database as well. 280 | pub fn flush(&mut self) -> Result<(), WriterError> { 281 | self.inserter 282 | .execute() 283 | .map_err(WriterError::ExecuteStatment)?; 284 | self.inserter.clear(); 285 | Ok(()) 286 | } 287 | } 288 | 289 | impl OdbcWriter> 290 | where 291 | C: StatementParent, 292 | { 293 | /// A writer which takes ownership of the connection and inserts the given schema into a table 294 | /// with matching column names. 295 | /// 296 | /// **Note:** 297 | /// 298 | /// If the column name contains any character which would make it not a valid qualifier for transact 299 | /// SQL it will be wrapped in double quotes (`"`) within the insert schema. Valid names consist of 300 | /// alpha numeric characters, `@`, `$`, `#` and `_`. 301 | pub fn from_connection( 302 | connection: C2, 303 | schema: &Schema, 304 | table_name: &str, 305 | row_capacity: usize, 306 | ) -> Result 307 | where 308 | C2: ConnectionTransitions, 309 | { 310 | let sql = insert_statement_from_schema(schema, table_name); 311 | let statement = connection 312 | .into_prepared(&sql) 313 | .map_err(|source| WriterError::PreparingInsertStatement { source, sql })?; 314 | Self::new(row_capacity, schema, statement) 315 | } 316 | } 317 | 318 | impl<'o> OdbcWriter> { 319 | /// A writer which borrows the connection and inserts the given schema into a table with 320 | /// matching column names. 321 | /// 322 | /// **Note:** 323 | /// 324 | /// If the column name contains any character which would make it not a valid qualifier for transact 325 | /// SQL it will be wrapped in double quotes (`"`) within the insert schema. Valid names consist of 326 | /// alpha numeric characters, `@`, `$`, `#` and `_`. 327 | pub fn with_connection( 328 | connection: &'o Connection<'o>, 329 | schema: &Schema, 330 | table_name: &str, 331 | row_capacity: usize, 332 | ) -> Result { 333 | let sql = insert_statement_from_schema(schema, table_name); 334 | let statement = connection 335 | .prepare(&sql) 336 | .map_err(|source| WriterError::PreparingInsertStatement { source, sql })?; 337 | Self::new(row_capacity, schema, statement) 338 | } 339 | } 340 | 341 | pub trait WriteStrategy { 342 | /// Describe the buffer used to hold the array parameters for the column 343 | fn buffer_desc(&self) -> BufferDesc; 344 | 345 | /// # Parameters 346 | /// 347 | /// * `param_offset`: Start writing parameters at that position. Number of rows in the parameter 348 | /// buffer before inserting the current chunk. 349 | /// * `column_buf`: Buffer to write the data into 350 | /// * `array`: Buffer to read the data from 351 | fn write_rows( 352 | &self, 353 | param_offset: usize, 354 | column_buf: AnySliceMut<'_>, 355 | array: &dyn Array, 356 | ) -> Result<(), WriterError>; 357 | } 358 | 359 | fn field_to_write_strategy(field: &Field) -> Result, WriterError> { 360 | let is_nullable = field.is_nullable(); 361 | let strategy = match field.data_type() { 362 | DataType::Utf8 => Box::new(Utf8ToNativeText {}), 363 | DataType::Boolean => boolean_to_bit(is_nullable), 364 | DataType::LargeUtf8 => Box::new(LargeUtf8ToNativeText {}), 365 | DataType::Int8 => Int8Type::identical(is_nullable), 366 | DataType::Int16 => Int16Type::identical(is_nullable), 367 | DataType::Int32 => Int32Type::identical(is_nullable), 368 | DataType::Int64 => Int64Type::identical(is_nullable), 369 | DataType::UInt8 => UInt8Type::identical(is_nullable), 370 | DataType::Float16 => Float16Type::map_with(is_nullable, |half| half.to_f32()), 371 | DataType::Float32 => Float32Type::identical(is_nullable), 372 | DataType::Float64 => Float64Type::identical(is_nullable), 373 | DataType::Timestamp(time_unit, time_zone) => { 374 | insert_timestamp_strategy(is_nullable, &time_unit, time_zone.clone())? 375 | } 376 | DataType::Date32 => Date32Type::map_with(is_nullable, epoch_to_date), 377 | DataType::Date64 => Date64Type::map_with(is_nullable, |days_since_epoch| { 378 | epoch_to_date(days_since_epoch.try_into().unwrap()) 379 | }), 380 | DataType::Time32(TimeUnit::Second) => { 381 | Time32SecondType::map_with(is_nullable, sec_since_midnight_to_time) 382 | } 383 | DataType::Time32(TimeUnit::Millisecond) => { 384 | Box::new(NullableTimeAsText::::new()) 385 | } 386 | DataType::Time64(TimeUnit::Microsecond) => { 387 | Box::new(NullableTimeAsText::::new()) 388 | } 389 | DataType::Time64(TimeUnit::Nanosecond) => { 390 | Box::new(NullableTimeAsText::::new()) 391 | } 392 | DataType::Binary => Box::new(VariadicBinary::new(1)), 393 | DataType::FixedSizeBinary(length) => { 394 | Box::new(VariadicBinary::new((*length).try_into().unwrap())) 395 | } 396 | DataType::Decimal128(precision, scale) => { 397 | Box::new(NullableDecimal128AsText::new(*precision, *scale)) 398 | } 399 | DataType::Decimal256(precision, scale) => { 400 | Box::new(NullableDecimal256AsText::new(*precision, *scale)) 401 | } 402 | unsupported => return Err(WriterError::UnsupportedArrowDataType(unsupported.clone())), 403 | }; 404 | Ok(strategy) 405 | } 406 | -------------------------------------------------------------------------------- /src/reader/odbc_reader.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::min; 2 | 3 | use arrow::{ 4 | datatypes::SchemaRef, 5 | error::ArrowError, 6 | record_batch::{RecordBatch, RecordBatchReader}, 7 | }; 8 | use odbc_api::{BlockCursor, Cursor, buffers::ColumnarAnyBuffer}; 9 | 10 | use crate::{BufferAllocationOptions, ConcurrentOdbcReader, Error}; 11 | 12 | use super::{TextEncoding, to_record_batch::ToRecordBatch}; 13 | 14 | /// Arrow ODBC reader. Implements the [`arrow::record_batch::RecordBatchReader`] trait so it can be 15 | /// used to fill Arrow arrays from an ODBC data source. 16 | /// 17 | /// This reader is generic over the cursor type so it can be used in cases there the cursor only 18 | /// borrows a statement handle (most likely the case then using prepared queries), or owned 19 | /// statement handles (recommened then using one shot queries, to have an easier life with the 20 | /// borrow checker). 21 | /// 22 | /// # Example 23 | /// 24 | /// ```no_run 25 | /// use arrow_odbc::{odbc_api::{Environment, ConnectionOptions}, OdbcReaderBuilder}; 26 | /// 27 | /// const CONNECTION_STRING: &str = "\ 28 | /// Driver={ODBC Driver 18 for SQL Server};\ 29 | /// Server=localhost;\ 30 | /// UID=SA;\ 31 | /// PWD=My@Test@Password1;\ 32 | /// "; 33 | /// 34 | /// fn main() -> Result<(), anyhow::Error> { 35 | /// 36 | /// let odbc_environment = Environment::new()?; 37 | /// 38 | /// // Connect with database. 39 | /// let connection = odbc_environment.connect_with_connection_string( 40 | /// CONNECTION_STRING, 41 | /// ConnectionOptions::default() 42 | /// )?; 43 | /// 44 | /// // This SQL statement does not require any arguments. 45 | /// let parameters = (); 46 | /// 47 | /// // Do not apply any timeout. 48 | /// let timeout_sec = None; 49 | /// 50 | /// // Execute query and create result set 51 | /// let cursor = connection 52 | /// .execute("SELECT * FROM MyTable", parameters, timeout_sec)? 53 | /// .expect("SELECT statement must produce a cursor"); 54 | /// 55 | /// // Read result set as arrow batches. Infer Arrow types automatically using the meta 56 | /// // information of `cursor`. 57 | /// let arrow_record_batches = OdbcReaderBuilder::new() 58 | /// .build(cursor)?; 59 | /// 60 | /// for batch in arrow_record_batches { 61 | /// // ... process batch ... 62 | /// } 63 | /// Ok(()) 64 | /// } 65 | /// ``` 66 | pub struct OdbcReader { 67 | /// Converts the content of ODBC buffers into Arrow record batches 68 | converter: ToRecordBatch, 69 | /// Fetches values from the ODBC datasource using columnar batches. Values are streamed batch 70 | /// by batch in order to avoid reallocation of the buffers used for tranistion. 71 | batch_stream: BlockCursor, 72 | /// We remember if the user decided to use fallibale allocations or not in case we need to 73 | /// allocate another buffer due to a state transition towards [`ConcurrentOdbcReader`]. 74 | fallibale_allocations: bool, 75 | } 76 | 77 | impl OdbcReader { 78 | /// Consume this instance to create a similar ODBC reader which fetches batches asynchronously. 79 | /// 80 | /// Steals all resources from this [`OdbcReader`] instance, and allocates another buffer for 81 | /// transiting data from the ODBC data source to the application. This way one buffer can be 82 | /// written to by a dedicated system thread, while the other is read by the application. Use 83 | /// this if you want to trade memory for speed. 84 | /// 85 | /// # Example 86 | /// 87 | /// ```no_run 88 | /// use arrow_odbc::{odbc_api::{Environment, ConnectionOptions}, OdbcReaderBuilder}; 89 | /// use std::sync::OnceLock; 90 | /// 91 | /// // In order to fetch in a dedicated system thread we need a cursor with static lifetime, 92 | /// // this implies a static ODBC environment. 93 | /// static ENV: OnceLock = OnceLock::new(); 94 | /// 95 | /// const CONNECTION_STRING: &str = "\ 96 | /// Driver={ODBC Driver 18 for SQL Server};\ 97 | /// Server=localhost;\ 98 | /// UID=SA;\ 99 | /// PWD=My@Test@Password1;\ 100 | /// "; 101 | /// 102 | /// fn main() -> Result<(), anyhow::Error> { 103 | /// 104 | /// let odbc_environment = ENV.get_or_init(|| {Environment::new().unwrap() }); 105 | /// 106 | /// // Connect with database. 107 | /// let connection = odbc_environment.connect_with_connection_string( 108 | /// CONNECTION_STRING, 109 | /// ConnectionOptions::default() 110 | /// )?; 111 | /// 112 | /// // This SQL statement does not require any arguments. 113 | /// let parameters = (); 114 | /// 115 | /// // Do not apply any timeout. 116 | /// let timeout_sec = None; 117 | /// 118 | /// // Execute query and create result set 119 | /// let cursor = connection 120 | /// // Using `into_cursor` instead of `execute` takes ownership of the connection and 121 | /// // allows for a cursor with static lifetime. 122 | /// .into_cursor("SELECT * FROM MyTable", parameters, timeout_sec) 123 | /// .map_err(|e|e.error)? 124 | /// .expect("SELECT statement must produce a cursor"); 125 | /// 126 | /// // Construct ODBC reader ... 127 | /// let arrow_record_batches = OdbcReaderBuilder::new() 128 | /// .build(cursor)? 129 | /// // ... and make it concurrent 130 | /// .into_concurrent()?; 131 | /// 132 | /// for batch in arrow_record_batches { 133 | /// // ... process batch ... 134 | /// } 135 | /// Ok(()) 136 | /// } 137 | /// ``` 138 | pub fn into_concurrent(self) -> Result, Error> 139 | where 140 | C: Send + 'static, 141 | { 142 | ConcurrentOdbcReader::from_block_cursor( 143 | self.batch_stream, 144 | self.converter, 145 | self.fallibale_allocations, 146 | ) 147 | } 148 | 149 | /// Destroy the ODBC arrow reader and yield the underlyinng cursor object. 150 | /// 151 | /// One application of this is to process more than one result set in case you executed a stored 152 | /// procedure. 153 | pub fn into_cursor(self) -> Result { 154 | let (cursor, _buffer) = self.batch_stream.unbind()?; 155 | Ok(cursor) 156 | } 157 | 158 | /// Size of the internal preallocated buffer bound to the cursor and filled by your ODBC driver 159 | /// in rows. Each record batch will at most have this many rows. Only the last one may have 160 | /// less. 161 | pub fn max_rows_per_batch(&self) -> usize { 162 | self.batch_stream.row_array_size() 163 | } 164 | } 165 | 166 | impl Iterator for OdbcReader 167 | where 168 | C: Cursor, 169 | { 170 | type Item = Result; 171 | 172 | fn next(&mut self) -> Option { 173 | match self.batch_stream.fetch_with_truncation_check(true) { 174 | // We successfully fetched a batch from the database. Try to copy it into a record batch 175 | // and forward errors if any. 176 | Ok(Some(batch)) => { 177 | let result_record_batch = self 178 | .converter 179 | .buffer_to_record_batch(batch) 180 | .map_err(|mapping_error| ArrowError::ExternalError(Box::new(mapping_error))); 181 | Some(result_record_batch) 182 | } 183 | // We ran out of batches in the result set. End the iterator. 184 | Ok(None) => None, 185 | // We had an error fetching the next batch from the database, let's report it as an 186 | // external error. 187 | Err(odbc_error) => Some(Err(odbc_to_arrow_error(odbc_error))), 188 | } 189 | } 190 | } 191 | 192 | impl RecordBatchReader for OdbcReader 193 | where 194 | C: Cursor, 195 | { 196 | fn schema(&self) -> SchemaRef { 197 | self.converter.schema().clone() 198 | } 199 | } 200 | 201 | /// Creates instances of [`OdbcReader`] based on [`odbc_api::Cursor`]. 202 | /// 203 | /// Using a builder pattern instead of passing structs with all required arguments to the 204 | /// constructors of [`OdbcReader`] allows `arrow_odbc` to introduce new paramters to fine tune the 205 | /// creation and behavior of the readers without breaking the code of downstream applications. 206 | #[derive(Default, Clone)] 207 | pub struct OdbcReaderBuilder { 208 | /// `Some` implies the user has set this explicitly using 209 | /// [`OdbcReaderBuilder::with_max_num_rows_per_batch`]. `None` implies that we have to choose 210 | /// for the user. 211 | max_num_rows_per_batch: usize, 212 | max_bytes_per_batch: usize, 213 | schema: Option, 214 | max_text_size: Option, 215 | max_binary_size: Option, 216 | map_value_errors_to_null: bool, 217 | dbms_name: Option, 218 | fallibale_allocations: bool, 219 | trim_fixed_sized_character_strings: bool, 220 | text_encoding: TextEncoding, 221 | } 222 | 223 | impl OdbcReaderBuilder { 224 | pub fn new() -> Self { 225 | // In the abscence of an explicit row limit set by the user we choose u16 MAX (65535). This 226 | // is a reasonable high value to allow for siginificantly reducing IO overhead as opposed to 227 | // row by row fetching already. Likely for many database schemas a memory limitation will 228 | // kick in before this limit. If not however it can still be dangerous to go beyond this 229 | // number. Some drivers use a 16Bit integer to count rows and you can run into overflow 230 | // errors if you use one of them. Once such issue occurred with SAP anywhere. 231 | const DEFAULT_MAX_ROWS_PER_BATCH: usize = u16::MAX as usize; 232 | const DEFAULT_MAX_BYTES_PER_BATCH: usize = 512 * 1024 * 1024; 233 | 234 | OdbcReaderBuilder { 235 | max_num_rows_per_batch: DEFAULT_MAX_ROWS_PER_BATCH, 236 | max_bytes_per_batch: DEFAULT_MAX_BYTES_PER_BATCH, 237 | schema: None, 238 | max_text_size: None, 239 | max_binary_size: None, 240 | fallibale_allocations: false, 241 | map_value_errors_to_null: false, 242 | dbms_name: None, 243 | trim_fixed_sized_character_strings: false, 244 | text_encoding: TextEncoding::Auto, 245 | } 246 | } 247 | 248 | /// Limits the maximum amount of rows which are fetched in a single roundtrip to the datasource. 249 | /// Higher numbers lower the IO overhead and may speed up your runtime, but also require larger 250 | /// preallocated buffers and use more memory. This value defaults to `65535` which is `u16` max. 251 | /// Some ODBC drivers use a 16Bit integer to count rows so this can avoid overflows. The 252 | /// improvements in saving IO overhead going above that number are estimated to be small. Your 253 | /// milage may vary of course. 254 | pub fn with_max_num_rows_per_batch(&mut self, max_num_rows_per_batch: usize) -> &mut Self { 255 | self.max_num_rows_per_batch = max_num_rows_per_batch; 256 | self 257 | } 258 | 259 | /// In addition to a row size limit you may specify an upper bound in bytes for allocating the 260 | /// transit buffer. This is useful if you do not know the database schema, or your code has to 261 | /// work with different ones, but you know the amount of memory in your machine. This limit is 262 | /// applied in addition to [`OdbcReaderBuilder::with_max_num_rows_per_batch`]. Whichever of 263 | /// these leads to a smaller buffer is used. This defaults to 512 MiB. 264 | pub fn with_max_bytes_per_batch(&mut self, max_bytes_per_batch: usize) -> &mut Self { 265 | self.max_bytes_per_batch = max_bytes_per_batch; 266 | self 267 | } 268 | 269 | /// Describes the types of the Arrow Arrays in the record batches. It is also used to determine 270 | /// CData type requested from the data source. If this is not explicitly set the type is infered 271 | /// from the schema information provided by the ODBC driver. A reason for setting this 272 | /// explicitly could be that you have superior knowledge about your data compared to the ODBC 273 | /// driver. E.g. a type for an unsigned byte (`u8`) is not part of the ODBC standard. Therfore 274 | /// the driver might at best be able to tell you that this is an (`i8`). If you want to still 275 | /// have `u8`s in the resulting array you need to specify the schema manually. Also many drivers 276 | /// struggle with reporting nullability correctly and just report every column as nullable. 277 | /// Explicitly specifying a schema can also compensate for such shortcomings if it turns out to 278 | /// be relevant. 279 | pub fn with_schema(&mut self, schema: SchemaRef) -> &mut Self { 280 | self.schema = Some(schema); 281 | self 282 | } 283 | 284 | /// In order for fast bulk fetching to work, `arrow-odbc` needs to know the size of the largest 285 | /// possible field in each column. It will do so itself automatically by considering the schema 286 | /// information. However, trouble arises if the schema contains ounbounded variadic fields like 287 | /// `VARCHAR(MAX)` which can hold really large values. These have a very high upper element 288 | /// size, if any. In order to work with such schemas we need a limit, of what the an upper 289 | /// bound of the actual values in the column is, as opposed to the what the largest value is the 290 | /// column could theoretically store. There is no need for this to be very precise, but just 291 | /// knowing that a value would never exceed 4KiB rather than 2GiB is enough to allow for 292 | /// tremendous efficiency gains. The size of the text is specified in UTF-8 encoded bytes if 293 | /// using a narrow encoding (typically all non-windows systems) and in UTF-16 encoded pairs of 294 | /// bytes on systems using a wide encoding (typically windows). This means about the size in 295 | /// letters, yet if you are using a lot of emojis or other special characters this number might 296 | /// need to be larger. 297 | pub fn with_max_text_size(&mut self, max_text_size: usize) -> &mut Self { 298 | self.max_text_size = Some(max_text_size); 299 | self 300 | } 301 | 302 | /// An upper limit for the size of buffers bound to variadic binary columns of the data source. 303 | /// This limit does not (directly) apply to the size of the created arrow buffers, but rather 304 | /// applies to the buffers used for the data in transit. Use this option if you have e.g. 305 | /// `VARBINARY(MAX)` fields in your database schema. In such a case without an upper limit, the 306 | /// ODBC driver of your data source is asked for the maximum size of an element, and is likely 307 | /// to answer with either `0` or a value which is way larger than any actual entry in the 308 | /// column. If you can not adapt your database schema, this limit might be what you are looking 309 | /// for. This is the maximum size in bytes of the binary column. If this method is not called no 310 | /// upper limit is set and the maximum element size, reported by ODBC is used to determine 311 | /// buffer sizes. 312 | pub fn with_max_binary_size(&mut self, max_binary_size: usize) -> &mut Self { 313 | self.max_binary_size = Some(max_binary_size); 314 | self 315 | } 316 | 317 | /// Set to `true` in order to trigger an [`crate::ColumnFailure::TooLarge`] instead of a panic 318 | /// in case the buffers can not be allocated due to their size. This might have a performance 319 | /// cost for constructing the reader. `false` by default. 320 | pub fn with_fallibale_allocations(&mut self, fallibale_allocations: bool) -> &mut Self { 321 | self.fallibale_allocations = fallibale_allocations; 322 | self 323 | } 324 | 325 | /// Set to `true` in order to map a value in the database which can not be successfully 326 | /// converted into its target type to NULL, rather than emitting an external Arrow Error. 327 | /// E.g. currently mapping errors can happen if a datetime value is not in the rang 328 | /// representable by arrow. Default is `false`. 329 | pub fn value_errors_as_null(&mut self, map_value_errors_to_null: bool) -> &mut Self { 330 | self.map_value_errors_to_null = map_value_errors_to_null; 331 | self 332 | } 333 | 334 | /// If set to `true` text in fixed sized character columns like e.g. CHAR are trimmed of 335 | /// whitespaces before converted into Arrow UTF-8 arrays. Default is `false`. 336 | pub fn trim_fixed_sized_characters( 337 | &mut self, 338 | fixed_sized_character_strings_are_trimmed: bool, 339 | ) -> &mut Self { 340 | self.trim_fixed_sized_character_strings = fixed_sized_character_strings_are_trimmed; 341 | self 342 | } 343 | 344 | /// Controls the encoding used for transferring text data from the ODBC data source to the 345 | /// application. The resulting Arrow arrays will still be UTF-8 encoded. You may want to use 346 | /// this if you get garbage characters or invalid UTF-8 errors on non-windows systems to set the 347 | /// encoding to [`TextEncoding::Utf16`]. On windows systems you may want to set this to 348 | /// [`TextEncoding::Utf8`] to gain performance benefits, after you have verified that your 349 | /// system locale is set to UTF-8. The default is [`TextEncoding::Auto`]. 350 | pub fn with_payload_text_encoding(&mut self, text_encoding: TextEncoding) -> &mut Self { 351 | self.text_encoding = text_encoding; 352 | self 353 | } 354 | 355 | /// If provided the name of the database management system (DBMS) is used to account for 356 | /// database specific behavior when determining the arrow schema. 357 | /// 358 | /// To deterimne the name of the dbms you can call 359 | /// [`odbc_api::Connection::database_management_system_name`]. 360 | pub fn with_dbms_name(&mut self, dbms_name: String) -> &mut Self { 361 | self.dbms_name = Some(dbms_name); 362 | self 363 | } 364 | 365 | /// No matter if the user explicitly specified a limit in row size, a memory limit, both or 366 | /// neither. In order to construct a reader we need to decide on the buffer size in rows. 367 | fn buffer_size_in_rows(&self, bytes_per_row: usize) -> Result { 368 | // If schema is empty, return before division by zero error. 369 | if bytes_per_row == 0 { 370 | return Ok(self.max_bytes_per_batch); 371 | } 372 | let rows_per_batch = self.max_bytes_per_batch / bytes_per_row; 373 | if rows_per_batch == 0 { 374 | Err(Error::OdbcBufferTooSmall { 375 | max_bytes_per_batch: self.max_bytes_per_batch, 376 | bytes_per_row, 377 | }) 378 | } else { 379 | Ok(min(self.max_num_rows_per_batch, rows_per_batch)) 380 | } 381 | } 382 | 383 | /// Constructs an [`OdbcReader`] which consumes the giver cursor. The cursor will also be used 384 | /// to infer the Arrow schema if it has not been supplied explicitly. 385 | /// 386 | /// # Parameters 387 | /// 388 | /// * `cursor`: ODBC cursor used to fetch batches from the data source. The constructor will 389 | /// bind buffers to this cursor in order to perform bulk fetches from the source. This is 390 | /// usually faster than fetching results row by row as it saves roundtrips to the database. 391 | /// The type of these buffers will be inferred from the arrow schema. Not every arrow type is 392 | /// supported though. 393 | pub fn build(&self, mut cursor: C) -> Result, Error> 394 | where 395 | C: Cursor, 396 | { 397 | let buffer_allocation_options = BufferAllocationOptions { 398 | max_text_size: self.max_text_size, 399 | max_binary_size: self.max_binary_size, 400 | fallibale_allocations: self.fallibale_allocations, 401 | }; 402 | let converter = ToRecordBatch::new( 403 | &mut cursor, 404 | self.schema.clone(), 405 | buffer_allocation_options, 406 | self.map_value_errors_to_null, 407 | self.dbms_name.as_deref(), 408 | self.trim_fixed_sized_character_strings, 409 | self.text_encoding, 410 | )?; 411 | let bytes_per_row = converter.row_size_in_bytes(); 412 | let buffer_size_in_rows = self.buffer_size_in_rows(bytes_per_row)?; 413 | let row_set_buffer = 414 | converter.allocate_buffer(buffer_size_in_rows, self.fallibale_allocations)?; 415 | let batch_stream = cursor.bind_buffer(row_set_buffer).unwrap(); 416 | 417 | Ok(OdbcReader { 418 | converter, 419 | batch_stream, 420 | fallibale_allocations: self.fallibale_allocations, 421 | }) 422 | } 423 | } 424 | 425 | pub fn odbc_to_arrow_error(odbc_error: odbc_api::Error) -> ArrowError { 426 | ArrowError::from_external_error(Box::new(odbc_error)) 427 | } 428 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [Unreleased] 9 | 10 | ## [21.0.0](https://github.com/pacman82/arrow-odbc/compare/v20.1.1...v21.0.0) - 2025-10-26 11 | 12 | ### Added 13 | 14 | - [**breaking**] Fetch Db2 Blobs as binary data and not text 15 | 16 | ### Other 17 | 18 | - introduce blob_on_db2 for local development 19 | 20 | ## [20.1.1](https://github.com/pacman82/arrow-odbc/compare/v20.1.0...v20.1.1) - 2025-10-25 21 | 22 | ### Other 23 | 24 | - Support arrow 57 25 | 26 | ## [20.1.0](https://github.com/pacman82/arrow-odbc/compare/v20.0.1...v20.1.0) - 2025-09-21 27 | 28 | ### Added 29 | 30 | 31 | - Then auto generating the insert statement into a table, column names which are already quoted, i.e. are enclosed in either rectangular brackets (`[`, `]`), double quotes (`"`) or backticks (`````) will not be quoted. This way if a column is named after a reserved keyword e.g. `values` or `from`, users could rename the column in the source to their quoted representation in order to avoid syntax errors in the statement. 32 | 33 | ### Other 34 | 35 | - Explain reasoning behind `is_already_quoted` in comment. 36 | - Mention GitHub issue triggering the change to column name 37 | - Test for insterting into a table with a column using a reserved 38 | - Replace lazy_static with std::once_lock 39 | 40 | ## [20.0.1](https://github.com/pacman82/arrow-odbc/compare/v20.0.0...v20.0.1) - 2025-09-08 41 | 42 | ### Other 43 | 44 | - Support `odbc-api 20` 45 | 46 | ## [20.0.0](https://github.com/pacman82/arrow-odbc/compare/v19.1.1...v20.0.0) - 2025-08-21 47 | 48 | ### Added 49 | 50 | - [**breaking**] Support OdbcWriter taking ownership of Arc> 51 | 52 | ## [19.1.1](https://github.com/pacman82/arrow-odbc/compare/v19.1.0...v19.1.1) - 2025-08-21 53 | 54 | ### Other 55 | 56 | - *(deps)* update odbc-api requirement from >= 15, < 17 to >= 15, < 18 57 | 58 | ## [19.1.0](https://github.com/pacman82/arrow-odbc/compare/v19.0.0...v19.1.0) - 2025-08-17 59 | 60 | ### Added 61 | 62 | - Support `odbc-api 16` 63 | 64 | ### Fixed 65 | 66 | - missing other entry in Changelog 67 | - Missing changelog entry for 19.0.0 68 | 69 | ## [19.0.0](https://github.com/pacman82/arrow-odbc/compare/v18.1.2...v19.0.0) - 2025-08-14 70 | 71 | ### Other 72 | 73 | - [**breaking**] Support for `odbc-api 15`. Dropped support for older versions. 74 | 75 | ## [18.1.2](https://github.com/pacman82/arrow-odbc/compare/v18.1.1...v18.1.2) - 2025-08-05 76 | 77 | ### Other 78 | 79 | - Move automerge logic into build.yml. 80 | - *(deps)* update arrow requirement from >= 29, < 56 to >= 29, < 57 81 | - Auto merge dependabot PRs 82 | # Changelog 83 | 84 | ## [18.1.1](https://github.com/pacman82/arrow-odbc/compare/v18.1.0...v18.1.1) - 2025-06-27 85 | 86 | ### Other 87 | 88 | - *(deps)* update odbc-api requirement from >= 12, < 14 to >= 12, < 15 89 | 90 | ## [18.1.0](https://github.com/pacman82/arrow-odbc/compare/v18.0.0...v18.1.0) - 2025-06-24 91 | 92 | ### Added 93 | 94 | - Time64 is inserted with ns precision rather than with 7 fractional digits. 95 | 96 | ## [18.0.0](https://github.com/pacman82/arrow-odbc/compare/v17.0.1...v18.0.0) - 2025-06-22 97 | 98 | ### Added 99 | 100 | - Inserting timestamps with timezones 101 | - [**breaking**] Add variant `WriterError::InvalidTimeZone` 102 | 103 | ## [17.0.1](https://github.com/pacman82/arrow-odbc/compare/v17.0.0...v17.0.1) - 2025-05-30 104 | 105 | ### Fixed 106 | 107 | - Remove eprintln statement, which had been left over from debugging 108 | 109 | ## [17.0.0](https://github.com/pacman82/arrow-odbc/compare/v16.0.2...v17.0.0) - 2025-05-30 110 | 111 | ### Added 112 | 113 | - [**breaking**] Provide possibility to provide dbms name, in order to autodetect time columns for MSSQL databases 114 | - [**breaking**] Support for Times64 Nanoseconds 115 | - [**breaking**] Support for wallclock time with milliseconds precision 116 | - [**breaking**] Support for time32 second 117 | 118 | ### Other 119 | 120 | - fix typos 121 | - time with precision 2 & 3 122 | - Human readable test assertion for fetch_time_1_psql 123 | - fetch_time_1_psql 124 | - change pssql time test to use Time(0) 125 | - replaced mssql connection string with psql in _psql test 126 | - Fetching time column from PostgreSQL 127 | - Use setup empty table with PostgreSQL 128 | - Introduce Dbms trait 129 | - Test for fetching time 130 | 131 | ## [16.0.2](https://github.com/pacman82/arrow-odbc/compare/v16.0.1...v16.0.2) - 2025-05-18 132 | 133 | ### Other 134 | 135 | - Support odbc-api 13 136 | - Integration test for fetching VARCHAR(1000) from PostgreSQL with special characters 137 | - Elaborate on bug cause 138 | - psql_varchar test now also reproduces behaviour then run in Windows 139 | - Trying to reproduce too large value for buffer. 140 | - Add PostgreSQL to test pipeline 141 | 142 | ## [16.0.1](https://github.com/pacman82/arrow-odbc/compare/v16.0.0...v16.0.1) - 2025-04-17 143 | 144 | ### Other 145 | 146 | - Support arrow 55 147 | 148 | ## [16.0.0](https://github.com/pacman82/arrow-odbc/compare/v15.1.1...v16.0.0) - 2025-04-05 149 | 150 | ### Added 151 | 152 | - [**breaking**] Update to support odbc-api 12. 153 | 154 | ### Other 155 | 156 | - Remove pin from chrono 157 | - update dependencies 158 | 159 | ## [15.1.1](https://github.com/pacman82/arrow-odbc/compare/v15.1.0...v15.1.1) - 2025-03-08 160 | 161 | ### Other 162 | 163 | - Fix docstrings for TextEncoding 164 | 165 | ## [15.1.0](https://github.com/pacman82/arrow-odbc/compare/v15.0.0...v15.1.0) - 2025-03-08 166 | 167 | ### Added 168 | 169 | - Support for explicitly choosing the transfer encoding. 170 | 171 | ### Fixed 172 | 173 | - Pin chrono version to 0.4.39 to fix build 174 | 175 | ### Other 176 | 177 | - use input parameters rather than literals to setup preconditions for most tests 178 | - Fix "String or binary data would be truncated in table" on linux systems 179 | - fix test when block in fetch_narrow_data 180 | 181 | ## [15.0.0](https://github.com/pacman82/arrow-odbc/compare/v14.2.0...v15.0.0) - 2025-02-23 182 | 183 | ### Added 184 | 185 | - [**breaking**] Dedicated Mapping error for InvalidUtf8 186 | 187 | ### Other 188 | 189 | - [**breaking**] Update to edition 2024 190 | - Rewrite docs for with_max_text_size 191 | 192 | ## [14.2.0](https://github.com/pacman82/arrow-odbc/compare/v14.1.0...v14.2.0) - 2025-02-16 193 | 194 | ### Added 195 | 196 | - Support odbc-api 11 197 | 198 | ### Other 199 | 200 | - Use ODBC driver 18 for tests 201 | - Install unixODBC in release plz workflow 202 | - Install unixodbc-dev 203 | 204 | ## [14.1.0](https://github.com/pacman82/arrow-odbc/compare/v14.0.1...v14.1.0) - 2025-01-01 205 | 206 | ### Added 207 | 208 | - Support arrow 53 209 | 210 | ## [14.0.1](https://github.com/pacman82/arrow-odbc/compare/v14.0.0...v14.0.1) - 2024-12-09 211 | 212 | ### Fixed 213 | 214 | - Inserting multiple small batches now works, even if the second batch triggers rebinding the buffer due to element size. Previously in this scenario not all values already inserted were correctly copied into the new buffer. This caused strings to be replaced with `null` bytes. 215 | 216 | ## [14.0.0](https://github.com/pacman82/arrow-odbc/compare/v13.0.2...v14.0.0) - 2024-11-25 217 | 218 | ### Added 219 | 220 | - You can now compile with using the wide (i.e. UTF-16) character set versions of ODBC functions calls on non-windows platforms by specifying the `wide` feature. Similarly you can now complie using the narrow character set on windows platforms by specifying the `narrow` feature. The default remains `wide` on windows and `narrow` on non-windows targets. This required updating using `odbc-api 10.0.0`. The only thing thing changing however from `odbc-api` 9 to 10 are the default compilation of feature flags, so your code should just continue working. 221 | 222 | - [**breaking**] Update odbc-api `>= 9, < 10` -> `>= 10, < 11` 223 | 224 | ## [13.0.2](https://github.com/pacman82/arrow-odbc/compare/v13.0.1...v13.0.2) - 2024-11-24 225 | 226 | ### Fixed 227 | 228 | - Overflow in epoch to timestamp is fixed. It is now possible to insert 1600-06-18 23:12:44.123 into a database with ms precision 229 | 230 | ## [13.0.1](https://github.com/pacman82/arrow-odbc/compare/v13.0.0...v13.0.1) - 2024-11-20 231 | 232 | ### Fixed 233 | 234 | - Timestamps with fractional seconds now work even if they are older than unix epoch. 235 | 236 | ### Other 237 | 238 | - setup release-plz 239 | - use uppercase for changelog 240 | - Update thiserror requirement from 1.0.65 to 2.0.0 241 | 242 | ## 13.0.0 243 | 244 | - Update odbc-api `>= 6, < 9` -> `>= 9, < 10` 245 | 246 | ## 12.2.0 247 | 248 | - Update arrow `>= 29, < 53` -> `>= 29, < 54` 249 | 250 | ## 12.1.0 251 | 252 | - Enabling trimming of fixed sized character data via `OdbcReaderBuilder::trim_fixed_sized_characters`. 253 | 254 | ## 12.0.0 255 | 256 | - Enable mapping out of ranges dates to `NULL`. You can do so using `OdbcReaderBuilder::value_errors_as_null`. 257 | - Breaking: `arrow_schema_from` now takes an additional boolean parameter `map_value_errors_to_null`. 258 | 259 | ## 11.2.0 260 | 261 | - Update odbc-api `>= 6, < 8` -> `>= 6, < 9` 262 | 263 | ## 11.1.0 264 | 265 | - Update arrow `>= 29, < 52` -> `>= 29, < 53` 266 | 267 | ## 11.0.0 268 | 269 | - Unsigned Tinyint are now mapped to `UInt8` instead of `Int8`. 270 | 271 | ## 10.0.0 272 | 273 | - Removed quirk `indicators_returned_from_bulk_fetch_are_memory_garbage`. Turns out the issue with IBM DB/2 drivers which triggered this can better be solved using a version of their ODBC driver which ends in `o` and is compiled with a 64Bit size for `SQLLEN`. 274 | - Remove `Quirks`. 275 | 276 | ## 9.0.0 277 | 278 | - Then generating the insert statement on behalf of the user quote column names which are not valid transact SQL qualifiers using double quotes (`"`) 279 | 280 | ## 8.3.0 281 | 282 | - Update odbc-api `>= 6, < 7` -> `>= 6, < 8` 283 | 284 | ## 8.2.0 285 | 286 | - `ConcurrentOdbcReader` is now `Send`. 287 | 288 | ## 8.1.0 289 | 290 | - Update arrow `>= 29, < 51` -> `>= 29, < 52` 291 | 292 | ## 8.0.0 293 | 294 | - Replace `odbc_api::Quirks` with `arrow_odbc::Quirks`. 295 | 296 | ## 7.0.0 297 | 298 | - Update odbc-api `>= 5, < 6` -> `>= 6, < 7` 299 | 300 | ## 6.1.0 301 | 302 | - Update arrow `>= 29, < 50` -> `>= 29, < 51` 303 | 304 | ## 6.0.0 305 | 306 | - Update odbc-api `>= 4, < 5` -> `>= 5, < 6` 307 | 308 | ## 5.0.3 309 | 310 | - Decimal parsing is now more robust. It does no longer require the text representation to have all trailing zeroes explicit in order to figure out the correct scale of the decimal. E.g. for a decimal with scale 5 a text representation of `10` would have been interpreted as `000.1` for scale five. Decimal parsing relied on databases making all trailing zeroes explicit e.g. `10.00000`. Oracle however does not do this, so parsing has been adopted to be more robust. 311 | 312 | ## 5.0.2 313 | 314 | - Fixes a bug introduced in 5.0.1, causing negative decimals not to be parsed correctly and to be returned as non-negative values. 315 | 316 | ## 5.0.1 317 | 318 | - Decimal parsing logic is now more robust and also works if the decimal point is not an actual point but a `,`. 319 | 320 | ## 5.0.0 321 | 322 | - Fixes a panic which occurred if database returned column names with invalid encodings. 323 | - Introduces new `Error` variant `EncodingInvalid`, which is returned in case a column name can not be interpreted as UTF-16 on windows platforms or UTF-8 on non-windows platforms. 324 | - Removes deprecated `WriteError::TimeZoneNotSupported`, `OdbcConcurrentReader::new`, `OdbcConcurrentReader::with_arrow_schema`, `OdbcConcurrentReader::with`, `OdbcReader::new`, `OdbcReader::with_arrow_schema`, `OdbcReader::with`. 325 | 326 | ## 4.1.1 327 | 328 | - In order to work with mandatory columns workaround for IBM DB2 returning memory garbage now no longer maps empty strings to zero. 329 | 330 | ## 4.1.0 331 | 332 | - Update odbc-api `>= 4, < 5` -> `>= 4.1, < 5` 333 | - Support for fetching text from IBM DB2. This has been difficult because of a bug in the IBM DB2 driver which causes it to return garbage memory instead of string lengths. A workaround can now be activated using `with_shims` on `OdbcReaderBuilder`. 334 | 335 | ## 4.0.0 336 | 337 | - Update odbc-api `>= 2.2, < 4` -> `>= 4, < 5` 338 | 339 | ## 3.1.2 340 | 341 | - An assumption has been removed, that unknown column types are always representable in ASCII. Now on Linux the system encoding is used which is assumed to be UTF-8 and on windows UTF-16. The same as for other text columns. 342 | - MySQL seems to report negative display sizes for JSON columns (-4). This is normally used to indicate no upper bound in other parts of the ODBC standard. Arrow ODBC will now return a `ColumnFailure::ZeroSizedColumn` in these scenarios, if no buffer limit has been specified. 343 | 344 | ## 3.1.1 345 | 346 | - Prevent division by zero errors when using `OdbcReaderBuilder::buffer_size_in_rows` on empty schemas. 347 | 348 | ## 3.1.0 349 | 350 | - Update arrow `>= 29, < 49` -> `>= 29, < 50` 351 | 352 | ## 3.0.0 353 | 354 | - Introduce `OdbcReaderBuilder` as the prefered way to create instances of `OdbcReader`. 355 | - Allow for limiting ODBC buffer sizes using a memory limit expressed in bytes using `OdbcReaderBuilder::max_bytes_per_batch`. 356 | - Add new variant `Error::OdbcBufferTooSmall`. 357 | 358 | ## 2.3.0 359 | 360 | - Log memory usage per row 361 | 362 | ## 2.2.0 363 | 364 | - Update odbc-api `>= 2.2, < 3` -> `>= 2.2, < 4` 365 | 366 | ## 2.1.0 367 | 368 | - Update arrow `>= 29, < 48` -> `>= 29, < 49` 369 | 370 | ## 2.0.0 371 | 372 | - Update odbc-api `>= 0.56.1, < 3` -> `>= 2.2, < 3` 373 | 374 | ## 1.3.0 375 | 376 | - Add `ConcurrentOdbcReader` to allow fetching ODBC row groups concurrently. 377 | 378 | ## 1.2.1 379 | 380 | - Additional debug messages emmitted to indicate relational types reported by ODBC 381 | 382 | ## 1.2.0 383 | 384 | - Update odbc-api `>= 0.56.1, < 2` -> `>= 0.56.1, < 3` 385 | 386 | ## 1.1.0 387 | 388 | - Update arrow `>= 29, < 47` -> `>= 29, < 48` 389 | 390 | ## 1.0.0 391 | 392 | - Update odbc-api `>= 0.56.1, < 0.58.0` -> `>= 0.56.1, < 2` 393 | 394 | ## 0.28.12 395 | 396 | - `insert_statement_from_schema` will no longer end statements with a semicolon (`;`) as to not confuse an IBM db2 driver into thinking that multiple statements are intended to be executed. Thanks to @rosscoleman for reporting the issue and spending a lot of effort reproducing the issue. 397 | 398 | ## 0.28.11 399 | 400 | - Fix: Emit an error if nanoprecision timestamps are outside of valid range, rather than overflowing silently. 401 | - Update arrow `>= 29, < 46` -> `>= 29, < 47` 402 | 403 | ## 0.28.10 404 | 405 | - Update arrow `>= 29, < 45` -> `>= 29, < 46` 406 | 407 | ## 0.28.9 408 | 409 | - Better error messages which contain the original error emitted by `odbc-api` even then printed using the `Display` trait. 410 | 411 | ## 0.28.8 412 | 413 | - Update arrow `>= 29, < 44` -> `>= 29, < 45` 414 | 415 | ## 0.28.7 416 | 417 | - Update arrow `>= 29, < 43` -> `>= 29, < 44` 418 | 419 | ## 0.28.6 420 | 421 | - Update arrow `>= 29, < 42` -> `>= 29, < 43` 422 | 423 | ## 0.28.5 424 | 425 | - Update arrow `>= 29, < 39` -> `>= 29, < 42` 426 | 427 | ## 0.28.4 428 | 429 | - Update arrow `>= 29, < 39` -> `>= 29, < 41` 430 | 431 | ## 0.28.3 432 | 433 | - Update arrow `>= 29, < 39` -> `>= 29, < 40` 434 | 435 | ## 0.28.2 436 | 437 | - Update arrow `>= 29, < 38` -> `>= 37, < 39` 438 | 439 | ## 0.28.1 440 | 441 | - Update odbc-api `>= 0.56.1, < 0.57.0` -> `>= 0.56.1, < 0.58.0` 442 | 443 | ## 0.28.0 444 | 445 | - Update arrow `>= 29, < 37` -> `>= 37, < 38` 446 | 447 | ## 0.27.0 448 | 449 | - Update odbc-api `>= 0.52.3, < 0.57.0` -> `>= 0.56.1, < 0.57.0` 450 | - Introduced `OdbcReader::into_cursor` in order to enable processing stored procedures returning multiple result sets. 451 | 452 | ## 0.26.12 453 | 454 | - Update odbc-api `>= 0.52.3, < 0.56.0` -> `>= 0.52.3, < 0.57.0` 455 | 456 | ## 0.26.11 457 | 458 | - Support for `LargeUtf8` then inserting data. 459 | 460 | ## 0.26.10 461 | 462 | - Update arrow `>= 29, < 36` -> `>= 29, < 37` 463 | 464 | ## 0.26.9 465 | 466 | - Fix code sample in Readme 467 | 468 | ## 0.26.8 469 | 470 | - Update odbc-api `>= 0.52.3, < 0.55.0` -> `>= 0.52.3, < 0.56.0` 471 | 472 | ## 0.26.7 473 | 474 | - Fix crate version for release 475 | 476 | ## 0.26.6 477 | 478 | - Update arrow `>= 29, < 34` -> `>= 29, < 36` 479 | 480 | ## 0.26.5 481 | 482 | - Update arrow `>= 29, < 33` -> `>= 29, < 34` 483 | 484 | ## 0.26.4 485 | 486 | - Update arrow `>= 29, < 31` -> `>= 29, < 33` 487 | - Depreacte `WriterError::TimeZonesNotSupported` in favor of `WriterError::UnsupportedArrowDataType`. 488 | 489 | ## 0.26.3 490 | 491 | - Update arrow `>= 29, < 31` -> `>= 29, < 32` 492 | 493 | ## 0.26.2 494 | 495 | - Update odbc-api `>= 0.52.3, < 0.54.0` -> `>= 0.52.3, < 0.55.0` 496 | 497 | ## 0.26.1 498 | 499 | - Update arrow `>= 29, < 30` -> `>= 29, < 31` 500 | 501 | ## 0.26.0 502 | 503 | - Update arrow `>= 28, < 30` -> `>= 29, < 30` 504 | - Update odbc-api `>= 0.52.3, < 0.53.0` -> `>= 0.52.3, < 0.54.0` 505 | 506 | ## 0.25.1 507 | 508 | - Update arrow `>= 25, < 29` -> `>= 28, < 30` 509 | 510 | ## 0.25.0 511 | 512 | - Update arrow `>= 25, < 28` -> `>= 28, < 29` 513 | 514 | ## 0.24.0 515 | 516 | - Update odbc-api `>= 0.50.0, < 0.53.0` -> `>= 0.52.3, < 0.53.0` 517 | 518 | ## 0.23.4 519 | 520 | - Update arrow `>=25, < 27` -> `>= 25, < 28` 521 | 522 | ## 0.23.3 523 | 524 | - Update odbc-api `>= 0.50.0, < 0.52.0` -> `>= 0.50.0, < 0.53.0` 525 | 526 | ## 0.23.2 527 | 528 | - Update odbc-api `>= 0.50.0, < 0.51.0` -> `>= 0.50.0, < 0.52.0` 529 | 530 | ## 0.23.1 531 | 532 | - Update arrow `>= 25, < 26` -> `>=25, < 27` 533 | 534 | ## 0.23.0 535 | 536 | - Update odbc-api `>= 0.45.0, < 0.51.0` -> `>= 0.50.0, < 0.51.0` 537 | - Update arrow `>= 22, < 25` -> `>= 25, < 26` 538 | 539 | ## 0.22.3 540 | 541 | - Update odbc-api `>= 0.45.0, < 0.50.0` -> `>= 0.45.0, < 0.51.0` 542 | 543 | ## 0.22.2 544 | 545 | - Update arrow `>= 22, < 23` -> `>= 22, < 25` 546 | 547 | ## 0.22.1 548 | 549 | - Update arrow `>= 22, < 23` -> `>= 22, < 24` 550 | 551 | ## 0.22.0 552 | 553 | - Update arrow `>= 21, < 22` -> `>= 22, < 23` 554 | 555 | ## 0.21.1 556 | 557 | - Update odbc-api `>= 0.45.0, < 0.49.0` -> `>= 0.45.0, < 0.50.0` 558 | 559 | ## 0.21.0 560 | 561 | - Update arrow `>= 20, < 21` -> `>= 21, < 22` 562 | - Fix: `OdbcWriter::inserter` had only been public by accident. 563 | 564 | ## 0.20.0 565 | 566 | - Use `narrow` text on non-windows platforms by default. Connection strings, queries and error messages are assumed to be UTF-8 and not transcoded to and from UTF-16. 567 | 568 | ## 0.19.3 569 | 570 | - Update odbc-api `>= 0.45.0, < 0.48.0` -> `>= 0.45.0, < 0.49.0` 571 | 572 | ## 0.19.2 573 | 574 | - Update odbc-api `>= 0.45.0, < 0.46.0` -> `>= 0.45.0, < 0.48.0` 575 | 576 | ## 0.19.1 577 | 578 | - Update odbc-api `>= 0.45.0, < 0.46.0` -> `>= 0.45.0, < 0.47.0` 579 | 580 | ## 0.19.0 581 | 582 | - Update arrow `>= 19, < 20` -> `>= 20, < 21` 583 | 584 | ## 0.18.1 585 | 586 | - Support for inserting `Decimal256`. 587 | 588 | ## 0.18.0 589 | 590 | - Update arrow `>= 7.0.0, < 19` -> `>= 19, < 20` 591 | 592 | ## 0.17.2 593 | 594 | - Update arrow `>= 7.0.0, < 18` -> `>= 7.0.0, < 19` 595 | 596 | ## 0.17.1 597 | 598 | - Update arrow `>= 7.0.0, < 17` -> `>= 7.0.0, < 18` 599 | 600 | ## 0.17.0 601 | 602 | - Update odbc-api `>= 0.44.3, < 0.45` -> `>= 0.45.0, < 0.46.0` 603 | - Allow for creating an `OdbcWriter` which takes ownership of the connection using `OdbcWriter::from_connection`. 604 | 605 | ## 0.16.0 606 | 607 | - Support for inserting `RecordBatch`es into a database table. 608 | 609 | ## 0.15.0 610 | 611 | - Update odbc-api `>= 0.40.2, < 0.45` -> `>= 0.44.3, < 0.45` 612 | - `unstable`: prototype for inserting arrow arrays into ODBC 613 | - Update arrow `>= 7.0.0, < 16` -> `>= 7.0.0, < 17` 614 | 615 | ## 0.14.0 616 | 617 | - `arrow_schema_from` now requires an exclusive reference (`&mut`) to `ResultSetMetadata`. 618 | - Update odbc-api `>= 0.40.2, < 0.44` -> `>= 0.40.2, < 0.45` 619 | 620 | ## 0.13.5 621 | 622 | - Update odbc-api `>= 0.40.2, < 0.43` -> `>= 0.40.2, < 0.44` 623 | 624 | ## 0.13.4 625 | 626 | - Update arrow `>= 7.0.0, < 15` -> `>= 7.0.0, < 16` 627 | 628 | ## 0.13.3 629 | 630 | - Update odbc-api = `>= 0.40.2, < 0.42` -> `>= 0.40.2, < 0.43` 631 | 632 | ## 0.13.2 633 | 634 | - Update odbc-api `>= 0.40 < 0.41` -> `>= 0.40.2, < 0.42` 635 | 636 | ## 0.13.1 637 | 638 | - Update arrow `>= 7.0.0, < 14` -> `>= 7.0.0, < 15` 639 | 640 | ## 0.13.0 641 | 642 | - `panic` is now default behaviour on allocation errors. Activate `fallibale_allocations` in the `BufferAllocationOptions` in order to get a recoverable error instead. 643 | 644 | ## 0.12.0 645 | 646 | - Update odbc-api `>= 0.39, < 0.40` -> `>= 0.40 < 0.41` 647 | 648 | ## 0.11.0 649 | 650 | - Update odbc-api `>= 0.38, < 0.39` -> `>= 0.39, < 0.40` 651 | - Support for fetching values from `VARCHAR(max)` and `VARBINARY(max)` columns, through specifying upper limits using `BufferAllocationOptions` in `OdbcReader::with`. 652 | 653 | ## 0.10.0 654 | 655 | - Update odbc-api `>= 0.36, < 0.37` -> `>= 0.38, < 0.39` 656 | - Recoverable errors if allocation for binary or text columns fails. 657 | 658 | ## 0.9.2 659 | 660 | - Update arrow `>= 7.0.0, < 10` -> `>= 7.0.0, < 13` 661 | 662 | ## 0.9.1 663 | 664 | - Update arrow `>= 7.0.0, < 10` -> `>= 7.0.0, < 12` 665 | 666 | ## 0.9.0 667 | 668 | - Update odbc-api `>= 0.33.0, < 0.36` -> `0.36 < 0.37` 669 | 670 | ## 0.8.5 671 | 672 | - Update arrow `>= 7.0.0, < 10` -> `>= 7.0.0, < 11` 673 | 674 | ## 0.8.4 675 | 676 | - Update odbc-api `>= 0.33.0, < 0.35` -> `>= 0.33.0, < 0.36` 677 | 678 | ## 0.8.3 679 | 680 | - Update arrow `>= 7.0.0, < 8` -> `>= 7.0.0, < 10` 681 | 682 | ## 0.8.2 683 | 684 | - Update odbc-api `>= 0.31.0, < 0.33` -> `>= 0.33.0, < 0.35` 685 | 686 | ## 0.8.1 687 | 688 | - Update arrow `>= 6.1.0, < 7` -> `>= 7.0.0, < 8` 689 | 690 | ## 0.8.0 691 | 692 | - Use Rust edition 2021 693 | - Update arrow `>= 6.1.0, < 7` -> `>= 7.0.0, < 8` 694 | - Update odbc-api `>= 0.31.0, < 0.33` -> `>= 0.33.0, < 0.34` 695 | 696 | ## 0.7.2 697 | 698 | - Fix: Formatting of error message for `ZeroSizedColumn`. 699 | 700 | ## 0.7.1 701 | 702 | - `Error::ColumnFailure` now prints also the errors cause. 703 | 704 | ## 0.7.0 705 | 706 | - `InvalidDisplaySize` replaced with `ZeroSizedColumn`. 707 | - Refactored error handling, to have separate variant for column specific errors. 708 | 709 | ## 0.6.4 710 | 711 | - Base allocations of text columns on column size instead of octet length. 712 | 713 | ## 0.6.3 714 | 715 | - Fixed an issue there not enough memory to hold the maximum string size has been allocated, if querying a VARCHAR column on windows or an NVARCHAR column on a non-windows platform. 716 | 717 | ## 0.6.2 718 | 719 | - Update arrow v6.0.0 -> `>= 6.1.0, < 7` 720 | - Update odbc-api v0.31.0 -> `>= 0.31.0, < 0.33` 721 | 722 | ## 0.6.1 723 | 724 | - Fix: There had been issue causing an overflow for timestamps with Microseconds precision. 725 | 726 | ## 0.6.0 727 | 728 | - Update odbc-api v0.30.0 -> v0.31.0 729 | 730 | ## 0.5.0 731 | 732 | - Update arrow v6.0.0 -> v6.1.0 733 | - Update odbc-api v0.29.0 -> v0.30.0 734 | - Introduced `arrow_schema_from` to support inferring arrow schemas without creating an `OdbcReader`. 735 | 736 | ## 0.4.1 737 | 738 | - Estimate memory usage of text columns more accuratly. 739 | 740 | ## 0.4.0 741 | 742 | - Udpate arrow v5.4.0 -> v6.0.0 743 | 744 | ## 0.3.0 745 | 746 | - Update arrow v5.4.0 -> v5.5.0 747 | - Update odbc-api v0.28.0 -> v0.29.0 748 | 749 | ## 0.2.1 750 | 751 | - Updated code examples to odbc-api use safe Environment construction introduced in `odbc-api` version 0.28.3 752 | 753 | ## 0.2.0 754 | 755 | - `odbc-api` version 0.28.0 756 | - `arrow` version 5.4.0 757 | 758 | ## 0.1.2 759 | 760 | - Support fixed sized binary types. 761 | 762 | ## 0.1.1 763 | 764 | - Add Readme path to manifest 765 | 766 | ## 0.1.0 767 | 768 | Initial release 769 | 770 | Allows for fetching arrow batches from ODBC data sources 771 | 772 | - `arrow` version 5.3.0 773 | - `odbc-api` version 0.27.3 774 | --------------------------------------------------------------------------------