├── .gitignore
├── .github
    ├── dependabot.yml
    ├── FUNDING.yml
    └── workflows
    │   ├── release.yml
    │   └── build.yml
├── docker-compose.yml
├── License
├── src
    ├── odbc_writer
    │   ├── binary.rs
    │   ├── boolean.rs
    │   ├── text.rs
    │   ├── map_arrow_to_odbc.rs
    │   └── timestamp.rs
    ├── reader
    │   ├── decimal.rs
    │   ├── binary.rs
    │   ├── time.rs
    │   ├── to_record_batch.rs
    │   ├── concurrent_odbc_reader.rs
    │   ├── text.rs
    │   ├── map_odbc_to_arrow.rs
    │   └── odbc_reader.rs
    ├── lib.rs
    ├── error.rs
    ├── decimal.rs
    ├── date_time.rs
    ├── schema.rs
    ├── reader.rs
    └── odbc_writer.rs
├── Contributing.md
├── Cargo.toml
├── Readme.md
└── CHANGELOG.md


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | Cargo.lock
3 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 | - package-ecosystem: cargo
 4 |   directory: "/"
 5 |   schedule:
 6 |     interval: daily
 7 |   open-pull-requests-limit: 10
 8 | - package-ecosystem: github-actions
 9 |   directory: "/"
10 |   schedule:
11 |     interval: daily


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | # Docker compose file used for local development
 2 | 
 3 | services:
 4 |   postgres:
 5 |     image: postgres:17
 6 |     ports:
 7 |       - "5432:5432"
 8 |     environment:
 9 |       POSTGRES_DB: test
10 |       POSTGRES_USER: test
11 |       POSTGRES_PASSWORD: test
12 | 
13 |   # Microsoft SQL database used for testing
14 |   mssql:
15 |     image: mcr.microsoft.com/mssql/server:2019-CU5-ubuntu-18.04
16 |     ports:
17 |       - 1433:1433
18 | 
19 |     environment:
20 |       - MSSQL_SA_PASSWORD=My@Test@Password1
21 |     command: ["/opt/mssql/bin/sqlservr", "--accept-eula", "--reset-sa-password"]


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: [pacman82] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']


--------------------------------------------------------------------------------
/License:
--------------------------------------------------------------------------------
1 | MIT License
2 | 
3 | Copyright (c) 2021 Markus Klein
4 | 
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 | 
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8 | 
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/src/odbc_writer/binary.rs:
--------------------------------------------------------------------------------
 1 | use arrow::array::{Array, BinaryArray};
 2 | use odbc_api::buffers::{AnySliceMut, BufferDesc};
 3 | 
 4 | use super::{WriteStrategy, WriterError};
 5 | 
 6 | pub struct VariadicBinary {
 7 |     buffer_start_size: usize,
 8 | }
 9 | 
10 | impl VariadicBinary {
11 |     pub fn new(buffer_start_size: usize) -> Self {
12 |         VariadicBinary { buffer_start_size }
13 |     }
14 | }
15 | 
16 | impl WriteStrategy for VariadicBinary {
17 |     fn buffer_desc(&self) -> BufferDesc {
18 |         BufferDesc::Binary {
19 |             length: self.buffer_start_size,
20 |         }
21 |     }
22 | 
23 |     fn write_rows(
24 |         &self,
25 |         param_offset: usize,
26 |         to: AnySliceMut<'_>,
27 |         from: &dyn Array,
28 |     ) -> Result<(), WriterError> {
29 |         let from = from.as_any().downcast_ref::<BinaryArray>().unwrap();
30 |         let mut to = to.as_bin_view().unwrap();
31 |         for (row_index, element) in from.iter().enumerate() {
32 |             if let Some(bytes) = element {
33 |                 to.ensure_max_element_length(bytes.len(), row_index)
34 |                     .map_err(WriterError::RebindBuffer)?;
35 |                 to.set_cell(param_offset + row_index, Some(bytes))
36 |             } else {
37 |                 to.set_cell(param_offset + row_index, None);
38 |             }
39 |         }
40 |         Ok(())
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/Contributing.md:
--------------------------------------------------------------------------------
 1 | # Contributions
 2 | 
 3 | Whether they be in code, interesting feature suggestions, design critique or bug reports, all contributions are welcome. Please start an issue, before investing a lot of work. This helps avoid situations there I would feel the need to reject a large body of work, and a lot of your time has been wasted. `odbc-arrow` is a pet project and a work of love, which implies that I maintain it in my spare time. Please understand that I may not always react immediately. If you contribute code to fix a Bug, please also contribute the test to fix it. Happy contributing.
 4 | 
 5 | ## Local build and test setup
 6 | 
 7 | Running local tests currently requires:
 8 | 
 9 | * Docker and Docker compose.
10 | * An ODBC driver manager
11 | * A driver for Microsoft SQL Server
12 | * Rust toolchain (cargo)
13 | 
14 | You can install these requirements from here:
15 | 
16 | * Install Rust compiler and Cargo. Follow the instructions on [this site](https://www.rust-lang.org/en-US/install.html).
17 | * Install PostgreSQL ODBC drivers
18 | * [Microsoft ODBC Driver 18 for SQL Server](https://docs.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server?view=sql-server-ver15).
19 | * An ODBC Driver manager if you are not on windows: <http://www.unixodbc.org/>
20 | 
21 | With docker installed run:
22 | 
23 | ```shell
24 | docker-compose up
25 | ```
26 | 
27 | This starts the Relational Databases used for testing.
28 | 
29 | We now can execute the tests in Rust typical fashion using:
30 | 
31 | ```shell
32 | cargo test
33 | ```
34 | 


--------------------------------------------------------------------------------
/src/reader/decimal.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | 
 3 | use arrow::array::{ArrayRef, Decimal128Builder};
 4 | use odbc_api::{
 5 |     buffers::{AnySlice, BufferDesc},
 6 |     decimal_text_to_i128,
 7 | };
 8 | 
 9 | use super::{MappingError, ReadStrategy};
10 | 
11 | pub struct Decimal {
12 |     precision: u8,
13 |     /// We know scale to be non-negative, yet we can save us some conversions storing it as i8.
14 |     scale: i8,
15 | }
16 | 
17 | impl Decimal {
18 |     pub fn new(precision: u8, scale: i8) -> Self {
19 |         Self { precision, scale }
20 |     }
21 | }
22 | 
23 | impl ReadStrategy for Decimal {
24 |     fn buffer_desc(&self) -> BufferDesc {
25 |         BufferDesc::Text {
26 |             // Must be able to hold num precision digits a sign and a decimal point
27 |             max_str_len: self.precision as usize + 2,
28 |         }
29 |     }
30 | 
31 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
32 |         let view = column_view.as_text_view().unwrap();
33 |         let mut builder = Decimal128Builder::new();
34 |         let scale = self.scale as usize;
35 | 
36 |         for opt in view.iter() {
37 |             if let Some(text) = opt {
38 |                 let num = decimal_text_to_i128(text, scale);
39 |                 builder.append_value(num);
40 |             } else {
41 |                 builder.append_null();
42 |             }
43 |         }
44 | 
45 |         Ok(Arc::new(
46 |             builder
47 |                 .finish()
48 |                 .with_precision_and_scale(self.precision, self.scale)
49 |                 .unwrap(),
50 |         ))
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/odbc_writer/boolean.rs:
--------------------------------------------------------------------------------
 1 | use arrow::array::{Array, BooleanArray};
 2 | use odbc_api::{
 3 |     Bit,
 4 |     buffers::{AnySliceMut, BufferDesc},
 5 | };
 6 | 
 7 | use crate::WriterError;
 8 | 
 9 | use super::WriteStrategy;
10 | 
11 | pub fn boolean_to_bit(nullable: bool) -> Box<dyn WriteStrategy> {
12 |     if nullable {
13 |         Box::new(Nullable)
14 |     } else {
15 |         Box::new(NonNullable)
16 |     }
17 | }
18 | 
19 | struct Nullable;
20 | 
21 | impl WriteStrategy for Nullable {
22 |     fn buffer_desc(&self) -> BufferDesc {
23 |         BufferDesc::Bit { nullable: true }
24 |     }
25 | 
26 |     fn write_rows(
27 |         &self,
28 |         param_offset: usize,
29 |         column_buf: AnySliceMut<'_>,
30 |         array: &dyn Array,
31 |     ) -> Result<(), WriterError> {
32 |         let from = array.as_any().downcast_ref::<BooleanArray>().unwrap();
33 |         let mut to = column_buf.as_nullable_slice::<Bit>().unwrap();
34 |         for (index, cell) in from.iter().enumerate() {
35 |             to.set_cell(index + param_offset, cell.map(Bit::from_bool))
36 |         }
37 |         Ok(())
38 |     }
39 | }
40 | 
41 | struct NonNullable;
42 | 
43 | impl WriteStrategy for NonNullable {
44 |     fn buffer_desc(&self) -> BufferDesc {
45 |         BufferDesc::Bit { nullable: false }
46 |     }
47 | 
48 |     fn write_rows(
49 |         &self,
50 |         param_offset: usize,
51 |         column_buf: AnySliceMut<'_>,
52 |         array: &dyn Array,
53 |     ) -> Result<(), WriterError> {
54 |         let from = array.as_any().downcast_ref::<BooleanArray>().unwrap();
55 |         let to = column_buf.as_slice::<Bit>().unwrap();
56 |         for index in 0..from.len() {
57 |             to[index + param_offset] = Bit::from_bool(from.value(index))
58 |         }
59 |         Ok(())
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release-plz
 2 | 
 3 | permissions:
 4 |   pull-requests: write
 5 |   contents: write
 6 | 
 7 | on:
 8 |   push:
 9 |     branches:
10 |       - main
11 | 
12 | jobs:
13 | 
14 |   # Release unpublished packages.
15 |   release-plz-release:
16 |     name: Release-plz release
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |       - name: Install UnixODBC
20 |         run: |
21 |           apt-get update
22 |           ACCEPT_EULA=Y apt-get install -y unixodbc-dev
23 |         shell: sudo bash {0}
24 |       - name: Install Rust toolchain
25 |         uses: dtolnay/rust-toolchain@stable
26 |       - name: Checkout repository
27 |         uses: actions/checkout@v6
28 |         with:
29 |           fetch-depth: 0
30 |       - name: Run release-plz
31 |         uses: MarcoIeni/release-plz-action@v0.5
32 |         with:
33 |           command: release
34 |         env:
35 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
36 |           CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
37 | 
38 |   # Create a PR with the new versions and changelog, preparing the next release.
39 |   release-plz-pr:
40 |     name: Release-plz PR
41 |     runs-on: ubuntu-latest
42 |     concurrency:
43 |       group: release-plz-${{ github.ref }}
44 |       cancel-in-progress: false
45 |     steps:
46 |       - name: Install UnixODBC
47 |         run: |
48 |           apt-get update
49 |           ACCEPT_EULA=Y apt-get install -y unixodbc-dev
50 |         shell: sudo bash {0}
51 |       - name: Install Rust toolchain
52 |         uses: dtolnay/rust-toolchain@stable
53 |       - name: Checkout repository
54 |         uses: actions/checkout@v6
55 |         with:
56 |           fetch-depth: 0
57 |       - name: Run release-plz
58 |         uses: MarcoIeni/release-plz-action@v0.5
59 |         with:
60 |           command: release-pr
61 |         env:
62 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
63 |           CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}


--------------------------------------------------------------------------------
/src/reader/binary.rs:
--------------------------------------------------------------------------------
 1 | use std::{convert::TryInto, sync::Arc};
 2 | 
 3 | use arrow::array::{ArrayRef, BinaryBuilder, FixedSizeBinaryBuilder};
 4 | use odbc_api::buffers::{AnySlice, BufferDesc};
 5 | 
 6 | use super::{MappingError, ReadStrategy};
 7 | 
 8 | pub struct Binary {
 9 |     /// Maximum length in bytes of elements
10 |     max_len: usize,
11 | }
12 | 
13 | impl Binary {
14 |     pub fn new(max_len: usize) -> Self {
15 |         Self { max_len }
16 |     }
17 | }
18 | 
19 | impl ReadStrategy for Binary {
20 |     fn buffer_desc(&self) -> BufferDesc {
21 |         BufferDesc::Binary {
22 |             length: self.max_len,
23 |         }
24 |     }
25 | 
26 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
27 |         let view = column_view.as_bin_view().unwrap();
28 |         let mut builder = BinaryBuilder::new();
29 |         for value in view.iter() {
30 |             if let Some(bytes) = value {
31 |                 builder.append_value(bytes);
32 |             } else {
33 |                 builder.append_null();
34 |             }
35 |         }
36 |         Ok(Arc::new(builder.finish()))
37 |     }
38 | }
39 | 
40 | pub struct FixedSizedBinary {
41 |     /// Length in bytes of elements
42 |     len: u32,
43 | }
44 | 
45 | impl FixedSizedBinary {
46 |     pub fn new(len: u32) -> Self {
47 |         Self { len }
48 |     }
49 | }
50 | 
51 | impl ReadStrategy for FixedSizedBinary {
52 |     fn buffer_desc(&self) -> BufferDesc {
53 |         BufferDesc::Binary {
54 |             length: self.len as usize,
55 |         }
56 |     }
57 | 
58 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
59 |         let view = column_view.as_bin_view().unwrap();
60 |         let mut builder = FixedSizeBinaryBuilder::new(self.len.try_into().unwrap());
61 |         for value in view.iter() {
62 |             if let Some(bytes) = value {
63 |                 builder.append_value(bytes).unwrap();
64 |             } else {
65 |                 builder.append_null();
66 |             }
67 |         }
68 |         Ok(Arc::new(builder.finish()))
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Fill Apache Arrow arrays from ODBC data sources.
 2 | //!
 3 | //! ## Usage
 4 | //!
 5 | //! ```no_run
 6 | //! use arrow_odbc::{odbc_api::{Environment, ConnectionOptions}, OdbcReaderBuilder};
 7 | //!
 8 | //! const CONNECTION_STRING: &str = "\
 9 | //!     Driver={ODBC Driver 18 for SQL Server};\
10 | //!     Server=localhost;\
11 | //!     UID=SA;\
12 | //!     PWD=My@Test@Password1;\
13 | //! ";
14 | //!
15 | //! fn main() -> Result<(), anyhow::Error> {
16 | //!     // Your application is fine if you spin up only one Environment.
17 | //!     let odbc_environment = Environment::new()?;
18 | //!     
19 | //!     // Connect with database.
20 | //!     let connection = odbc_environment.connect_with_connection_string(
21 | //!         CONNECTION_STRING,
22 | //!         ConnectionOptions::default()
23 | //!     )?;
24 | //!
25 | //!     // This SQL statement does not require any arguments.
26 | //!     let parameters = ();
27 | //!
28 | //!     // Do not apply any timeout.
29 | //!     let timeout_sec = None;
30 | //!
31 | //!     // Execute query and create result set
32 | //!     let cursor = connection
33 | //!         .execute("SELECT * FROM MyTable", parameters, timeout_sec)?
34 | //!         .expect("SELECT statement must produce a cursor");
35 | //!
36 | //!     // Read result set as arrow batches. Infer Arrow types automatically using the meta
37 | //!     // information of `cursor`.
38 | //!     let arrow_record_batches = OdbcReaderBuilder::new().build(cursor)?;
39 | //!
40 | //!     for batch in arrow_record_batches {
41 | //!         // ... process batch ...
42 | //!     }
43 | //!
44 | //!     Ok(())
45 | //! }
46 | //! ```
47 | mod date_time;
48 | mod decimal;
49 | mod error;
50 | mod odbc_writer;
51 | mod reader;
52 | mod schema;
53 | 
54 | // Rexport odbc_api and arrow to make it easier for downstream crates to depend to avoid version
55 | // mismatches
56 | pub use arrow;
57 | pub use odbc_api;
58 | 
59 | pub use self::{
60 |     error::Error,
61 |     odbc_writer::{OdbcWriter, WriterError, insert_into_table, insert_statement_from_schema},
62 |     reader::{
63 |         BufferAllocationOptions, ColumnFailure, ConcurrentOdbcReader, OdbcReader,
64 |         OdbcReaderBuilder, TextEncoding,
65 |     },
66 |     schema::arrow_schema_from,
67 | };
68 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build and test
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   linux:
 7 |     name: Build & Test Linux
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     services:
11 |       sqlserver:
12 |         image: mcr.microsoft.com/mssql/server:2019-latest
13 |         ports:
14 |           - 1433:1433
15 |         env:
16 |           ACCEPT_EULA: Y
17 |           SA_PASSWORD: My@Test@Password1
18 | 
19 |       postgres:
20 |         image: postgres:17
21 |         ports:
22 |           - "5432:5432"
23 |         env:
24 |           POSTGRES_DB: test
25 |           POSTGRES_USER: test
26 |           POSTGRES_PASSWORD: test
27 | 
28 |     steps:
29 |       - name: Checkout
30 |         uses: actions/checkout@v6
31 |       - name: Install latests rust toolchain
32 |         uses: actions-rs/toolchain@v1
33 |         with:
34 |           toolchain: stable
35 |           profile: minimal
36 |           override: true
37 |       # selecting a toolchain either by action or manual `rustup` calls should happen
38 |       # before the plugin, as the cache uses the current rustc version as its cache key
39 |       - name: Rust build cache
40 |         uses: Swatinem/rust-cache@v2
41 |       - name: Build
42 |         run: cargo build
43 |       - name: Install ODBC Drivers
44 |         run: |
45 |           apt-get update
46 |           apt-get install -y unixodbc-dev odbc-postgresql msodbcsql18
47 |           # Fix PostgreSQL driver paths
48 |           sed --in-place 's/psqlodbca.so/\/usr\/lib\/x86_64-linux-gnu\/odbc\/psqlodbca.so/' /etc/odbcinst.ini
49 |           sed --in-place 's/psqlodbcw.so/\/usr\/lib\/x86_64-linux-gnu\/odbc\/psqlodbcw.so/' /etc/odbcinst.ini
50 |         shell: sudo bash {0}
51 |       - name: Print odbcinst.ini
52 |         run: cat /etc/odbcinst.ini
53 |       - name: Test
54 |         run: cargo test
55 | 
56 |   dependabot:
57 |     needs: [linux]
58 |     permissions:
59 |       pull-requests: write
60 |       contents: write
61 |     runs-on: ubuntu-latest
62 |     if: ${{ github.actor == 'dependabot[bot]' && github.event_name == 'pull_request'}}
63 |     steps:
64 |       - name: Merge Depenabot Pull Request
65 |         run: gh pr merge --auto --rebase "$PR_URL"
66 |         env:
67 |           PR_URL: ${{github.event.pull_request.html_url}}
68 |           GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
69 | 


--------------------------------------------------------------------------------
/src/error.rs:
--------------------------------------------------------------------------------
 1 | use thiserror::Error;
 2 | 
 3 | use crate::reader::ColumnFailure;
 4 | 
 5 | /// A variation of things which can go wrong then creating an [`crate::OdbcReader`].
 6 | #[derive(Error, Debug)]
 7 | pub enum Error {
 8 |     /// Failure to retrieve the number of columns from the result set.
 9 |     #[error("Unable to retrieve number of columns in result set.\n{0}")]
10 |     UnableToRetrieveNumCols(odbc_api::Error),
11 |     /// Indicates that the error is related to a specify column.
12 |     #[error(
13 |         "There is a problem with the SQL type of the column with name: {} and index {}:\n{source}",
14 |         name,
15 |         index
16 |     )]
17 |     ColumnFailure {
18 |         // Name of the erroneous column
19 |         name: String,
20 |         // Zero based index of the erroneous column
21 |         index: usize,
22 |         // Cause of the error
23 |         source: ColumnFailure,
24 |     },
25 |     /// Failure during constructing an OdbcReader, if it turns out the buffer memory size limit is
26 |     /// too small.
27 |     #[error(
28 |         "The Odbc buffer is limited to a size of {max_bytes_per_batch} bytes. Yet a single row \
29 |         does require up to {bytes_per_row}. This means the buffer is not large enough to hold a \
30 |         single row of data. Please note that the buffers in ODBC must always be able to hold the \
31 |         largest possible value of variadic types. You should either set a higher upper bound for \
32 |         the buffer size, or limit the length of the variadic columns."
33 |     )]
34 |     OdbcBufferTooSmall {
35 |         max_bytes_per_batch: usize,
36 |         bytes_per_row: usize,
37 |     },
38 |     /// We use UTF-16 encoding on windows by default. Since UTF-8 locals on windows system can not
39 |     /// be expected to be the default. Since we use wide methods the ODBC standard demands the
40 |     /// encoding to be UTF-16.
41 |     #[cfg(any(feature = "wide", all(not(feature = "narrow"), target_os = "windows")))]
42 |     #[error(
43 |         "Expected the database to return UTF-16, yet what came back was not valid UTF-16. Precise \
44 |         encoding error: {source}. This is likely a bug in your ODBC driver not supporting wide \
45 |         method calls correctly."
46 |     )]
47 |     EncodingInvalid { source: std::char::DecodeUtf16Error },
48 |     /// We expect UTF-8 to be the default on non-windows platforms. Yet still some systems are
49 |     /// configured different.
50 |     #[cfg(not(any(feature = "wide", all(not(feature = "narrow"), target_os = "windows"))))]
51 |     #[error(
52 |         "Expected the database to return UTF-8, yet what came back was not valid UTF-8. According \
53 |         to the ODBC standard the encoding is specified by your system locale. So you may want to \
54 |         check your environment and whether it specifies to use an UTF-8 charset. However it is \
55 |         worth noting that drivers take some liberty with the interpretation. Your connection \
56 |         string and other configurations specific to your database may also influence client side \
57 |         encoding. Precise encoding error: {source}"
58 |     )]
59 |     EncodingInvalid { source: std::string::FromUtf8Error },
60 | }
61 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "arrow-odbc"
 3 | version = "21.0.0"
 4 | authors = ["Markus Klein"]
 5 | edition = "2024"
 6 | license = "MIT"
 7 | repository = "https://github.com/pacman82/arrow-odbc"
 8 | documentation = "https://docs.rs/arrow-odbc/"
 9 | 
10 | # A short blurb about the package. This is not rendered in any format when
11 | # uploaded to crates.io (aka this is not markdown).
12 | description = "Read/Write Apache Arrow arrays from/to ODBC data sources."
13 | 
14 | # This is a list of up to five keywords that describe this crate. Keywords
15 | # are searchable on crates.io, and you may choose any words that would
16 | # help someone find this crate.
17 | keywords = ["odbc", "database", "sql", "arrow"]
18 | 
19 | # This is a list of up to five categories where this crate would fit.
20 | # Categories are a fixed list available at crates.io/category_slugs, and
21 | # they must match exactly.
22 | categories = ["database"]
23 | 
24 | # This points to a file under the package root (relative to this `Cargo.toml`).
25 | # The contents of this file are stored and indexed in the registry.
26 | # crates.io will render this file and place the result on the crate's page.
27 | readme = "Readme.md"
28 | 
29 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
30 | 
31 | [dependencies]
32 | arrow = { version = ">= 29, < 58", default-features = false, features = [
33 |     "chrono-tz",
34 | ] }
35 | # We use chrono for date conversions.
36 | chrono = "0.4.39"
37 | log = "0.4.27"
38 | thiserror = "2.0.12"
39 | 
40 | odbc-api = ">= 17, < 21"
41 | atoi = "2.0.0"
42 | 
43 | 
44 | [dev-dependencies]
45 | anyhow = "1.0.97"
46 | # Function name macro is used to ensure unique table names in test
47 | stdext = "0.3.3"
48 | float_eq = "1.0.1"
49 | 
50 | [features]
51 | 
52 | # On linux we assume use of a UTF-8 locale. So we set the narrow features implying that for queries,
53 | # connection strings and error messages the driver and driver manager supply utf8-strings. This
54 | # might also be slightly faster since no transcoding between UTF-8 and UTF-16 is required.
55 | # Overall speed always depends on the driver, but it is reasonable to assume this is faster, more
56 | # importantly, UTF-8 is likely to be more battled tested on these platforms, while UTF-16 is "only"
57 | # required by the standard.
58 | 
59 | # On windows we can not assume the default locale to be UTF-8, so we compile odbc-api with default
60 | # features implying the use of UTF-16 for queries, connection strings and error messages. This
61 | # should work on any system. However if you would like to use the narrow UTF-8 function calls on
62 | # windows systems you can set this feature flag.
63 | narrow = ["odbc-api/narrow"]
64 | 
65 | # On linux we assume use of a UTF-8 locale. So we set the narrow features implying that for queries,
66 | # connection strings and error messages the driver and driver manager supply utf8-strings. This
67 | # might also be slightly faster since no transcoding between UTF-8 and UTF-16 is required.
68 | # Overall speed always depends on the driver, but it is reasonable to assume this is faster, more
69 | # importantly, UTF-8 is likely to be more battled tested on these platforms, while UTF-16 is "only"
70 | # required by the standard. However, if you are e.g. faced with a driver which does not use UTF-8,
71 | # but only ascii, or want to use the wide functions calls for any other reason on a non-windows
72 | # system you can set the `wide` feature flag to overwrite this behavior.
73 | wide = ["odbc-api/wide"]
74 | 


--------------------------------------------------------------------------------
/src/reader/time.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     ops::{Add, Mul},
  3 |     sync::Arc,
  4 | };
  5 | 
  6 | use arrow::array::{
  7 |     ArrayRef, Time32MillisecondBuilder, Time64MicrosecondBuilder, Time64NanosecondBuilder,
  8 | };
  9 | use atoi::FromRadix10;
 10 | use odbc_api::{
 11 |     buffers::{AnySlice, BufferDesc},
 12 |     sys::Time,
 13 | };
 14 | 
 15 | use super::{MappingError, ReadStrategy};
 16 | 
 17 | pub fn seconds_since_midnight(time: &Time) -> i32 {
 18 |     (time.hour as i32 * 60 + time.minute as i32) * 60 + time.second as i32
 19 | }
 20 | 
 21 | /// Strategy for fetching the time as text and parsing it into an `i32` which represents
 22 | /// milliseconds after midnight.
 23 | pub struct TimeMsI32;
 24 | 
 25 | impl ReadStrategy for TimeMsI32 {
 26 |     fn buffer_desc(&self) -> BufferDesc {
 27 |         BufferDesc::Text {
 28 |             // Expected format is HH:MM:SS.fff
 29 |             max_str_len: 12,
 30 |         }
 31 |     }
 32 | 
 33 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
 34 |         let view = column_view.as_text_view().unwrap();
 35 |         let mut builder = Time32MillisecondBuilder::new();
 36 | 
 37 |         for opt in view.iter() {
 38 |             if let Some(text) = opt {
 39 |                 let num = ticks_since_midnights_from_text::<i32>(text, 3);
 40 |                 builder.append_value(num);
 41 |             } else {
 42 |                 builder.append_null();
 43 |             }
 44 |         }
 45 | 
 46 |         Ok(Arc::new(builder.finish()))
 47 |     }
 48 | }
 49 | 
 50 | /// Strategy for fetching the time as text and parsing it into an `i32` which represents
 51 | /// milliseconds after midnight.
 52 | pub struct TimeUsI64;
 53 | 
 54 | impl ReadStrategy for TimeUsI64 {
 55 |     fn buffer_desc(&self) -> BufferDesc {
 56 |         BufferDesc::Text {
 57 |             // Expected format is HH:MM:SS.ffffff
 58 |             max_str_len: 15,
 59 |         }
 60 |     }
 61 | 
 62 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
 63 |         let view = column_view.as_text_view().unwrap();
 64 |         let mut builder = Time64MicrosecondBuilder::new();
 65 | 
 66 |         for opt in view.iter() {
 67 |             if let Some(text) = opt {
 68 |                 let num = ticks_since_midnights_from_text::<i64>(text, 6);
 69 |                 builder.append_value(num);
 70 |             } else {
 71 |                 builder.append_null();
 72 |             }
 73 |         }
 74 | 
 75 |         Ok(Arc::new(builder.finish()))
 76 |     }
 77 | }
 78 | 
 79 | /// Strategy for fetching the time as text and parsing it into an `i32` which represents
 80 | /// milliseconds after midnight.
 81 | pub struct TimeNsI64;
 82 | 
 83 | impl ReadStrategy for TimeNsI64 {
 84 |     fn buffer_desc(&self) -> BufferDesc {
 85 |         BufferDesc::Text {
 86 |             // Expected format is HH:MM:SS.fffffffff
 87 |             max_str_len: 18,
 88 |         }
 89 |     }
 90 | 
 91 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
 92 |         let view = column_view.as_text_view().unwrap();
 93 |         let mut builder = Time64NanosecondBuilder::new();
 94 | 
 95 |         for opt in view.iter() {
 96 |             if let Some(text) = opt {
 97 |                 let num = ticks_since_midnights_from_text::<i64>(text, 9);
 98 |                 builder.append_value(num);
 99 |             } else {
100 |                 builder.append_null();
101 |             }
102 |         }
103 | 
104 |         Ok(Arc::new(builder.finish()))
105 |     }
106 | }
107 | 
108 | fn ticks_since_midnights_from_text<I>(text: &[u8], precision: u32) -> I
109 | where
110 |     I: Tick,
111 | {
112 |     // HH:MM:SS.fff
113 |     // 012345678901
114 |     let (hours, hours_digits) = I::from_radix_10(&text[0..2]);
115 |     debug_assert_eq!(2, hours_digits);
116 |     debug_assert_eq!(b':', text[2]);
117 |     let (min, min_digits) = I::from_radix_10(&text[3..5]);
118 |     debug_assert_eq!(2, min_digits);
119 |     debug_assert_eq!(b':', text[5]);
120 |     let (sec, sec_digits) = I::from_radix_10(&text[6..8]);
121 |     debug_assert_eq!(2, sec_digits);
122 |     // check for fractional part
123 |     let (frac, frac_digits) = if text.len() > 9 {
124 |         I::from_radix_10(&text[9..])
125 |     } else {
126 |         (I::ZERO, 0)
127 |     };
128 |     let frac = frac * I::TEN.pow(precision - frac_digits as u32);
129 |     ((hours * I::SIXTY + min) * I::SIXTY + sec) * I::TEN.pow(precision) + frac
130 | }
131 | 
132 | trait Tick: FromRadix10 + Mul<Output = Self> + Add<Output = Self> {
133 |     const ZERO: Self;
134 |     const TEN: Self;
135 |     const SIXTY: Self;
136 | 
137 |     fn pow(self, exp: u32) -> Self;
138 | }
139 | 
140 | impl Tick for i32 {
141 |     const ZERO: Self = 0;
142 |     const TEN: Self = 10;
143 |     const SIXTY: Self = 60;
144 | 
145 |     fn pow(self, exp: u32) -> Self {
146 |         self.pow(exp)
147 |     }
148 | }
149 | 
150 | impl Tick for i64 {
151 |     const ZERO: Self = 0;
152 |     const TEN: Self = 10;
153 |     const SIXTY: Self = 60;
154 | 
155 |     fn pow(self, exp: u32) -> Self {
156 |         self.pow(exp)
157 |     }
158 | }
159 | 


--------------------------------------------------------------------------------
/src/reader/to_record_batch.rs:
--------------------------------------------------------------------------------
  1 | use std::sync::Arc;
  2 | 
  3 | use arrow::{
  4 |     datatypes::{Schema, SchemaRef},
  5 |     record_batch::RecordBatch,
  6 | };
  7 | use log::info;
  8 | use odbc_api::{ResultSetMetadata, buffers::ColumnarAnyBuffer};
  9 | 
 10 | use crate::{BufferAllocationOptions, ColumnFailure, Error, arrow_schema_from};
 11 | 
 12 | use super::{MappingError, ReadStrategy, TextEncoding, choose_column_strategy};
 13 | 
 14 | /// Transforms batches fetched from an ODBC data source in a
 15 | /// [`odbc_api::bufferers::ColumnarAnyBuffer`] into arrow tables of the specified schemas. It also
 16 | /// allocates the buffers to hold the ODBC batches with the matching buffer descriptions.
 17 | pub struct ToRecordBatch {
 18 |     /// Must contain one item for each field in [`Self::schema`]. Encapsulates all the column type
 19 |     /// specific decisions which go into filling an Arrow array from an ODBC data source.
 20 |     column_strategies: Vec<Box<dyn ReadStrategy + Send>>,
 21 |     /// Arrow schema describing the arrays we want to fill from the Odbc data source.
 22 |     schema: SchemaRef,
 23 | }
 24 | 
 25 | impl ToRecordBatch {
 26 |     pub fn new(
 27 |         cursor: &mut impl ResultSetMetadata,
 28 |         schema: Option<SchemaRef>,
 29 |         buffer_allocation_options: BufferAllocationOptions,
 30 |         map_value_errors_to_null: bool,
 31 |         dbms_name: Option<&str>,
 32 |         trim_fixed_sized_character_strings: bool,
 33 |         text_encoding: TextEncoding,
 34 |     ) -> Result<Self, Error> {
 35 |         // Infer schema if not given by the user
 36 |         let schema = if let Some(schema) = schema {
 37 |             schema
 38 |         } else {
 39 |             Arc::new(arrow_schema_from(
 40 |                 cursor,
 41 |                 dbms_name,
 42 |                 map_value_errors_to_null,
 43 |             )?)
 44 |         };
 45 | 
 46 |         let column_strategies: Vec<Box<dyn ReadStrategy + Send>> = schema
 47 |             .fields()
 48 |             .iter()
 49 |             .enumerate()
 50 |             .map(|(index, field)| {
 51 |                 let col_index = (index + 1).try_into().unwrap();
 52 |                 choose_column_strategy(
 53 |                     field,
 54 |                     cursor,
 55 |                     col_index,
 56 |                     buffer_allocation_options,
 57 |                     map_value_errors_to_null,
 58 |                     trim_fixed_sized_character_strings,
 59 |                     text_encoding,
 60 |                 )
 61 |                 .map_err(|cause| cause.into_crate_error(field.name().clone(), index))
 62 |             })
 63 |             .collect::<Result<_, _>>()?;
 64 | 
 65 |         Ok(ToRecordBatch {
 66 |             column_strategies,
 67 |             schema,
 68 |         })
 69 |     }
 70 | 
 71 |     /// Logs buffer description and sizes
 72 |     pub fn row_size_in_bytes(&self) -> usize {
 73 |         let mut total_bytes = 0;
 74 |         for (read, field) in self.column_strategies.iter().zip(self.schema.fields()) {
 75 |             let name = field.name();
 76 |             let desc = read.buffer_desc();
 77 |             let bytes_per_row = desc.bytes_per_row();
 78 |             info!("Column '{name}'\nBytes used per row: {bytes_per_row}");
 79 |             total_bytes += bytes_per_row;
 80 |         }
 81 |         info!("Total memory usage per row for single transit buffer: {total_bytes}");
 82 |         total_bytes
 83 |     }
 84 | 
 85 |     pub fn allocate_buffer(
 86 |         &self,
 87 |         max_batch_size: usize,
 88 |         fallibale_allocations: bool,
 89 |     ) -> Result<ColumnarAnyBuffer, Error> {
 90 |         let descs = self.column_strategies.iter().map(|cs| cs.buffer_desc());
 91 | 
 92 |         let row_set_buffer = if fallibale_allocations {
 93 |             ColumnarAnyBuffer::try_from_descs(max_batch_size, descs)
 94 |                 .map_err(|err| map_allocation_error(err, &self.schema))?
 95 |         } else {
 96 |             ColumnarAnyBuffer::from_descs(max_batch_size, descs)
 97 |         };
 98 |         Ok(row_set_buffer)
 99 |     }
100 | 
101 |     pub fn schema(&self) -> &SchemaRef {
102 |         &self.schema
103 |     }
104 | 
105 |     pub fn buffer_to_record_batch(
106 |         &self,
107 |         odbc_buffer: &ColumnarAnyBuffer,
108 |     ) -> Result<RecordBatch, MappingError> {
109 |         let arrow_columns = self
110 |             .column_strategies
111 |             .iter()
112 |             .enumerate()
113 |             .map(|(index, strat)| {
114 |                 let column_view = odbc_buffer.column(index);
115 |                 strat.fill_arrow_array(column_view)
116 |             })
117 |             .collect::<Result<Vec<_>, _>>()?;
118 |         let record_batch = RecordBatch::try_new(self.schema.clone(), arrow_columns).unwrap();
119 |         Ok(record_batch)
120 |     }
121 | }
122 | 
123 | fn map_allocation_error(error: odbc_api::Error, schema: &Schema) -> Error {
124 |     match error {
125 |         odbc_api::Error::TooLargeColumnBufferSize {
126 |             buffer_index,
127 |             num_elements,
128 |             element_size,
129 |         } => Error::ColumnFailure {
130 |             name: schema.field(buffer_index as usize).name().clone(),
131 |             index: buffer_index as usize,
132 |             source: ColumnFailure::TooLarge {
133 |                 num_elements,
134 |                 element_size,
135 |             },
136 |         },
137 |         _ => {
138 |             panic!("Unexpected error in upstream ODBC api error library")
139 |         }
140 |     }
141 | }
142 | 


--------------------------------------------------------------------------------
/src/odbc_writer/text.rs:
--------------------------------------------------------------------------------
  1 | use arrow::array::{Array, LargeStringArray, StringArray};
  2 | use odbc_api::buffers::{AnySliceMut, BufferDesc, TextColumnSliceMut};
  3 | 
  4 | use super::{WriteStrategy, WriterError};
  5 | 
  6 | #[cfg(not(target_os = "windows"))]
  7 | pub type Utf8ToNativeText = Utf8ToNarrow;
  8 | 
  9 | #[cfg(target_os = "windows")]
 10 | pub type Utf8ToNativeText = Utf8ToWide;
 11 | 
 12 | #[cfg(not(target_os = "windows"))]
 13 | pub type LargeUtf8ToNativeText = LargeUtf8ToNarrow;
 14 | 
 15 | #[cfg(target_os = "windows")]
 16 | pub type LargeUtf8ToNativeText = LargeUtf8ToWide;
 17 | 
 18 | #[cfg_attr(target_os = "windows", allow(dead_code))]
 19 | pub struct Utf8ToNarrow;
 20 | 
 21 | impl WriteStrategy for Utf8ToNarrow {
 22 |     fn buffer_desc(&self) -> BufferDesc {
 23 |         BufferDesc::Text { max_str_len: 1 }
 24 |     }
 25 | 
 26 |     fn write_rows(
 27 |         &self,
 28 |         param_offset: usize,
 29 |         to: AnySliceMut<'_>,
 30 |         from: &dyn Array,
 31 |     ) -> Result<(), WriterError> {
 32 |         let from = from.as_any().downcast_ref::<StringArray>().unwrap();
 33 |         let to = to.as_text_view().unwrap();
 34 |         insert_into_narrow_slice(from.iter(), to, param_offset)?;
 35 |         Ok(())
 36 |     }
 37 | }
 38 | 
 39 | #[cfg_attr(target_os = "windows", allow(dead_code))]
 40 | pub struct LargeUtf8ToNarrow;
 41 | 
 42 | impl WriteStrategy for LargeUtf8ToNarrow {
 43 |     fn buffer_desc(&self) -> BufferDesc {
 44 |         BufferDesc::Text { max_str_len: 1 }
 45 |     }
 46 | 
 47 |     fn write_rows(
 48 |         &self,
 49 |         param_offset: usize,
 50 |         to: AnySliceMut<'_>,
 51 |         from: &dyn Array,
 52 |     ) -> Result<(), WriterError> {
 53 |         let from = from.as_any().downcast_ref::<LargeStringArray>().unwrap();
 54 |         let to = to.as_text_view().unwrap();
 55 |         insert_into_narrow_slice(from.iter(), to, param_offset)?;
 56 |         Ok(())
 57 |     }
 58 | }
 59 | 
 60 | #[cfg_attr(target_os = "windows", allow(dead_code))]
 61 | fn insert_into_narrow_slice<'a>(
 62 |     from: impl Iterator<Item = Option<&'a str>>,
 63 |     mut to: TextColumnSliceMut<u8>,
 64 |     param_offset: usize,
 65 | ) -> Result<(), WriterError> {
 66 |     for (row_index, element) in from.enumerate() {
 67 |         // Total number of rows written into the inserter (`to`). This includes the values from the
 68 |         // current batch (`row_index`), as well as the ones from the previous batches
 69 |         // (`param_offset`). In case of reallocation, we need to copy all these values. Also, this
 70 |         // is the index of the element we currently want to write.
 71 |         let num_rows_written_so_far = param_offset + row_index;
 72 |         if let Some(text) = element {
 73 |             to.ensure_max_element_length(text.len(), num_rows_written_so_far)
 74 |                 .map_err(WriterError::RebindBuffer)?;
 75 |             to.set_cell(num_rows_written_so_far, Some(text.as_bytes()))
 76 |         } else {
 77 |             to.set_cell(num_rows_written_so_far, None);
 78 |         }
 79 |     }
 80 |     Ok(())
 81 | }
 82 | 
 83 | pub struct Utf8ToWide;
 84 | 
 85 | impl WriteStrategy for Utf8ToWide {
 86 |     fn buffer_desc(&self) -> BufferDesc {
 87 |         BufferDesc::WText { max_str_len: 1 }
 88 |     }
 89 | 
 90 |     fn write_rows(
 91 |         &self,
 92 |         param_offset: usize,
 93 |         to: AnySliceMut<'_>,
 94 |         from: &dyn Array,
 95 |     ) -> Result<(), WriterError> {
 96 |         let from = from.as_any().downcast_ref::<StringArray>().unwrap();
 97 |         let to = to.as_w_text_view().unwrap();
 98 |         insert_into_wide_slice(from.iter(), to, param_offset)?;
 99 |         Ok(())
100 |     }
101 | }
102 | 
103 | pub struct LargeUtf8ToWide;
104 | 
105 | impl WriteStrategy for LargeUtf8ToWide {
106 |     fn buffer_desc(&self) -> BufferDesc {
107 |         BufferDesc::WText { max_str_len: 1 }
108 |     }
109 | 
110 |     fn write_rows(
111 |         &self,
112 |         param_offset: usize,
113 |         to: AnySliceMut<'_>,
114 |         from: &dyn Array,
115 |     ) -> Result<(), WriterError> {
116 |         let from = from.as_any().downcast_ref::<LargeStringArray>().unwrap();
117 |         let to = to.as_w_text_view().unwrap();
118 |         insert_into_wide_slice(from.iter(), to, param_offset)?;
119 |         Ok(())
120 |     }
121 | }
122 | 
123 | fn insert_into_wide_slice<'a>(
124 |     from: impl Iterator<Item = Option<&'a str>>,
125 |     mut to: TextColumnSliceMut<u16>,
126 |     at: usize,
127 | ) -> Result<(), WriterError> {
128 |     // We must first encode the utf8 input to utf16. We reuse this buffer for that in order to avoid
129 |     // allocations.
130 |     let mut utf_16 = Vec::new();
131 |     for (row_index, element) in from.enumerate() {
132 |         // Total number of rows written into the inserter (`to`). This includes the values from the
133 |         // current batch (`row_index`), as well as the ones from the previous batches (`at`). In
134 |         // case of reallocation, we need to copy all these values. Also, this is the index of the
135 |         // element we currently want to write.
136 |         let num_rows_written_so_far = at + row_index;
137 |         if let Some(text) = element {
138 |             utf_16.extend(text.encode_utf16());
139 |             to.ensure_max_element_length(utf_16.len(), num_rows_written_so_far)
140 |                 .map_err(WriterError::RebindBuffer)?;
141 |             to.set_cell(num_rows_written_so_far, Some(&utf_16));
142 |             utf_16.clear();
143 |         } else {
144 |             to.set_cell(num_rows_written_so_far, None);
145 |         }
146 |     }
147 |     Ok(())
148 | }
149 | 


--------------------------------------------------------------------------------
/src/decimal.rs:
--------------------------------------------------------------------------------
  1 | use arrow::{
  2 |     array::{Array, Decimal128Array, Decimal256Array},
  3 |     datatypes::{ArrowPrimitiveType, Decimal256Type},
  4 | };
  5 | use odbc_api::buffers::{AnySliceMut, BufferDesc};
  6 | 
  7 | use crate::{WriterError, odbc_writer::WriteStrategy};
  8 | 
  9 | pub struct NullableDecimal128AsText {
 10 |     precision: u8,
 11 |     scale: i8,
 12 | }
 13 | 
 14 | impl NullableDecimal128AsText {
 15 |     pub fn new(precision: u8, scale: i8) -> Self {
 16 |         Self { precision, scale }
 17 |     }
 18 | }
 19 | 
 20 | pub struct NullableDecimal256AsText {
 21 |     precision: u8,
 22 |     scale: i8,
 23 | }
 24 | 
 25 | impl NullableDecimal256AsText {
 26 |     pub fn new(precision: u8, scale: i8) -> Self {
 27 |         Self { precision, scale }
 28 |     }
 29 | }
 30 | 
 31 | /// Length of a text representation of a decimal
 32 | fn len_text(scale: i8, precision: u8) -> usize {
 33 |     match scale {
 34 |         // Precision digits + (- scale zeroes) + sign
 35 |         i8::MIN..=-1 => (precision as i32 - scale as i32 + 1).try_into().unwrap(),
 36 |         // Precision digits + sign
 37 |         0 => precision as usize + 1,
 38 |         // Precision digits + radix character (`.`) + sign
 39 |         1.. => precision as usize + 1 + 1,
 40 |     }
 41 | }
 42 | 
 43 | impl WriteStrategy for NullableDecimal128AsText {
 44 |     fn buffer_desc(&self) -> BufferDesc {
 45 |         BufferDesc::Text {
 46 |             max_str_len: len_text(self.scale, self.precision),
 47 |         }
 48 |     }
 49 | 
 50 |     fn write_rows(
 51 |         &self,
 52 |         param_offset: usize,
 53 |         column_buf: AnySliceMut<'_>,
 54 |         array: &dyn Array,
 55 |     ) -> Result<(), WriterError> {
 56 |         let length = len_text(self.scale, self.precision);
 57 | 
 58 |         let from = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
 59 |         let mut to = column_buf.as_text_view().unwrap();
 60 | 
 61 |         for (index, cell) in from.iter().enumerate() {
 62 |             if let Some(value) = cell {
 63 |                 let buf = to.set_mut(index + param_offset, length);
 64 |                 write_i128_as_decimal(value, self.precision, self.scale, buf)
 65 |             } else {
 66 |                 to.set_cell(index + param_offset, None)
 67 |             }
 68 |         }
 69 |         Ok(())
 70 |     }
 71 | }
 72 | 
 73 | impl WriteStrategy for NullableDecimal256AsText {
 74 |     fn buffer_desc(&self) -> BufferDesc {
 75 |         BufferDesc::Text {
 76 |             max_str_len: len_text(self.scale, self.precision),
 77 |         }
 78 |     }
 79 | 
 80 |     fn write_rows(
 81 |         &self,
 82 |         param_offset: usize,
 83 |         column_buf: AnySliceMut<'_>,
 84 |         array: &dyn Array,
 85 |     ) -> Result<(), WriterError> {
 86 |         let from = array.as_any().downcast_ref::<Decimal256Array>().unwrap();
 87 |         let mut to = column_buf.as_text_view().unwrap();
 88 |         let length = len_text(self.scale, self.precision);
 89 | 
 90 |         for (index, cell) in from.iter().enumerate() {
 91 |             if let Some(value) = cell {
 92 |                 let buf = to.set_mut(index + param_offset, length);
 93 |                 write_i256_as_decimal(value, self.precision, self.scale, buf)
 94 |             } else {
 95 |                 to.set_cell(index + param_offset, None)
 96 |             }
 97 |         }
 98 |         Ok(())
 99 |     }
100 | }
101 | 
102 | fn write_i128_as_decimal(mut n: i128, precision: u8, scale: i8, text: &mut [u8]) {
103 |     if n.is_negative() {
104 |         n *= n.signum();
105 |         text[0] = b'-';
106 |     } else {
107 |         text[0] = b'+';
108 |     }
109 | 
110 |     // Number of digits + one decimal separator (`.`)
111 |     let str_len: i32 = (len_text(scale, precision) - 1).try_into().unwrap();
112 | 
113 |     let ten = 10;
114 |     for index in (0..str_len).rev() {
115 |         // In case of negative scale, fill the last digits with zeroes
116 |         let char = if (str_len - index) <= -scale as i32 {
117 |             b'0'
118 |         // The separator will not be printed in case of scale <= 0 since index is never going to
119 |         // reach `precision`.
120 |         } else if index == precision as i32 - scale as i32 {
121 |             b'.'
122 |         } else {
123 |             let digit: u8 = (n % ten) as u8;
124 |             n /= ten;
125 |             b'0' + digit
126 |         };
127 |         // +1 offset to make space for sign character
128 |         text[index as usize + 1] = char;
129 |     }
130 | }
131 | 
132 | type I256 = <Decimal256Type as ArrowPrimitiveType>::Native;
133 | 
134 | fn write_i256_as_decimal(mut n: I256, precision: u8, scale: i8, text: &mut [u8]) {
135 |     if n.lt(&I256::ZERO) {
136 |         n = n.checked_mul(I256::MINUS_ONE).unwrap();
137 |         text[0] = b'-';
138 |     } else {
139 |         text[0] = b'+';
140 |     }
141 | 
142 |     // Number of digits + one decimal separator (`.`)
143 |     let str_len: i32 = (len_text(scale, precision) - 1).try_into().unwrap();
144 | 
145 |     let ten = I256::from_i128(10);
146 |     for index in (0..str_len).rev() {
147 |         let char = if (str_len - index) <= -scale as i32 {
148 |             b'0'
149 |         // The separator will not be printed in case of scale == 0 since index is never going to
150 |         // reach `precision`.
151 |         } else if index == precision as i32 - scale as i32 {
152 |             b'.'
153 |         } else {
154 |             let digit: u8 = n.checked_rem(ten).unwrap().to_i128().unwrap() as u8;
155 |             n = n.checked_div(ten).unwrap();
156 |             b'0' + digit
157 |         };
158 |         // +1 offset to make space for sign character
159 |         text[index as usize + 1] = char;
160 |     }
161 | }
162 | 


--------------------------------------------------------------------------------
/src/odbc_writer/map_arrow_to_odbc.rs:
--------------------------------------------------------------------------------
  1 | use std::marker::PhantomData;
  2 | 
  3 | use arrow::{
  4 |     array::{Array, PrimitiveArray},
  5 |     datatypes::ArrowPrimitiveType,
  6 | };
  7 | use odbc_api::buffers::{AnySliceMut, BufferDesc, Item};
  8 | 
  9 | use crate::WriterError;
 10 | 
 11 | use super::WriteStrategy;
 12 | 
 13 | pub trait MapArrowToOdbc {
 14 |     type ArrowElement;
 15 | 
 16 |     fn map_with<U>(
 17 |         nullable: bool,
 18 |         arrow_to_odbc: impl Fn(Self::ArrowElement) -> U + 'static,
 19 |     ) -> Box<dyn WriteStrategy>
 20 |     where
 21 |         U: Item;
 22 | 
 23 |     fn identical(nullable: bool) -> Box<dyn WriteStrategy>
 24 |     where
 25 |         Self::ArrowElement: Item;
 26 | }
 27 | 
 28 | impl<T> MapArrowToOdbc for T
 29 | where
 30 |     T: ArrowPrimitiveType,
 31 | {
 32 |     type ArrowElement = T::Native;
 33 | 
 34 |     fn map_with<U>(
 35 |         nullable: bool,
 36 |         arrow_to_odbc: impl Fn(Self::ArrowElement) -> U + 'static,
 37 |     ) -> Box<dyn WriteStrategy>
 38 |     where
 39 |         U: Item,
 40 |     {
 41 |         if nullable {
 42 |             Box::new(Nullable::<T, _>::new(arrow_to_odbc))
 43 |         } else {
 44 |             Box::new(NonNullable::<T, _>::new(arrow_to_odbc))
 45 |         }
 46 |     }
 47 | 
 48 |     fn identical(nullable: bool) -> Box<dyn WriteStrategy>
 49 |     where
 50 |         Self::ArrowElement: Item,
 51 |     {
 52 |         if nullable {
 53 |             Box::new(NullableIdentical::<Self>::new())
 54 |         } else {
 55 |             Box::new(NonNullableIdentical::<Self>::new())
 56 |         }
 57 |     }
 58 | }
 59 | 
 60 | struct Nullable<P, F> {
 61 |     // We use this type information to correctly downcast from a `&dyn Array`.
 62 |     _primitive_type: PhantomData<P>,
 63 |     arrow_to_odbc: F,
 64 | }
 65 | 
 66 | impl<P, F> Nullable<P, F> {
 67 |     fn new(arrow_to_odbc: F) -> Self {
 68 |         Self {
 69 |             _primitive_type: PhantomData,
 70 |             arrow_to_odbc,
 71 |         }
 72 |     }
 73 | }
 74 | 
 75 | impl<P, F, U> WriteStrategy for Nullable<P, F>
 76 | where
 77 |     P: ArrowPrimitiveType,
 78 |     F: Fn(P::Native) -> U,
 79 |     U: Item,
 80 | {
 81 |     fn buffer_desc(&self) -> BufferDesc {
 82 |         U::buffer_desc(true)
 83 |     }
 84 | 
 85 |     fn write_rows(
 86 |         &self,
 87 |         param_offset: usize,
 88 |         column_buf: AnySliceMut<'_>,
 89 |         array: &dyn Array,
 90 |     ) -> Result<(), WriterError> {
 91 |         let from = array.as_any().downcast_ref::<PrimitiveArray<P>>().unwrap();
 92 |         let mut to = column_buf.as_nullable_slice::<U>().unwrap();
 93 |         for (index, cell) in from.iter().enumerate() {
 94 |             to.set_cell(index + param_offset, cell.map(&self.arrow_to_odbc))
 95 |         }
 96 |         Ok(())
 97 |     }
 98 | }
 99 | 
100 | struct NonNullable<P, F> {
101 |     // We use this type information to correctly downcast from a `&dyn Array`.
102 |     _primitive_type: PhantomData<P>,
103 |     arrow_to_odbc: F,
104 | }
105 | 
106 | impl<P, F> NonNullable<P, F> {
107 |     fn new(arrow_to_odbc: F) -> Self {
108 |         Self {
109 |             _primitive_type: PhantomData,
110 |             arrow_to_odbc,
111 |         }
112 |     }
113 | }
114 | 
115 | impl<P, F, U> WriteStrategy for NonNullable<P, F>
116 | where
117 |     P: ArrowPrimitiveType,
118 |     F: Fn(P::Native) -> U,
119 |     U: Item,
120 | {
121 |     fn buffer_desc(&self) -> BufferDesc {
122 |         U::buffer_desc(false)
123 |     }
124 | 
125 |     fn write_rows(
126 |         &self,
127 |         param_offset: usize,
128 |         column_buf: AnySliceMut<'_>,
129 |         array: &dyn Array,
130 |     ) -> Result<(), WriterError> {
131 |         let from = array.as_any().downcast_ref::<PrimitiveArray<P>>().unwrap();
132 |         let to = column_buf.as_slice::<U>().unwrap();
133 |         for index in 0..from.len() {
134 |             to[index + param_offset] = (self.arrow_to_odbc)(from.value(index))
135 |         }
136 |         Ok(())
137 |     }
138 | }
139 | 
140 | struct NullableIdentical<P> {
141 |     _phantom: PhantomData<P>,
142 | }
143 | 
144 | impl<P> NullableIdentical<P> {
145 |     pub fn new() -> Self {
146 |         Self {
147 |             _phantom: PhantomData,
148 |         }
149 |     }
150 | }
151 | 
152 | impl<P> WriteStrategy for NullableIdentical<P>
153 | where
154 |     P: ArrowPrimitiveType,
155 |     P::Native: Item,
156 | {
157 |     fn buffer_desc(&self) -> BufferDesc {
158 |         P::Native::buffer_desc(true)
159 |     }
160 | 
161 |     fn write_rows(
162 |         &self,
163 |         param_offset: usize,
164 |         column_buf: AnySliceMut<'_>,
165 |         array: &dyn Array,
166 |     ) -> Result<(), WriterError> {
167 |         let from = array.as_any().downcast_ref::<PrimitiveArray<P>>().unwrap();
168 |         let mut to = column_buf.as_nullable_slice::<P::Native>().unwrap();
169 |         for (index, cell) in from.iter().enumerate() {
170 |             to.set_cell(index + param_offset, cell);
171 |         }
172 |         Ok(())
173 |     }
174 | }
175 | 
176 | struct NonNullableIdentical<P> {
177 |     _phantom: PhantomData<P>,
178 | }
179 | 
180 | impl<P> NonNullableIdentical<P> {
181 |     pub fn new() -> Self {
182 |         Self {
183 |             _phantom: PhantomData,
184 |         }
185 |     }
186 | }
187 | 
188 | impl<P> WriteStrategy for NonNullableIdentical<P>
189 | where
190 |     P: ArrowPrimitiveType,
191 |     P::Native: Item,
192 | {
193 |     fn buffer_desc(&self) -> BufferDesc {
194 |         P::Native::buffer_desc(false)
195 |     }
196 | 
197 |     fn write_rows(
198 |         &self,
199 |         param_offset: usize,
200 |         column_buf: AnySliceMut<'_>,
201 |         array: &dyn Array,
202 |     ) -> Result<(), WriterError> {
203 |         let from = array.as_any().downcast_ref::<PrimitiveArray<P>>().unwrap();
204 |         let to = column_buf.as_slice::<P::Native>().unwrap();
205 |         to[param_offset..(param_offset + from.len())].copy_from_slice(from.values());
206 |         Ok(())
207 |     }
208 | }
209 | 


--------------------------------------------------------------------------------
/src/reader/concurrent_odbc_reader.rs:
--------------------------------------------------------------------------------
  1 | use arrow::{
  2 |     datatypes::SchemaRef,
  3 |     error::ArrowError,
  4 |     record_batch::{RecordBatch, RecordBatchReader},
  5 | };
  6 | use odbc_api::{BlockCursor, ConcurrentBlockCursor, Cursor, buffers::ColumnarAnyBuffer};
  7 | 
  8 | use crate::Error;
  9 | 
 10 | use super::{odbc_reader::odbc_to_arrow_error, to_record_batch::ToRecordBatch};
 11 | 
 12 | /// Arrow ODBC reader. Implements the [`arrow::record_batch::RecordBatchReader`] trait so it can be
 13 | /// used to fill Arrow arrays from an ODBC data source. Similar to [`crate::OdbcReader`], yet
 14 | /// [`ConcurrentOdbcReader`] fetches ODBC batches in a second transit buffer eagerly from the
 15 | /// database in a dedicated system thread. This allows the allocation of the Arrow arrays and your
 16 | /// application logic to run on the main thread, while fetching the batches from the source happens
 17 | /// concurrently. You need twice the memory for the transit buffer for this strategy, since one is
 18 | /// may be in use by the main thread in order to copy values into arrow arrays, while the other is
 19 | /// used to write values from the database.
 20 | ///
 21 | /// # Example
 22 | ///
 23 | /// ```no_run
 24 | /// use arrow_odbc::{odbc_api::{Environment, ConnectionOptions}, OdbcReaderBuilder};
 25 | /// use std::sync::OnceLock;
 26 | ///
 27 | /// // In order to fetch in a dedicated system thread we need a cursor with static lifetime,
 28 | /// // this implies a static ODBC environment.
 29 | /// static ENV: OnceLock<Environment> = OnceLock::new();
 30 | ///
 31 | /// const CONNECTION_STRING: &str = "\
 32 | ///     Driver={ODBC Driver 18 for SQL Server};\
 33 | ///     Server=localhost;\
 34 | ///     UID=SA;\
 35 | ///     PWD=My@Test@Password1;\
 36 | /// ";
 37 | ///
 38 | /// fn main() -> Result<(), anyhow::Error> {
 39 | ///
 40 | ///     let odbc_environment = ENV.get_or_init(|| {Environment::new().unwrap() });
 41 | ///     
 42 | ///     // Connect with database.
 43 | ///     let connection = odbc_environment.connect_with_connection_string(
 44 | ///         CONNECTION_STRING,
 45 | ///         ConnectionOptions::default()
 46 | ///     )?;
 47 | ///
 48 | ///     // This SQL statement does not require any arguments.
 49 | ///     let parameters = ();
 50 | ///
 51 | ///     // Do not apply any timout.
 52 | ///     let timeout_sec = None;
 53 | ///
 54 | ///     // Execute query and create result set
 55 | ///     let cursor = connection
 56 | ///         // Using `into_cursor` instead of `execute` takes ownership of the connection and
 57 | ///         // allows for a cursor with static lifetime.
 58 | ///         .into_cursor("SELECT * FROM MyTable", parameters, timeout_sec)
 59 | ///         .map_err(|e| e.error)?
 60 | ///         .expect("SELECT statement must produce a cursor");
 61 | ///
 62 | ///     // Construct ODBC reader and make it concurrent
 63 | ///     let arrow_record_batches = OdbcReaderBuilder::new().build(cursor)?.into_concurrent()?;
 64 | ///
 65 | ///     for batch in arrow_record_batches {
 66 | ///         // ... process batch ...
 67 | ///     }
 68 | ///     Ok(())
 69 | /// }
 70 | /// ```
 71 | pub struct ConcurrentOdbcReader<C: Cursor> {
 72 |     /// We fill the buffers using ODBC concurrently. The buffer currently being filled is bound to
 73 |     /// the Cursor. This is the buffer which is unbound and read by the application to fill the
 74 |     /// arrow arrays. After being read we will reuse the buffer and bind it to the cursor in order
 75 |     /// to safe allocations.
 76 |     buffer: ColumnarAnyBuffer,
 77 |     /// Converts the content of ODBC buffers into Arrow record batches
 78 |     converter: ToRecordBatch,
 79 |     /// Fetches values from the ODBC datasource using columnar batches. Values are streamed batch
 80 |     /// by batch in order to avoid reallocation of the buffers used for tranistion.
 81 |     batch_stream: ConcurrentBlockCursor<C, ColumnarAnyBuffer>,
 82 | }
 83 | 
 84 | impl<C: Cursor + Send + 'static> ConcurrentOdbcReader<C> {
 85 |     /// The schema implied by `block_cursor` and `converter` must match. Invariant is hard to check
 86 |     /// in type system, keep this constructor private to this crate. Users should use
 87 |     /// [`crate::OdbcReader::into_concurrent`] instead.
 88 |     pub(crate) fn from_block_cursor(
 89 |         block_cursor: BlockCursor<C, ColumnarAnyBuffer>,
 90 |         converter: ToRecordBatch,
 91 |         fallibale_allocations: bool,
 92 |     ) -> Result<Self, Error> {
 93 |         let max_batch_size = block_cursor.row_array_size();
 94 |         let batch_stream = ConcurrentBlockCursor::from_block_cursor(block_cursor);
 95 |         // Note that we delay buffer allocation until after the fetch thread has started and we
 96 |         // start fetching the first row group concurrently as early, not waiting for the buffer
 97 |         // allocation to go through.
 98 |         let buffer = converter.allocate_buffer(max_batch_size, fallibale_allocations)?;
 99 | 
100 |         Ok(Self {
101 |             buffer,
102 |             converter,
103 |             batch_stream,
104 |         })
105 |     }
106 | 
107 |     /// Destroy the ODBC arrow reader and yield the underlyinng cursor object.
108 |     ///
109 |     /// One application of this is to process more than one result set in case you executed a stored
110 |     /// procedure.
111 |     ///
112 |     /// Due to the concurrent fetching of row groups you can not know how many row groups have been
113 |     /// extracted once the cursor is returned. Unless that is that the entire cursor has been
114 |     /// consumed i.e. [`Self::next`] returned `None`.
115 |     pub fn into_cursor(self) -> Result<C, odbc_api::Error> {
116 |         self.batch_stream.into_cursor()
117 |     }
118 | }
119 | 
120 | impl<C> Iterator for ConcurrentOdbcReader<C>
121 | where
122 |     C: Cursor,
123 | {
124 |     type Item = Result<RecordBatch, ArrowError>;
125 | 
126 |     fn next(&mut self) -> Option<Self::Item> {
127 |         match self.batch_stream.fetch_into(&mut self.buffer) {
128 |             // We successfully fetched a batch from the database. Try to copy it into a record batch
129 |             // and forward errors if any.
130 |             Ok(true) => {
131 |                 let result_record_batch = self
132 |                     .converter
133 |                     .buffer_to_record_batch(&self.buffer)
134 |                     .map_err(|mapping_error| ArrowError::ExternalError(Box::new(mapping_error)));
135 |                 Some(result_record_batch)
136 |             }
137 |             // We ran out of batches in the result set. End the iterator.
138 |             Ok(false) => None,
139 |             // We had an error fetching the next batch from the database, let's report it as an
140 |             // external error.
141 |             Err(odbc_error) => Some(Err(odbc_to_arrow_error(odbc_error))),
142 |         }
143 |     }
144 | }
145 | 
146 | impl<C> RecordBatchReader for ConcurrentOdbcReader<C>
147 | where
148 |     C: Cursor,
149 | {
150 |     fn schema(&self) -> SchemaRef {
151 |         self.converter.schema().clone()
152 |     }
153 | }
154 | 


--------------------------------------------------------------------------------
/src/date_time.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     convert::TryInto,
  3 |     fmt::Display,
  4 |     io::Write,
  5 |     marker::PhantomData,
  6 |     ops::{Div, Mul, Rem},
  7 | };
  8 | 
  9 | use arrow::{
 10 |     array::{Array, PrimitiveArray},
 11 |     datatypes::{
 12 |         ArrowPrimitiveType, Time32MillisecondType, Time64MicrosecondType, Time64NanosecondType,
 13 |     },
 14 | };
 15 | use chrono::{Datelike, NaiveDate};
 16 | use odbc_api::{
 17 |     buffers::{AnySliceMut, BufferDesc, TextColumnSliceMut},
 18 |     sys::{Date, Time, Timestamp},
 19 | };
 20 | 
 21 | use crate::{WriterError, odbc_writer::WriteStrategy, reader::MappingError};
 22 | 
 23 | /// Transform date to days since unix epoch as i32
 24 | pub fn days_since_epoch(date: &Date) -> i32 {
 25 |     let unix_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
 26 |     let date =
 27 |         NaiveDate::from_ymd_opt(date.year as i32, date.month as u32, date.day as u32).unwrap();
 28 |     let duration = date.signed_duration_since(unix_epoch);
 29 |     duration.num_days().try_into().unwrap()
 30 | }
 31 | 
 32 | pub fn seconds_since_epoch(from: &Timestamp) -> i64 {
 33 |     let ndt = NaiveDate::from_ymd_opt(from.year as i32, from.month as u32, from.day as u32)
 34 |         .unwrap()
 35 |         .and_hms_opt(from.hour as u32, from.minute as u32, from.second as u32)
 36 |         .unwrap();
 37 |     ndt.and_utc().timestamp()
 38 | }
 39 | 
 40 | pub fn ms_since_epoch(from: &Timestamp) -> i64 {
 41 |     let ndt = NaiveDate::from_ymd_opt(from.year as i32, from.month as u32, from.day as u32)
 42 |         .unwrap()
 43 |         .and_hms_nano_opt(
 44 |             from.hour as u32,
 45 |             from.minute as u32,
 46 |             from.second as u32,
 47 |             from.fraction,
 48 |         )
 49 |         .unwrap();
 50 |     ndt.and_utc().timestamp_millis()
 51 | }
 52 | 
 53 | pub fn us_since_epoch(from: &Timestamp) -> i64 {
 54 |     let ndt = NaiveDate::from_ymd_opt(from.year as i32, from.month as u32, from.day as u32)
 55 |         .unwrap()
 56 |         .and_hms_nano_opt(
 57 |             from.hour as u32,
 58 |             from.minute as u32,
 59 |             from.second as u32,
 60 |             from.fraction,
 61 |         )
 62 |         .unwrap();
 63 |     ndt.and_utc().timestamp_micros()
 64 | }
 65 | 
 66 | pub fn ns_since_epoch(from: &Timestamp) -> Result<i64, MappingError> {
 67 |     let ndt = NaiveDate::from_ymd_opt(from.year as i32, from.month as u32, from.day as u32)
 68 |         .unwrap()
 69 |         .and_hms_nano_opt(
 70 |             from.hour as u32,
 71 |             from.minute as u32,
 72 |             from.second as u32,
 73 |             from.fraction,
 74 |         )
 75 |         .unwrap();
 76 | 
 77 |     // The dates that can be represented as nanoseconds are between 1677-09-21T00:12:44.0 and
 78 |     // 2262-04-11T23:47:16.854775804
 79 |     ndt.and_utc()
 80 |         .timestamp_nanos_opt()
 81 |         .ok_or(MappingError::OutOfRangeTimestampNs { value: ndt })
 82 | }
 83 | 
 84 | pub fn epoch_to_date(from: i32) -> Date {
 85 |     // Offset between between ce and unix epoch
 86 |     const OFFSET: i32 = 719_163;
 87 |     let nd = NaiveDate::from_num_days_from_ce_opt(from + OFFSET).unwrap();
 88 |     Date {
 89 |         year: nd.year().try_into().unwrap(),
 90 |         month: nd.month().try_into().unwrap(),
 91 |         day: nd.day().try_into().unwrap(),
 92 |     }
 93 | }
 94 | 
 95 | pub fn sec_since_midnight_to_time(from: i32) -> Time {
 96 |     let unit_min = 60;
 97 |     let unit_hour = unit_min * 60;
 98 |     let hour = from / unit_hour;
 99 |     let minute = (from % unit_hour) / unit_min;
100 |     let second = from % unit_min;
101 |     Time {
102 |         hour: hour.try_into().unwrap(),
103 |         minute: minute.try_into().unwrap(),
104 |         second: second.try_into().unwrap(),
105 |     }
106 | }
107 | 
108 | pub struct NullableTimeAsText<P> {
109 |     _phantom: PhantomData<P>,
110 | }
111 | 
112 | impl<P> NullableTimeAsText<P> {
113 |     pub fn new() -> Self {
114 |         Self {
115 |             _phantom: PhantomData,
116 |         }
117 |     }
118 | }
119 | 
120 | pub trait TimePrimitive {
121 |     type Integer: From<i32>
122 |         + Copy
123 |         + Mul<Output = Self::Integer>
124 |         + Div<Output = Self::Integer>
125 |         + Rem<Output = Self::Integer>
126 |         + Display;
127 |     const SCALE: usize;
128 |     const PRECISION_FACTOR: Self::Integer;
129 |     const STR_LEN: usize;
130 | 
131 |     fn insert_at(index: usize, from: Self::Integer, to: &mut TextColumnSliceMut<u8>) {
132 |         let sixty: Self::Integer = 60.into();
133 |         let unit_min = sixty * Self::PRECISION_FACTOR;
134 |         let unit_hour = unit_min * sixty;
135 |         let hour = from / unit_hour;
136 |         let minute = (from % unit_hour) / unit_min;
137 |         let second = (from % unit_min) / Self::PRECISION_FACTOR;
138 |         let fraction = from % Self::PRECISION_FACTOR;
139 |         write!(
140 |             to.set_mut(index, Self::STR_LEN),
141 |             "{hour:02}:{minute:02}:{second:02}.{fraction:0s$}",
142 |             s = Self::SCALE
143 |         )
144 |         .unwrap();
145 |     }
146 | }
147 | 
148 | impl TimePrimitive for Time32MillisecondType {
149 |     type Integer = i32;
150 |     const SCALE: usize = 3;
151 |     const PRECISION_FACTOR: i32 = 1_000;
152 |     // Length of text representation of time. HH:MM:SS.fff
153 |     const STR_LEN: usize = 12;
154 | }
155 | 
156 | impl TimePrimitive for Time64MicrosecondType {
157 |     type Integer = i64;
158 | 
159 |     const SCALE: usize = 6;
160 |     const PRECISION_FACTOR: i64 = 1_000_000;
161 |     // Length of text representation of time. HH:MM:SS.ffffff
162 |     const STR_LEN: usize = 15;
163 | }
164 | 
165 | impl TimePrimitive for Time64NanosecondType {
166 |     type Integer = i64;
167 |     // For now we insert nanoseconds with a precision of 7 digits rather than 9
168 |     const SCALE: usize = 9;
169 |     const PRECISION_FACTOR: i64 = 1_000_000_000;
170 |     // Length of text representation of time. HH:MM:SS.fffffffff
171 |     const STR_LEN: usize = 18;
172 | }
173 | 
174 | impl<P> WriteStrategy for NullableTimeAsText<P>
175 | where
176 |     P: ArrowPrimitiveType + TimePrimitive<Integer = <P as ArrowPrimitiveType>::Native>,
177 | {
178 |     fn buffer_desc(&self) -> BufferDesc {
179 |         BufferDesc::Text {
180 |             max_str_len: P::STR_LEN,
181 |         }
182 |     }
183 | 
184 |     fn write_rows(
185 |         &self,
186 |         param_offset: usize,
187 |         column_buf: AnySliceMut<'_>,
188 |         array: &dyn Array,
189 |     ) -> Result<(), WriterError> {
190 |         let from = array.as_any().downcast_ref::<PrimitiveArray<P>>().unwrap();
191 |         let mut to = column_buf.as_text_view().unwrap();
192 |         for (index, elapsed_since_midnight) in from.iter().enumerate() {
193 |             if let Some(from) = elapsed_since_midnight {
194 |                 P::insert_at(index + param_offset, from, &mut to)
195 |             } else {
196 |                 to.set_cell(index + param_offset, None)
197 |             }
198 |         }
199 |         Ok(())
200 |     }
201 | }
202 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
  1 | # arrow-odbc
  2 | 
  3 | [![Docs](https://docs.rs/arrow-odbc/badge.svg)](https://docs.rs/arrow-odbc/)
  4 | [![Licence](https://img.shields.io/crates/l/arrow-odbc)](https://github.com/pacman82/arrow-odbc/blob/main/License)
  5 | [![Crates.io](https://img.shields.io/crates/v/arrow-odbc)](https://crates.io/crates/arrow-odbc)
  6 | 
  7 | Fill Apache Arrow arrays from ODBC data sources. `arrow-odbc` is build on top of the [`arrow`](https://crates.io/crates/arrow) and [`odbc-api`](https://crates.io/crates/odbc-api) crates and enables you to read the data of an ODBC data source as sequence of Apache Arrow record batches. `arrow-odbc` can also be used to insert the contens of Arrow record batches into a database table.
  8 | 
  9 | This repository contains the code of the `arrow-odbc` Rust crate. The repository containing the code for the [`arrow-odbc` Python wheel](https://pypi.org/project/arrow-odbc/) resides in the [`arrow-odbc-py` repository](https://github.com/pacman82/arrow-odbc-py).
 10 | 
 11 | ## About Arrow
 12 | 
 13 | > [Apache Arrow](https://arrow.apache.org/) defines a language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware like CPUs and GPUs. The Arrow memory format also supports zero-copy reads for lightning-fast data access without serialization overhead.
 14 | 
 15 | ## About ODBC
 16 | 
 17 | [ODBC](https://docs.microsoft.com/en-us/sql/odbc/microsoft-open-database-connectivity-odbc) (Open DataBase Connectivity) is a standard which enables you to access data from a wide variaty of data sources using SQL.
 18 | 
 19 | ## Usage
 20 | 
 21 | ```rust
 22 | use arrow_odbc::OdbcReaderBuilder;
 23 | // You can use the reexport of odbc_api to make sure the version used by arrow_odbc is in sync with
 24 | // the version directly used by your application.
 25 | use arrow_odbc::odbc_api as odbc_api;
 26 | use odbc_api::{Environment, ConnectionOptions};
 27 | 
 28 | const CONNECTION_STRING: &str = "\
 29 |     Driver={ODBC Driver 18 for SQL Server};\
 30 |     Server=localhost;\
 31 |     UID=SA;\
 32 |     PWD=My@Test@Password1;\
 33 | ";
 34 | 
 35 | fn main() -> Result<(), anyhow::Error> {
 36 | 
 37 |     let odbc_environment = Environment::new()?;
 38 |     
 39 |     // Connect with database.
 40 |     let connection = odbc_environment.connect_with_connection_string(
 41 |         CONNECTION_STRING,
 42 |         ConnectionOptions::default(),
 43 |     )?;
 44 | 
 45 |     // This SQL statement does not require any arguments.
 46 |     let parameters = ();
 47 | 
 48 |     // Execute query and create result set
 49 |     let cursor = connection
 50 |         .execute("SELECT * FROM MyTable", parameters)?
 51 |         .expect("SELECT statement must produce a cursor");
 52 | 
 53 |     // Read result set as arrow batches. Infer Arrow types automatically using the meta
 54 |     // information of `cursor`.
 55 |     let arrow_record_batches = OdbcReaderBuilder::new()
 56 |         // Use at most 256 MiB for transit buffer
 57 |         .with_max_bytes_per_batch(256 * 1024 * 1024)
 58 |         .build(cursor)?;
 59 | 
 60 |     for batch in arrow_record_batches {
 61 |         // ... process batch ...
 62 |     }
 63 |     Ok(())
 64 | }
 65 | ```
 66 | 
 67 | ## Matching of ODBC to Arrow types then querying
 68 | 
 69 | | ODBC                     | Arrow                |
 70 | | ------------------------ | -------------------- |
 71 | | Numeric(p <= 38)         | Decimal128           |
 72 | | Decimal(p <= 38, s >= 0) | Decimal128           |
 73 | | Integer                  | Int32                |
 74 | | SmallInt                 | Int16                |
 75 | | Real                     | Float32              |
 76 | | Float(p <=24)            | Float32              |
 77 | | Double                   | Float64              |
 78 | | Float(p > 24)            | Float64              |
 79 | | Date                     | Date32               |
 80 | | LongVarbinary            | Binary               |
 81 | | Time(p = 0)              | Time32Second         |
 82 | | Time(p = 1..3)           | Time32Millisecond    |
 83 | | Time(p = 4..6)           | Time64Microsecond    |
 84 | | Time(p = 7..9)           | Time64Nanosecond     |
 85 | | Timestamp(p = 0)         | TimestampSecond      |
 86 | | Timestamp(p: 1..3)       | TimestampMilliSecond |
 87 | | Timestamp(p: 4..6)       | TimestampMicroSecond |
 88 | | Timestamp(p >= 7 )       | TimestampNanoSecond  |
 89 | | BigInt                   | Int64                |
 90 | | TinyInt Signed           | Int8                 |
 91 | | TinyInt Unsigend         | UInt8                |
 92 | | Bit                      | Boolean              |
 93 | | Varbinary                | Binary               |
 94 | | Binary                   | FixedSizedBinary     |
 95 | | All others               | Utf8                 |
 96 | 
 97 | ## Matching of Arrow to ODBC types then inserting
 98 | 
 99 | | Arrow                 | ODBC               |
100 | | --------------------- | ------------------ |
101 | | Utf8                  | VarChar            |
102 | | LargeUtf8             | VarChar            |
103 | | Decimal128(p, s = 0)  | VarChar(p + 1)     |
104 | | Decimal128(p, s != 0) | VarChar(p + 2)     |
105 | | Decimal128(p, s < 0)  | VarChar(p - s + 1) |
106 | | Decimal256(p, s = 0)  | VarChar(p + 1)     |
107 | | Decimal256(p, s != 0) | VarChar(p + 2)     |
108 | | Decimal256(p, s < 0)  | VarChar(p - s + 1) |
109 | | Int8                  | TinyInt            |
110 | | Int16                 | SmallInt           |
111 | | Int32                 | Integer            |
112 | | Int64                 | BigInt             |
113 | | Float16               | Real               |
114 | | Float32               | Real               |
115 | | Float64               | Double             |
116 | | Timestamp s           | Timestamp(7)       |
117 | | Timestamp ms          | Timestamp(7)       |
118 | | Timestamp us          | Timestamp(7)       |
119 | | Timestamp ns          | Timestamp(7)       |
120 | | Timestamp with Tz s   | VarChar(25)        |
121 | | Timestamp with Tz ms  | VarChar(29)        |
122 | | Timestamp with Tz us  | VarChar(32)        |
123 | | Timestamp with Tz ns  | VarChar(35)        |
124 | | Date32                | Date               |
125 | | Date64                | Date               |
126 | | Time32 s              | Time               |
127 | | Time32 ms             | VarChar(12)        |
128 | | Time64 us             | VarChar(15)        |
129 | | Time64 ns             | VarChar(18)        |
130 | | Binary                | Varbinary          |
131 | | FixedBinary(l)        | Varbinary(l)       |
132 | | All others            | Unsupported        |
133 | 
134 | The mapping for insertion is not the optimal yet, but before spending a lot of work on improving it I was curious that usecase would pop up for users. So if something does not work, but maybe could provided a better mapping of Arrow to ODBC types, feel free to open an issue. If you do so please give a lot of context of what you are trying to do.
135 | 
136 | ## Build
137 | 
138 | To build `arrow-odbc` and compile it as a part of your Rust project you need to link against an ODBC driver manager. On Windows this is already part of the system, so there is nothing to do. On Linux and MacOS it is recommended to install UnixODBC.
139 | 
140 | ### Ubuntu
141 | 
142 | ```shell
143 | sudo apt-get install unixodbc-dev
144 | ```
145 | 
146 | ### Mac OS
147 | 
148 | ```shell
149 | brew install unixodbc
150 | ```
151 | 
152 | ### Mac OS ARM
153 | 
154 | On MacOS with ARM brew installs into a directory not found by cargo during linking. There are likely many ways to deal with this. Since the author does not have access to an ARM Mac, here only a collection of things that have worked for other users.
155 | 
156 | * Installing unixODBC itself from source with make/configure instead of brew
157 | * Installing unixODBC with brew and creating a symlink for its binary directory `sudo ln -s /opt/homebrew/lib /Users/<your name>/lib`
158 | 


--------------------------------------------------------------------------------
/src/odbc_writer/timestamp.rs:
--------------------------------------------------------------------------------
  1 | //! Logic for inserting timestamp from Arrow Arrays into ODBC databases.
  2 | 
  3 | use std::{io::Write, marker::PhantomData, sync::Arc};
  4 | 
  5 | use arrow::{
  6 |     array::{Array, ArrowPrimitiveType, PrimitiveArray, timezone::Tz},
  7 |     datatypes::{
  8 |         TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
  9 |         TimestampSecondType,
 10 |     },
 11 | };
 12 | use chrono::{DateTime, Datelike, TimeZone, Timelike};
 13 | use log::debug;
 14 | use odbc_api::{
 15 |     buffers::{AnySliceMut, BufferDesc},
 16 |     sys::Timestamp,
 17 | };
 18 | 
 19 | use super::{WriteStrategy, WriterError, map_arrow_to_odbc::MapArrowToOdbc};
 20 | 
 21 | pub fn insert_timestamp_strategy(
 22 |     is_nullable: bool,
 23 |     time_unit: &TimeUnit,
 24 |     time_zone: Option<Arc<str>>,
 25 | ) -> Result<Box<dyn WriteStrategy>, WriterError> {
 26 |     let ws = match (time_unit, time_zone) {
 27 |         (TimeUnit::Second, None) => {
 28 |             TimestampSecondType::map_with(is_nullable, epoch_to_timestamp_s)
 29 |         }
 30 |         (TimeUnit::Millisecond, None) => {
 31 |             TimestampMillisecondType::map_with(is_nullable, epoch_to_timestamp_ms)
 32 |         }
 33 |         (TimeUnit::Microsecond, None) => {
 34 |             TimestampMicrosecondType::map_with(is_nullable, epoch_to_timestamp_us)
 35 |         }
 36 |         (TimeUnit::Nanosecond, None) => TimestampNanosecondType::map_with(is_nullable, |ns| {
 37 |             // Drop the last to digits of precision, since we bind it with precision 7 and not 9.
 38 |             epoch_to_timestamp_ns((ns / 100) * 100)
 39 |         }),
 40 |         (TimeUnit::Second, Some(tz)) => {
 41 |             Box::new(TimestampTzToText::<TimestampSecondType>::new(tz)?)
 42 |         }
 43 |         (TimeUnit::Millisecond, Some(tz)) => {
 44 |             Box::new(TimestampTzToText::<TimestampMillisecondType>::new(tz)?)
 45 |         }
 46 |         (TimeUnit::Microsecond, Some(tz)) => {
 47 |             Box::new(TimestampTzToText::<TimestampMicrosecondType>::new(tz)?)
 48 |         }
 49 |         (TimeUnit::Nanosecond, Some(tz)) => {
 50 |             Box::new(TimestampTzToText::<TimestampNanosecondType>::new(tz)?)
 51 |         }
 52 |     };
 53 |     Ok(ws)
 54 | }
 55 | 
 56 | pub fn epoch_to_timestamp_ns(from: i64) -> Timestamp {
 57 |     let ndt = DateTime::from_timestamp_nanos(from);
 58 |     datetime_to_timestamp(ndt)
 59 | }
 60 | 
 61 | pub fn epoch_to_timestamp_us(from: i64) -> Timestamp {
 62 |     let ndt =
 63 |         DateTime::from_timestamp_micros(from).expect("Timestamp must be in range for microseconds");
 64 |     datetime_to_timestamp(ndt)
 65 | }
 66 | 
 67 | pub fn epoch_to_timestamp_ms(from: i64) -> Timestamp {
 68 |     let ndt =
 69 |         DateTime::from_timestamp_millis(from).expect("Timestamp must be in range for milliseconds");
 70 |     datetime_to_timestamp(ndt)
 71 | }
 72 | 
 73 | pub fn epoch_to_timestamp_s(from: i64) -> Timestamp {
 74 |     let ndt = DateTime::from_timestamp_millis(from * 1_000)
 75 |         .expect("Timestamp must be in range for milliseconds");
 76 |     datetime_to_timestamp(ndt)
 77 | }
 78 | 
 79 | fn datetime_to_timestamp(ndt: DateTime<chrono::Utc>) -> Timestamp {
 80 |     let date = ndt.date_naive();
 81 |     let time = ndt.time();
 82 |     Timestamp {
 83 |         year: date.year().try_into().unwrap(),
 84 |         month: date.month().try_into().unwrap(),
 85 |         day: date.day().try_into().unwrap(),
 86 |         hour: time.hour().try_into().unwrap(),
 87 |         minute: time.minute().try_into().unwrap(),
 88 |         second: time.second().try_into().unwrap(),
 89 |         fraction: time.nanosecond(),
 90 |     }
 91 | }
 92 | 
 93 | /// Strategy for writing a timestamp with timezone as text into the database. Microsoft SQL Server
 94 | /// supports this via `SQL_SS_TIMESTAMPOFFSET`, yet this is an extension of the ODBC standard. So
 95 | /// maybe for now we are safer just to write it as a string literal.
 96 | pub struct TimestampTzToText<P> {
 97 |     time_zone: Tz,
 98 |     _phantom: PhantomData<P>,
 99 | }
100 | 
101 | impl<P> TimestampTzToText<P> {
102 |     pub fn new(time_zone: Arc<str>) -> Result<Self, WriterError> {
103 |         let tz = time_zone.parse().map_err(|e| {
104 |             debug!("Failed to parse time zone '{time_zone}'. Original error: {e}");
105 |             WriterError::InvalidTimeZone { time_zone }
106 |         })?;
107 |         Ok(Self {
108 |             time_zone: tz,
109 |             _phantom: PhantomData,
110 |         })
111 |     }
112 | }
113 | 
114 | impl<P> WriteStrategy for TimestampTzToText<P>
115 | where
116 |     P: ArrowPrimitiveType<Native = i64> + InserableAsTimestampWithTimeZone,
117 | {
118 |     fn buffer_desc(&self) -> BufferDesc {
119 |         BufferDesc::Text {
120 |             max_str_len: P::FORMAT_WITH_TIME_ZONE_LEN,
121 |         }
122 |     }
123 | 
124 |     fn write_rows(
125 |         &self,
126 |         param_offset: usize,
127 |         column_buf: AnySliceMut<'_>,
128 |         array: &dyn Array,
129 |     ) -> Result<(), WriterError> {
130 |         let from = array.as_any().downcast_ref::<PrimitiveArray<P>>().unwrap();
131 |         let mut to = column_buf.as_text_view().unwrap();
132 |         for (index, timestamp) in from.iter().enumerate() {
133 |             if let Some(timestamp) = timestamp {
134 |                 let dt = P::to_regional_datetime(timestamp, &self.time_zone);
135 |                 write!(
136 |                     to.set_mut(index + param_offset, P::FORMAT_WITH_TIME_ZONE_LEN),
137 |                     "{}",
138 |                     dt.format(P::FORMAT_STRING),
139 |                 )
140 |                 .unwrap();
141 |             } else {
142 |                 to.set_cell(index + param_offset, None)
143 |             }
144 |         }
145 |         Ok(())
146 |     }
147 | }
148 | 
149 | trait InserableAsTimestampWithTimeZone {
150 |     /// Length of the string representation of a timestamp with time zone, e.g. "2023-10-01 12:34:56.789+02:00"
151 |     const FORMAT_WITH_TIME_ZONE_LEN: usize;
152 |     const FORMAT_STRING: &'static str;
153 |     fn to_regional_datetime(epoch: i64, time_zone: &Tz) -> DateTime<Tz>;
154 | }
155 | 
156 | impl InserableAsTimestampWithTimeZone for TimestampSecondType {
157 |     const FORMAT_WITH_TIME_ZONE_LEN: usize = 25; // "YYYY-MM-DD HH:MM:SS+00:00"
158 |     const FORMAT_STRING: &'static str = "%Y-%m-%d %H:%M:%S%Z";
159 | 
160 |     fn to_regional_datetime(epoch: i64, time_zone: &Tz) -> DateTime<Tz> {
161 |         time_zone
162 |             .timestamp_opt(epoch, 0)
163 |             .earliest()
164 |             .expect("Timestamp must be in range for the timezone")
165 |     }
166 | }
167 | 
168 | impl InserableAsTimestampWithTimeZone for TimestampMillisecondType {
169 |     const FORMAT_WITH_TIME_ZONE_LEN: usize = 29; // "YYYY-MM-DD HH:MM:SS.fff+00:00"
170 |     const FORMAT_STRING: &'static str = "%Y-%m-%d %H:%M:%S.%3f%Z";
171 | 
172 |     fn to_regional_datetime(epoch: i64, time_zone: &Tz) -> DateTime<Tz> {
173 |         let epoch_sec = epoch / 1_000;
174 |         let nano = (epoch % 1_000) * 1_000_000; // Convert milliseconds to nanoseconds
175 |         time_zone
176 |             .timestamp_opt(epoch_sec, nano as u32)
177 |             .earliest()
178 |             .expect("Timestamp must be in range for the timezone")
179 |     }
180 | }
181 | 
182 | impl InserableAsTimestampWithTimeZone for TimestampMicrosecondType {
183 |     const FORMAT_WITH_TIME_ZONE_LEN: usize = 32; // "YYYY-MM-DD HH:MM:SS.fff+00:00"
184 |     const FORMAT_STRING: &'static str = "%Y-%m-%d %H:%M:%S.%6f%Z";
185 | 
186 |     fn to_regional_datetime(epoch: i64, time_zone: &Tz) -> DateTime<Tz> {
187 |         let epoch_sec = epoch / 1_000_000;
188 |         let nano = (epoch % 1_000_000) * 1_000; // Convert milliseconds to nanoseconds
189 |         time_zone
190 |             .timestamp_opt(epoch_sec, nano as u32)
191 |             .earliest()
192 |             .expect("Timestamp must be in range for the timezone")
193 |     }
194 | }
195 | 
196 | impl InserableAsTimestampWithTimeZone for TimestampNanosecondType {
197 |     const FORMAT_WITH_TIME_ZONE_LEN: usize = 35; // "YYYY-MM-DD HH:MM:SS.fff+00:00"
198 |     const FORMAT_STRING: &'static str = "%Y-%m-%d %H:%M:%S.%9f%Z";
199 | 
200 |     fn to_regional_datetime(epoch: i64, time_zone: &Tz) -> DateTime<Tz> {
201 |         let epoch_sec = epoch / 1_000_000_000;
202 |         let nano = epoch % 1_000_000_000; // Convert milliseconds to nanoseconds
203 |         time_zone
204 |             .timestamp_opt(epoch_sec, nano as u32)
205 |             .earliest()
206 |             .expect("Timestamp must be in range for the timezone")
207 |     }
208 | }
209 | 


--------------------------------------------------------------------------------
/src/schema.rs:
--------------------------------------------------------------------------------
  1 | use arrow::datatypes::{DataType as ArrowDataType, Field, Schema, TimeUnit};
  2 | use log::debug;
  3 | use odbc_api::{ColumnDescription, DataType as OdbcDataType, ResultSetMetadata, sys::SqlDataType};
  4 | use std::convert::TryInto;
  5 | 
  6 | use crate::{ColumnFailure, Error};
  7 | 
  8 | /// Query the metadata to create an arrow schema. This method is invoked automatically for you by
  9 | /// [`crate::OdbcReaderBuilder::build`]. You may want to call this method in situation there you
 10 | /// want to create an arrow schema without creating the reader yet.
 11 | ///
 12 | /// # Parameters
 13 | ///
 14 | /// * `result_set_metadata`: Used to query metadata about the columns in the result set, which is
 15 | ///   used to determine the arrow schema.
 16 | /// * `dbms_name`: If provided, it is used to account for Database specific behavior than mapping
 17 | ///   types. Currently it is used to map `TIME` types from 'Microsoft SQL Server' to `Time32` or
 18 | ///   `Time64`
 19 | /// * `map_value_errors_to_null`: In case falliable conversions should result in `NULL` the arrow
 20 | ///   field must be nullable, even if the source column on the database is not nullable.
 21 | ///
 22 | /// # Example
 23 | ///
 24 | /// ```
 25 | /// use anyhow::Error;
 26 | ///
 27 | /// use arrow_odbc::{arrow_schema_from, arrow::datatypes::Schema, odbc_api::Connection};
 28 | ///
 29 | /// fn fetch_schema_for_table(
 30 | ///     table_name: &str,
 31 | ///     connection: &Connection<'_>
 32 | /// ) -> Result<Schema, Error> {
 33 | ///     // Query column with values to get a cursor
 34 | ///     let sql = format!("SELECT * FROM {}", table_name);
 35 | ///     let mut prepared = connection.prepare(&sql)?;
 36 | ///
 37 | ///     // Now that we have prepared statement, we want to use it to query metadata.
 38 | ///     let map_errors_to_null = false;
 39 | ///     let dbms_name = None;
 40 | ///     let schema = arrow_schema_from(&mut prepared, dbms_name, map_errors_to_null)?;
 41 | ///     Ok(schema)
 42 | /// }
 43 | /// ```
 44 | pub fn arrow_schema_from(
 45 |     result_set_metadata: &mut impl ResultSetMetadata,
 46 |     dbms_name: Option<&str>,
 47 |     map_value_errors_to_null: bool,
 48 | ) -> Result<Schema, Error> {
 49 |     let num_cols: u16 = result_set_metadata
 50 |         .num_result_cols()
 51 |         .map_err(Error::UnableToRetrieveNumCols)?
 52 |         .try_into()
 53 |         .unwrap();
 54 |     let mut fields = Vec::new();
 55 |     for index in 0..num_cols {
 56 |         let field = arrow_field_from(
 57 |             result_set_metadata,
 58 |             dbms_name,
 59 |             index,
 60 |             map_value_errors_to_null,
 61 |         )?;
 62 | 
 63 |         fields.push(field)
 64 |     }
 65 |     Ok(Schema::new(fields))
 66 | }
 67 | 
 68 | fn arrow_field_from(
 69 |     resut_set_metadata: &mut impl ResultSetMetadata,
 70 |     dbms_name: Option<&str>,
 71 |     index: u16,
 72 |     map_value_errors_to_null: bool,
 73 | ) -> Result<Field, Error> {
 74 |     let mut column_description = ColumnDescription::default();
 75 |     resut_set_metadata
 76 |         .describe_col(index + 1, &mut column_description)
 77 |         .map_err(|cause| Error::ColumnFailure {
 78 |             name: "Unknown".to_owned(),
 79 |             index: index as usize,
 80 |             source: ColumnFailure::FailedToDescribeColumn(cause),
 81 |         })?;
 82 |     let name = column_description
 83 |         .name_to_string()
 84 |         .map_err(|source| Error::EncodingInvalid { source })?;
 85 |     debug!(
 86 |         "ODBC driver reported for column {index}. Relational type: {:?}; Nullability: {:?}; \
 87 |             Name: '{name}';",
 88 |         column_description.data_type, column_description.nullability
 89 |     );
 90 |     let data_type = match column_description.data_type {
 91 |         OdbcDataType::Numeric {
 92 |             precision: p @ 0..=38,
 93 |             scale,
 94 |         }
 95 |         | OdbcDataType::Decimal {
 96 |             precision: p @ 0..=38,
 97 |             scale,
 98 |         } => ArrowDataType::Decimal128(p as u8, scale.try_into().unwrap()),
 99 |         OdbcDataType::Integer => ArrowDataType::Int32,
100 |         OdbcDataType::SmallInt => ArrowDataType::Int16,
101 |         OdbcDataType::Real | OdbcDataType::Float { precision: 0..=24 } => ArrowDataType::Float32,
102 |         OdbcDataType::Float { precision: _ } | OdbcDataType::Double => ArrowDataType::Float64,
103 |         OdbcDataType::Date => ArrowDataType::Date32,
104 |         OdbcDataType::Timestamp { precision: 0 } => {
105 |             ArrowDataType::Timestamp(TimeUnit::Second, None)
106 |         }
107 |         OdbcDataType::Timestamp { precision: 1..=3 } => {
108 |             ArrowDataType::Timestamp(TimeUnit::Millisecond, None)
109 |         }
110 |         OdbcDataType::Timestamp { precision: 4..=6 } => {
111 |             ArrowDataType::Timestamp(TimeUnit::Microsecond, None)
112 |         }
113 |         OdbcDataType::Timestamp { precision: _ } => {
114 |             ArrowDataType::Timestamp(TimeUnit::Nanosecond, None)
115 |         }
116 |         OdbcDataType::BigInt => ArrowDataType::Int64,
117 |         OdbcDataType::TinyInt => {
118 |             let is_unsigned = resut_set_metadata
119 |                 .column_is_unsigned(index + 1)
120 |                 .map_err(|e| Error::ColumnFailure {
121 |                     name: name.clone(),
122 |                     index: index as usize,
123 |                     source: ColumnFailure::FailedToDescribeColumn(e),
124 |                 })?;
125 |             if is_unsigned {
126 |                 ArrowDataType::UInt8
127 |             } else {
128 |                 ArrowDataType::Int8
129 |             }
130 |         }
131 |         OdbcDataType::Bit => ArrowDataType::Boolean,
132 |         OdbcDataType::Binary { length } => {
133 |             let length = length
134 |                 .ok_or_else(|| Error::ColumnFailure {
135 |                     name: name.clone(),
136 |                     index: index as usize,
137 |                     source: ColumnFailure::ZeroSizedColumn {
138 |                         sql_type: OdbcDataType::Binary { length },
139 |                     },
140 |                 })?
141 |                 .get()
142 |                 .try_into()
143 |                 .unwrap();
144 |             ArrowDataType::FixedSizeBinary(length)
145 |         }
146 |         OdbcDataType::LongVarbinary { length: _ } | OdbcDataType::Varbinary { length: _ } => {
147 |             ArrowDataType::Binary
148 |         }
149 |         OdbcDataType::Time { precision } => precision_to_time(precision),
150 |         OdbcDataType::Other {
151 |             data_type: SqlDataType(-154),
152 |             column_size: _,
153 |             decimal_digits,
154 |         } => {
155 |             if dbms_name.is_some_and(|name| name == "Microsoft SQL Server") {
156 |                 // SQL Server's -154 is used by Microsoft SQL Server for Timestamps without a time
157 |                 // zone.
158 |                 precision_to_time(decimal_digits)
159 |             } else {
160 |                 // Other databases may use -154 for other purposes, so we treat it as a string.
161 |                 ArrowDataType::Utf8
162 |             }
163 |         }
164 |         OdbcDataType::Other {
165 |             data_type: SqlDataType(-98),
166 |             column_size: _,
167 |             decimal_digits: _,
168 |         } => {
169 |             // IBM DB2 names seem platform specific. E.g.; "DB2/LINUXX8664"
170 |             if dbms_name.is_some_and(|name| name.starts_with("DB2/")) {
171 |                 // IBM DB2's -98 is used for binary blob types.
172 |                 ArrowDataType::Binary
173 |             } else {
174 |                 // Other databases may use -98 for other purposes, so we treat it as a string.
175 |                 ArrowDataType::Utf8
176 |             }
177 |         }
178 |         OdbcDataType::Unknown
179 |         | OdbcDataType::Numeric { .. }
180 |         | OdbcDataType::Decimal { .. }
181 |         | OdbcDataType::Other {
182 |             data_type: _,
183 |             column_size: _,
184 |             decimal_digits: _,
185 |         }
186 |         | OdbcDataType::WChar { length: _ }
187 |         | OdbcDataType::Char { length: _ }
188 |         | OdbcDataType::WVarchar { length: _ }
189 |         | OdbcDataType::WLongVarchar { length: _ }
190 |         | OdbcDataType::LongVarchar { length: _ }
191 |         | OdbcDataType::Varchar { length: _ } => ArrowDataType::Utf8,
192 |     };
193 |     let is_falliable = matches!(data_type, ArrowDataType::Timestamp(TimeUnit::Nanosecond, _));
194 |     let nullable =
195 |         column_description.could_be_nullable() || (is_falliable && map_value_errors_to_null);
196 |     let field = Field::new(name, data_type, nullable);
197 |     Ok(field)
198 | }
199 | 
200 | fn precision_to_time(precision: i16) -> ArrowDataType {
201 |     match precision {
202 |         0 => ArrowDataType::Time32(TimeUnit::Second),
203 |         1..=3 => ArrowDataType::Time32(TimeUnit::Millisecond),
204 |         4..=6 => ArrowDataType::Time64(TimeUnit::Microsecond),
205 |         7..=9 => ArrowDataType::Time64(TimeUnit::Nanosecond),
206 |         _ => ArrowDataType::Utf8,
207 |     }
208 | }
209 | 


--------------------------------------------------------------------------------
/src/reader/text.rs:
--------------------------------------------------------------------------------
  1 | use std::{char::decode_utf16, cmp::min, num::NonZeroUsize, sync::Arc};
  2 | 
  3 | use arrow::array::{ArrayRef, StringBuilder};
  4 | use odbc_api::{
  5 |     DataType as OdbcDataType,
  6 |     buffers::{AnySlice, BufferDesc},
  7 | };
  8 | 
  9 | use super::{ColumnFailure, MappingError, ReadStrategy};
 10 | 
 11 | /// This function decides wether this column will be queried as narrow (assumed to be utf-8) or
 12 | /// wide text (assumed to be utf-16). The reason we do not always use narrow is that the encoding
 13 | /// dependends on the system locals which is usually not UTF-8 on windows systems. Furthermore we
 14 | /// are trying to adapt the buffer size to the maximum string length the column could contain.
 15 | pub fn choose_text_strategy(
 16 |     sql_type: OdbcDataType,
 17 |     lazy_display_size: impl FnOnce() -> Result<Option<NonZeroUsize>, odbc_api::Error>,
 18 |     max_text_size: Option<usize>,
 19 |     trim_fixed_sized_character_strings: bool,
 20 |     text_encoding: TextEncoding,
 21 | ) -> Result<Box<dyn ReadStrategy + Send>, ColumnFailure> {
 22 |     let apply_buffer_limit = |len| match (len, max_text_size) {
 23 |         (None, None) => Err(ColumnFailure::ZeroSizedColumn { sql_type }),
 24 |         (None, Some(limit)) => Ok(limit),
 25 |         (Some(len), None) => Ok(len),
 26 |         (Some(len), Some(limit)) => Ok(min(len, limit)),
 27 |     };
 28 |     let is_fixed_sized_char = matches!(
 29 |         sql_type,
 30 |         OdbcDataType::Char { .. } | OdbcDataType::WChar { .. }
 31 |     );
 32 |     let trim = trim_fixed_sized_character_strings && is_fixed_sized_char;
 33 |     let strategy: Box<dyn ReadStrategy + Send> = if text_encoding.use_utf16() {
 34 |         let hex_len = sql_type
 35 |             .utf16_len()
 36 |             .map(Ok)
 37 |             .or_else(|| lazy_display_size().transpose())
 38 |             .transpose()
 39 |             .map_err(|source| ColumnFailure::UnknownStringLength { sql_type, source })?;
 40 |         let hex_len = apply_buffer_limit(hex_len.map(NonZeroUsize::get))?;
 41 |         wide_text_strategy(hex_len, trim)
 42 |     } else {
 43 |         let octet_len = sql_type
 44 |             .utf8_len()
 45 |             .map(Ok)
 46 |             .or_else(|| lazy_display_size().transpose())
 47 |             .transpose()
 48 |             .map_err(|source| ColumnFailure::UnknownStringLength { sql_type, source })?;
 49 |         let octet_len = apply_buffer_limit(octet_len.map(NonZeroUsize::get))?;
 50 |         // So far only Linux users seemed to have complained about panics due to garbage indices?
 51 |         // Linux usually would use UTF-8, so we only invest work in working around this for narrow
 52 |         // strategies
 53 |         narrow_text_strategy(octet_len, trim)
 54 |     };
 55 | 
 56 |     Ok(strategy)
 57 | }
 58 | 
 59 | /// Used to indicate the preferred encoding for text columns.
 60 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 61 | pub enum TextEncoding {
 62 |     /// Evaluates to [`Self::Utf16`] on windows and [`Self::Utf8`] on other systems. We do this,
 63 |     /// because most systems e.g. MacOs and Linux use UTF-8 as their default encoding, while windows
 64 |     /// may still use a Latin1 or some other extended ASCII as their narrow encoding. On the other
 65 |     /// hand many Posix drivers are lacking in their support for wide function calls and UTF-16. So
 66 |     /// using `Wide` on windows and `Narrow` everythere else is a good starting point.
 67 |     Auto,
 68 |     /// Use narrow characters (one byte) to encode text in payloads. ODBC lets the client choose the
 69 |     /// encoding which should be based on the system local. This is often not what is actually
 70 |     /// happening though. If we use narrow encoding, we assume the text to be UTF-8 and error if we
 71 |     /// find that not to be the case.
 72 |     Utf8,
 73 |     /// Use wide characters (two bytes) to encode text in payloads. ODBC defines the encoding to
 74 |     /// be always UTF-16.
 75 |     Utf16,
 76 | }
 77 | 
 78 | impl Default for TextEncoding {
 79 |     fn default() -> Self {
 80 |         Self::Auto
 81 |     }
 82 | }
 83 | 
 84 | impl TextEncoding {
 85 |     pub fn use_utf16(&self) -> bool {
 86 |         match self {
 87 |             Self::Auto => cfg!(target_os = "windows"),
 88 |             Self::Utf8 => false,
 89 |             Self::Utf16 => true,
 90 |         }
 91 |     }
 92 | }
 93 | 
 94 | fn wide_text_strategy(u16_len: usize, trim: bool) -> Box<dyn ReadStrategy + Send> {
 95 |     Box::new(WideText::new(u16_len, trim))
 96 | }
 97 | 
 98 | fn narrow_text_strategy(octet_len: usize, trim: bool) -> Box<dyn ReadStrategy + Send> {
 99 |     Box::new(NarrowText::new(octet_len, trim))
100 | }
101 | 
102 | /// Strategy requesting the text from the database as UTF-16 (Wide characters) and emmitting it as
103 | /// UTF-8. We use it, since the narrow representation in ODBC is not always guaranteed to be UTF-8,
104 | /// but depends on the local instead.
105 | pub struct WideText {
106 |     /// Maximum string length in u16, excluding terminating zero
107 |     max_str_len: usize,
108 |     /// Wether the string should be trimmed.
109 |     trim: bool,
110 | }
111 | 
112 | impl WideText {
113 |     pub fn new(max_str_len: usize, trim: bool) -> Self {
114 |         Self { max_str_len, trim }
115 |     }
116 | }
117 | 
118 | impl ReadStrategy for WideText {
119 |     fn buffer_desc(&self) -> BufferDesc {
120 |         BufferDesc::WText {
121 |             max_str_len: self.max_str_len,
122 |         }
123 |     }
124 | 
125 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
126 |         let view = column_view.as_w_text_view().unwrap();
127 |         let item_capacity = view.len();
128 |         // Any utf-16 character could take up to 4 Bytes if represented as utf-8, but since mostly
129 |         // this is 1 to one, and also not every string is likeyl to use its maximum capacity, we
130 |         // rather accept the reallocation in these scenarios.
131 |         let data_capacity = self.max_str_len * item_capacity;
132 |         let mut builder = StringBuilder::with_capacity(item_capacity, data_capacity);
133 |         // Buffer used to convert individual values from utf16 to utf8.
134 |         let mut buf_utf8 = String::new();
135 |         for value in view.iter() {
136 |             buf_utf8.clear();
137 |             let opt = if let Some(utf16) = value {
138 |                 for c in decode_utf16(utf16.as_slice().iter().cloned()) {
139 |                     buf_utf8.push(c.unwrap());
140 |                 }
141 |                 let slice = if self.trim {
142 |                     buf_utf8.trim()
143 |                 } else {
144 |                     buf_utf8.as_str()
145 |                 };
146 |                 Some(slice)
147 |             } else {
148 |                 None
149 |             };
150 |             builder.append_option(opt);
151 |         }
152 |         Ok(Arc::new(builder.finish()))
153 |     }
154 | }
155 | 
156 | pub struct NarrowText {
157 |     /// Maximum string length in u8, excluding terminating zero
158 |     max_str_len: usize,
159 |     /// Wether the string should be trimmed.
160 |     trim: bool,
161 | }
162 | 
163 | impl NarrowText {
164 |     pub fn new(max_str_len: usize, trim: bool) -> Self {
165 |         Self { max_str_len, trim }
166 |     }
167 | }
168 | 
169 | impl ReadStrategy for NarrowText {
170 |     fn buffer_desc(&self) -> BufferDesc {
171 |         BufferDesc::Text {
172 |             max_str_len: self.max_str_len,
173 |         }
174 |     }
175 | 
176 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
177 |         let view = column_view.as_text_view().unwrap();
178 |         let mut builder = StringBuilder::with_capacity(view.len(), self.max_str_len * view.len());
179 |         for value in view.iter() {
180 |             builder.append_option(
181 |                 value
182 |                     .map(|bytes| {
183 |                         let untrimmed =
184 |                             std::str::from_utf8(bytes).map_err(|_| MappingError::InvalidUtf8 {
185 |                                 lossy_value: String::from_utf8_lossy(bytes).into_owned(),
186 |                             })?;
187 |                         Ok(if self.trim {
188 |                             untrimmed.trim()
189 |                         } else {
190 |                             untrimmed
191 |                         })
192 |                     })
193 |                     .transpose()?,
194 |             );
195 |         }
196 |         Ok(Arc::new(builder.finish()))
197 |     }
198 | }
199 | 
200 | #[cfg(test)]
201 | mod tests {
202 |     use odbc_api::buffers::{AnySlice, ColumnBuffer, TextColumn};
203 | 
204 |     use crate::reader::{MappingError, ReadStrategy as _};
205 | 
206 |     use super::NarrowText;
207 | 
208 |     #[test]
209 |     fn must_return_error_for_invalid_utf8() {
210 |         // Given a slice with invalid utf-8
211 |         let mut column = TextColumn::new(1, 10);
212 |         column.set_value(0, Some(&[b'H', b'e', b'l', b'l', b'o', 0xc3]));
213 |         let column_view = AnySlice::Text(column.view(1));
214 | 
215 |         // When
216 |         let strategy = NarrowText::new(5, false);
217 |         let result = strategy.fill_arrow_array(column_view);
218 | 
219 |         // Then
220 |         let error = result.unwrap_err();
221 |         let MappingError::InvalidUtf8 { lossy_value } = error else {
222 |             panic!("Not an InvalidUtf8 error")
223 |         };
224 |         assert_eq!(lossy_value, "Hello�");
225 |     }
226 | }
227 | 


--------------------------------------------------------------------------------
/src/reader/map_odbc_to_arrow.rs:
--------------------------------------------------------------------------------
  1 | use std::{marker::PhantomData, sync::Arc};
  2 | 
  3 | use arrow::{
  4 |     array::{ArrayRef, PrimitiveBuilder},
  5 |     datatypes::ArrowPrimitiveType,
  6 | };
  7 | use chrono::NaiveDateTime;
  8 | use odbc_api::buffers::{AnySlice, BufferDesc, Item};
  9 | use thiserror::Error;
 10 | 
 11 | use super::ReadStrategy;
 12 | 
 13 | /// Extend an arrow primitive type to serve as a builder for Read strategies.
 14 | pub trait MapOdbcToArrow {
 15 |     type ArrowElement;
 16 | 
 17 |     /// Use the provided function to convert an element of an ODBC column buffer into the desired
 18 |     /// element of an arrow array. This method assumes the conversion is falliable.
 19 |     fn map_falliable<U>(
 20 |         nullable: bool,
 21 |         map_errors_to_null: bool,
 22 |         odbc_to_arrow: impl Fn(&U) -> Result<Self::ArrowElement, MappingError> + 'static + Send,
 23 |     ) -> Box<dyn ReadStrategy + Send>
 24 |     where
 25 |         U: Item + 'static + Send;
 26 | 
 27 |     /// Use the infallible function provided to convert an element of an ODBC column buffer into the
 28 |     /// desired element of an arrow array.
 29 |     fn map_infalliable<U>(
 30 |         nullable: bool,
 31 |         odbc_to_arrow: impl Fn(&U) -> Self::ArrowElement + 'static + Send,
 32 |     ) -> Box<dyn ReadStrategy + Send>
 33 |     where
 34 |         U: Item + 'static + Send;
 35 | 
 36 |     /// Should the arrow array element be identical to an item in the ODBC buffer no mapping is
 37 |     /// needed. We still need to account for nullability.
 38 |     fn identical(nullable: bool) -> Box<dyn ReadStrategy + Send>
 39 |     where
 40 |         Self::ArrowElement: Item;
 41 | }
 42 | 
 43 | impl<T> MapOdbcToArrow for T
 44 | where
 45 |     T: ArrowPrimitiveType + Send,
 46 | {
 47 |     type ArrowElement = T::Native;
 48 | 
 49 |     fn map_falliable<U>(
 50 |         nullable: bool,
 51 |         map_errors_to_null: bool,
 52 |         odbc_to_arrow: impl Fn(&U) -> Result<Self::ArrowElement, MappingError> + 'static + Send,
 53 |     ) -> Box<dyn ReadStrategy + Send>
 54 |     where
 55 |         U: Item + 'static + Send,
 56 |     {
 57 |         if map_errors_to_null {
 58 |             return Box::new(ErrorToNullStrategy::<Self, U, _>::new(odbc_to_arrow));
 59 |         }
 60 | 
 61 |         if nullable {
 62 |             return Box::new(NullableStrategy::<Self, U, _>::new(odbc_to_arrow));
 63 |         }
 64 | 
 65 |         Box::new(NonNullableStrategy::<Self, U, _>::new(odbc_to_arrow))
 66 |     }
 67 | 
 68 |     fn map_infalliable<U>(
 69 |         nullable: bool,
 70 |         odbc_to_arrow: impl Fn(&U) -> Self::ArrowElement + 'static + Send,
 71 |     ) -> Box<dyn ReadStrategy + Send>
 72 |     where
 73 |         U: Item + 'static + Send,
 74 |     {
 75 |         if nullable {
 76 |             Box::new(NullableStrategy::<Self, U, _>::new(OkWrappedMapped(
 77 |                 odbc_to_arrow,
 78 |             )))
 79 |         } else {
 80 |             Box::new(NonNullableStrategy::<Self, U, _>::new(OkWrappedMapped(
 81 |                 odbc_to_arrow,
 82 |             )))
 83 |         }
 84 |     }
 85 | 
 86 |     fn identical(nullable: bool) -> Box<dyn ReadStrategy + Send>
 87 |     where
 88 |         Self::ArrowElement: Item,
 89 |     {
 90 |         if nullable {
 91 |             Box::new(NullableDirectStrategy::<Self>::new())
 92 |         } else {
 93 |             Box::new(NonNullDirectStrategy::<Self>::new())
 94 |         }
 95 |     }
 96 | }
 97 | 
 98 | /// We introduce this trait instead of using the Fn(...) trait syntax directly, in order to being
 99 | /// able to provide an implementation for `OkWrappedMapped`. Which in turn we need to reuse our
100 | /// Strategy implementations for falliable and infalliable cases.
101 | ///
102 | /// We could save our selves all of this if Rust would be better at figuring out then to promote
103 | /// the lifetimes in closures to higher order liftimes, but at time of writing this, I've not been
104 | /// able to Ok wrapping with a straight forward lambda `|e| Ok(f(e))``. (Current version 1.79).
105 | ///
106 | /// Since Fn traits can not be implemented manually either we introduce this one.
107 | trait MapElement<O, A> {
108 |     fn map_element(&self, odbc: &O) -> Result<A, MappingError>;
109 | }
110 | 
111 | impl<T, O, A> MapElement<O, A> for T
112 | where
113 |     T: Fn(&O) -> Result<A, MappingError>,
114 | {
115 |     fn map_element(&self, odbc: &O) -> Result<A, MappingError> {
116 |         self(odbc)
117 |     }
118 | }
119 | 
120 | struct OkWrappedMapped<F>(F);
121 | 
122 | impl<F, O, A> MapElement<O, A> for OkWrappedMapped<F>
123 | where
124 |     F: Fn(&O) -> A,
125 | {
126 |     fn map_element(&self, odbc: &O) -> Result<A, MappingError> {
127 |         Ok((self.0)(odbc))
128 |     }
129 | }
130 | 
131 | struct NonNullDirectStrategy<T> {
132 |     phantom: PhantomData<T>,
133 | }
134 | 
135 | impl<T> NonNullDirectStrategy<T> {
136 |     fn new() -> Self {
137 |         Self {
138 |             phantom: PhantomData,
139 |         }
140 |     }
141 | }
142 | 
143 | impl<T> ReadStrategy for NonNullDirectStrategy<T>
144 | where
145 |     T: ArrowPrimitiveType + Send,
146 |     T::Native: Item,
147 | {
148 |     fn buffer_desc(&self) -> BufferDesc {
149 |         T::Native::buffer_desc(false)
150 |     }
151 | 
152 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
153 |         let slice = T::Native::as_slice(column_view).unwrap();
154 |         let mut builder = PrimitiveBuilder::<T>::with_capacity(slice.len());
155 |         builder.append_slice(slice);
156 |         Ok(Arc::new(builder.finish()))
157 |     }
158 | }
159 | 
160 | struct NullableDirectStrategy<T> {
161 |     phantom: PhantomData<T>,
162 | }
163 | 
164 | impl<T> NullableDirectStrategy<T> {
165 |     fn new() -> Self {
166 |         Self {
167 |             phantom: PhantomData,
168 |         }
169 |     }
170 | }
171 | 
172 | impl<T> ReadStrategy for NullableDirectStrategy<T>
173 | where
174 |     T: ArrowPrimitiveType + Send,
175 |     T::Native: Item,
176 | {
177 |     fn buffer_desc(&self) -> BufferDesc {
178 |         T::Native::buffer_desc(true)
179 |     }
180 | 
181 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
182 |         let values = T::Native::as_nullable_slice(column_view).unwrap();
183 |         let mut builder = PrimitiveBuilder::<T>::with_capacity(values.len());
184 |         for value in values {
185 |             builder.append_option(value.copied());
186 |         }
187 |         Ok(Arc::new(builder.finish()))
188 |     }
189 | }
190 | 
191 | struct NonNullableStrategy<P, O, F> {
192 |     _primitive_type: PhantomData<P>,
193 |     _odbc_item: PhantomData<O>,
194 |     odbc_to_arrow: F,
195 | }
196 | 
197 | impl<P, O, F> NonNullableStrategy<P, O, F> {
198 |     fn new(odbc_to_arrow: F) -> Self {
199 |         Self {
200 |             _primitive_type: PhantomData,
201 |             _odbc_item: PhantomData,
202 |             odbc_to_arrow,
203 |         }
204 |     }
205 | }
206 | 
207 | impl<P, O, F> ReadStrategy for NonNullableStrategy<P, O, F>
208 | where
209 |     P: ArrowPrimitiveType + Send,
210 |     O: Item + Send,
211 |     F: MapElement<O, P::Native>,
212 | {
213 |     fn buffer_desc(&self) -> BufferDesc {
214 |         O::buffer_desc(false)
215 |     }
216 | 
217 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
218 |         let slice = column_view.as_slice::<O>().unwrap();
219 |         let mut builder = PrimitiveBuilder::<P>::with_capacity(slice.len());
220 |         for odbc_value in slice {
221 |             builder.append_value(self.odbc_to_arrow.map_element(odbc_value)?);
222 |         }
223 |         Ok(Arc::new(builder.finish()))
224 |     }
225 | }
226 | 
227 | struct NullableStrategy<P, O, F> {
228 |     _primitive_type: PhantomData<P>,
229 |     _odbc_item: PhantomData<O>,
230 |     odbc_to_arrow: F,
231 | }
232 | 
233 | impl<P, O, F> NullableStrategy<P, O, F> {
234 |     fn new(odbc_to_arrow: F) -> Self {
235 |         Self {
236 |             _primitive_type: PhantomData,
237 |             _odbc_item: PhantomData,
238 |             odbc_to_arrow,
239 |         }
240 |     }
241 | }
242 | 
243 | impl<P, O, F> ReadStrategy for NullableStrategy<P, O, F>
244 | where
245 |     P: ArrowPrimitiveType + Send,
246 |     O: Item + Send,
247 |     F: MapElement<O, P::Native>,
248 | {
249 |     fn buffer_desc(&self) -> BufferDesc {
250 |         O::buffer_desc(true)
251 |     }
252 | 
253 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
254 |         let opts = column_view.as_nullable_slice::<O>().unwrap();
255 |         let mut builder = PrimitiveBuilder::<P>::with_capacity(opts.len());
256 |         for odbc_opt in opts {
257 |             builder.append_option(
258 |                 odbc_opt
259 |                     .map(|odbc_element| self.odbc_to_arrow.map_element(odbc_element))
260 |                     .transpose()?,
261 |             );
262 |         }
263 |         Ok(Arc::new(builder.finish()))
264 |     }
265 | }
266 | 
267 | /// Map invalid values to `NULL` rather than emitting a [`MappingError`]`.
268 | struct ErrorToNullStrategy<P, O, F> {
269 |     _primitive_type: PhantomData<P>,
270 |     _odbc_item: PhantomData<O>,
271 |     odbc_to_arrow: F,
272 | }
273 | 
274 | impl<P, O, F> ErrorToNullStrategy<P, O, F> {
275 |     fn new(odbc_to_arrow: F) -> Self {
276 |         Self {
277 |             _primitive_type: PhantomData,
278 |             _odbc_item: PhantomData,
279 |             odbc_to_arrow,
280 |         }
281 |     }
282 | }
283 | 
284 | impl<P, O, F> ReadStrategy for ErrorToNullStrategy<P, O, F>
285 | where
286 |     P: ArrowPrimitiveType + Send,
287 |     O: Item + Send,
288 |     F: Fn(&O) -> Result<P::Native, MappingError> + Send,
289 | {
290 |     fn buffer_desc(&self) -> BufferDesc {
291 |         O::buffer_desc(true)
292 |     }
293 | 
294 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
295 |         let opts = column_view.as_nullable_slice::<O>().unwrap();
296 |         let mut builder = PrimitiveBuilder::<P>::with_capacity(opts.len());
297 |         for odbc_opt in opts {
298 |             builder.append_option(odbc_opt.and_then(|val| (self.odbc_to_arrow)(val).ok()));
299 |         }
300 |         Ok(Arc::new(builder.finish()))
301 |     }
302 | }
303 | 
304 | /// The source value returned from the ODBC datasource is out of range and can not be mapped into
305 | /// its Arrow target type.
306 | #[derive(Error, Debug)]
307 | pub enum MappingError {
308 |     #[error(
309 |         "Timestamp is not representable in arrow: {value}\n\
310 |         Timestamps with nanoseconds precision are represented using a signed 64 Bit integer. This \
311 |         limits their range to values between 1677-09-21 00:12:44 and 2262-04-11 \
312 |         23:47:16.854775807. The value returned from the database is outside of this range. \
313 |         Suggestions to fix this error either reduce the precision or fetch the values as text."
314 |     )]
315 |     OutOfRangeTimestampNs { value: NaiveDateTime },
316 |     #[error("Datasource returned invalid UTF-8. Lossy representation of value: {lossy_value}")]
317 |     InvalidUtf8 { lossy_value: String },
318 | }
319 | 


--------------------------------------------------------------------------------
/src/reader.rs:
--------------------------------------------------------------------------------
  1 | use std::{convert::TryInto, sync::Arc};
  2 | 
  3 | use arrow::{
  4 |     array::{ArrayRef, BooleanBuilder},
  5 |     datatypes::{
  6 |         DataType as ArrowDataType, Date32Type, Field, Float32Type, Float64Type, Int8Type,
  7 |         Int16Type, Int32Type, Int64Type, Time32SecondType, TimeUnit, TimestampMicrosecondType,
  8 |         TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt8Type,
  9 |     },
 10 | };
 11 | 
 12 | use log::debug;
 13 | use odbc_api::{
 14 |     Bit, DataType as OdbcDataType, ResultSetMetadata,
 15 |     buffers::{AnySlice, BufferDesc, Item},
 16 | };
 17 | use thiserror::Error;
 18 | use time::{TimeMsI32, TimeNsI64, TimeUsI64, seconds_since_midnight};
 19 | 
 20 | mod binary;
 21 | mod concurrent_odbc_reader;
 22 | mod decimal;
 23 | mod map_odbc_to_arrow;
 24 | mod odbc_reader;
 25 | mod text;
 26 | mod time;
 27 | mod to_record_batch;
 28 | 
 29 | use crate::date_time::{
 30 |     days_since_epoch, ms_since_epoch, ns_since_epoch, seconds_since_epoch, us_since_epoch,
 31 | };
 32 | 
 33 | pub use self::{
 34 |     binary::{Binary, FixedSizedBinary},
 35 |     concurrent_odbc_reader::ConcurrentOdbcReader,
 36 |     decimal::Decimal,
 37 |     map_odbc_to_arrow::{MapOdbcToArrow, MappingError},
 38 |     odbc_reader::{OdbcReader, OdbcReaderBuilder},
 39 |     text::{TextEncoding, choose_text_strategy},
 40 | };
 41 | 
 42 | /// All decisions needed to copy data from an ODBC buffer to an Arrow Array
 43 | pub trait ReadStrategy {
 44 |     /// Describes the buffer which is bound to the ODBC cursor.
 45 |     fn buffer_desc(&self) -> BufferDesc;
 46 | 
 47 |     /// Create an arrow array from an ODBC buffer described in [`Self::buffer_description`].
 48 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError>;
 49 | }
 50 | 
 51 | pub struct NonNullableBoolean;
 52 | 
 53 | impl ReadStrategy for NonNullableBoolean {
 54 |     fn buffer_desc(&self) -> BufferDesc {
 55 |         BufferDesc::Bit { nullable: false }
 56 |     }
 57 | 
 58 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
 59 |         let values = Bit::as_slice(column_view).unwrap();
 60 |         let mut builder = BooleanBuilder::new();
 61 |         for bit in values {
 62 |             builder.append_value(bit.as_bool());
 63 |         }
 64 |         Ok(Arc::new(builder.finish()))
 65 |     }
 66 | }
 67 | 
 68 | pub struct NullableBoolean;
 69 | 
 70 | impl ReadStrategy for NullableBoolean {
 71 |     fn buffer_desc(&self) -> BufferDesc {
 72 |         BufferDesc::Bit { nullable: true }
 73 |     }
 74 | 
 75 |     fn fill_arrow_array(&self, column_view: AnySlice) -> Result<ArrayRef, MappingError> {
 76 |         let values = Bit::as_nullable_slice(column_view).unwrap();
 77 |         let mut builder = BooleanBuilder::new();
 78 |         for bit in values {
 79 |             builder.append_option(bit.copied().map(Bit::as_bool))
 80 |         }
 81 |         Ok(Arc::new(builder.finish()))
 82 |     }
 83 | }
 84 | 
 85 | /// Allows setting limits for buffers bound to the ODBC data source. Check this out if you find that
 86 | /// you get memory allocation, or zero sized column errors. Used than constructing a reader using
 87 | /// [`crate::OdbcReaderBuilder`].
 88 | #[derive(Default, Debug, Clone, Copy)]
 89 | pub struct BufferAllocationOptions {
 90 |     /// An upper limit for the size of buffers bound to variadic text columns of the data source.
 91 |     /// This limit does not (directly) apply to the size of the created arrow buffers, but rather
 92 |     /// applies to the buffers used for the data in transit. Use this option if you have e.g.
 93 |     /// `VARCHAR(MAX)` fields in your database schema. In such a case without an upper limit, the
 94 |     /// ODBC driver of your data source is asked for the maximum size of an element, and is likely
 95 |     /// to answer with either `0` or a value which is way larger than any actual entry in the column
 96 |     /// If you can not adapt your database schema, this limit might be what you are looking for. On
 97 |     /// windows systems the size is double words (16Bit), as windows utilizes an UTF-16 encoding. So
 98 |     /// this translates to roughly the size in letters. On non windows systems this is the size in
 99 |     /// bytes and the datasource is assumed to utilize an UTF-8 encoding. `None` means no upper
100 |     /// limit is set and the maximum element size, reported by ODBC is used to determine buffer
101 |     /// sizes.
102 |     pub max_text_size: Option<usize>,
103 |     /// An upper limit for the size of buffers bound to variadic binary columns of the data source.
104 |     /// This limit does not (directly) apply to the size of the created arrow buffers, but rather
105 |     /// applies to the buffers used for the data in transit. Use this option if you have e.g.
106 |     /// `VARBINARY(MAX)` fields in your database schema. In such a case without an upper limit, the
107 |     /// ODBC driver of your data source is asked for the maximum size of an element, and is likely
108 |     /// to answer with either `0` or a value which is way larger than any actual entry in the
109 |     /// column. If you can not adapt your database schema, this limit might be what you are looking
110 |     /// for. This is the maximum size in bytes of the binary column.
111 |     pub max_binary_size: Option<usize>,
112 |     /// Set to `true` in order to trigger an [`ColumnFailure::TooLarge`] instead of a panic in case
113 |     /// the buffers can not be allocated due to their size. This might have a performance cost for
114 |     /// constructing the reader. `false` by default.
115 |     pub fallibale_allocations: bool,
116 | }
117 | 
118 | pub fn choose_column_strategy(
119 |     field: &Field,
120 |     query_metadata: &mut impl ResultSetMetadata,
121 |     col_index: u16,
122 |     buffer_allocation_options: BufferAllocationOptions,
123 |     map_value_errors_to_null: bool,
124 |     trim_fixed_sized_character_strings: bool,
125 |     text_encoding: TextEncoding,
126 | ) -> Result<Box<dyn ReadStrategy + Send>, ColumnFailure> {
127 |     let strat: Box<dyn ReadStrategy + Send> = match field.data_type() {
128 |         ArrowDataType::Boolean => {
129 |             if field.is_nullable() {
130 |                 Box::new(NullableBoolean)
131 |             } else {
132 |                 Box::new(NonNullableBoolean)
133 |             }
134 |         }
135 |         ArrowDataType::Int8 => Int8Type::identical(field.is_nullable()),
136 |         ArrowDataType::Int16 => Int16Type::identical(field.is_nullable()),
137 |         ArrowDataType::Int32 => Int32Type::identical(field.is_nullable()),
138 |         ArrowDataType::Int64 => Int64Type::identical(field.is_nullable()),
139 |         ArrowDataType::UInt8 => UInt8Type::identical(field.is_nullable()),
140 |         ArrowDataType::Float32 => Float32Type::identical(field.is_nullable()),
141 |         ArrowDataType::Float64 => Float64Type::identical(field.is_nullable()),
142 |         ArrowDataType::Date32 => Date32Type::map_infalliable(field.is_nullable(), days_since_epoch),
143 |         ArrowDataType::Time32(TimeUnit::Second) => {
144 |             Time32SecondType::map_infalliable(field.is_nullable(), seconds_since_midnight)
145 |         }
146 |         ArrowDataType::Time32(TimeUnit::Millisecond) => Box::new(TimeMsI32),
147 |         ArrowDataType::Time64(TimeUnit::Microsecond) => Box::new(TimeUsI64),
148 |         ArrowDataType::Time64(TimeUnit::Nanosecond) => Box::new(TimeNsI64),
149 |         ArrowDataType::Utf8 => {
150 |             let sql_type = query_metadata
151 |                 .col_data_type(col_index)
152 |                 .map_err(ColumnFailure::FailedToDescribeColumn)?;
153 |             // Use a zero based index here, because we use it everywhere else there we communicate
154 |             // with users.
155 |             debug!("Relational type of column {}: {sql_type:?}", col_index - 1);
156 |             let lazy_display_size = || query_metadata.col_display_size(col_index);
157 |             // Use the SQL type first to determine buffer length.
158 |             choose_text_strategy(
159 |                 sql_type,
160 |                 lazy_display_size,
161 |                 buffer_allocation_options.max_text_size,
162 |                 trim_fixed_sized_character_strings,
163 |                 text_encoding,
164 |             )?
165 |         }
166 |         ArrowDataType::Decimal128(precision, scale @ 0..) => {
167 |             Box::new(Decimal::new(*precision, *scale))
168 |         }
169 |         ArrowDataType::Binary => {
170 |             let sql_type = query_metadata
171 |                 .col_data_type(col_index)
172 |                 .map_err(ColumnFailure::FailedToDescribeColumn)?;
173 |             let length = sql_type.column_size();
174 |             let length = match (length, buffer_allocation_options.max_binary_size) {
175 |                 (None, None) => return Err(ColumnFailure::ZeroSizedColumn { sql_type }),
176 |                 (None, Some(limit)) => limit,
177 |                 (Some(len), None) => len.get(),
178 |                 (Some(len), Some(limit)) => {
179 |                     if len.get() < limit {
180 |                         len.get()
181 |                     } else {
182 |                         limit
183 |                     }
184 |                 }
185 |             };
186 |             Box::new(Binary::new(length))
187 |         }
188 |         ArrowDataType::Timestamp(TimeUnit::Second, _) => {
189 |             TimestampSecondType::map_infalliable(field.is_nullable(), seconds_since_epoch)
190 |         }
191 |         ArrowDataType::Timestamp(TimeUnit::Millisecond, _) => {
192 |             TimestampMillisecondType::map_infalliable(field.is_nullable(), ms_since_epoch)
193 |         }
194 |         ArrowDataType::Timestamp(TimeUnit::Microsecond, _) => {
195 |             TimestampMicrosecondType::map_infalliable(field.is_nullable(), us_since_epoch)
196 |         }
197 |         ArrowDataType::Timestamp(TimeUnit::Nanosecond, _) => {
198 |             TimestampNanosecondType::map_falliable(
199 |                 field.is_nullable(),
200 |                 map_value_errors_to_null,
201 |                 ns_since_epoch,
202 |             )
203 |         }
204 |         ArrowDataType::FixedSizeBinary(length) => {
205 |             Box::new(FixedSizedBinary::new((*length).try_into().unwrap()))
206 |         }
207 |         unsupported_arrow_type => {
208 |             return Err(ColumnFailure::UnsupportedArrowType(
209 |                 unsupported_arrow_type.clone(),
210 |             ));
211 |         }
212 |     };
213 |     Ok(strat)
214 | }
215 | 
216 | /// Read error related to a specific column
217 | #[derive(Error, Debug)]
218 | pub enum ColumnFailure {
219 |     /// We are getting a display or column size from ODBC but it is not larger than 0.
220 |     #[error(
221 |         "The ODBC driver did not specify a sensible upper bound for the column. This usually \
222 |         happens for large variadic types (E.g. VARCHAR(max)). In other cases it can be a \
223 |         shortcoming of the ODBC driver. Try casting the column into a type with a sensible upper \
224 |         bound. `arrow-odbc` also allows the application to specify a generic upper bound, which it \
225 |         would automatically apply. The type of the column causing this error is {:?}.",
226 |         sql_type
227 |     )]
228 |     ZeroSizedColumn { sql_type: OdbcDataType },
229 |     /// Unable to retrieve the column display size for the column.
230 |     #[error(
231 |         "Unable to deduce the maximum string length for the SQL Data Type reported by the ODBC \
232 |         driver. Reported SQL data type is: {:?}.\n Error fetching column display or octet size: \
233 |         {source}",
234 |         sql_type
235 |     )]
236 |     UnknownStringLength {
237 |         sql_type: OdbcDataType,
238 |         source: odbc_api::Error,
239 |     },
240 |     /// The type specified in the arrow schema is not supported to be fetched from the database.
241 |     #[error(
242 |         "Unsupported arrow type: `{0}`. This type can currently not be fetched from an ODBC data \
243 |         source by an instance of OdbcReader."
244 |     )]
245 |     UnsupportedArrowType(ArrowDataType),
246 |     /// At ODBC api calls gaining information about the columns did fail.
247 |     #[error(
248 |         "An error occurred fetching the column description or data type from the metainformation \
249 |         attached to the ODBC result set:\n{0}"
250 |     )]
251 |     FailedToDescribeColumn(#[source] odbc_api::Error),
252 |     #[error(
253 |         "Column buffer is too large to be allocated. Tried to alloacte {num_elements} elements \
254 |         with {element_size} bytes in size each."
255 |     )]
256 |     TooLarge {
257 |         num_elements: usize,
258 |         element_size: usize,
259 |     },
260 | }
261 | 
262 | impl ColumnFailure {
263 |     /// Provides the error with additional context of Error with column name and index.
264 |     pub fn into_crate_error(self, name: String, index: usize) -> crate::Error {
265 |         crate::Error::ColumnFailure {
266 |             name,
267 |             index,
268 |             source: self,
269 |         }
270 |     }
271 | }
272 | 


--------------------------------------------------------------------------------
/src/odbc_writer.rs:
--------------------------------------------------------------------------------
  1 | use std::{borrow::Cow, cmp::min, sync::Arc};
  2 | 
  3 | use thiserror::Error;
  4 | 
  5 | use arrow::{
  6 |     array::Array,
  7 |     datatypes::{
  8 |         DataType, Date32Type, Date64Type, Field, Float16Type, Float32Type, Float64Type, Int8Type,
  9 |         Int16Type, Int32Type, Int64Type, Schema, Time32MillisecondType, Time32SecondType,
 10 |         Time64MicrosecondType, Time64NanosecondType, TimeUnit, UInt8Type,
 11 |     },
 12 |     error::ArrowError,
 13 |     record_batch::{RecordBatch, RecordBatchReader},
 14 | };
 15 | use odbc_api::{
 16 |     ColumnarBulkInserter, Connection, ConnectionTransitions, Prepared,
 17 |     buffers::{AnyBuffer, AnySliceMut, BufferDesc},
 18 |     handles::{AsStatementRef, StatementConnection, StatementImpl, StatementParent},
 19 | };
 20 | 
 21 | use crate::{
 22 |     date_time::{NullableTimeAsText, epoch_to_date, sec_since_midnight_to_time},
 23 |     decimal::{NullableDecimal128AsText, NullableDecimal256AsText},
 24 |     odbc_writer::timestamp::insert_timestamp_strategy,
 25 | };
 26 | 
 27 | use self::{
 28 |     binary::VariadicBinary,
 29 |     boolean::boolean_to_bit,
 30 |     map_arrow_to_odbc::MapArrowToOdbc,
 31 |     text::{LargeUtf8ToNativeText, Utf8ToNativeText},
 32 | };
 33 | 
 34 | mod binary;
 35 | mod boolean;
 36 | mod map_arrow_to_odbc;
 37 | mod text;
 38 | mod timestamp;
 39 | 
 40 | /// Fastest and most convinient way to stream the contents of arrow record batches into a database
 41 | /// table. For usecase there you want to insert repeatedly into the same table from different
 42 | /// streams it is more efficient to create an instance of [`self::OdbcWriter`] and reuse it.
 43 | ///
 44 | /// **Note:**
 45 | ///
 46 | /// If table or column names are derived from user input, be sure to sanatize the input in order to
 47 | /// prevent SQL injection attacks.
 48 | pub fn insert_into_table(
 49 |     connection: &Connection,
 50 |     batches: &mut impl RecordBatchReader,
 51 |     table_name: &str,
 52 |     batch_size: usize,
 53 | ) -> Result<(), WriterError> {
 54 |     let schema = batches.schema();
 55 |     let mut inserter =
 56 |         OdbcWriter::with_connection(connection, schema.as_ref(), table_name, batch_size)?;
 57 |     inserter.write_all(batches)
 58 | }
 59 | 
 60 | /// Generates an insert statement using the table and column names.
 61 | ///
 62 | /// `INSERT INTO <table> (<column_names 0>, <column_names 1>, ...) VALUES (?, ?, ...)`
 63 | fn insert_statement_text(table: &str, column_names: &[&'_ str]) -> String {
 64 |     // Generate statement text from table name and headline
 65 |     let column_names = column_names
 66 |         .iter()
 67 |         .map(|cn| quote_column_name(cn))
 68 |         .collect::<Vec<_>>();
 69 |     let columns = column_names.join(", ");
 70 |     let values = column_names
 71 |         .iter()
 72 |         .map(|_| "?")
 73 |         .collect::<Vec<_>>()
 74 |         .join(", ");
 75 |     // Do not finish the statement with a semicolon. There is anecodtical evidence of IBM db2 not
 76 |     // allowing the command, because it expects now multiple statements.
 77 |     // See: <https://github.com/pacman82/arrow-odbc/issues/63>
 78 |     format!("INSERT INTO {table} ({columns}) VALUES ({values})")
 79 | }
 80 | 
 81 | /// Wraps column name in quotes, if need be.
 82 | fn quote_column_name(column_name: &str) -> Cow<'_, str> {
 83 |     // We do not want to apply quoting in case the string is already quoted. See:
 84 |     // <https://github.com/pacman82/arrow-odbc-py/issues/162>
 85 |     //
 86 |     // Another approach would have been to apply quoting after detecting keywords. Yet the list of
 87 |     // reserved keywords is large. There is also the issue with different databases having different
 88 |     // quoting rules. So the strategy choosen here is to apply quoting in less situations and not
 89 |     // more, so the user has more control over the final statement. This crate is about arrow and
 90 |     // odbc, less so about SQL dialects and statement construction.
 91 |     let is_already_quoted = || {
 92 |         (column_name.starts_with('"') && column_name.ends_with('"'))
 93 |             || column_name.starts_with('[') && column_name.ends_with(']')
 94 |             || column_name.starts_with('`') && column_name.ends_with('`')
 95 |     };
 96 |     let contains_invalid_characters = || column_name.contains(|c| !valid_in_column_name(c));
 97 |     let needs_quotes = contains_invalid_characters() && !is_already_quoted();
 98 |     if needs_quotes {
 99 |         Cow::Owned(format!("\"{column_name}\""))
100 |     } else {
101 |         Cow::Borrowed(column_name)
102 |     }
103 | }
104 | 
105 | /// Check if this character is allowed in an unquoted column name
106 | fn valid_in_column_name(c: char) -> bool {
107 |     // See:
108 |     // <https://stackoverflow.com/questions/4200351/what-characters-are-valid-in-an-sql-server-database-name>
109 |     c.is_alphanumeric() || c == '@' || c == '$' || c == '#' || c == '_'
110 | }
111 | 
112 | /// Creates an SQL insert statement from an arrow schema. The resulting statement will have one
113 | /// placeholer (`?`) for each column in the statement.
114 | ///
115 | /// **Note:**
116 | ///
117 | /// If the column name contains any character which would make it not a valid qualifier for transact
118 | /// SQL it will be wrapped in double quotes (`"`) within the insert schema. Valid names consist of
119 | /// alpha numeric characters, `@`, `$`, `#` and `_`.
120 | ///
121 | /// # Example
122 | ///
123 | /// ```
124 | /// use arrow_odbc::{
125 | ///     insert_statement_from_schema,
126 | ///     arrow::datatypes::{Field, DataType, Schema},
127 | /// };
128 | ///
129 | /// let field_a = Field::new("a", DataType::Int64, false);
130 | /// let field_b = Field::new("b", DataType::Boolean, false);
131 | ///
132 | /// let schema = Schema::new(vec![field_a, field_b]);
133 | /// let sql = insert_statement_from_schema(&schema, "MyTable");
134 | ///
135 | /// assert_eq!("INSERT INTO MyTable (a, b) VALUES (?, ?)", sql)
136 | /// ```
137 | ///
138 | /// This function is automatically invoked by [`crate::OdbcWriter::with_connection`].
139 | pub fn insert_statement_from_schema(schema: &Schema, table_name: &str) -> String {
140 |     let fields = schema.fields();
141 |     let num_columns = fields.len();
142 |     let column_names: Vec<_> = (0..num_columns)
143 |         .map(|i| fields[i].name().as_str())
144 |         .collect();
145 |     insert_statement_text(table_name, &column_names)
146 | }
147 | 
148 | /// Emitted writing values from arror arrays into a table on the database
149 | #[derive(Debug, Error)]
150 | pub enum WriterError {
151 |     #[error("Failure to bind the array parameter buffers to the statement.\n{0}")]
152 |     BindParameterBuffers(#[source] odbc_api::Error),
153 |     #[error("Failure to execute the sql statement, sending the data to the database.\n{0}")]
154 |     ExecuteStatment(#[source] odbc_api::Error),
155 |     #[error("An error occured rebinding a parameter buffer to the sql statement.\n{0}")]
156 |     RebindBuffer(#[source] odbc_api::Error),
157 |     #[error("The arrow data type {0} is not supported for insertion.")]
158 |     UnsupportedArrowDataType(DataType),
159 |     #[error("An error occured extracting a record batch from an error reader.\n{0}")]
160 |     ReadingRecordBatch(#[source] ArrowError),
161 |     #[error("Unable to parse '{time_zone}' into a valid IANA time zone.")]
162 |     InvalidTimeZone { time_zone: Arc<str> },
163 |     #[error("An error occurred preparing SQL statement. SQL:\n{sql}\n{source}")]
164 |     PreparingInsertStatement {
165 |         #[source]
166 |         source: odbc_api::Error,
167 |         sql: String,
168 |     },
169 | }
170 | 
171 | /// Inserts batches from an [`arrow::record_batch::RecordBatchReader`] into a database.
172 | pub struct OdbcWriter<S> {
173 |     /// Prepared statement with bound array parameter buffers. Data is copied into these buffers
174 |     /// until they are full. Then we execute the statement. This is repeated until we run out of
175 |     /// data.
176 |     inserter: ColumnarBulkInserter<S, AnyBuffer>,
177 |     /// For each field in the arrow schema we decide on which buffer to use to send the parameters
178 |     /// to the database, and need to remember how to copy the data from an arrow array to an odbc
179 |     /// mutable buffer slice for any column.
180 |     strategies: Vec<Box<dyn WriteStrategy>>,
181 | }
182 | 
183 | impl<S> OdbcWriter<S>
184 | where
185 |     S: AsStatementRef,
186 | {
187 |     /// Construct a new ODBC writer using an alredy existing prepared statement. Usually you want to
188 |     /// call a higher level constructor like [`Self::with_connection`]. Yet, this constructor is
189 |     /// useful in two scenarios.
190 |     ///
191 |     /// 1. The prepared statement is already constructed and you do not want to spend the time to
192 |     ///    prepare it again.
193 |     /// 2. You want to use the arrow arrays as arrar parameters for a statement, but that statement
194 |     ///    is not necessarily an INSERT statement with a simple 1to1 mapping of columns between
195 |     ///    table and arrow schema.
196 |     ///
197 |     /// # Parameters
198 |     ///
199 |     /// * `row_capacity`: The amount of rows send to the database in each chunk. With the exception
200 |     ///   of the last chunk, which may be smaller.
201 |     /// * `schema`: Schema needs to have one column for each positional parameter of the statement
202 |     ///   and match the data which will be supplied to the instance later. Otherwise your code will
203 |     ///   panic.
204 |     /// * `statement`: A prepared statement whose SQL text representation contains one placeholder
205 |     ///   for each column. The order of the placeholers must correspond to the orders of the columns
206 |     ///   in the `schema`.
207 |     pub fn new(
208 |         row_capacity: usize,
209 |         schema: &Schema,
210 |         statement: Prepared<S>,
211 |     ) -> Result<Self, WriterError> {
212 |         let strategies: Vec<_> = schema
213 |             .fields()
214 |             .iter()
215 |             .map(|field| field_to_write_strategy(field.as_ref()))
216 |             .collect::<Result<_, _>>()?;
217 |         let descriptions = strategies.iter().map(|cws| cws.buffer_desc());
218 |         let inserter = statement
219 |             .into_column_inserter(row_capacity, descriptions)
220 |             .map_err(WriterError::BindParameterBuffers)?;
221 | 
222 |         Ok(Self {
223 |             inserter,
224 |             strategies,
225 |         })
226 |     }
227 | 
228 |     /// Consumes all the batches in the record batch reader and sends them chunk by chunk to the
229 |     /// database.
230 |     pub fn write_all(
231 |         &mut self,
232 |         reader: impl Iterator<Item = Result<RecordBatch, ArrowError>>,
233 |     ) -> Result<(), WriterError> {
234 |         for result in reader {
235 |             let record_batch = result.map_err(WriterError::ReadingRecordBatch)?;
236 |             self.write_batch(&record_batch)?;
237 |         }
238 |         self.flush()?;
239 |         Ok(())
240 |     }
241 | 
242 |     /// Consumes a single batch and sends it chunk by chunk to the database. The last batch may not
243 |     /// be consumed until [`Self::flush`] is called.
244 |     pub fn write_batch(&mut self, record_batch: &RecordBatch) -> Result<(), WriterError> {
245 |         let capacity = self.inserter.capacity();
246 |         let mut remanining_rows = record_batch.num_rows();
247 |         // The record batch may contain more rows than the capacity of our writer can hold. So we
248 |         // need to be able to fill the buffers multiple times and send them to the database in
249 |         // between.
250 |         while remanining_rows != 0 {
251 |             let chunk_size = min(capacity - self.inserter.num_rows(), remanining_rows);
252 |             let param_offset = self.inserter.num_rows();
253 |             self.inserter.set_num_rows(param_offset + chunk_size);
254 |             let chunk = record_batch.slice(record_batch.num_rows() - remanining_rows, chunk_size);
255 |             for (index, (array, strategy)) in chunk
256 |                 .columns()
257 |                 .iter()
258 |                 .zip(self.strategies.iter())
259 |                 .enumerate()
260 |             {
261 |                 strategy.write_rows(param_offset, self.inserter.column_mut(index), array)?
262 |             }
263 | 
264 |             // If we used up all capacity we send the parameters to the database and reset the
265 |             // parameter buffers.
266 |             if self.inserter.num_rows() == capacity {
267 |                 self.flush()?;
268 |             }
269 |             remanining_rows -= chunk_size;
270 |         }
271 | 
272 |         Ok(())
273 |     }
274 | 
275 |     /// The number of row in an individual record batch must not necessarily match the capacity of
276 |     /// the buffers owned by this writer. Therfore sometimes records are not send to the database
277 |     /// immediatly but rather we wait for the buffers to be filled then reading the next batch. Once
278 |     /// we reach the last batch however, there is no "next batch" anymore. In that case we call this
279 |     /// method in order to send the remainder of the records to the database as well.
280 |     pub fn flush(&mut self) -> Result<(), WriterError> {
281 |         self.inserter
282 |             .execute()
283 |             .map_err(WriterError::ExecuteStatment)?;
284 |         self.inserter.clear();
285 |         Ok(())
286 |     }
287 | }
288 | 
289 | impl<C> OdbcWriter<StatementConnection<C>>
290 | where
291 |     C: StatementParent,
292 | {
293 |     /// A writer which takes ownership of the connection and inserts the given schema into a table
294 |     /// with matching column names.
295 |     ///
296 |     /// **Note:**
297 |     ///
298 |     /// If the column name contains any character which would make it not a valid qualifier for transact
299 |     /// SQL it will be wrapped in double quotes (`"`) within the insert schema. Valid names consist of
300 |     /// alpha numeric characters, `@`, `$`, `#` and `_`.
301 |     pub fn from_connection<C2>(
302 |         connection: C2,
303 |         schema: &Schema,
304 |         table_name: &str,
305 |         row_capacity: usize,
306 |     ) -> Result<Self, WriterError>
307 |     where
308 |         C2: ConnectionTransitions<StatementParent = C>,
309 |     {
310 |         let sql = insert_statement_from_schema(schema, table_name);
311 |         let statement = connection
312 |             .into_prepared(&sql)
313 |             .map_err(|source| WriterError::PreparingInsertStatement { source, sql })?;
314 |         Self::new(row_capacity, schema, statement)
315 |     }
316 | }
317 | 
318 | impl<'o> OdbcWriter<StatementImpl<'o>> {
319 |     /// A writer which borrows the connection and inserts the given schema into a table with
320 |     /// matching column names.
321 |     ///
322 |     /// **Note:**
323 |     ///
324 |     /// If the column name contains any character which would make it not a valid qualifier for transact
325 |     /// SQL it will be wrapped in double quotes (`"`) within the insert schema. Valid names consist of
326 |     /// alpha numeric characters, `@`, `$`, `#` and `_`.
327 |     pub fn with_connection(
328 |         connection: &'o Connection<'o>,
329 |         schema: &Schema,
330 |         table_name: &str,
331 |         row_capacity: usize,
332 |     ) -> Result<Self, WriterError> {
333 |         let sql = insert_statement_from_schema(schema, table_name);
334 |         let statement = connection
335 |             .prepare(&sql)
336 |             .map_err(|source| WriterError::PreparingInsertStatement { source, sql })?;
337 |         Self::new(row_capacity, schema, statement)
338 |     }
339 | }
340 | 
341 | pub trait WriteStrategy {
342 |     /// Describe the buffer used to hold the array parameters for the column
343 |     fn buffer_desc(&self) -> BufferDesc;
344 | 
345 |     /// # Parameters
346 |     ///
347 |     /// * `param_offset`: Start writing parameters at that position. Number of rows in the parameter
348 |     ///   buffer before inserting the current chunk.
349 |     /// * `column_buf`: Buffer to write the data into
350 |     /// * `array`: Buffer to read the data from
351 |     fn write_rows(
352 |         &self,
353 |         param_offset: usize,
354 |         column_buf: AnySliceMut<'_>,
355 |         array: &dyn Array,
356 |     ) -> Result<(), WriterError>;
357 | }
358 | 
359 | fn field_to_write_strategy(field: &Field) -> Result<Box<dyn WriteStrategy>, WriterError> {
360 |     let is_nullable = field.is_nullable();
361 |     let strategy = match field.data_type() {
362 |         DataType::Utf8 => Box::new(Utf8ToNativeText {}),
363 |         DataType::Boolean => boolean_to_bit(is_nullable),
364 |         DataType::LargeUtf8 => Box::new(LargeUtf8ToNativeText {}),
365 |         DataType::Int8 => Int8Type::identical(is_nullable),
366 |         DataType::Int16 => Int16Type::identical(is_nullable),
367 |         DataType::Int32 => Int32Type::identical(is_nullable),
368 |         DataType::Int64 => Int64Type::identical(is_nullable),
369 |         DataType::UInt8 => UInt8Type::identical(is_nullable),
370 |         DataType::Float16 => Float16Type::map_with(is_nullable, |half| half.to_f32()),
371 |         DataType::Float32 => Float32Type::identical(is_nullable),
372 |         DataType::Float64 => Float64Type::identical(is_nullable),
373 |         DataType::Timestamp(time_unit, time_zone) => {
374 |             insert_timestamp_strategy(is_nullable, &time_unit, time_zone.clone())?
375 |         }
376 |         DataType::Date32 => Date32Type::map_with(is_nullable, epoch_to_date),
377 |         DataType::Date64 => Date64Type::map_with(is_nullable, |days_since_epoch| {
378 |             epoch_to_date(days_since_epoch.try_into().unwrap())
379 |         }),
380 |         DataType::Time32(TimeUnit::Second) => {
381 |             Time32SecondType::map_with(is_nullable, sec_since_midnight_to_time)
382 |         }
383 |         DataType::Time32(TimeUnit::Millisecond) => {
384 |             Box::new(NullableTimeAsText::<Time32MillisecondType>::new())
385 |         }
386 |         DataType::Time64(TimeUnit::Microsecond) => {
387 |             Box::new(NullableTimeAsText::<Time64MicrosecondType>::new())
388 |         }
389 |         DataType::Time64(TimeUnit::Nanosecond) => {
390 |             Box::new(NullableTimeAsText::<Time64NanosecondType>::new())
391 |         }
392 |         DataType::Binary => Box::new(VariadicBinary::new(1)),
393 |         DataType::FixedSizeBinary(length) => {
394 |             Box::new(VariadicBinary::new((*length).try_into().unwrap()))
395 |         }
396 |         DataType::Decimal128(precision, scale) => {
397 |             Box::new(NullableDecimal128AsText::new(*precision, *scale))
398 |         }
399 |         DataType::Decimal256(precision, scale) => {
400 |             Box::new(NullableDecimal256AsText::new(*precision, *scale))
401 |         }
402 |         unsupported => return Err(WriterError::UnsupportedArrowDataType(unsupported.clone())),
403 |     };
404 |     Ok(strategy)
405 | }
406 | 


--------------------------------------------------------------------------------
/src/reader/odbc_reader.rs:
--------------------------------------------------------------------------------
  1 | use std::cmp::min;
  2 | 
  3 | use arrow::{
  4 |     datatypes::SchemaRef,
  5 |     error::ArrowError,
  6 |     record_batch::{RecordBatch, RecordBatchReader},
  7 | };
  8 | use odbc_api::{BlockCursor, Cursor, buffers::ColumnarAnyBuffer};
  9 | 
 10 | use crate::{BufferAllocationOptions, ConcurrentOdbcReader, Error};
 11 | 
 12 | use super::{TextEncoding, to_record_batch::ToRecordBatch};
 13 | 
 14 | /// Arrow ODBC reader. Implements the [`arrow::record_batch::RecordBatchReader`] trait so it can be
 15 | /// used to fill Arrow arrays from an ODBC data source.
 16 | ///
 17 | /// This reader is generic over the cursor type so it can be used in cases there the cursor only
 18 | /// borrows a statement handle (most likely the case then using prepared queries), or owned
 19 | /// statement handles (recommened then using one shot queries, to have an easier life with the
 20 | /// borrow checker).
 21 | ///
 22 | /// # Example
 23 | ///
 24 | /// ```no_run
 25 | /// use arrow_odbc::{odbc_api::{Environment, ConnectionOptions}, OdbcReaderBuilder};
 26 | ///
 27 | /// const CONNECTION_STRING: &str = "\
 28 | ///     Driver={ODBC Driver 18 for SQL Server};\
 29 | ///     Server=localhost;\
 30 | ///     UID=SA;\
 31 | ///     PWD=My@Test@Password1;\
 32 | /// ";
 33 | ///
 34 | /// fn main() -> Result<(), anyhow::Error> {
 35 | ///
 36 | ///     let odbc_environment = Environment::new()?;
 37 | ///     
 38 | ///     // Connect with database.
 39 | ///     let connection = odbc_environment.connect_with_connection_string(
 40 | ///         CONNECTION_STRING,
 41 | ///         ConnectionOptions::default()
 42 | ///     )?;
 43 | ///
 44 | ///     // This SQL statement does not require any arguments.
 45 | ///     let parameters = ();
 46 | ///
 47 | ///     // Do not apply any timeout.
 48 | ///     let timeout_sec = None;
 49 | ///
 50 | ///     // Execute query and create result set
 51 | ///     let cursor = connection
 52 | ///         .execute("SELECT * FROM MyTable", parameters, timeout_sec)?
 53 | ///         .expect("SELECT statement must produce a cursor");
 54 | ///
 55 | ///     // Read result set as arrow batches. Infer Arrow types automatically using the meta
 56 | ///     // information of `cursor`.
 57 | ///     let arrow_record_batches = OdbcReaderBuilder::new()
 58 | ///         .build(cursor)?;
 59 | ///
 60 | ///     for batch in arrow_record_batches {
 61 | ///         // ... process batch ...
 62 | ///     }
 63 | ///     Ok(())
 64 | /// }
 65 | /// ```
 66 | pub struct OdbcReader<C: Cursor> {
 67 |     /// Converts the content of ODBC buffers into Arrow record batches
 68 |     converter: ToRecordBatch,
 69 |     /// Fetches values from the ODBC datasource using columnar batches. Values are streamed batch
 70 |     /// by batch in order to avoid reallocation of the buffers used for tranistion.
 71 |     batch_stream: BlockCursor<C, ColumnarAnyBuffer>,
 72 |     /// We remember if the user decided to use fallibale allocations or not in case we need to
 73 |     /// allocate another buffer due to a state transition towards [`ConcurrentOdbcReader`].
 74 |     fallibale_allocations: bool,
 75 | }
 76 | 
 77 | impl<C: Cursor> OdbcReader<C> {
 78 |     /// Consume this instance to create a similar ODBC reader which fetches batches asynchronously.
 79 |     ///
 80 |     /// Steals all resources from this [`OdbcReader`] instance, and allocates another buffer for
 81 |     /// transiting data from the ODBC data source to the application. This way one buffer can be
 82 |     /// written to by a dedicated system thread, while the other is read by the application. Use
 83 |     /// this if you want to trade memory for speed.
 84 |     ///
 85 |     /// # Example
 86 |     ///
 87 |     /// ```no_run
 88 |     /// use arrow_odbc::{odbc_api::{Environment, ConnectionOptions}, OdbcReaderBuilder};
 89 |     /// use std::sync::OnceLock;
 90 |     ///
 91 |     /// // In order to fetch in a dedicated system thread we need a cursor with static lifetime,
 92 |     /// // this implies a static ODBC environment.
 93 |     /// static ENV: OnceLock<Environment> = OnceLock::new();
 94 |     ///
 95 |     /// const CONNECTION_STRING: &str = "\
 96 |     ///     Driver={ODBC Driver 18 for SQL Server};\
 97 |     ///     Server=localhost;\
 98 |     ///     UID=SA;\
 99 |     ///     PWD=My@Test@Password1;\
100 |     /// ";
101 |     ///
102 |     /// fn main() -> Result<(), anyhow::Error> {
103 |     ///
104 |     ///     let odbc_environment = ENV.get_or_init(|| {Environment::new().unwrap() });
105 |     ///     
106 |     ///     // Connect with database.
107 |     ///     let connection = odbc_environment.connect_with_connection_string(
108 |     ///         CONNECTION_STRING,
109 |     ///         ConnectionOptions::default()
110 |     ///     )?;
111 |     ///
112 |     ///     // This SQL statement does not require any arguments.
113 |     ///     let parameters = ();
114 |     ///
115 |     ///     // Do not apply any timeout.
116 |     ///     let timeout_sec = None;
117 |     ///
118 |     ///     // Execute query and create result set
119 |     ///     let cursor = connection
120 |     ///         // Using `into_cursor` instead of `execute` takes ownership of the connection and
121 |     ///         // allows for a cursor with static lifetime.
122 |     ///         .into_cursor("SELECT * FROM MyTable", parameters, timeout_sec)
123 |     ///         .map_err(|e|e.error)?
124 |     ///         .expect("SELECT statement must produce a cursor");
125 |     ///
126 |     ///     // Construct ODBC reader ...
127 |     ///     let arrow_record_batches = OdbcReaderBuilder::new()
128 |     ///         .build(cursor)?
129 |     ///         // ... and make it concurrent
130 |     ///         .into_concurrent()?;
131 |     ///
132 |     ///     for batch in arrow_record_batches {
133 |     ///         // ... process batch ...
134 |     ///     }
135 |     ///     Ok(())
136 |     /// }
137 |     /// ```
138 |     pub fn into_concurrent(self) -> Result<ConcurrentOdbcReader<C>, Error>
139 |     where
140 |         C: Send + 'static,
141 |     {
142 |         ConcurrentOdbcReader::from_block_cursor(
143 |             self.batch_stream,
144 |             self.converter,
145 |             self.fallibale_allocations,
146 |         )
147 |     }
148 | 
149 |     /// Destroy the ODBC arrow reader and yield the underlyinng cursor object.
150 |     ///
151 |     /// One application of this is to process more than one result set in case you executed a stored
152 |     /// procedure.
153 |     pub fn into_cursor(self) -> Result<C, odbc_api::Error> {
154 |         let (cursor, _buffer) = self.batch_stream.unbind()?;
155 |         Ok(cursor)
156 |     }
157 | 
158 |     /// Size of the internal preallocated buffer bound to the cursor and filled by your ODBC driver
159 |     /// in rows. Each record batch will at most have this many rows. Only the last one may have
160 |     /// less.
161 |     pub fn max_rows_per_batch(&self) -> usize {
162 |         self.batch_stream.row_array_size()
163 |     }
164 | }
165 | 
166 | impl<C> Iterator for OdbcReader<C>
167 | where
168 |     C: Cursor,
169 | {
170 |     type Item = Result<RecordBatch, ArrowError>;
171 | 
172 |     fn next(&mut self) -> Option<Self::Item> {
173 |         match self.batch_stream.fetch_with_truncation_check(true) {
174 |             // We successfully fetched a batch from the database. Try to copy it into a record batch
175 |             // and forward errors if any.
176 |             Ok(Some(batch)) => {
177 |                 let result_record_batch = self
178 |                     .converter
179 |                     .buffer_to_record_batch(batch)
180 |                     .map_err(|mapping_error| ArrowError::ExternalError(Box::new(mapping_error)));
181 |                 Some(result_record_batch)
182 |             }
183 |             // We ran out of batches in the result set. End the iterator.
184 |             Ok(None) => None,
185 |             // We had an error fetching the next batch from the database, let's report it as an
186 |             // external error.
187 |             Err(odbc_error) => Some(Err(odbc_to_arrow_error(odbc_error))),
188 |         }
189 |     }
190 | }
191 | 
192 | impl<C> RecordBatchReader for OdbcReader<C>
193 | where
194 |     C: Cursor,
195 | {
196 |     fn schema(&self) -> SchemaRef {
197 |         self.converter.schema().clone()
198 |     }
199 | }
200 | 
201 | /// Creates instances of [`OdbcReader`] based on [`odbc_api::Cursor`].
202 | ///
203 | /// Using a builder pattern instead of passing structs with all required arguments to the
204 | /// constructors of [`OdbcReader`] allows `arrow_odbc` to introduce new paramters to fine tune the
205 | /// creation and behavior of the readers without breaking the code of downstream applications.
206 | #[derive(Default, Clone)]
207 | pub struct OdbcReaderBuilder {
208 |     /// `Some` implies the user has set this explicitly using
209 |     /// [`OdbcReaderBuilder::with_max_num_rows_per_batch`]. `None` implies that we have to choose
210 |     /// for the user.
211 |     max_num_rows_per_batch: usize,
212 |     max_bytes_per_batch: usize,
213 |     schema: Option<SchemaRef>,
214 |     max_text_size: Option<usize>,
215 |     max_binary_size: Option<usize>,
216 |     map_value_errors_to_null: bool,
217 |     dbms_name: Option<String>,
218 |     fallibale_allocations: bool,
219 |     trim_fixed_sized_character_strings: bool,
220 |     text_encoding: TextEncoding,
221 | }
222 | 
223 | impl OdbcReaderBuilder {
224 |     pub fn new() -> Self {
225 |         // In the abscence of an explicit row limit set by the user we choose u16 MAX (65535). This
226 |         // is a reasonable high value to allow for siginificantly reducing IO overhead as opposed to
227 |         // row by row fetching already. Likely for many database schemas a memory limitation will
228 |         // kick in before this limit. If not however it can still be dangerous to go beyond this
229 |         // number. Some drivers use a 16Bit integer to count rows and you can run into overflow
230 |         // errors if you use one of them. Once such issue occurred with SAP anywhere.
231 |         const DEFAULT_MAX_ROWS_PER_BATCH: usize = u16::MAX as usize;
232 |         const DEFAULT_MAX_BYTES_PER_BATCH: usize = 512 * 1024 * 1024;
233 | 
234 |         OdbcReaderBuilder {
235 |             max_num_rows_per_batch: DEFAULT_MAX_ROWS_PER_BATCH,
236 |             max_bytes_per_batch: DEFAULT_MAX_BYTES_PER_BATCH,
237 |             schema: None,
238 |             max_text_size: None,
239 |             max_binary_size: None,
240 |             fallibale_allocations: false,
241 |             map_value_errors_to_null: false,
242 |             dbms_name: None,
243 |             trim_fixed_sized_character_strings: false,
244 |             text_encoding: TextEncoding::Auto,
245 |         }
246 |     }
247 | 
248 |     /// Limits the maximum amount of rows which are fetched in a single roundtrip to the datasource.
249 |     /// Higher numbers lower the IO overhead and may speed up your runtime, but also require larger
250 |     /// preallocated buffers and use more memory. This value defaults to `65535` which is `u16` max.
251 |     /// Some ODBC drivers use a 16Bit integer to count rows so this can avoid overflows. The
252 |     /// improvements in saving IO overhead going above that number are estimated to be small. Your
253 |     /// milage may vary of course.
254 |     pub fn with_max_num_rows_per_batch(&mut self, max_num_rows_per_batch: usize) -> &mut Self {
255 |         self.max_num_rows_per_batch = max_num_rows_per_batch;
256 |         self
257 |     }
258 | 
259 |     /// In addition to a row size limit you may specify an upper bound in bytes for allocating the
260 |     /// transit buffer. This is useful if you do not know the database schema, or your code has to
261 |     /// work with different ones, but you know the amount of memory in your machine. This limit is
262 |     /// applied in addition to [`OdbcReaderBuilder::with_max_num_rows_per_batch`]. Whichever of
263 |     /// these leads to a smaller buffer is used. This defaults to 512 MiB.
264 |     pub fn with_max_bytes_per_batch(&mut self, max_bytes_per_batch: usize) -> &mut Self {
265 |         self.max_bytes_per_batch = max_bytes_per_batch;
266 |         self
267 |     }
268 | 
269 |     /// Describes the types of the Arrow Arrays in the record batches. It is also used to determine
270 |     /// CData type requested from the data source. If this is not explicitly set the type is infered
271 |     /// from the schema information provided by the ODBC driver. A reason for setting this
272 |     /// explicitly could be that you have superior knowledge about your data compared to the ODBC
273 |     /// driver. E.g. a type for an unsigned byte (`u8`) is not part of the ODBC standard. Therfore
274 |     /// the driver might at best be able to tell you that this is an (`i8`). If you want to still
275 |     /// have `u8`s in the resulting array you need to specify the schema manually. Also many drivers
276 |     /// struggle with reporting nullability correctly and just report every column as nullable.
277 |     /// Explicitly specifying a schema can also compensate for such shortcomings if it turns out to
278 |     /// be relevant.
279 |     pub fn with_schema(&mut self, schema: SchemaRef) -> &mut Self {
280 |         self.schema = Some(schema);
281 |         self
282 |     }
283 | 
284 |     /// In order for fast bulk fetching to work, `arrow-odbc` needs to know the size of the largest
285 |     /// possible field in each column. It will do so itself automatically by considering the schema
286 |     /// information. However, trouble arises if the schema contains ounbounded variadic fields like
287 |     /// `VARCHAR(MAX)` which can hold really large values. These have a very high upper element
288 |     /// size, if any. In order to work with such schemas we need a limit, of what the an upper
289 |     /// bound of the actual values in the column is, as opposed to the what the largest value is the
290 |     /// column could theoretically store. There is no need for this to be very precise, but just
291 |     /// knowing that a value would never exceed 4KiB rather than 2GiB is enough to allow for
292 |     /// tremendous efficiency gains. The size of the text is specified in UTF-8 encoded bytes if
293 |     /// using a narrow encoding (typically all non-windows systems) and in UTF-16 encoded pairs of
294 |     /// bytes on systems using a wide encoding (typically windows). This means about the size in
295 |     /// letters, yet if you are using a lot of emojis or other special characters this number might
296 |     /// need to be larger.
297 |     pub fn with_max_text_size(&mut self, max_text_size: usize) -> &mut Self {
298 |         self.max_text_size = Some(max_text_size);
299 |         self
300 |     }
301 | 
302 |     /// An upper limit for the size of buffers bound to variadic binary columns of the data source.
303 |     /// This limit does not (directly) apply to the size of the created arrow buffers, but rather
304 |     /// applies to the buffers used for the data in transit. Use this option if you have e.g.
305 |     /// `VARBINARY(MAX)` fields in your database schema. In such a case without an upper limit, the
306 |     /// ODBC driver of your data source is asked for the maximum size of an element, and is likely
307 |     /// to answer with either `0` or a value which is way larger than any actual entry in the
308 |     /// column. If you can not adapt your database schema, this limit might be what you are looking
309 |     /// for. This is the maximum size in bytes of the binary column. If this method is not called no
310 |     /// upper limit is set and the maximum element size, reported by ODBC is used to determine
311 |     /// buffer sizes.
312 |     pub fn with_max_binary_size(&mut self, max_binary_size: usize) -> &mut Self {
313 |         self.max_binary_size = Some(max_binary_size);
314 |         self
315 |     }
316 | 
317 |     /// Set to `true` in order to trigger an [`crate::ColumnFailure::TooLarge`] instead of a panic
318 |     /// in case the buffers can not be allocated due to their size. This might have a performance
319 |     /// cost for constructing the reader. `false` by default.
320 |     pub fn with_fallibale_allocations(&mut self, fallibale_allocations: bool) -> &mut Self {
321 |         self.fallibale_allocations = fallibale_allocations;
322 |         self
323 |     }
324 | 
325 |     /// Set to `true` in order to map a value in the database which can not be successfully
326 |     /// converted into its target type to NULL, rather than emitting an external Arrow Error.
327 |     /// E.g. currently mapping errors can happen if a datetime value is not in the rang
328 |     /// representable by arrow. Default is `false`.
329 |     pub fn value_errors_as_null(&mut self, map_value_errors_to_null: bool) -> &mut Self {
330 |         self.map_value_errors_to_null = map_value_errors_to_null;
331 |         self
332 |     }
333 | 
334 |     /// If set to `true` text in fixed sized character columns like e.g. CHAR are trimmed of
335 |     /// whitespaces before converted into Arrow UTF-8 arrays. Default is `false`.
336 |     pub fn trim_fixed_sized_characters(
337 |         &mut self,
338 |         fixed_sized_character_strings_are_trimmed: bool,
339 |     ) -> &mut Self {
340 |         self.trim_fixed_sized_character_strings = fixed_sized_character_strings_are_trimmed;
341 |         self
342 |     }
343 | 
344 |     /// Controls the encoding used for transferring text data from the ODBC data source to the
345 |     /// application. The resulting Arrow arrays will still be UTF-8 encoded. You may want to use
346 |     /// this if you get garbage characters or invalid UTF-8 errors on non-windows systems to set the
347 |     /// encoding to [`TextEncoding::Utf16`]. On windows systems you may want to set this to
348 |     /// [`TextEncoding::Utf8`] to gain performance benefits, after you have verified that your
349 |     /// system locale is set to UTF-8. The default is [`TextEncoding::Auto`].
350 |     pub fn with_payload_text_encoding(&mut self, text_encoding: TextEncoding) -> &mut Self {
351 |         self.text_encoding = text_encoding;
352 |         self
353 |     }
354 | 
355 |     /// If provided the name of the database management system (DBMS) is used to account for
356 |     /// database specific behavior when determining the arrow schema.
357 |     ///
358 |     /// To deterimne the name of the dbms you can call
359 |     /// [`odbc_api::Connection::database_management_system_name`].
360 |     pub fn with_dbms_name(&mut self, dbms_name: String) -> &mut Self {
361 |         self.dbms_name = Some(dbms_name);
362 |         self
363 |     }
364 | 
365 |     /// No matter if the user explicitly specified a limit in row size, a memory limit, both or
366 |     /// neither. In order to construct a reader we need to decide on the buffer size in rows.
367 |     fn buffer_size_in_rows(&self, bytes_per_row: usize) -> Result<usize, Error> {
368 |         // If schema is empty, return before division by zero error.
369 |         if bytes_per_row == 0 {
370 |             return Ok(self.max_bytes_per_batch);
371 |         }
372 |         let rows_per_batch = self.max_bytes_per_batch / bytes_per_row;
373 |         if rows_per_batch == 0 {
374 |             Err(Error::OdbcBufferTooSmall {
375 |                 max_bytes_per_batch: self.max_bytes_per_batch,
376 |                 bytes_per_row,
377 |             })
378 |         } else {
379 |             Ok(min(self.max_num_rows_per_batch, rows_per_batch))
380 |         }
381 |     }
382 | 
383 |     /// Constructs an [`OdbcReader`] which consumes the giver cursor. The cursor will also be used
384 |     /// to infer the Arrow schema if it has not been supplied explicitly.
385 |     ///
386 |     /// # Parameters
387 |     ///
388 |     /// * `cursor`: ODBC cursor used to fetch batches from the data source. The constructor will
389 |     ///   bind buffers to this cursor in order to perform bulk fetches from the source. This is
390 |     ///   usually faster than fetching results row by row as it saves roundtrips to the database.
391 |     ///   The type of these buffers will be inferred from the arrow schema. Not every arrow type is
392 |     ///   supported though.
393 |     pub fn build<C>(&self, mut cursor: C) -> Result<OdbcReader<C>, Error>
394 |     where
395 |         C: Cursor,
396 |     {
397 |         let buffer_allocation_options = BufferAllocationOptions {
398 |             max_text_size: self.max_text_size,
399 |             max_binary_size: self.max_binary_size,
400 |             fallibale_allocations: self.fallibale_allocations,
401 |         };
402 |         let converter = ToRecordBatch::new(
403 |             &mut cursor,
404 |             self.schema.clone(),
405 |             buffer_allocation_options,
406 |             self.map_value_errors_to_null,
407 |             self.dbms_name.as_deref(),
408 |             self.trim_fixed_sized_character_strings,
409 |             self.text_encoding,
410 |         )?;
411 |         let bytes_per_row = converter.row_size_in_bytes();
412 |         let buffer_size_in_rows = self.buffer_size_in_rows(bytes_per_row)?;
413 |         let row_set_buffer =
414 |             converter.allocate_buffer(buffer_size_in_rows, self.fallibale_allocations)?;
415 |         let batch_stream = cursor.bind_buffer(row_set_buffer).unwrap();
416 | 
417 |         Ok(OdbcReader {
418 |             converter,
419 |             batch_stream,
420 |             fallibale_allocations: self.fallibale_allocations,
421 |         })
422 |     }
423 | }
424 | 
425 | pub fn odbc_to_arrow_error(odbc_error: odbc_api::Error) -> ArrowError {
426 |     ArrowError::from_external_error(Box::new(odbc_error))
427 | }
428 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | All notable changes to this project will be documented in this file.
  4 | 
  5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
  6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
  7 | 
  8 | ## [Unreleased]
  9 | 
 10 | ## [21.0.0](https://github.com/pacman82/arrow-odbc/compare/v20.1.1...v21.0.0) - 2025-10-26
 11 | 
 12 | ### Added
 13 | 
 14 | - [**breaking**] Fetch Db2 Blobs as binary data and not text
 15 | 
 16 | ### Other
 17 | 
 18 | - introduce blob_on_db2 for local development
 19 | 
 20 | ## [20.1.1](https://github.com/pacman82/arrow-odbc/compare/v20.1.0...v20.1.1) - 2025-10-25
 21 | 
 22 | ### Other
 23 | 
 24 | - Support arrow 57
 25 | 
 26 | ## [20.1.0](https://github.com/pacman82/arrow-odbc/compare/v20.0.1...v20.1.0) - 2025-09-21
 27 | 
 28 | ### Added
 29 | 
 30 | 
 31 | - Then auto generating the insert statement into a table, column names which are already quoted, i.e. are enclosed in either rectangular brackets (`[`, `]`), double quotes (`"`) or backticks (`````) will not be quoted. This way if a column is named after a reserved keyword e.g. `values` or `from`, users could rename the column in the source to their quoted representation in order to avoid syntax errors in the statement.
 32 | 
 33 | ### Other
 34 | 
 35 | - Explain reasoning behind `is_already_quoted` in comment.
 36 | - Mention GitHub issue triggering the change to column name
 37 | - Test for insterting into a table with a column using a reserved
 38 | - Replace lazy_static with std::once_lock
 39 | 
 40 | ## [20.0.1](https://github.com/pacman82/arrow-odbc/compare/v20.0.0...v20.0.1) - 2025-09-08
 41 | 
 42 | ### Other
 43 | 
 44 | - Support `odbc-api 20`
 45 | 
 46 | ## [20.0.0](https://github.com/pacman82/arrow-odbc/compare/v19.1.1...v20.0.0) - 2025-08-21
 47 | 
 48 | ### Added
 49 | 
 50 | - [**breaking**] Support OdbcWriter taking ownership of Arc<Mutex<Connection>>
 51 | 
 52 | ## [19.1.1](https://github.com/pacman82/arrow-odbc/compare/v19.1.0...v19.1.1) - 2025-08-21
 53 | 
 54 | ### Other
 55 | 
 56 | - *(deps)* update odbc-api requirement from >= 15, < 17 to >= 15, < 18
 57 | 
 58 | ## [19.1.0](https://github.com/pacman82/arrow-odbc/compare/v19.0.0...v19.1.0) - 2025-08-17
 59 | 
 60 | ### Added
 61 | 
 62 | - Support `odbc-api 16`
 63 | 
 64 | ### Fixed
 65 | 
 66 | - missing other entry in Changelog
 67 | - Missing changelog entry for 19.0.0
 68 | 
 69 | ## [19.0.0](https://github.com/pacman82/arrow-odbc/compare/v18.1.2...v19.0.0) - 2025-08-14
 70 | 
 71 | ### Other
 72 | 
 73 | - [**breaking**] Support for `odbc-api 15`. Dropped support for older versions.
 74 | 
 75 | ## [18.1.2](https://github.com/pacman82/arrow-odbc/compare/v18.1.1...v18.1.2) - 2025-08-05
 76 | 
 77 | ### Other
 78 | 
 79 | - Move automerge logic into build.yml.
 80 | - *(deps)* update arrow requirement from >= 29, < 56 to >= 29, < 57
 81 | - Auto merge dependabot PRs
 82 | # Changelog
 83 | 
 84 | ## [18.1.1](https://github.com/pacman82/arrow-odbc/compare/v18.1.0...v18.1.1) - 2025-06-27
 85 | 
 86 | ### Other
 87 | 
 88 | - *(deps)* update odbc-api requirement from >= 12, < 14 to >= 12, < 15
 89 | 
 90 | ## [18.1.0](https://github.com/pacman82/arrow-odbc/compare/v18.0.0...v18.1.0) - 2025-06-24
 91 | 
 92 | ### Added
 93 | 
 94 | - Time64 is inserted with ns precision rather than with 7 fractional digits.
 95 | 
 96 | ## [18.0.0](https://github.com/pacman82/arrow-odbc/compare/v17.0.1...v18.0.0) - 2025-06-22
 97 | 
 98 | ### Added
 99 | 
100 | - Inserting timestamps with timezones
101 | - [**breaking**] Add variant `WriterError::InvalidTimeZone`
102 | 
103 | ## [17.0.1](https://github.com/pacman82/arrow-odbc/compare/v17.0.0...v17.0.1) - 2025-05-30
104 | 
105 | ### Fixed
106 | 
107 | - Remove eprintln statement, which had been left over from debugging
108 | 
109 | ## [17.0.0](https://github.com/pacman82/arrow-odbc/compare/v16.0.2...v17.0.0) - 2025-05-30
110 | 
111 | ### Added
112 | 
113 | - [**breaking**] Provide possibility to provide dbms name, in order to autodetect time columns for MSSQL databases
114 | - [**breaking**] Support for Times64 Nanoseconds
115 | - [**breaking**] Support for wallclock time with milliseconds precision
116 | - [**breaking**] Support for time32 second
117 | 
118 | ### Other
119 | 
120 | - fix typos
121 | - time with precision 2 & 3
122 | - Human readable test assertion for fetch_time_1_psql
123 | - fetch_time_1_psql
124 | - change pssql time test to use Time(0)
125 | - replaced mssql connection string with psql in _psql test
126 | - Fetching time column from PostgreSQL
127 | - Use setup empty table with PostgreSQL
128 | - Introduce Dbms trait
129 | - Test for fetching time
130 | 
131 | ## [16.0.2](https://github.com/pacman82/arrow-odbc/compare/v16.0.1...v16.0.2) - 2025-05-18
132 | 
133 | ### Other
134 | 
135 | - Support odbc-api 13
136 | - Integration test for fetching VARCHAR(1000) from PostgreSQL with special characters
137 | - Elaborate on bug cause
138 | - psql_varchar test now also reproduces behaviour then run in Windows
139 | - Trying to reproduce too large value for buffer.
140 | - Add PostgreSQL to test pipeline
141 | 
142 | ## [16.0.1](https://github.com/pacman82/arrow-odbc/compare/v16.0.0...v16.0.1) - 2025-04-17
143 | 
144 | ### Other
145 | 
146 | - Support arrow 55
147 | 
148 | ## [16.0.0](https://github.com/pacman82/arrow-odbc/compare/v15.1.1...v16.0.0) - 2025-04-05
149 | 
150 | ### Added
151 | 
152 | - [**breaking**] Update to support odbc-api 12.
153 | 
154 | ### Other
155 | 
156 | - Remove pin from chrono
157 | - update dependencies
158 | 
159 | ## [15.1.1](https://github.com/pacman82/arrow-odbc/compare/v15.1.0...v15.1.1) - 2025-03-08
160 | 
161 | ### Other
162 | 
163 | - Fix docstrings for TextEncoding
164 | 
165 | ## [15.1.0](https://github.com/pacman82/arrow-odbc/compare/v15.0.0...v15.1.0) - 2025-03-08
166 | 
167 | ### Added
168 | 
169 | - Support for explicitly choosing the transfer encoding.
170 | 
171 | ### Fixed
172 | 
173 | - Pin chrono version to 0.4.39 to fix build
174 | 
175 | ### Other
176 | 
177 | - use input parameters rather than literals to setup preconditions for most tests
178 | - Fix "String or binary data would be truncated in table" on linux systems
179 | - fix test when block in fetch_narrow_data
180 | 
181 | ## [15.0.0](https://github.com/pacman82/arrow-odbc/compare/v14.2.0...v15.0.0) - 2025-02-23
182 | 
183 | ### Added
184 | 
185 | - [**breaking**] Dedicated Mapping error for InvalidUtf8
186 | 
187 | ### Other
188 | 
189 | - [**breaking**] Update to edition 2024
190 | - Rewrite docs for with_max_text_size
191 | 
192 | ## [14.2.0](https://github.com/pacman82/arrow-odbc/compare/v14.1.0...v14.2.0) - 2025-02-16
193 | 
194 | ### Added
195 | 
196 | - Support odbc-api 11
197 | 
198 | ### Other
199 | 
200 | - Use ODBC driver 18 for tests
201 | - Install unixODBC in release plz workflow
202 | - Install unixodbc-dev
203 | 
204 | ## [14.1.0](https://github.com/pacman82/arrow-odbc/compare/v14.0.1...v14.1.0) - 2025-01-01
205 | 
206 | ### Added
207 | 
208 | - Support arrow 53
209 | 
210 | ## [14.0.1](https://github.com/pacman82/arrow-odbc/compare/v14.0.0...v14.0.1) - 2024-12-09
211 | 
212 | ### Fixed
213 | 
214 | - Inserting multiple small batches now works, even if the second batch triggers rebinding the buffer due to element size. Previously in this scenario not all values already inserted were correctly copied into the new buffer. This caused strings to be replaced with `null` bytes.
215 | 
216 | ## [14.0.0](https://github.com/pacman82/arrow-odbc/compare/v13.0.2...v14.0.0) - 2024-11-25
217 | 
218 | ### Added
219 | 
220 | - You can now compile with using the wide (i.e. UTF-16) character set versions of ODBC functions calls on non-windows platforms by specifying the `wide` feature. Similarly you can now complie using the narrow character set on windows platforms by specifying the `narrow` feature. The default remains `wide` on windows and `narrow` on non-windows targets. This required updating using `odbc-api 10.0.0`. The only thing thing changing however from `odbc-api` 9 to 10 are the default compilation of feature flags, so your code should just continue working.
221 | 
222 | - [**breaking**] Update odbc-api `>= 9, < 10` -> `>= 10, < 11`
223 | 
224 | ## [13.0.2](https://github.com/pacman82/arrow-odbc/compare/v13.0.1...v13.0.2) - 2024-11-24
225 | 
226 | ### Fixed
227 | 
228 | - Overflow in epoch to timestamp is fixed. It is now possible to insert 1600-06-18 23:12:44.123 into a database with ms precision
229 | 
230 | ## [13.0.1](https://github.com/pacman82/arrow-odbc/compare/v13.0.0...v13.0.1) - 2024-11-20
231 | 
232 | ### Fixed
233 | 
234 | - Timestamps with fractional seconds now work even if they are older than unix epoch.
235 | 
236 | ### Other
237 | 
238 | - setup release-plz
239 | - use uppercase for changelog
240 | - Update thiserror requirement from 1.0.65 to 2.0.0
241 | 
242 | ## 13.0.0
243 | 
244 | - Update odbc-api `>= 6, < 9` -> `>= 9, < 10`
245 | 
246 | ## 12.2.0
247 | 
248 | - Update arrow `>= 29, < 53` -> `>= 29, < 54`
249 | 
250 | ## 12.1.0
251 | 
252 | - Enabling trimming of fixed sized character data via `OdbcReaderBuilder::trim_fixed_sized_characters`.
253 | 
254 | ## 12.0.0
255 | 
256 | - Enable mapping out of ranges dates to `NULL`. You can do so using `OdbcReaderBuilder::value_errors_as_null`.
257 | - Breaking: `arrow_schema_from` now takes an additional boolean parameter `map_value_errors_to_null`.
258 | 
259 | ## 11.2.0
260 | 
261 | - Update odbc-api `>= 6, < 8` -> `>= 6, < 9`
262 | 
263 | ## 11.1.0
264 | 
265 | - Update arrow `>= 29, < 52` -> `>= 29, < 53`
266 | 
267 | ## 11.0.0
268 | 
269 | - Unsigned Tinyint are now mapped to `UInt8` instead of `Int8`.
270 | 
271 | ## 10.0.0
272 | 
273 | - Removed quirk `indicators_returned_from_bulk_fetch_are_memory_garbage`. Turns out the issue with IBM DB/2 drivers which triggered this can better be solved using a version of their ODBC driver which ends in `o` and is compiled with a 64Bit size for `SQLLEN`.
274 | - Remove `Quirks`.
275 | 
276 | ## 9.0.0
277 | 
278 | - Then generating the insert statement on behalf of the user quote column names which are not valid transact SQL qualifiers using double quotes (`"`)
279 | 
280 | ## 8.3.0
281 | 
282 | - Update odbc-api `>= 6, < 7` -> `>= 6, < 8`
283 | 
284 | ## 8.2.0
285 | 
286 | - `ConcurrentOdbcReader` is now `Send`.
287 | 
288 | ## 8.1.0
289 | 
290 | - Update arrow `>= 29, < 51` -> `>= 29, < 52`
291 | 
292 | ## 8.0.0
293 | 
294 | - Replace `odbc_api::Quirks` with `arrow_odbc::Quirks`.
295 | 
296 | ## 7.0.0
297 | 
298 | - Update odbc-api `>= 5, < 6` ->  `>= 6, < 7`
299 | 
300 | ## 6.1.0
301 | 
302 | - Update arrow `>= 29, < 50` -> `>= 29, < 51`
303 | 
304 | ## 6.0.0
305 | 
306 | - Update odbc-api `>= 4, < 5` ->  `>= 5, < 6`
307 | 
308 | ## 5.0.3
309 | 
310 | - Decimal parsing is now more robust. It does no longer require the text representation to have all trailing zeroes explicit in order to figure out the correct scale of the decimal. E.g. for a decimal with scale 5 a text representation of `10` would have been interpreted as `000.1` for scale five. Decimal parsing relied on databases making all trailing zeroes explicit e.g. `10.00000`. Oracle however does not do this, so parsing has been adopted to be more robust.
311 | 
312 | ## 5.0.2
313 | 
314 | - Fixes a bug introduced in 5.0.1, causing negative decimals not to be parsed correctly and to be returned as non-negative values.
315 | 
316 | ## 5.0.1
317 | 
318 | - Decimal parsing logic is now more robust and also works if the decimal point is not an actual point but a `,`.
319 | 
320 | ## 5.0.0
321 | 
322 | - Fixes a panic which occurred if database returned column names with invalid encodings.
323 | - Introduces new `Error` variant `EncodingInvalid`, which is returned in case a column name can not be interpreted as UTF-16 on windows platforms or UTF-8 on non-windows platforms.
324 | - Removes deprecated `WriteError::TimeZoneNotSupported`, `OdbcConcurrentReader::new`, `OdbcConcurrentReader::with_arrow_schema`, `OdbcConcurrentReader::with`, `OdbcReader::new`, `OdbcReader::with_arrow_schema`, `OdbcReader::with`.
325 | 
326 | ## 4.1.1
327 | 
328 | - In order to work with mandatory columns workaround for IBM DB2 returning memory garbage now no longer maps empty strings to zero.
329 | 
330 | ## 4.1.0
331 | 
332 | - Update odbc-api `>= 4, < 5` ->  `>= 4.1, < 5`
333 | - Support for fetching text from IBM DB2. This has been difficult because of a bug in the IBM DB2 driver which causes it to return garbage memory instead of string lengths. A workaround can now be activated using `with_shims` on `OdbcReaderBuilder`.
334 | 
335 | ## 4.0.0
336 | 
337 | - Update odbc-api `>= 2.2, < 4` ->  `>= 4, < 5`
338 | 
339 | ## 3.1.2
340 | 
341 | - An assumption has been removed, that unknown column types are always representable in ASCII. Now on Linux the system encoding is used which is assumed to be UTF-8 and on windows UTF-16. The same as for other text columns.
342 | - MySQL seems to report negative display sizes for JSON columns (-4). This is normally used to indicate no upper bound in other parts of the ODBC standard. Arrow ODBC will now return a `ColumnFailure::ZeroSizedColumn` in these scenarios, if no buffer limit has been specified.
343 | 
344 | ## 3.1.1
345 | 
346 | - Prevent division by zero errors when using `OdbcReaderBuilder::buffer_size_in_rows` on empty schemas.
347 | 
348 | ## 3.1.0
349 | 
350 | - Update arrow `>= 29, < 49` -> `>= 29, < 50`
351 | 
352 | ## 3.0.0
353 | 
354 | - Introduce `OdbcReaderBuilder` as the prefered way to create instances of `OdbcReader`.
355 | - Allow for limiting ODBC buffer sizes using a memory limit expressed in bytes using `OdbcReaderBuilder::max_bytes_per_batch`.
356 | - Add new variant `Error::OdbcBufferTooSmall`.
357 | 
358 | ## 2.3.0
359 | 
360 | - Log memory usage per row
361 | 
362 | ## 2.2.0
363 | 
364 | - Update odbc-api `>= 2.2, < 3` ->  `>= 2.2, < 4`
365 | 
366 | ## 2.1.0
367 | 
368 | - Update arrow `>= 29, < 48` -> `>= 29, < 49`
369 | 
370 | ## 2.0.0
371 | 
372 | - Update odbc-api `>= 0.56.1, < 3` ->  `>= 2.2, < 3`
373 | 
374 | ## 1.3.0
375 | 
376 | - Add `ConcurrentOdbcReader` to allow fetching ODBC row groups concurrently.
377 | 
378 | ## 1.2.1
379 | 
380 | - Additional debug messages emmitted to indicate relational types reported by ODBC
381 | 
382 | ## 1.2.0
383 | 
384 | - Update odbc-api `>= 0.56.1, < 2` ->  `>= 0.56.1, < 3`
385 | 
386 | ## 1.1.0
387 | 
388 | - Update arrow `>= 29, < 47` -> `>= 29, < 48`
389 | 
390 | ## 1.0.0
391 | 
392 | - Update odbc-api `>= 0.56.1, < 0.58.0` ->  `>= 0.56.1, < 2`
393 | 
394 | ## 0.28.12
395 | 
396 | - `insert_statement_from_schema` will no longer end statements with a semicolon (`;`) as to not confuse an IBM db2 driver into thinking that multiple statements are intended to be executed. Thanks to @rosscoleman for reporting the issue and spending a lot of effort reproducing the issue.
397 | 
398 | ## 0.28.11
399 | 
400 | - Fix: Emit an error if nanoprecision timestamps are outside of valid range, rather than overflowing silently.
401 | - Update arrow `>= 29, < 46` -> `>= 29, < 47`
402 | 
403 | ## 0.28.10
404 | 
405 | - Update arrow `>= 29, < 45` -> `>= 29, < 46`
406 | 
407 | ## 0.28.9
408 | 
409 | - Better error messages which contain the original error emitted by `odbc-api` even then printed using the `Display` trait.
410 | 
411 | ## 0.28.8
412 | 
413 | - Update arrow `>= 29, < 44` -> `>= 29, < 45`
414 | 
415 | ## 0.28.7
416 | 
417 | - Update arrow `>= 29, < 43` -> `>= 29, < 44`
418 | 
419 | ## 0.28.6
420 | 
421 | - Update arrow `>= 29, < 42` -> `>= 29, < 43`
422 | 
423 | ## 0.28.5
424 | 
425 | - Update arrow `>= 29, < 39` -> `>= 29, < 42`
426 | 
427 | ## 0.28.4
428 | 
429 | - Update arrow `>= 29, < 39` -> `>= 29, < 41`
430 | 
431 | ## 0.28.3
432 | 
433 | - Update arrow `>= 29, < 39` -> `>= 29, < 40`
434 | 
435 | ## 0.28.2
436 | 
437 | - Update arrow `>= 29, < 38` -> `>= 37, < 39`
438 | 
439 | ## 0.28.1
440 | 
441 | - Update odbc-api `>= 0.56.1, < 0.57.0` -> `>= 0.56.1, < 0.58.0`
442 | 
443 | ## 0.28.0
444 | 
445 | - Update arrow `>= 29, < 37` -> `>= 37, < 38`
446 | 
447 | ## 0.27.0
448 | 
449 | - Update odbc-api `>= 0.52.3, < 0.57.0` -> `>= 0.56.1, < 0.57.0`
450 | - Introduced `OdbcReader::into_cursor` in order to enable processing stored procedures returning multiple result sets.
451 | 
452 | ## 0.26.12
453 | 
454 | - Update odbc-api `>= 0.52.3, < 0.56.0` -> `>= 0.52.3, < 0.57.0`
455 | 
456 | ## 0.26.11
457 | 
458 | - Support for `LargeUtf8` then inserting data.
459 | 
460 | ## 0.26.10
461 | 
462 | - Update arrow `>= 29, < 36` -> `>= 29, < 37`
463 | 
464 | ## 0.26.9
465 | 
466 | - Fix code sample in Readme
467 | 
468 | ## 0.26.8
469 | 
470 | - Update odbc-api `>= 0.52.3, < 0.55.0` -> `>= 0.52.3, < 0.56.0`
471 | 
472 | ## 0.26.7
473 | 
474 | - Fix crate version for release
475 | 
476 | ## 0.26.6
477 | 
478 | - Update arrow `>= 29, < 34` -> `>= 29, < 36`
479 | 
480 | ## 0.26.5
481 | 
482 | - Update arrow `>= 29, < 33` -> `>= 29, < 34`
483 | 
484 | ## 0.26.4
485 | 
486 | - Update arrow `>= 29, < 31` -> `>= 29, < 33`
487 | - Depreacte `WriterError::TimeZonesNotSupported` in favor of `WriterError::UnsupportedArrowDataType`.
488 | 
489 | ## 0.26.3
490 | 
491 | - Update arrow `>= 29, < 31` -> `>= 29, < 32`
492 | 
493 | ## 0.26.2
494 | 
495 | - Update odbc-api `>= 0.52.3, < 0.54.0` -> `>= 0.52.3, < 0.55.0`
496 | 
497 | ## 0.26.1
498 | 
499 | - Update arrow `>= 29, < 30` -> `>= 29, < 31`
500 | 
501 | ## 0.26.0
502 | 
503 | - Update arrow `>= 28, < 30` -> `>= 29, < 30`
504 | - Update odbc-api `>= 0.52.3, < 0.53.0` -> `>= 0.52.3, < 0.54.0`
505 | 
506 | ## 0.25.1
507 | 
508 | - Update arrow `>= 25, < 29` -> `>= 28, < 30`
509 | 
510 | ## 0.25.0
511 | 
512 | - Update arrow `>= 25, < 28` -> `>= 28, < 29`
513 | 
514 | ## 0.24.0
515 | 
516 | - Update odbc-api `>= 0.50.0, < 0.53.0` -> `>= 0.52.3, < 0.53.0`
517 | 
518 | ## 0.23.4
519 | 
520 | - Update arrow `>=25, < 27` -> `>= 25, < 28`
521 | 
522 | ## 0.23.3
523 | 
524 | - Update odbc-api `>= 0.50.0, < 0.52.0` -> `>= 0.50.0, < 0.53.0`
525 | 
526 | ## 0.23.2
527 | 
528 | - Update odbc-api `>= 0.50.0, < 0.51.0` -> `>= 0.50.0, < 0.52.0`
529 | 
530 | ## 0.23.1
531 | 
532 | - Update arrow `>= 25, < 26` -> `>=25, < 27`
533 | 
534 | ## 0.23.0
535 | 
536 | - Update odbc-api `>= 0.45.0, < 0.51.0` -> `>= 0.50.0, < 0.51.0`
537 | - Update arrow `>= 22, < 25` -> `>= 25, < 26`
538 | 
539 | ## 0.22.3
540 | 
541 | - Update odbc-api `>= 0.45.0, < 0.50.0` -> `>= 0.45.0, < 0.51.0`
542 | 
543 | ## 0.22.2
544 | 
545 | - Update arrow `>= 22, < 23` -> `>= 22, < 25`
546 | 
547 | ## 0.22.1
548 | 
549 | - Update arrow `>= 22, < 23` -> `>= 22, < 24`
550 | 
551 | ## 0.22.0
552 | 
553 | - Update arrow `>= 21, < 22` -> `>= 22, < 23`
554 | 
555 | ## 0.21.1
556 | 
557 | - Update odbc-api  `>= 0.45.0, < 0.49.0` -> `>= 0.45.0, < 0.50.0`
558 | 
559 | ## 0.21.0
560 | 
561 | - Update arrow `>= 20, < 21` -> `>= 21, < 22`
562 | - Fix: `OdbcWriter::inserter` had only been public by accident.
563 | 
564 | ## 0.20.0
565 | 
566 | - Use `narrow` text on non-windows platforms by default. Connection strings, queries and error messages are assumed to be UTF-8 and not transcoded to and from UTF-16.
567 | 
568 | ## 0.19.3
569 | 
570 | - Update odbc-api  `>= 0.45.0, < 0.48.0` -> `>= 0.45.0, < 0.49.0`
571 | 
572 | ## 0.19.2
573 | 
574 | - Update odbc-api  `>= 0.45.0, < 0.46.0` -> `>= 0.45.0, < 0.48.0`
575 | 
576 | ## 0.19.1
577 | 
578 | - Update odbc-api  `>= 0.45.0, < 0.46.0` -> `>= 0.45.0, < 0.47.0`
579 | 
580 | ## 0.19.0
581 | 
582 | - Update arrow `>= 19, < 20` -> `>= 20, < 21`
583 | 
584 | ## 0.18.1
585 | 
586 | - Support for inserting `Decimal256`.
587 | 
588 | ## 0.18.0
589 | 
590 | - Update arrow `>= 7.0.0, < 19` -> `>= 19, < 20`
591 | 
592 | ## 0.17.2
593 | 
594 | - Update arrow `>= 7.0.0, < 18` -> `>= 7.0.0, < 19`
595 | 
596 | ## 0.17.1
597 | 
598 | - Update arrow `>= 7.0.0, < 17` -> `>= 7.0.0, < 18`
599 | 
600 | ## 0.17.0
601 | 
602 | - Update odbc-api  `>= 0.44.3, < 0.45` -> `>= 0.45.0, < 0.46.0`
603 | - Allow for creating an `OdbcWriter` which takes ownership of the connection using `OdbcWriter::from_connection`.
604 | 
605 | ## 0.16.0
606 | 
607 | - Support for inserting `RecordBatch`es into a database table.
608 | 
609 | ## 0.15.0
610 | 
611 | - Update odbc-api `>= 0.40.2, < 0.45` -> `>= 0.44.3, < 0.45`
612 | - `unstable`: prototype for inserting arrow arrays into ODBC
613 | - Update arrow `>= 7.0.0, < 16` -> `>= 7.0.0, < 17`
614 | 
615 | ## 0.14.0
616 | 
617 | - `arrow_schema_from` now requires an exclusive reference (`&mut`) to `ResultSetMetadata`.
618 | - Update odbc-api `>= 0.40.2, < 0.44` -> `>= 0.40.2, < 0.45`
619 | 
620 | ## 0.13.5
621 | 
622 | - Update odbc-api `>= 0.40.2, < 0.43` -> `>= 0.40.2, < 0.44`
623 | 
624 | ## 0.13.4
625 | 
626 | - Update arrow `>= 7.0.0, < 15` -> `>= 7.0.0, < 16`
627 | 
628 | ## 0.13.3
629 | 
630 | - Update odbc-api = `>= 0.40.2, < 0.42` -> `>= 0.40.2, < 0.43`
631 | 
632 | ## 0.13.2
633 | 
634 | - Update odbc-api `>= 0.40 < 0.41` -> `>= 0.40.2, < 0.42`
635 | 
636 | ## 0.13.1
637 | 
638 | - Update arrow `>= 7.0.0, < 14` -> `>= 7.0.0, < 15`
639 | 
640 | ## 0.13.0
641 | 
642 | - `panic` is now default behaviour on allocation errors. Activate `fallibale_allocations` in the `BufferAllocationOptions` in order to get a recoverable error instead.
643 | 
644 | ## 0.12.0
645 | 
646 | - Update odbc-api `>= 0.39, < 0.40` -> `>= 0.40 < 0.41`
647 | 
648 | ## 0.11.0
649 | 
650 | - Update odbc-api `>= 0.38, < 0.39` -> `>= 0.39, < 0.40`
651 | - Support for fetching values from `VARCHAR(max)` and `VARBINARY(max)` columns, through specifying upper limits using `BufferAllocationOptions` in `OdbcReader::with`.
652 | 
653 | ## 0.10.0
654 | 
655 | - Update odbc-api `>= 0.36, < 0.37` -> `>= 0.38, < 0.39`
656 | - Recoverable errors if allocation for binary or text columns fails.
657 | 
658 | ## 0.9.2
659 | 
660 | - Update arrow `>= 7.0.0, < 10` -> `>= 7.0.0, < 13`
661 | 
662 | ## 0.9.1
663 | 
664 | - Update arrow `>= 7.0.0, < 10` -> `>= 7.0.0, < 12`
665 | 
666 | ## 0.9.0
667 | 
668 | - Update odbc-api `>= 0.33.0, < 0.36` -> `0.36 < 0.37`
669 | 
670 | ## 0.8.5
671 | 
672 | - Update arrow `>= 7.0.0, < 10` -> `>= 7.0.0, < 11`
673 | 
674 | ## 0.8.4
675 | 
676 | - Update odbc-api `>= 0.33.0, < 0.35` -> `>= 0.33.0, < 0.36`
677 | 
678 | ## 0.8.3
679 | 
680 | - Update arrow `>= 7.0.0, < 8` -> `>= 7.0.0, < 10`
681 | 
682 | ## 0.8.2
683 | 
684 | - Update odbc-api `>= 0.31.0, < 0.33` -> `>= 0.33.0, < 0.35`
685 | 
686 | ## 0.8.1
687 | 
688 | - Update arrow `>= 6.1.0, < 7` -> `>= 7.0.0, < 8`
689 | 
690 | ## 0.8.0
691 | 
692 | - Use Rust edition 2021
693 | - Update arrow `>= 6.1.0, < 7` -> `>= 7.0.0, < 8`
694 | - Update odbc-api `>= 0.31.0, < 0.33` -> `>= 0.33.0, < 0.34`
695 | 
696 | ## 0.7.2
697 | 
698 | - Fix: Formatting of error message for `ZeroSizedColumn`.
699 | 
700 | ## 0.7.1
701 | 
702 | - `Error::ColumnFailure` now prints also the errors cause.
703 | 
704 | ## 0.7.0
705 | 
706 | - `InvalidDisplaySize` replaced with `ZeroSizedColumn`.
707 | - Refactored error handling, to have separate variant for column specific errors.
708 | 
709 | ## 0.6.4
710 | 
711 | - Base allocations of text columns on column size instead of octet length.
712 | 
713 | ## 0.6.3
714 | 
715 | - Fixed an issue there not enough memory to hold the maximum string size has been allocated, if querying a VARCHAR column on windows or an NVARCHAR column on a non-windows platform.
716 | 
717 | ## 0.6.2
718 | 
719 | - Update arrow v6.0.0 -> `>= 6.1.0, < 7`
720 | - Update odbc-api v0.31.0 -> `>= 0.31.0, < 0.33`
721 | 
722 | ## 0.6.1
723 | 
724 | - Fix: There had been issue causing an overflow for timestamps with Microseconds precision.
725 | 
726 | ## 0.6.0
727 | 
728 | - Update odbc-api v0.30.0 -> v0.31.0
729 | 
730 | ## 0.5.0
731 | 
732 | - Update arrow v6.0.0 -> v6.1.0
733 | - Update odbc-api v0.29.0 -> v0.30.0
734 | - Introduced `arrow_schema_from` to support inferring arrow schemas without creating an `OdbcReader`.
735 | 
736 | ## 0.4.1
737 | 
738 | - Estimate memory usage of text columns more accuratly.
739 | 
740 | ## 0.4.0
741 | 
742 | - Udpate arrow v5.4.0 -> v6.0.0
743 | 
744 | ## 0.3.0
745 | 
746 | - Update arrow v5.4.0 -> v5.5.0
747 | - Update odbc-api v0.28.0 -> v0.29.0
748 | 
749 | ## 0.2.1
750 | 
751 | - Updated code examples to odbc-api use safe Environment construction introduced in `odbc-api` version 0.28.3
752 | 
753 | ## 0.2.0
754 | 
755 | - `odbc-api` version 0.28.0
756 | - `arrow` version 5.4.0
757 | 
758 | ## 0.1.2
759 | 
760 | - Support fixed sized binary types.
761 | 
762 | ## 0.1.1
763 | 
764 | - Add Readme path to manifest
765 | 
766 | ## 0.1.0
767 | 
768 | Initial release
769 | 
770 | Allows for fetching arrow batches from ODBC data sources
771 | 
772 | - `arrow` version 5.3.0
773 | - `odbc-api` version 0.27.3
774 | 


--------------------------------------------------------------------------------