├── .github └── workflows │ └── ci.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── dtparse └── __init__.py ├── setup.cfg ├── setup.py ├── src └── lib.rs └── tests ├── test_call_signature.py ├── test_invalid_inputs.py ├── test_performance.py └── test_valid_inputs.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | tags: 8 | - '**' 9 | pull_request: {} 10 | 11 | jobs: 12 | build: 13 | name: build py${{ matrix.python-version }} on ${{ matrix.platform || matrix.os }} 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | os: 18 | - ubuntu 19 | - macos 20 | - windows 21 | python-version: 22 | - '3.6' 23 | - '3.7' 24 | - '3.8' 25 | - '3.9' 26 | wheels-dir: 27 | - '.wheels' 28 | include: 29 | - os: ubuntu 30 | platform: linux 31 | - os: windows 32 | ls: dir 33 | - python-version: 3.6 34 | cibw-version: cp36 35 | - python-version: 3.7 36 | cibw-version: cp37 37 | - python-version: 3.8 38 | cibw-version: cp38 39 | - python-version: 3.9 40 | cibw-version: cp39 41 | 42 | runs-on: ${{ format('{0}-latest', matrix.os) }} 43 | steps: 44 | - uses: actions/checkout@v2 45 | 46 | - name: set up python 47 | uses: actions/setup-python@v1 48 | with: 49 | python-version: ${{ matrix.python-version }} 50 | 51 | - name: set up rust 52 | uses: actions-rs/toolchain@v1 53 | with: 54 | toolchain: 1.50.0 55 | profile: minimal 56 | default: true 57 | override: true 58 | 59 | - name: install python dependencies 60 | run: | 61 | pip install -U setuptools setuptools-rust wheel cibuildwheel==1.7.0 62 | 63 | - name: build sdist 64 | run: | 65 | python setup.py sdist 66 | 67 | - name: create wheels dir 68 | run: | 69 | mkdir ${{ matrix.wheels-dir }} 70 | 71 | - name: build ${{ matrix.platform || matrix.os }} binaries 72 | run: python -m cibuildwheel --output-dir ${{ matrix.wheels-dir }} 73 | env: 74 | CIBW_BUILD: '${{ matrix.cibw-version }}-*' 75 | CIBW_SKIP: '*-win32' 76 | CIBW_PLATFORM: ${{ matrix.platform || matrix.os }} 77 | CIBW_ENVIRONMENT: 'PATH="$HOME/.cargo/bin:$PATH"' 78 | CIBW_ENVIRONMENT_WINDOWS: 'PATH="$UserProfile\.cargo\bin;$PATH"' 79 | CIBW_BEFORE_BUILD: > 80 | rustup show 81 | CIBW_BEFORE_BUILD_LINUX: > 82 | curl https://sh.rustup.rs -sSf | sh -s -- --profile=minimal -y && 83 | rustup show 84 | CIBW_TEST_COMMAND: "pytest {project}/tests" 85 | CIBW_TEST_EXTRAS: test 86 | 87 | - name: build windows 32bit binaries 88 | if: matrix.os == 'windows' 89 | run: cibuildwheel --output-dir ${{ matrix.wheels-dir }} 90 | env: 91 | CIBW_BUILD: '${{ matrix.cibw-version }}-win32' 92 | CIBW_PLATFORM: windows 93 | CIBW_ENVIRONMENT: 'PATH="$UserProfile\.cargo\bin;$PATH"' 94 | CIBW_BEFORE_BUILD: > 95 | rustup default stable-i686-pc-windows-msvc && 96 | rustup show 97 | CIBW_TEST_COMMAND: "pytest {project}/tests" 98 | CIBW_TEST_EXTRAS: test 99 | 100 | - name: list wheels 101 | run: ${{ matrix.ls || 'ls -lh' }} ${{ matrix.wheels-dir }} 102 | 103 | - name: twine check 104 | run: | 105 | pip install -U twine 106 | twine check ${{ matrix.wheels-dir }}/* 107 | 108 | - name: Upload to github releases 109 | if: startsWith(github.ref, 'refs/tags/') 110 | uses: svenstaro/upload-release-action@v2 111 | with: 112 | repo_token: ${{ secrets.GITHUB_TOKEN }} 113 | file: ${{ matrix.wheels-dir }}/* 114 | file_glob: true 115 | tag: ${{ github.ref }} 116 | 117 | - name: upload to pypi 118 | if: startsWith(github.ref, 'refs/tags/') 119 | run: twine upload ${{ matrix.wheels-dir }}/* 120 | env: 121 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 122 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 123 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | **/*.rs.bk 3 | .idea 4 | .python-version 5 | __pycache__ 6 | *.so 7 | Cargo.lock 8 | .benchmarks/ 9 | .cache/ 10 | *.egg-info/ 11 | projectFilesBackup/ 12 | benchmark*.svg 13 | build/ 14 | *py[cdo] 15 | venv/ 16 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Konstantin Gukov "] 3 | name = "python-dtparse" 4 | version = "1.3.2" 5 | 6 | [dependencies] 7 | pyo3 = "0.13.1" 8 | chrono = "0.4.19" 9 | 10 | [lib] 11 | name = "dtparse" 12 | crate-type = ["cdylib"] 13 | 14 | [features] 15 | default = ["pyo3/extension-module"] 16 | 17 | [profile.dev] 18 | opt-level = 3 19 | 20 | [profile.release] 21 | opt-level = 3 22 | 23 | [profile.test] 24 | opt-level = 3 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Konstantin Gukov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dtparse 2 | 3 | Fast datetime parser for Python written in Rust. Parses 10x-15x faster than `datetime.strptime`. 4 | 5 | It isn't a drop-in replacement for `datetime.strptime`, although they work similarly most of the time. 6 | Instead, think of it as of a library of its own. 7 | 8 | The full list of supported specifiers (`%Y`, `%m`, `%d`, ...) can be found in [chrono documentation](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html). 9 | They are a bit different from [Python's](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes). 10 | 11 | ## Notable differences from `datetime.strptime` 12 | 13 | ### Required parts of the template 14 | 15 | `dtparse.parse` requires hours, minutes, and the full date to be present in the template. 16 | 17 | For `datetime.strptime`, __all__ parts of the template are optional. 18 | 19 | ```python 20 | datetime.strptime('2018/01/02 12', '%Y/%m/%d %H') # no minutes in the template 21 | # datetime(2018, 1, 2, 12, 0) 22 | 23 | dtparse.parse('2018/01/02 12', '%Y/%m/%d %H') 24 | # ValueError: input is not enough for unique date and time 25 | ``` 26 | 27 | ### Specifiers for sub-second precision 28 | 29 | Python uses `%f` as a specifier for microseconds, zero-padded on the left. 30 | Chrono's `%f` is different - left-aligned, optionally zero-padded to the right. 31 | 32 | ```python 33 | datetime.strptime('2004-12-01 13:02:47.123456', '%Y-%m-%d %H:%M:%S.%f') 34 | # datetime(2004, 12, 1, 13, 2, 47, 123456) 35 | 36 | dtparse.parse('2004-12-01 13:02:47.123456', '%Y-%m-%d %H:%M:%S.%f') 37 | # datetime(2004, 12, 1, 13, 2, 47, 123) 38 | ``` 39 | 40 | On the other hand, Python's `.%f` works very close to chrono's `%.f`. 41 | The only difference is that chrono accepts up to 9 digits, because it parses 42 | nanoseconds, not microseconds. 43 | 44 | ```python 45 | datetime.strptime('2004-12-01 13:02:47.123456', '%Y-%m-%d %H:%M:%S.%f') 46 | # datetime(2004, 12, 1, 13, 2, 47, 123456) 47 | 48 | dtparse.parse('2004-12-01 13:02:47.123456', '%Y-%m-%d %H:%M:%S%.f') 49 | # datetime(2004, 12, 1, 13, 2, 47, 123456) 50 | ``` 51 | ```python 52 | datetime.strptime('2004-12-01 13:02:47.123456789', '%Y-%m-%d %H:%M:%S.%f') 53 | # ValueError: unconverted data remains: 789 54 | 55 | dtparse.parse('2004-12-01 13:02:47.123456789', '%Y-%m-%d %H:%M:%S%.f') 56 | # datetime(2004, 12, 1, 13, 2, 47, 123456) 57 | ``` 58 | 59 | 60 | ## How does it work? 61 | 62 | Uses [chrono](https://github.com/chronotope/chrono) library under the cover. 63 | Binds Rust to Python via [PyO3](https://github.com/PyO3/pyo3). 64 | Wheels are built and distributed using GitHub actions, see the workflow 65 | [here](.github/workflows/ci.yml). 66 | 67 | ## Origins 68 | 69 | This library was written for my talk on Rust extensions on [Yandex Pytup](https://events.yandex.ru/events/meetings/28-03-2018/). 70 | It is intended to be both a stable library people can use, and a reference for those interested in 71 | extending Python with Rust. 72 | -------------------------------------------------------------------------------- /dtparse/__init__.py: -------------------------------------------------------------------------------- 1 | """Fast datetime parser for Python written in Rust. 2 | 3 | Signature: 4 | 5 | def parse(str_datetime: str, fmt: str) -> datetime: 6 | pass 7 | 8 | For example: 9 | 10 | parse('2004-12-01 13:02:47', '%Y-%m-%d %H:%M:%S') 11 | or 12 | parse('2004-12-01 13:02:47.123456', '%Y-%m-%d %H:%M:%S.%f') 13 | 14 | The format string is built a bit differently than for datetime.strptime. 15 | Please, see the list of allowed format specifiers (%Y, %m, %d, ...) here: 16 | https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html 17 | """ 18 | from __future__ import absolute_import 19 | 20 | from ._dtparse import parse # import from the compiled library 21 | 22 | __all__ = ['parse'] 23 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [tool:pytest] 2 | addopts = --benchmark-histogram --benchmark-min-time=0.00000001 --benchmark-calibration-precision=1 3 | 4 | [bdist_wheel] 5 | universal = 1 6 | 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from setuptools import setup 4 | from setuptools.command.test import test as TestCommand 5 | 6 | try: 7 | from setuptools_rust import RustExtension 8 | except ImportError: 9 | import subprocess 10 | errno = subprocess.call([sys.executable, '-m', 'pip', 'install', 'setuptools-rust']) 11 | if errno: 12 | print("Please install setuptools-rust package") 13 | raise SystemExit(errno) 14 | else: 15 | from setuptools_rust import RustExtension 16 | 17 | 18 | class PyTest(TestCommand): 19 | user_options = [] 20 | 21 | def run(self): 22 | self.run_command("test_rust") 23 | 24 | import subprocess 25 | import sys 26 | errno = subprocess.call([sys.executable, '-m', 'pytest', 'tests']) 27 | raise SystemExit(errno) 28 | 29 | 30 | setup_requires = ['setuptools-rust>=0.10.3'] 31 | install_requires = [] 32 | tests_require = install_requires + ['ciso8601', 'pytest', 'pytest-benchmark[histogram]'] 33 | 34 | setup( 35 | name='dtparse', 36 | version='1.3.2', 37 | classifiers=[ 38 | 'License :: OSI Approved :: MIT License', 39 | 'Development Status :: 3 - Alpha', 40 | 'Intended Audience :: Developers', 41 | 'Programming Language :: Python', 42 | 'Programming Language :: Rust', 43 | ], 44 | packages=['dtparse'], 45 | rust_extensions=[RustExtension('dtparse._dtparse', 'Cargo.toml')], 46 | install_requires=install_requires, 47 | tests_require=tests_require, 48 | extras_require={'test': tests_require}, 49 | setup_requires=setup_requires, 50 | include_package_data=True, 51 | zip_safe=False, 52 | cmdclass=dict(test=PyTest) 53 | ) 54 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate pyo3; 2 | 3 | extern crate chrono; 4 | 5 | use chrono::prelude::*; 6 | use pyo3::exceptions::*; 7 | use pyo3::prelude::*; 8 | use pyo3::types::*; 9 | 10 | // https://pyo3.rs/v0.12.4/module.html 11 | // This macro makes Rust compile a _dtparse.so binary in Python-compatible format. 12 | // Such a binary can be imported from Python just like a regular Python module. 13 | #[pymodule(_dtparse)] 14 | fn init_mod(_py: Python, m: &PyModule) -> PyResult<()> { 15 | // We fill this module with everything we want to make visible from Python. 16 | 17 | #[pyfn(m, "parse")] 18 | fn parse<'a>(_py: Python<'a>, str_datetime: &str, fmt: &str) -> PyResult<&'a PyDateTime> { 19 | // Call chrono and ask it to parse the datetime for us 20 | let chrono_dt = Utc.datetime_from_str(str_datetime, fmt); 21 | 22 | match chrono_dt { 23 | // In case everything's fine, get Rust datetime out of the result and transform 24 | // it into a Python datetime. 25 | Ok(dt) => { 26 | let microsecond = dt.nanosecond() / 1000; 27 | PyDateTime::new( 28 | _py, 29 | dt.year(), 30 | dt.month() as u8, 31 | dt.day() as u8, 32 | dt.hour() as u8, 33 | dt.minute() as u8, 34 | dt.second() as u8, 35 | microsecond as u32, 36 | None, 37 | ) 38 | } 39 | // In case chrono couldn't parse a datetime, raise a ValueError with chrono's error message. 40 | // Because there are no exceptions in Rust, we return a PyValueError instance here. 41 | // By convention, it will make PyO3 wrapper raise an exception in Python interpreter. 42 | // https://pyo3.rs/v0.12.4/exception.html 43 | Err(e) => Err(PyValueError::new_err(e.to_string())), 44 | } 45 | } 46 | Ok(()) 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_call_signature.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | 4 | from datetime import datetime 5 | 6 | from dtparse import parse 7 | 8 | 9 | def test_call_signature(): 10 | string = '2004-12-01 13:02:47.123456' 11 | dt_format = '%Y-%m-%d %H:%M:%S%.f' 12 | expected = datetime(2004, 12, 1, 13, 2, 47, 123456) 13 | 14 | assert parse(string, dt_format) == expected 15 | assert parse(string, fmt=dt_format) == expected 16 | assert parse(str_datetime=string, fmt=dt_format) == expected 17 | 18 | assert parse(*[string, dt_format]) == expected 19 | assert parse(**dict(str_datetime=string, fmt=dt_format)) == expected 20 | -------------------------------------------------------------------------------- /tests/test_invalid_inputs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | 4 | import pytest 5 | 6 | from dtparse import parse 7 | 8 | 9 | @pytest.mark.parametrize( 10 | 'str_datetime,fmt', [ 11 | ["", ""], 12 | ["abc", ""], 13 | ["", "abc"], 14 | ["abc", "abc"], 15 | ['2018/01/02', '%Y/%m/%d'], # only date 16 | ['2018/01/02 12', '%Y/%m/%d %H'], # no minutes in the template 17 | ['2018/01/02 12', '%Y/%m/%d %M'], # no hours in the template 18 | ['2004-12-01 13:02:47 ', '%Y-%m-%d %H:%M:%S'], # extra whitespace in the string 19 | ['Fri Nov 28 12:00:09', '%a %b %e %T %Y'], # the year is missing in the string 20 | ['Fri Nov 28 12:00:09', '%a %b %e %T'], # the year is missing in the template 21 | ['Sat Nov 28 12:00:09 2014', '%a %b %e %T %Y'], # the weekday is incorrect 22 | ] 23 | ) 24 | def test_throws_value_error(str_datetime, fmt): 25 | with pytest.raises(ValueError): 26 | parse(str_datetime=str_datetime, fmt=fmt) 27 | 28 | 29 | @pytest.mark.parametrize( 30 | 'args', [ 31 | [], 32 | [''], 33 | [1], 34 | [None], 35 | [None, None], 36 | [None, None, None], 37 | [1, '%Y-%m-%d %H:%M:%S'], 38 | [None, '%Y-%m-%d %H:%M:%S'], 39 | ['2004-12-01 13:02:47', 1], 40 | ['2004-12-01 13:02:47', None], 41 | ['2004-12-01 13:02:47', b'%Y-%m-%d %H:%M:%S'], 42 | [b'2004-12-01 13:02:47', '%Y-%m-%d %H:%M:%S'], 43 | [b'2004-12-01 13:02:47', b'%Y-%m-%d %H:%M:%S'], 44 | ['2004-12-01 13:02:47', '%Y-%m-%d %H:%M:%S', ""], 45 | ['2004-12-01 13:02:47', '%Y-%m-%d %H:%M:%S', None], 46 | ] 47 | ) 48 | def test_throws_type_error(args): 49 | with pytest.raises(TypeError): 50 | parse(*args) 51 | -------------------------------------------------------------------------------- /tests/test_performance.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | 4 | import ciso8601 5 | from datetime import datetime 6 | 7 | import dtparse 8 | 9 | 10 | def test_ciso8601(benchmark): 11 | assert benchmark.pedantic( 12 | ciso8601.parse_datetime, args=('2018-12-31T23:59:58', ), 13 | rounds=10 ** 6, iterations=100 14 | ) == datetime(2018, 12, 31, 23, 59, 58) 15 | 16 | 17 | def test_rust(benchmark): 18 | assert benchmark.pedantic( 19 | dtparse.parse, args=('2018-12-31T23:59:58', '%Y-%m-%dT%H:%M:%S'), 20 | rounds=10 ** 1, iterations=100 21 | ) == datetime(2018, 12, 31, 23, 59, 58) 22 | 23 | 24 | def test_py(benchmark): 25 | assert benchmark.pedantic( 26 | datetime.strptime, args=('2018-12-31T23:59:58', '%Y-%m-%dT%H:%M:%S'), 27 | rounds=10 ** 1, iterations=10 28 | ) == datetime(2018, 12, 31, 23, 59, 58) 29 | -------------------------------------------------------------------------------- /tests/test_valid_inputs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | 4 | from datetime import datetime 5 | 6 | import pytest 7 | 8 | from dtparse import parse 9 | 10 | 11 | @pytest.mark.parametrize( 12 | 'str_datetime,fmt,expected', [ 13 | ['2004-12-01 13:02:47', '%Y-%m-%d %H:%M:%S', datetime(2004, 12, 1, 13, 2, 47)], 14 | ['2004-12-01 13:02:47', '%Y-%m-%d %H:%M:%S ', datetime(2004, 12, 1, 13, 2, 47)], # extra whitespace in the template 15 | ['13:02:47XXX2004-12-01', '%H:%M:%SXXX%Y-%m-%d', datetime(2004, 12, 1, 13, 2, 47)], 16 | ['2004-12-01 13:02:47.123456', '%Y-%m-%d %H:%M:%S%.f', datetime(2004, 12, 1, 13, 2, 47, 123456)], 17 | ['2004-12-01 13:02:47.123456789', '%Y-%m-%d %H:%M:%S%.f', datetime(2004, 12, 1, 13, 2, 47, 123456)], 18 | ['Fri, 28 Nov 2014 21:00:09', '%a, %d %b %Y %H:%M:%S', datetime(2014, 11, 28, 21, 0, 9)], 19 | ['Fri, 28 Nov 2014 21:00:09', '%a, %d %b %Y %T', datetime(2014, 11, 28, 21, 0, 9)], 20 | ['Fri Nov 28 21:00:09 2014', '%a %b %e %T %Y', datetime(2014, 11, 28, 21, 0, 9)], 21 | ] 22 | ) 23 | def test_parses_correctly(str_datetime, fmt, expected): 24 | assert parse(str_datetime=str_datetime, fmt=fmt) == expected 25 | --------------------------------------------------------------------------------