├── tests ├── __init__.py └── onigurumacffi_test.py ├── MANIFEST.in ├── requirements-dev.txt ├── .gitignore ├── bin ├── clone-oniguruma ├── ctx └── build-manylinux-wheels ├── tox.ini ├── setup.py ├── .github ├── workflows │ └── main.yml └── actions │ └── pre-test │ └── action.yml ├── LICENSE ├── setup.cfg ├── .pre-commit-config.yaml ├── README.md ├── onigurumacffi_build.py └── onigurumacffi.py /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include onigurumacffi_build.py 2 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | covdefaults 2 | coverage 3 | pytest 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.egg-info 3 | *.o 4 | *.pyc 5 | *.so 6 | /.coverage 7 | /.eggs 8 | /.tox 9 | /build 10 | /dist 11 | -------------------------------------------------------------------------------- /bin/clone-oniguruma: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euxo pipefail 3 | ONIGURUMA_VERSION=v6.9.10 4 | git init "${ONIGURUMA_CLONE}" 5 | cd "${ONIGURUMA_CLONE}" 6 | git remote add origin https://github.com/kkos/oniguruma 7 | git -c protocol.version=2 fetch --depth=1 origin "${ONIGURUMA_VERSION}" 8 | git checkout FETCH_HEAD 9 | -------------------------------------------------------------------------------- /bin/ctx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | if [ "$#" -lt 2 ] || [ ! -d "$1" ]; then 4 | echo "usage $0 PREFIX EXE ..." 5 | exit 1 6 | fi 7 | 8 | PREFIX="$1" 9 | shift 10 | 11 | export CPATH="$PREFIX/include" 12 | export LD_LIBRARY_PATH="$PREFIX/lib" 13 | export LIBRARY_PATH="$PREFIX/lib" 14 | export PATH="$PREFIX/bin:$PATH" 15 | export PKG_CONFIG_PATH="$PREFIX/lib/pkgconfig" 16 | 17 | exec "$@" 18 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py,pre-commit 3 | 4 | [testenv] 5 | deps = -rrequirements-dev.txt 6 | passenv = ONIGURUMA_CLONE 7 | commands = 8 | coverage erase 9 | coverage run -m pytest {posargs:tests} 10 | coverage report 11 | 12 | [testenv:pre-commit] 13 | skip_install = true 14 | deps = pre-commit 15 | commands = pre-commit run --all-files --show-diff-on-failure 16 | 17 | [pep8] 18 | ignore = E265,E501,W504 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import platform 4 | import sys 5 | 6 | from setuptools import setup 7 | 8 | if platform.python_implementation() == 'CPython': 9 | try: 10 | import wheel.bdist_wheel 11 | except ImportError: 12 | cmdclass = {} 13 | else: 14 | class bdist_wheel(wheel.bdist_wheel.bdist_wheel): 15 | def finalize_options(self) -> None: 16 | self.py_limited_api = f'cp3{sys.version_info[1]}' 17 | super().finalize_options() 18 | 19 | cmdclass = {'bdist_wheel': bdist_wheel} 20 | else: 21 | cmdclass = {} 22 | 23 | setup(cffi_modules=['onigurumacffi_build.py:ffibuilder'], cmdclass=cmdclass) 24 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: main 2 | 3 | on: 4 | push: 5 | branches: [main, test-me-*] 6 | tags: '*' 7 | pull_request: 8 | 9 | jobs: 10 | main-windows: 11 | uses: asottile/workflows/.github/workflows/tox.yml@v1.8.1 12 | with: 13 | env: '["py310"]' 14 | os: windows-latest 15 | arch: '["x64", "x86"]' 16 | wheel-tags: true 17 | main-macos: 18 | uses: asottile/workflows/.github/workflows/tox.yml@v1.8.1 19 | with: 20 | env: '["py310"]' 21 | os: macos-latest 22 | wheel-tags: true 23 | main-macos-intel: 24 | uses: asottile/workflows/.github/workflows/tox.yml@v1.8.1 25 | with: 26 | env: '["py310"]' 27 | os: macos-15-intel 28 | wheel-tags: true 29 | main-linux: 30 | uses: asottile/workflows/.github/workflows/tox.yml@v1.8.1 31 | with: 32 | env: '["py310", "py311", "py312"]' 33 | os: ubuntu-latest 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020 Anthony Sottile 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = onigurumacffi 3 | version = 1.4.1 4 | description = python cffi bindings for the oniguruma regex engine 5 | long_description = file: README.md 6 | long_description_content_type = text/markdown 7 | url = https://github.com/asottile/onigurumacffi 8 | author = Anthony Sottile 9 | author_email = asottile@umich.edu 10 | license = MIT 11 | license_files = LICENSE 12 | classifiers = 13 | Programming Language :: Python :: 3 14 | Programming Language :: Python :: 3 :: Only 15 | Programming Language :: Python :: Implementation :: CPython 16 | Programming Language :: Python :: Implementation :: PyPy 17 | 18 | [options] 19 | py_modules = onigurumacffi 20 | install_requires = 21 | cffi>=1 22 | python_requires = >=3.10 23 | setup_requires = 24 | cffi>=1 25 | 26 | [coverage:run] 27 | plugins = covdefaults 28 | omit = onigurumacffi_build.py 29 | 30 | [mypy] 31 | check_untyped_defs = true 32 | disallow_any_generics = true 33 | disallow_incomplete_defs = true 34 | disallow_untyped_defs = true 35 | warn_redundant_casts = true 36 | warn_unused_ignores = true 37 | 38 | [mypy-testing.*] 39 | disallow_untyped_defs = false 40 | 41 | [mypy-tests.*] 42 | disallow_untyped_defs = false 43 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v6.0.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: end-of-file-fixer 7 | - id: check-yaml 8 | - id: debug-statements 9 | - id: double-quote-string-fixer 10 | - id: name-tests-test 11 | - id: requirements-txt-fixer 12 | - repo: https://github.com/asottile/setup-cfg-fmt 13 | rev: v3.2.0 14 | hooks: 15 | - id: setup-cfg-fmt 16 | - repo: https://github.com/asottile/reorder-python-imports 17 | rev: v3.16.0 18 | hooks: 19 | - id: reorder-python-imports 20 | args: [--py310-plus, --add-import, 'from __future__ import annotations'] 21 | - repo: https://github.com/asottile/add-trailing-comma 22 | rev: v4.0.0 23 | hooks: 24 | - id: add-trailing-comma 25 | - repo: https://github.com/asottile/pyupgrade 26 | rev: v3.21.2 27 | hooks: 28 | - id: pyupgrade 29 | args: [--py310-plus] 30 | - repo: https://github.com/hhatto/autopep8 31 | rev: v2.3.2 32 | hooks: 33 | - id: autopep8 34 | - repo: https://github.com/PyCQA/flake8 35 | rev: 7.3.0 36 | hooks: 37 | - id: flake8 38 | - repo: https://github.com/pre-commit/mirrors-mypy 39 | rev: v1.19.1 40 | hooks: 41 | - id: mypy 42 | -------------------------------------------------------------------------------- /bin/build-manylinux-wheels: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from __future__ import annotations 3 | 4 | import argparse 5 | import os 6 | import shutil 7 | import subprocess 8 | 9 | DOCKERFILE_FMT = '''\ 10 | FROM {base} 11 | RUN : \ 12 | && ONIGURUMA_CLONE="$PWD/oniguruma" \ 13 | && {clone} \ 14 | && ./autogen.sh && ./configure && make -j4 install \ 15 | && rm -rf "$ONIGURUMA_CLONE" 16 | ''' 17 | PROG = '''\ 18 | {py_bin}/pip wheel --wheel-dir /work --no-deps onigurumacffi=={version} && 19 | auditwheel repair --wheel-dir /dist /work/*.whl && 20 | find /dist/ -name '*.whl' -print0 | xargs -n1 -0 --verbose unzip -l 21 | ''' 22 | 23 | 24 | def main() -> int: 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument('version') 27 | args = parser.parse_args() 28 | 29 | img = 'onigurumacffi-build' 30 | base = 'quay.io/pypa/manylinux2014_x86_64' 31 | 32 | with open(os.path.join(os.path.dirname(__file__), 'clone-oniguruma')) as f: 33 | clone = ' && '.join(f.read().splitlines()[3:]) 34 | dockerfile = DOCKERFILE_FMT.format(base=base, clone=clone).encode() 35 | 36 | cmd = ('docker', 'build', '-t', img, '-') 37 | subprocess.run(cmd, input=dockerfile, check=True) 38 | 39 | shutil.rmtree('dist', ignore_errors=True) 40 | os.makedirs('dist', exist_ok=True) 41 | 42 | for py_bin in ( 43 | '/opt/python/cp39-cp39/bin', 44 | '/opt/python/pp310-pypy310_pp73/bin', 45 | ): 46 | prog = PROG.format(py_bin=py_bin, version=args.version) 47 | if subprocess.call(( 48 | 'docker', 'run', 49 | '--volume', f'{os.path.abspath("dist")}:/dist:rw', 50 | '--rm', '-ti', img, 'bash', '-euxc', prog, 51 | )): 52 | return 1 53 | else: 54 | return 0 55 | 56 | 57 | if __name__ == '__main__': 58 | raise SystemExit(main()) 59 | -------------------------------------------------------------------------------- /.github/actions/pre-test/action.yml: -------------------------------------------------------------------------------- 1 | inputs: 2 | arch: 3 | default: ${{ matrix.arch }} 4 | runs: 5 | using: composite 6 | steps: 7 | - name: setup vars 8 | shell: bash 9 | run: echo "ONIGURUMA_CLONE=${WS}${SLASH}oniguruma" >> "$GITHUB_ENV" 10 | env: 11 | WS: ${{ github.workspace }} 12 | SLASH: ${{ runner.os == 'Windows' && '\' || '/' }} 13 | - uses: actions/cache@v3 14 | id: oniguruma-cache 15 | with: 16 | key: ${{ runner.os }} | ${{ runner.arch }} | ${{ inputs.arch }} | ${{ hashFiles('bin/clone-oniguruma') }} 17 | path: ${{ env.ONIGURUMA_CLONE }} 18 | - name: build oniguruma (windows) 19 | shell: cmd 20 | run: | 21 | bash bin\clone-oniguruma 22 | call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ inputs.arch }} 23 | cd %ONIGURUMA_CLONE% 24 | make_win.bat 25 | if: runner.os == 'Windows' && steps.oniguruma-cache.outputs.cache-hit != 'true' 26 | - name: build oniguruma (macos) 27 | shell: bash 28 | run: | 29 | set -x 30 | brew install autoconf automake libtool 31 | bin/clone-oniguruma 32 | cd "$ONIGURUMA_CLONE" 33 | ./autogen.sh 34 | ./configure 35 | make -j4 36 | if: runner.os == 'macOS' && steps.oniguruma-cache.outputs.cache-hit != 'true' 37 | - name: build oniguruma (linux) 38 | shell: bash 39 | run: | 40 | set -x 41 | bin/clone-oniguruma 42 | cd "$ONIGURUMA_CLONE" 43 | ./autogen.sh 44 | ./configure 45 | make -j4 46 | if: runner.os == 'Linux' && steps.oniguruma-cache.outputs.cache-hit != 'true' 47 | - name: install oniguruma (macos) 48 | shell: bash 49 | run: sudo make -C "$ONIGURUMA_CLONE" install 50 | if: runner.os == 'macOS' 51 | - name: install oniguruma (linux) 52 | shell: bash 53 | run: | 54 | sudo make -C "$ONIGURUMA_CLONE" install 55 | sudo ldconfig 56 | if: runner.os == 'Linux' 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![build status](https://github.com/asottile/onigurumacffi/actions/workflows/main.yml/badge.svg)](https://github.com/asottile/onigurumacffi/actions/workflows/main.yml) 2 | [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/asottile/onigurumacffi/main.svg)](https://results.pre-commit.ci/latest/github/asottile/onigurumacffi/main) 3 | 4 | onigurumacffi 5 | ============= 6 | 7 | python cffi bindings for the oniguruma regex engine 8 | 9 | ### installation 10 | 11 | ```bash 12 | pip install onigurumacffi 13 | ``` 14 | 15 | - wheels should be available on pypi in most cases 16 | - to build from source, `libonig-dev` must be installed prior to installation 17 | 18 | ### api 19 | 20 | the api is currently *very limited* (basically just enough to support what I 21 | needed). 22 | 23 | #### `compile(pattern: str) -> _Pattern` 24 | 25 | make a compiled pattern 26 | 27 | #### `compile_regset(*patterns: str) -> _RegSet` 28 | 29 | make a compiled RegSet 30 | 31 | #### `OnigSearchOption` 32 | 33 | an enum listing the search-time options for oniguruma 34 | 35 | the current set of options are: 36 | 37 | ```python 38 | class OnigSearchOption(enum.IntEnum): 39 | NONE = ... 40 | NOTBOL = ... 41 | NOTEOL = ... 42 | POSIX_REGION = ... 43 | CHECK_VALIDITY_OF_STRING = ... 44 | NOT_BEGIN_STRING = ... 45 | NOT_BEGIN_POSITION = ... 46 | ``` 47 | 48 | #### `_Pattern.match(s: str, start: int = 0, flags: OnigSearchOption = OnigSearchOption.NONE) -> Optional[_Match]` 49 | 50 | match a string using the pattern. optionally set `start` to adjust the offset 51 | which is searched from 52 | 53 | #### `_Pattern.search(s: str, start: int = 0, flags: OnigSearchOption = OnigSearchOption.NONE) -> Optional[_Match]` 54 | 55 | search a string using the pattern. optionally set `start` to adjust the offset 56 | which is searched from 57 | 58 | #### `_Pattern.number_of_captures() -> int` 59 | 60 | return the number of captures in the regex 61 | 62 | #### `_RegSet.search(s: str, start: int = 0, flags: OnigSearchOption = OnigSearchOption.NONE) -> Tuple[int, Optional[_Match]]` 63 | 64 | search a string using the RegSet. optionally set `start` to adjust the offset 65 | which is searched from 66 | 67 | the leftmost regex index and match is returned or `(-1, None)` if there is no 68 | match 69 | 70 | #### `_Match.group(n: int = 0) -> str` 71 | 72 | return the string of the matched group, defaults to 0 (the whole match) 73 | 74 | #### `_Match[n: int] -> str` 75 | 76 | a shorthand alias for `_Match.group(...)` 77 | 78 | #### `_Match.start(n: int = 0) -> int` 79 | 80 | return the character position of the start of the matched group, defaults to 0 81 | (the whole match) 82 | 83 | #### `_Match.end(n: int = 0) -> int` 84 | 85 | return the character position of the end of the matched group, defaults to 0 86 | (the whole match) 87 | 88 | #### `_Match.span(n: int = 0) -> int` 89 | 90 | return `(start, end)` character position of the matched group, defaults to 0 91 | (the whole match) 92 | 93 | #### `_Match.expand(s: str) -> str` 94 | 95 | expand numeric groups in `s` via the groups in the match 96 | -------------------------------------------------------------------------------- /onigurumacffi_build.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import os 4 | import sys 5 | 6 | from cffi import FFI 7 | 8 | CDEF = '''\ 9 | #define ONIG_MAX_ERROR_MESSAGE_LEN ... 10 | 11 | #define ONIG_MISMATCH ... 12 | 13 | typedef unsigned int OnigOptionType; 14 | 15 | #define ONIG_OPTION_NONE ... 16 | #define ONIG_OPTION_NOTBOL ... 17 | #define ONIG_OPTION_NOTEOL ... 18 | #define ONIG_OPTION_POSIX_REGION ... 19 | #define ONIG_OPTION_CHECK_VALIDITY_OF_STRING ... 20 | #define ONIG_OPTION_NOT_BEGIN_STRING ... 21 | #define ONIG_OPTION_NOT_BEGIN_POSITION ... 22 | #define ONIG_OPTION_NOT_END_STRING ... 23 | 24 | typedef unsigned char OnigUChar; 25 | 26 | int onigcffi_initialize(void); 27 | 28 | const char* onig_version(void); 29 | 30 | typedef struct {...;} OnigErrorInfo; 31 | int onig_error_code_to_str(OnigUChar* s, int err_code, ...); 32 | 33 | struct re_registers { 34 | int allocated; 35 | int num_regs; 36 | int* beg; 37 | int* end; 38 | ...; 39 | }; 40 | typedef struct re_registers OnigRegion; 41 | 42 | OnigRegion* onig_region_new(void); 43 | void onigcffi_region_free(OnigRegion* region); 44 | 45 | typedef ... regex_t; 46 | int onigcffi_new( 47 | regex_t** reg, 48 | const OnigUChar* pattern, size_t len, 49 | OnigErrorInfo* err_info 50 | ); 51 | void onig_free(regex_t*); 52 | 53 | int onig_number_of_captures(regex_t* reg); 54 | 55 | int onigcffi_match( 56 | regex_t* reg, 57 | const OnigUChar* str, size_t len, size_t start, 58 | OnigRegion* region, 59 | OnigOptionType flags 60 | ); 61 | 62 | int onigcffi_search( 63 | regex_t* reg, 64 | const OnigUChar* str, size_t len, size_t start, 65 | OnigRegion* region, 66 | OnigOptionType flags 67 | ); 68 | 69 | typedef ... OnigRegSet; 70 | int onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[]); 71 | void onig_regset_free(OnigRegSet*); 72 | 73 | int onigcffi_regset_search( 74 | OnigRegSet* set, 75 | const OnigUChar* str, size_t len, size_t start, OnigRegion** region, 76 | OnigOptionType flags 77 | ); 78 | ''' 79 | SRC = '''\ 80 | #include 81 | 82 | int onigcffi_initialize(void) { 83 | OnigEncoding enc = ONIG_ENCODING_UTF8; 84 | return onig_initialize(&enc, 1); 85 | } 86 | 87 | void onigcffi_region_free(OnigRegion* region) { 88 | onig_region_free(region, 1); 89 | } 90 | 91 | int onigcffi_new( 92 | regex_t** reg, 93 | const OnigUChar* pattern, size_t len, 94 | OnigErrorInfo* err_info 95 | ) { 96 | return onig_new( 97 | reg, 98 | pattern, pattern + len, 99 | ONIG_OPTION_NONE, 100 | ONIG_ENCODING_UTF8, 101 | ONIG_SYNTAX_ONIGURUMA, 102 | err_info 103 | ); 104 | } 105 | 106 | int onigcffi_match( 107 | regex_t* reg, 108 | const OnigUChar* str, size_t len, size_t start, OnigRegion* region, 109 | OnigOptionType flags 110 | ) { 111 | return onig_match( 112 | reg, 113 | str, str + len, 114 | str + start, 115 | region, 116 | flags 117 | ); 118 | } 119 | 120 | int onigcffi_search( 121 | regex_t* reg, 122 | const OnigUChar* str, size_t len, size_t start, OnigRegion* region, 123 | OnigOptionType flags 124 | ) { 125 | return onig_search( 126 | reg, 127 | str, str + len, 128 | str + start, str + len, 129 | region, 130 | flags 131 | ); 132 | } 133 | 134 | int onigcffi_regset_search( 135 | OnigRegSet* set, 136 | const OnigUChar* str, size_t len, size_t start, OnigRegion** region, 137 | OnigOptionType flags 138 | ) { 139 | int _unused_match_pos; 140 | int idx = onig_regset_search( 141 | set, 142 | str, str + len, 143 | str + start, str + len, 144 | ONIG_REGSET_POSITION_LEAD, 145 | flags, 146 | &_unused_match_pos 147 | ); 148 | if (idx >= 0) { 149 | *region = onig_regset_get_region(set, idx); 150 | } 151 | return idx; 152 | } 153 | ''' 154 | 155 | ffibuilder = FFI() 156 | ffibuilder.cdef(CDEF) 157 | 158 | if sys.platform == 'win32': 159 | here = os.path.dirname(__file__) 160 | clone = os.environ.get('ONIGURUMA_CLONE', os.path.join(here, 'oniguruma')) 161 | ffibuilder.set_source( 162 | '_onigurumacffi', SRC, 163 | libraries=['onig_s'], 164 | define_macros=[('ONIG_EXTERN', 'extern')], 165 | include_dirs=[os.path.join(clone, 'src')], 166 | library_dirs=[clone], 167 | ) 168 | else: 169 | ffibuilder.set_source('_onigurumacffi', SRC, libraries=['onig']) 170 | 171 | if __name__ == '__main__': 172 | ffibuilder.compile(verbose=True) 173 | -------------------------------------------------------------------------------- /onigurumacffi.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import enum 4 | import re 5 | from typing import Any 6 | 7 | import _onigurumacffi 8 | 9 | _ffi = _onigurumacffi.ffi 10 | _lib = _onigurumacffi.lib 11 | 12 | _BACKREF_RE = re.compile(r'((? str: 31 | buf = _ffi.new('OnigUChar[ONIG_MAX_ERROR_MESSAGE_LEN]') 32 | length = _lib.onig_error_code_to_str(buf, code, *args) 33 | return bytes(buf[0:length]).decode() 34 | 35 | 36 | def _check(code: int, *args: Any) -> None: 37 | if code < 0: 38 | raise OnigError(_err(code, *args)) 39 | 40 | 41 | _check(_lib.onigcffi_initialize()) 42 | __onig_version__ = _ffi.string(_lib.onig_version()).decode() 43 | 44 | 45 | class _Match: 46 | __slots__ = ('_s_b', '_begs', '_ends') 47 | 48 | def __init__( 49 | self, 50 | s_b: bytes, 51 | begs: tuple[int, ...], 52 | ends: tuple[int, ...], 53 | ) -> None: 54 | self._s_b = s_b 55 | self._begs = begs 56 | self._ends = ends 57 | 58 | def __repr__(self) -> str: 59 | return f'' 60 | 61 | def group(self, n: int = 0) -> str: 62 | return self._s_b[self._begs[n]:self._ends[n]].decode() 63 | 64 | __getitem__ = group 65 | 66 | def start(self, n: int = 0) -> int: 67 | return len(self._s_b[:self._begs[n]].decode()) 68 | 69 | def end(self, n: int = 0) -> int: 70 | return len(self._s_b[:self._ends[n]].decode()) 71 | 72 | def span(self, n: int = 0) -> tuple[int, int]: 73 | return self.start(n), self.end(n) 74 | 75 | def expand(self, s: str) -> str: 76 | return _BACKREF_RE.sub(lambda m: f'{m[1]}{self[int(m[2])]}', s) 77 | 78 | @property 79 | def string(self) -> str: 80 | return self._s_b.decode() 81 | 82 | 83 | def _start_params(s: str, start: int) -> tuple[bytes, int]: 84 | return s.encode(), len(s[:start].encode()) 85 | 86 | 87 | def _region() -> Any: 88 | return _ffi.gc(_lib.onig_region_new(), _lib.onigcffi_region_free) 89 | 90 | 91 | def _match_ret(ret: int, s_b: bytes, region: Any) -> _Match | None: 92 | if ret == _lib.ONIG_MISMATCH: 93 | return None 94 | else: 95 | _check(ret) 96 | 97 | begs = tuple(region[0].beg[0:region[0].num_regs]) 98 | ends = tuple(region[0].end[0:region[0].num_regs]) 99 | 100 | return _Match(s_b, begs, ends) 101 | 102 | 103 | class _Pattern: 104 | def __init__(self, pattern: str, regex_t: Any) -> None: 105 | self._pattern = pattern 106 | self._regex_t = _ffi.gc(regex_t, _lib.onig_free) 107 | 108 | def __repr__(self) -> str: 109 | return f'{__name__}.compile({self._pattern!r})' 110 | 111 | def number_of_captures(self) -> int: 112 | return _lib.onig_number_of_captures(self._regex_t) 113 | 114 | def match( 115 | self, 116 | s: str, 117 | start: int = 0, 118 | flags: OnigSearchOption = OnigSearchOption.NONE, 119 | ) -> _Match | None: 120 | s_b, start_b = _start_params(s, start) 121 | region = _region() 122 | 123 | ret = _lib.onigcffi_match( 124 | self._regex_t, s_b, len(s_b), start_b, region, flags, 125 | ) 126 | 127 | return _match_ret(ret, s_b, region) 128 | 129 | def search( 130 | self, 131 | s: str, 132 | start: int = 0, 133 | flags: OnigSearchOption = OnigSearchOption.NONE, 134 | ) -> _Match | None: 135 | s_b, start_b = _start_params(s, start) 136 | region = _region() 137 | 138 | ret = _lib.onigcffi_search( 139 | self._regex_t, s_b, len(s_b), start_b, region, flags, 140 | ) 141 | 142 | return _match_ret(ret, s_b, region) 143 | 144 | 145 | class _RegSet: 146 | def __init__(self, patterns: tuple[str, ...], regset_t: Any) -> None: 147 | self._patterns = patterns 148 | self._regset_t = _ffi.gc(regset_t, _lib.onig_regset_free) 149 | 150 | def __repr__(self) -> str: 151 | patterns = ', '.join(repr(pattern) for pattern in self._patterns) 152 | return f'{__name__}.compile_regset({patterns})' 153 | 154 | def search( 155 | self, 156 | s: str, 157 | start: int = 0, 158 | flags: OnigSearchOption = OnigSearchOption.NONE, 159 | ) -> tuple[int, _Match | None]: 160 | s_b, start_b = _start_params(s, start) 161 | region = _ffi.new('OnigRegion*[1]') 162 | 163 | idx = _lib.onigcffi_regset_search( 164 | self._regset_t, s_b, len(s_b), start_b, region, flags, 165 | ) 166 | return idx, _match_ret(idx, s_b, region[0]) 167 | 168 | 169 | def _compile_regex_t(pattern: str, dest: Any) -> None: 170 | pattern_b = pattern.encode() 171 | 172 | err_info = _ffi.new('OnigErrorInfo[1]') 173 | ret = _lib.onigcffi_new(dest, pattern_b, len(pattern_b), err_info) 174 | _check(ret, err_info) 175 | 176 | 177 | def compile(pattern: str) -> _Pattern: 178 | regex = _ffi.new('regex_t*[1]') 179 | _compile_regex_t(pattern, regex) 180 | return _Pattern(pattern, regex[0]) 181 | 182 | 183 | def compile_regset(*patterns: str) -> _RegSet: 184 | regexes = _ffi.new('regex_t*[]', len(patterns)) 185 | for i, pattern in enumerate(patterns): 186 | _compile_regex_t(pattern, regexes + i) 187 | 188 | regset = _ffi.new('OnigRegSet*[1]') 189 | _check(_lib.onig_regset_new(regset, len(patterns), regexes)) 190 | return _RegSet(patterns, regset[0]) 191 | -------------------------------------------------------------------------------- /tests/onigurumacffi_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import pytest 4 | 5 | import onigurumacffi 6 | 7 | FOO_RE = onigurumacffi.compile('^foo') 8 | ABC_RE = onigurumacffi.compile('(a+)B+(c+)') 9 | UNICODE_RE = onigurumacffi.compile('.*?(🙃+)') 10 | REGSET = onigurumacffi.compile_regset('a+', 'b+', 'c+') 11 | 12 | 13 | def test_regex_compiles(): 14 | assert FOO_RE is not None 15 | 16 | 17 | def test_regex_repr(): 18 | assert repr(FOO_RE) == "onigurumacffi.compile('^foo')" 19 | 20 | 21 | def test_regex_compile_failure(): 22 | with pytest.raises(onigurumacffi.OnigError): 23 | onigurumacffi.compile('(') 24 | 25 | 26 | def test_regex_number_of_captures(): 27 | assert FOO_RE.number_of_captures() == 0 28 | assert ABC_RE.number_of_captures() == 2 29 | assert UNICODE_RE.number_of_captures() == 1 30 | 31 | 32 | def test_match_failure(): 33 | assert FOO_RE.match('bar') is None 34 | 35 | 36 | def test_match_success(): 37 | assert FOO_RE.match('food') is not None 38 | 39 | 40 | def test_match_repr(): 41 | match = FOO_RE.match('food') 42 | assert match is not None 43 | assert repr(match) == "" 44 | 45 | 46 | def test_match_groups(): 47 | match = ABC_RE.match('aaaaaBBBBBcccDDD') 48 | assert match is not None 49 | assert match[0] == 'aaaaaBBBBBccc' 50 | assert match.group(0) == 'aaaaaBBBBBccc' 51 | assert match[1] == 'aaaaa' 52 | assert match.group(1) == 'aaaaa' 53 | assert match[2] == 'ccc' 54 | assert match.group(2) == 'ccc' 55 | with pytest.raises(IndexError): 56 | match[3] 57 | 58 | 59 | def test_match_starts_ends_spans(): 60 | match = ABC_RE.match('aaaBBBcccddd') 61 | assert match is not None 62 | assert match.start() == 0 63 | assert match.end() == 9 64 | assert match.span() == (0, 9) 65 | assert match.start(1) == 0 66 | assert match.end(1) == 3 67 | assert match.span(1) == (0, 3) 68 | assert match.start(2) == 6 69 | assert match.end(2) == 9 70 | assert match.span(2) == (6, 9) 71 | 72 | 73 | def test_match_start(): 74 | match = ABC_RE.match('aaaBBBcccddd', start=1) 75 | assert match is not None 76 | assert match.start() == 1 77 | assert match.end() == 9 78 | assert match[1] == 'aa' 79 | 80 | 81 | def test_unicode_match(): 82 | match = UNICODE_RE.match('ohai☃🙃🙃🙃wat') 83 | assert match is not None 84 | assert match[0] == 'ohai☃🙃🙃🙃' 85 | assert match[1] == '🙃🙃🙃' 86 | assert match.start() == 0 87 | assert match.end() == 8 88 | 89 | 90 | def test_unicode_match_start(): 91 | match = UNICODE_RE.match('☃☃☃🙃🙃🙃', start=1) 92 | assert match is not None 93 | assert match[0] == '☃☃🙃🙃🙃' 94 | 95 | 96 | def test_re_compile_unicode_escape(): 97 | pattern = onigurumacffi.compile(r'"\u2603++"') 98 | assert pattern.match('"☃☃☃☃"') 99 | 100 | 101 | def test_search(): 102 | match = ABC_RE.search('zzzaaaBccczzz') 103 | assert match is not None 104 | assert match.group() == 'aaaBccc' 105 | assert match.start() == 3 106 | 107 | 108 | def test_search_start(): 109 | match = ABC_RE.search('zzzaaaBccczzz', start=4) 110 | assert match is not None 111 | assert match.group() == 'aaBccc' 112 | assert match.start() == 4 113 | 114 | 115 | def test_search_no_match(): 116 | match = ABC_RE.search('zzz') 117 | assert match is None 118 | 119 | 120 | def test_search_with_flags(): 121 | reg_A = onigurumacffi.compile(r'\Afoo') 122 | assert reg_A.search('foo') 123 | assert not reg_A.search( 124 | 'foo', 125 | flags=onigurumacffi.OnigSearchOption.NOT_BEGIN_STRING, 126 | ) 127 | 128 | reg_G = onigurumacffi.compile(r'\Gfoo') 129 | assert reg_G.search('afoo', start=1) 130 | assert not reg_G.search( 131 | 'afoo', 132 | start=1, 133 | flags=onigurumacffi.OnigSearchOption.NOT_BEGIN_POSITION, 134 | ) 135 | 136 | reg_z = onigurumacffi.compile(r'foo\z') 137 | assert reg_z.search('hello foo') 138 | assert not reg_z.search( 139 | 'hello foo', 140 | flags=onigurumacffi.OnigSearchOption.NOT_END_STRING, 141 | ) 142 | 143 | 144 | def test_match_expand(): 145 | match = ABC_RE.match('aaaBccccddd') 146 | assert match is not None 147 | assert match.expand(r'foo\1\1\1') == 'fooaaaaaaaaa' 148 | assert match.expand(r'foo\2\1') == 'fooccccaaa' 149 | 150 | 151 | def test_match_string(): 152 | match = FOO_RE.match('food') 153 | assert match is not None 154 | assert match.string == 'food' 155 | 156 | 157 | def test_match_with_flags(): 158 | reg_A = onigurumacffi.compile(r'\Afoo') 159 | assert reg_A.match('foo') 160 | assert not reg_A.match( 161 | 'foo', 162 | flags=onigurumacffi.OnigSearchOption.NOT_BEGIN_STRING, 163 | ) 164 | 165 | reg_G = onigurumacffi.compile(r'\Gfoo') 166 | assert reg_G.match('foo') 167 | assert not reg_G.match( 168 | 'foo', 169 | flags=onigurumacffi.OnigSearchOption.NOT_BEGIN_POSITION, 170 | ) 171 | 172 | reg_not_G = onigurumacffi.compile(r'(?!\G)') 173 | assert not reg_not_G.match('foo') 174 | assert reg_not_G.match( 175 | 'foo', flags=onigurumacffi.OnigSearchOption.NOT_BEGIN_POSITION, 176 | ) 177 | 178 | 179 | def test_regset_repr(): 180 | ret = repr(onigurumacffi.compile_regset('abc', 'def')) 181 | assert ret == "onigurumacffi.compile_regset('abc', 'def')" 182 | 183 | 184 | def test_regset_search_not_matching(): 185 | idx, match = REGSET.search('zzzq') 186 | assert idx == -1 187 | assert match is None 188 | 189 | 190 | def test_regset_search_matches_first_match(): 191 | idx, match = REGSET.search('zzzabc') 192 | assert idx == 0 193 | assert match is not None 194 | assert match.group() == 'a' 195 | 196 | 197 | def test_regset_returns_first_regex_when_equal(): 198 | regset = onigurumacffi.compile_regset('a', '[^z]') 199 | idx, match = regset.search('zzza') 200 | assert idx == 0 201 | assert match is not None 202 | assert match.group() == 'a' 203 | 204 | 205 | def test_regset_empty_match_at_end_of_string(): 206 | regset = onigurumacffi.compile_regset('a', '.*') 207 | idx, match = regset.search('foo', start=3) 208 | assert idx == 1 209 | assert match is not None 210 | assert match.group() == '' 211 | 212 | 213 | def test_regset_empty_match_empty_string(): 214 | regset = onigurumacffi.compile_regset('a', '.*') 215 | idx, match = regset.search('') 216 | assert idx == 1 217 | assert match is not None 218 | assert match.group() == '' 219 | 220 | 221 | def test_regset_search_with_flags(): 222 | regset_A = onigurumacffi.compile_regset(r'\Afoo', 'foo') 223 | idx, match = regset_A.search('foo') 224 | assert idx == 0 225 | assert match is not None 226 | 227 | idx, match = regset_A.search( 228 | 'foo', 229 | flags=onigurumacffi.OnigSearchOption.NOT_BEGIN_STRING, 230 | ) 231 | assert idx == 1 232 | assert match is not None 233 | 234 | regset_G = onigurumacffi.compile_regset(r'\Gfoo', 'foo') 235 | idx, match = regset_G.search('afoo', start=1) 236 | assert idx == 0 237 | assert match is not None 238 | 239 | idx, match = regset_G.search( 240 | 'afoo', 241 | start=1, 242 | flags=onigurumacffi.OnigSearchOption.NOT_BEGIN_POSITION, 243 | ) 244 | assert idx == 1 245 | assert match is not None 246 | --------------------------------------------------------------------------------