├── .github └── workflows │ ├── release.yaml │ ├── rename-wheels.py │ ├── test.yaml │ └── upload-deno-assets.js ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── Makefile ├── README.md ├── VERSION ├── benchmarks ├── Makefile ├── README.md ├── dates.png ├── dates │ ├── .gitignore │ ├── README.md │ ├── bench-dates.sh │ ├── regexp.sh │ ├── results.json │ ├── sqlean-re.sh │ └── sqlite-regex.sh ├── regexp.sql ├── sqlean.sql ├── this-pointer.sql ├── this.sql ├── thisx.sql └── todo │ ├── bench-internal.sh │ ├── bench-sqlite.sh │ ├── bench.sh │ ├── build-duckdb.sql │ ├── build-sqlite.sql │ ├── build-words.sql │ ├── duckdb.sql │ ├── sqlite-regex-email.sh │ ├── sqlite-regex-ipv4.sh │ └── sqlite-regex-uri.sh ├── bindings ├── ruby │ ├── .gitignore │ ├── Gemfile │ ├── Rakefile │ ├── lib │ │ ├── sqlite_regex.rb │ │ ├── version.rb │ │ └── version.rb.tmpl │ └── sqlite_regex.gemspec └── sqlite-utils │ ├── .gitignore │ ├── README.md │ ├── pyproject.toml │ ├── pyproject.toml.tmpl │ └── sqlite_utils_sqlite_regex │ ├── __init__.py │ ├── version.py │ └── version.py.tmpl ├── build.rs ├── cbindgen.toml ├── deno ├── README.md ├── README.md.tmpl ├── deno.json ├── deno.json.tmpl ├── deno.lock ├── mod.ts └── test.ts ├── docs.md ├── npm ├── .gitignore ├── README.md ├── platform-package.README.md.tmpl ├── platform-package.package.json.tmpl ├── sqlite-regex-darwin-arm64 │ ├── README.md │ ├── lib │ │ └── .gitkeep │ └── package.json ├── sqlite-regex-darwin-x64 │ ├── README.md │ ├── lib │ │ └── .gitkeep │ └── package.json ├── sqlite-regex-linux-x64 │ ├── README.md │ ├── lib │ │ └── .gitkeep │ └── package.json ├── sqlite-regex-windows-x64 │ ├── README.md │ ├── lib │ │ └── .gitkeep │ └── package.json └── sqlite-regex │ ├── README.md │ ├── package.json │ ├── package.json.tmpl │ ├── src │ └── index.js │ └── test.js ├── python ├── .gitignore ├── README.md ├── datasette_sqlite_regex │ ├── README.md │ ├── datasette_sqlite_regex │ │ ├── __init__.py │ │ └── version.py │ ├── setup.py │ └── tests │ │ └── test_sqlite_regex.py └── sqlite_regex │ ├── README.md │ ├── noop.c │ ├── setup.py │ └── sqlite_regex │ ├── __init__.py │ └── version.py ├── scripts ├── deno_generate_package.sh ├── npm_generate_platform_packages.sh └── publish_release.sh ├── sqlite-regex.h ├── src ├── captures.rs ├── find_all.rs ├── lib.rs ├── meta.rs ├── regex.rs ├── regexset.rs ├── regexset_matches.rs ├── split.rs └── utils.rs └── tests ├── test-loadable.py └── test-python.py /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: "Release" 2 | on: 3 | release: 4 | types: [published] 5 | workflow_dispatch: 6 | permissions: 7 | contents: read 8 | jobs: 9 | build-ubuntu-extension: 10 | name: Build ubuntu 11 | runs-on: ubuntu-20.04 12 | steps: 13 | - uses: actions/checkout@v2 14 | - uses: actions/cache@v3 15 | with: 16 | path: | 17 | ~/.cargo/bin/ 18 | ~/.cargo/registry/index/ 19 | ~/.cargo/registry/cache/ 20 | ~/.cargo/git/db/ 21 | target/ 22 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 23 | - uses: actions-rs/toolchain@v1 24 | with: 25 | toolchain: stable 26 | - run: make loadable-release 27 | - name: Upload artifacts 28 | uses: actions/upload-artifact@v2 29 | with: 30 | name: sqlite-regex-ubuntu 31 | path: dist/release/regex0.so 32 | build-ubuntu-python: 33 | runs-on: ubuntu-20.04 34 | needs: [build-ubuntu-extension] 35 | steps: 36 | - uses: actions/checkout@v3 37 | - name: Download workflow artifacts 38 | uses: actions/download-artifact@v3 39 | with: 40 | name: sqlite-regex-ubuntu 41 | path: dist/release/ 42 | - uses: actions/setup-python@v3 43 | - run: pip install wheel 44 | - run: make python-release 45 | - uses: actions/upload-artifact@v3 46 | with: 47 | name: sqlite-regex-ubuntu-wheels 48 | path: dist/release/wheels/*.whl 49 | build-macos-extension: 50 | name: Build macos-latest 51 | runs-on: macos-latest 52 | steps: 53 | - uses: actions/checkout@v2 54 | - uses: actions/cache@v3 55 | with: 56 | path: | 57 | ~/.cargo/bin/ 58 | ~/.cargo/registry/index/ 59 | ~/.cargo/registry/cache/ 60 | ~/.cargo/git/db/ 61 | target/ 62 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 63 | - uses: actions-rs/toolchain@v1 64 | with: 65 | toolchain: stable 66 | - run: make loadable-release 67 | - name: Upload artifacts 68 | uses: actions/upload-artifact@v2 69 | with: 70 | name: sqlite-regex-macos 71 | path: dist/release/regex0.dylib 72 | build-macos-python: 73 | runs-on: macos-latest 74 | needs: [build-macos-extension] 75 | steps: 76 | - uses: actions/checkout@v3 77 | - name: Download workflow artifacts 78 | uses: actions/download-artifact@v3 79 | with: 80 | name: sqlite-regex-macos 81 | path: dist/release/ 82 | - uses: actions/setup-python@v3 83 | - run: pip install wheel 84 | - run: make python-release 85 | - uses: actions/upload-artifact@v3 86 | with: 87 | name: sqlite-regex-macos-wheels 88 | path: dist/release/wheels/*.whl 89 | build-macos-arm-extension: 90 | name: Build macos-latest with arm 91 | runs-on: macos-latest 92 | steps: 93 | - uses: actions/checkout@v3 94 | - uses: actions/cache@v3 95 | with: 96 | path: | 97 | ~/.cargo/bin/ 98 | ~/.cargo/registry/index/ 99 | ~/.cargo/registry/cache/ 100 | ~/.cargo/git/db/ 101 | target/ 102 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 103 | - uses: actions-rs/toolchain@v1 104 | with: 105 | toolchain: stable 106 | - run: rustup target add aarch64-apple-darwin 107 | - run: make loadable-release target=aarch64-apple-darwin 108 | - name: Upload artifacts 109 | uses: actions/upload-artifact@v3 110 | with: 111 | name: sqlite-regex-macos-arm 112 | path: dist/release/regex0.dylib 113 | build-macos-arm-python: 114 | runs-on: macos-latest 115 | needs: [build-macos-arm-extension] 116 | steps: 117 | - uses: actions/checkout@v3 118 | - name: Download workflow artifacts 119 | uses: actions/download-artifact@v3 120 | with: 121 | name: sqlite-regex-macos-arm 122 | path: dist/release/ 123 | - uses: actions/setup-python@v3 124 | - run: pip install wheel 125 | - run: make python-release IS_MACOS_ARM=1 126 | - uses: actions/upload-artifact@v3 127 | with: 128 | name: sqlite-regex-macos-arm-wheels 129 | path: dist/release/wheels/*.whl 130 | build-windows-extension: 131 | name: Build windows-latest 132 | runs-on: windows-latest 133 | steps: 134 | - uses: actions/checkout@v2 135 | - uses: actions/cache@v3 136 | with: 137 | path: | 138 | ~/.cargo/bin/ 139 | ~/.cargo/registry/index/ 140 | ~/.cargo/registry/cache/ 141 | ~/.cargo/git/db/ 142 | target/ 143 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 144 | - uses: actions-rs/toolchain@v1 145 | with: 146 | toolchain: stable 147 | - run: make loadable-release 148 | - name: Upload artifacts 149 | uses: actions/upload-artifact@v2 150 | with: 151 | name: sqlite-regex-windows 152 | path: dist/release/regex0.dll 153 | build-windows-python: 154 | runs-on: windows-latest 155 | needs: [build-windows-extension] 156 | steps: 157 | - uses: actions/checkout@v3 158 | - name: Download workflow artifacts 159 | uses: actions/download-artifact@v3 160 | with: 161 | name: sqlite-regex-windows 162 | path: dist/release/ 163 | - uses: actions/setup-python@v3 164 | - run: pip install wheel 165 | - run: make python-release 166 | - uses: actions/upload-artifact@v3 167 | with: 168 | name: sqlite-regex-windows-wheels 169 | path: dist/release/wheels/*.whl 170 | build-datasette-sqlite-utils: 171 | runs-on: ubuntu-20.04 172 | steps: 173 | - uses: actions/checkout@v3 174 | - uses: actions/setup-python@v3 175 | - run: pip install wheel build 176 | - run: make datasette-release sqlite-utils-release 177 | - uses: actions/upload-artifact@v3 178 | with: 179 | name: sqlite-regex-datasette-sqlite-utils-wheels 180 | path: dist/release/wheels/*.whl 181 | upload-crate: 182 | runs-on: ubuntu-latest 183 | steps: 184 | - uses: actions/checkout@v2 185 | - uses: actions-rs/toolchain@v1 186 | with: 187 | toolchain: stable 188 | - run: cargo publish 189 | env: 190 | CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} 191 | upload-extensions: 192 | name: Upload release assets 193 | needs: 194 | [ 195 | build-macos-extension, 196 | build-macos-arm-extension, 197 | build-ubuntu-extension, 198 | build-windows-extension, 199 | ] 200 | permissions: 201 | contents: write 202 | runs-on: ubuntu-latest 203 | steps: 204 | - uses: actions/checkout@v2 205 | - uses: actions/download-artifact@v2 206 | - uses: asg017/upload-spm@main 207 | id: upload-spm 208 | with: 209 | name: sqlite-regex 210 | github-token: ${{ secrets.GITHUB_TOKEN }} 211 | platforms: | 212 | linux-x86_64: sqlite-regex-ubuntu/* 213 | macos-x86_64: sqlite-regex-macos/* 214 | macos-aarch64: sqlite-regex-macos-arm/* 215 | windows-x86_64: sqlite-regex-windows/* 216 | upload-deno: 217 | name: Upload Deno release assets 218 | needs: 219 | [ 220 | build-macos-extension, 221 | build-macos-arm-extension, 222 | build-ubuntu-extension, 223 | build-windows-extension, 224 | ] 225 | permissions: 226 | contents: write 227 | runs-on: ubuntu-latest 228 | steps: 229 | - uses: actions/checkout@v3 230 | - name: Download workflow artifacts 231 | uses: actions/download-artifact@v2 232 | - uses: actions/github-script@v6 233 | with: 234 | github-token: ${{ secrets.GITHUB_TOKEN }} 235 | script: | 236 | const script = require('.github/workflows/upload-deno-assets.js') 237 | await script({github, context}) 238 | upload-npm: 239 | needs: 240 | [ 241 | build-macos-extension, 242 | build-macos-arm-extension, 243 | build-ubuntu-extension, 244 | build-windows-extension, 245 | ] 246 | runs-on: ubuntu-latest 247 | steps: 248 | - uses: actions/checkout@v3 249 | - name: Download workflow artifacts 250 | uses: actions/download-artifact@v2 251 | - run: | 252 | cp sqlite-regex-ubuntu/regex0.so npm/sqlite-regex-linux-x64/lib/regex0.so 253 | cp sqlite-regex-macos/regex0.dylib npm/sqlite-regex-darwin-x64/lib/regex0.dylib 254 | cp sqlite-regex-macos-arm/regex0.dylib npm/sqlite-regex-darwin-arm64/lib/regex0.dylib 255 | cp sqlite-regex-windows/regex0.dll npm/sqlite-regex-windows-x64/lib/regex0.dll 256 | - name: Install node 257 | uses: actions/setup-node@v3 258 | with: 259 | node-version: "16" 260 | registry-url: "https://registry.npmjs.org" 261 | - name: Publish NPM sqlite-regex-linux-x64 262 | working-directory: npm/sqlite-regex-linux-x64 263 | run: npm publish --access public 264 | env: 265 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 266 | - name: Publish NPM sqlite-regex-darwin-x64 267 | working-directory: npm/sqlite-regex-darwin-x64 268 | run: npm publish --access public 269 | env: 270 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 271 | - name: Publish NPM sqlite-regex-darwin-arm64 272 | working-directory: npm/sqlite-regex-darwin-arm64 273 | run: npm publish --access public 274 | env: 275 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 276 | - name: Publish NPM sqlite-regex-windows-x64 277 | working-directory: npm/sqlite-regex-windows-x64 278 | run: npm publish --access public 279 | env: 280 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 281 | - name: Publish NPM sqlite-regex 282 | working-directory: npm/sqlite-regex 283 | run: npm publish --access public 284 | env: 285 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 286 | upload-gem: 287 | needs: 288 | [ 289 | build-macos-extension, 290 | build-macos-arm-extension, 291 | build-ubuntu-extension, 292 | build-windows-extension, 293 | ] 294 | permissions: 295 | contents: write 296 | runs-on: ubuntu-latest 297 | steps: 298 | - uses: actions/checkout@v2 299 | - uses: actions/download-artifact@v2 300 | - uses: ruby/setup-ruby@v1 301 | with: 302 | ruby-version: 3.2 303 | - run: | 304 | rm bindings/ruby/lib/*.{dylib,so,dll} || true 305 | cp sqlite-regex-macos/*.dylib bindings/ruby/lib 306 | gem -C bindings/ruby build -o x86_64-darwin.gem sqlite_regex.gemspec 307 | env: 308 | PLATFORM: x86_64-darwin 309 | - run: | 310 | rm bindings/ruby/lib/*.{dylib,so,dll} || true 311 | cp sqlite-regex-macos-arm/*.dylib bindings/ruby/lib 312 | gem -C bindings/ruby build -o arm64-darwin.gem sqlite_regex.gemspec 313 | env: 314 | PLATFORM: arm64-darwin 315 | - run: | 316 | rm bindings/ruby/lib/*.{dylib,so,dll} || true 317 | cp sqlite-regex-ubuntu/*.so bindings/ruby/lib 318 | gem -C bindings/ruby build -o x86_64-linux.gem sqlite_regex.gemspec 319 | env: 320 | PLATFORM: x86_64-linux 321 | - run: | 322 | rm bindings/ruby/lib/*.{dylib,so,dll} || true 323 | cp sqlite-regex-windows/*.dll bindings/ruby/lib 324 | gem -C bindings/ruby build -o ${{ env.PLATFORM }}.gem sqlite_regex.gemspec 325 | env: 326 | PLATFORM: x64-mingw32 327 | - run: | 328 | gem push bindings/ruby/x86_64-linux.gem 329 | gem push bindings/ruby/x86_64-darwin.gem 330 | gem push bindings/ruby/arm64-darwin.gem 331 | gem push bindings/ruby/x64-mingw32.gem 332 | env: 333 | GEM_HOST_API_KEY: ${{ secrets.GEM_HOST_API_KEY }} 334 | upload-pypi: 335 | needs: 336 | [ 337 | build-ubuntu-python, 338 | build-macos-python, 339 | build-macos-arm-python, 340 | build-windows-python, 341 | build-datasette-sqlite-utils, 342 | ] 343 | runs-on: ubuntu-latest 344 | steps: 345 | - uses: actions/download-artifact@v3 346 | with: 347 | name: sqlite-regex-windows-wheels 348 | path: dist 349 | - uses: actions/download-artifact@v3 350 | with: 351 | name: sqlite-regex-ubuntu-wheels 352 | path: dist 353 | - uses: actions/download-artifact@v3 354 | with: 355 | name: sqlite-regex-macos-wheels 356 | path: dist 357 | - uses: actions/download-artifact@v3 358 | with: 359 | name: sqlite-regex-macos-arm-wheels 360 | path: dist 361 | - uses: actions/download-artifact@v3 362 | with: 363 | name: sqlite-regex-datasette-sqlite-utils-wheels 364 | path: dist 365 | - uses: pypa/gh-action-pypi-publish@release/v1 366 | with: 367 | password: ${{ secrets.PYPI_API_TOKEN }} 368 | skip_existing: true 369 | -------------------------------------------------------------------------------- /.github/workflows/rename-wheels.py: -------------------------------------------------------------------------------- 1 | # This file is a small utility that rename all .whl files in a given directory 2 | # and "generalizes" them. The wheels made by python/sqlite_ulid contain the 3 | # pre-compiled sqlite extension, but those aren't bound by a specfic Python 4 | # runtime or version, that other wheels might be. So, this file will rename 5 | # those wheels to be "generalized", like replacing "c37-cp37" to "py3-none". 6 | import sys 7 | import os 8 | from pathlib import Path 9 | 10 | wheel_dir = sys.argv[1] 11 | 12 | is_macos_arm_build = '--is-macos-arm' in sys.argv 13 | 14 | for filename in os.listdir(wheel_dir): 15 | filename = Path(wheel_dir, filename) 16 | if not filename.suffix == '.whl': 17 | continue 18 | new_filename = (filename.name 19 | .replace('cp37-cp37', 'py3-none') 20 | .replace('cp38-cp38', 'py3-none') 21 | .replace('cp39-cp39', 'py3-none') 22 | .replace('cp310-cp310', 'py3-none') 23 | .replace('cp311-cp311', 'py3-none') 24 | .replace('linux_x86_64', 'manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux1_x86_64') 25 | 26 | 27 | ) 28 | if is_macos_arm_build: 29 | new_filename = new_filename.replace('macosx_12_0_universal2', 'macosx_11_0_arm64') 30 | else: 31 | new_filename = (new_filename 32 | .replace('macosx_12_0_universal2', 'macosx_10_6_x86_64') 33 | .replace('macosx_12_0_x86_64', 'macosx_10_6_x86_64') 34 | ) 35 | 36 | os.rename(filename, Path(wheel_dir, new_filename)) -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: "build" 2 | on: 3 | push: 4 | branches: 5 | - main 6 | - pip-install 7 | permissions: 8 | contents: read 9 | jobs: 10 | build-ubuntu-extension: 11 | name: Building ubuntu 12 | runs-on: ubuntu-20.04 13 | steps: 14 | - uses: actions/checkout@v3 15 | - uses: actions/cache@v3 16 | with: 17 | path: | 18 | ~/.cargo/bin/ 19 | ~/.cargo/registry/index/ 20 | ~/.cargo/registry/cache/ 21 | ~/.cargo/git/db/ 22 | target/ 23 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 24 | - uses: actions-rs/toolchain@v1 25 | with: 26 | toolchain: stable 27 | - run: make loadable 28 | - name: Upload artifacts 29 | uses: actions/upload-artifact@v3 30 | with: 31 | name: sqlite-regex-ubuntu 32 | path: dist/debug/regex0.so 33 | build-ubuntu-python: 34 | runs-on: ubuntu-20.04 35 | needs: [build-ubuntu-extension] 36 | steps: 37 | - uses: actions/checkout@v3 38 | - name: Download workflow artifacts 39 | uses: actions/download-artifact@v3 40 | with: 41 | name: sqlite-regex-ubuntu 42 | path: dist/debug/ 43 | - uses: actions/setup-python@v3 44 | - run: pip install wheel 45 | - run: make python 46 | - run: make datasette 47 | - uses: actions/upload-artifact@v3 48 | with: 49 | name: sqlite-regex-ubuntu-wheels 50 | path: dist/debug/wheels/*.whl 51 | test-ubuntu: 52 | runs-on: ubuntu-20.04 53 | needs: [build-ubuntu-extension, build-ubuntu-python] 54 | env: 55 | DENO_DIR: deno_cache 56 | steps: 57 | - uses: actions/checkout@v3 58 | - uses: actions/download-artifact@v3 59 | with: 60 | name: sqlite-regex-ubuntu 61 | path: dist/debug/ 62 | - uses: actions/download-artifact@v3 63 | with: 64 | name: sqlite-regex-ubuntu 65 | path: npm/sqlite-regex-linux-x64/lib 66 | - uses: actions/download-artifact@v3 67 | with: 68 | name: sqlite-regex-ubuntu-wheels 69 | path: dist/debug/ 70 | - run: pip install --find-links dist/debug/ sqlite_regex 71 | - run: make test-loadable 72 | - run: make test-python 73 | # for test-npm 74 | - uses: actions/setup-node@v3 75 | with: 76 | cache: "npm" 77 | cache-dependency-path: npm/sqlite-regex/package.json 78 | - run: npm install 79 | working-directory: npm/sqlite-regex 80 | - run: make test-npm 81 | # for test-deno 82 | - uses: denoland/setup-deno@v1 83 | with: 84 | deno-version: v1.30 85 | - name: Cache Deno dependencies 86 | uses: actions/cache@v3 87 | with: 88 | path: ${{ env.DENO_DIR }} 89 | key: ${{ runner.os }}-${{ hashFiles('deno/deno.lock') }} 90 | - run: make test-deno 91 | env: 92 | DENO_SQLITE_REGEX_PATH: ${{ github.workspace }}/dist/debug/regex0 93 | build-macos-extension: 94 | name: Building macos-latest 95 | runs-on: macos-latest 96 | steps: 97 | - uses: actions/checkout@v3 98 | - uses: actions/cache@v3 99 | with: 100 | path: | 101 | ~/.cargo/bin/ 102 | ~/.cargo/registry/index/ 103 | ~/.cargo/registry/cache/ 104 | ~/.cargo/git/db/ 105 | target/ 106 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 107 | - uses: actions-rs/toolchain@v1 108 | with: 109 | toolchain: stable 110 | - run: make loadable 111 | - name: Upload artifacts 112 | uses: actions/upload-artifact@v3 113 | with: 114 | name: sqlite-regex-macos 115 | path: dist/debug/regex0.dylib 116 | build-macos-python: 117 | runs-on: macos-latest 118 | needs: [build-macos-extension] 119 | steps: 120 | - uses: actions/checkout@v3 121 | - name: Download workflow artifacts 122 | uses: actions/download-artifact@v3 123 | with: 124 | name: sqlite-regex-macos 125 | path: dist/debug/ 126 | - uses: actions/setup-python@v3 127 | - run: pip install wheel 128 | - run: make python 129 | - run: make datasette 130 | - uses: actions/upload-artifact@v3 131 | with: 132 | name: sqlite-regex-macos-wheels 133 | path: dist/debug/wheels/*.whl 134 | test-macos: 135 | runs-on: macos-latest 136 | needs: [build-macos-extension, build-macos-python] 137 | env: 138 | DENO_DIR: deno_cache 139 | steps: 140 | - uses: actions/checkout@v3 141 | - uses: actions/download-artifact@v3 142 | with: 143 | name: sqlite-regex-macos 144 | path: dist/debug/ 145 | - uses: actions/download-artifact@v3 146 | with: 147 | name: sqlite-regex-macos 148 | path: npm/sqlite-regex-darwin-x64/lib 149 | - uses: actions/download-artifact@v3 150 | with: 151 | name: sqlite-regex-macos-wheels 152 | path: dist/debug/ 153 | - run: brew install python 154 | - run: /usr/local/opt/python@3/libexec/bin/pip install --find-links dist/debug/ sqlite_regex 155 | - run: make test-loadable python=/usr/local/opt/python@3/libexec/bin/python 156 | - run: make test-python python=/usr/local/opt/python@3/libexec/bin/python 157 | # for test-npm 158 | - uses: actions/setup-node@v3 159 | with: 160 | cache: "npm" 161 | cache-dependency-path: npm/sqlite-regex/package.json 162 | - run: npm install 163 | working-directory: npm/sqlite-regex 164 | - run: make test-npm 165 | # for test-deno 166 | - uses: denoland/setup-deno@v1 167 | with: 168 | deno-version: v1.30 169 | - name: Cache Deno dependencies 170 | uses: actions/cache@v3 171 | with: 172 | path: ${{ env.DENO_DIR }} 173 | key: ${{ runner.os }}-${{ hashFiles('deno/deno.lock') }} 174 | - run: make test-deno 175 | env: 176 | DENO_SQLITE_REGEX_PATH: ${{ github.workspace }}/dist/debug/regex0 177 | build-macos-arm-extension: 178 | name: Building macos arm extension 179 | runs-on: macos-latest 180 | steps: 181 | - uses: actions/checkout@v3 182 | - uses: actions/cache@v3 183 | with: 184 | path: | 185 | ~/.cargo/bin/ 186 | ~/.cargo/registry/index/ 187 | ~/.cargo/registry/cache/ 188 | ~/.cargo/git/db/ 189 | target/ 190 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 191 | - uses: actions-rs/toolchain@v1 192 | with: 193 | toolchain: stable 194 | - run: rustup target add aarch64-apple-darwin 195 | - run: make loadable target=aarch64-apple-darwin 196 | - name: Upload artifacts 197 | uses: actions/upload-artifact@v3 198 | with: 199 | name: sqlite-regex-macos-arm 200 | path: dist/debug/regex0.dylib 201 | build-macos-arm-python: 202 | runs-on: macos-latest 203 | needs: [build-macos-arm-extension] 204 | steps: 205 | - uses: actions/checkout@v3 206 | - name: Download workflow artifacts 207 | uses: actions/download-artifact@v3 208 | with: 209 | name: sqlite-regex-macos-arm 210 | path: dist/debug/ 211 | - uses: actions/setup-python@v3 212 | - run: pip install wheel 213 | - run: make python IS_MACOS_ARM=1 214 | - run: make datasette 215 | - uses: actions/upload-artifact@v3 216 | with: 217 | name: sqlite-regex-macos-arm-wheels 218 | path: dist/debug/wheels/*.whl 219 | build-windows-extension: 220 | name: Building windows extension 221 | runs-on: windows-latest 222 | steps: 223 | - uses: actions/checkout@v3 224 | - uses: actions/cache@v3 225 | with: 226 | path: | 227 | ~/.cargo/bin/ 228 | ~/.cargo/registry/index/ 229 | ~/.cargo/registry/cache/ 230 | ~/.cargo/git/db/ 231 | target/ 232 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 233 | - uses: actions-rs/toolchain@v1 234 | with: 235 | toolchain: stable 236 | - run: make loadable 237 | - name: Upload artifacts 238 | uses: actions/upload-artifact@v3 239 | with: 240 | name: sqlite-regex-windows 241 | path: dist/debug/regex0.dll 242 | build-windows-python: 243 | runs-on: windows-latest 244 | needs: [build-windows-extension] 245 | steps: 246 | - uses: actions/checkout@v3 247 | - name: Download workflow artifacts 248 | uses: actions/download-artifact@v3 249 | with: 250 | name: sqlite-regex-windows 251 | path: dist/debug/ 252 | - uses: actions/setup-python@v3 253 | - run: pip install wheel 254 | - run: make python 255 | - run: make datasette 256 | - uses: actions/upload-artifact@v3 257 | with: 258 | name: sqlite-regex-windows-wheels 259 | path: dist/debug/wheels/*.whl 260 | test-windows: 261 | runs-on: windows-latest 262 | needs: [build-windows-extension, build-windows-python] 263 | env: 264 | DENO_DIR: deno_cache 265 | steps: 266 | - uses: actions/checkout@v3 267 | - uses: actions/download-artifact@v3 268 | with: 269 | name: sqlite-regex-windows 270 | path: dist/debug/ 271 | - uses: actions/download-artifact@v3 272 | with: 273 | name: sqlite-regex-windows 274 | path: npm/sqlite-regex-windows-x64/lib 275 | - uses: actions/download-artifact@v3 276 | with: 277 | name: sqlite-regex-windows-wheels 278 | path: dist/debug/ 279 | - run: pip install --find-links dist/debug/ sqlite_regex 280 | - run: make test-loadable 281 | - run: make test-python 282 | # for test-npm 283 | - uses: actions/setup-node@v3 284 | with: 285 | cache: "npm" 286 | cache-dependency-path: npm/sqlite-regex/package.json 287 | - run: npm install 288 | working-directory: npm/sqlite-regex 289 | - run: make test-npm 290 | # for test-deno 291 | - uses: denoland/setup-deno@v1 292 | with: 293 | deno-version: v1.30 294 | - name: Cache Deno dependencies 295 | uses: actions/cache@v3 296 | with: 297 | path: ${{ env.DENO_DIR }} 298 | key: ${{ runner.os }}-${{ hashFiles('deno/deno.lock') }} 299 | - run: make test-deno 300 | env: 301 | DENO_SQLITE_REGEX_PATH: ${{ github.workspace }}/dist/debug/regex0 302 | upload_test_pypi: 303 | if: ${{ contains(github.event.head_commit.message, '@test_pypi') }} 304 | needs: [test-ubuntu, test-macos, test-windows, build-macos-arm-python] 305 | runs-on: ubuntu-latest 306 | steps: 307 | - uses: actions/download-artifact@v3 308 | with: 309 | name: sqlite-regex-windows-wheels 310 | path: dist 311 | - uses: actions/download-artifact@v3 312 | with: 313 | name: sqlite-regex-ubuntu-wheels 314 | path: dist 315 | - uses: actions/download-artifact@v3 316 | with: 317 | name: sqlite-regex-macos-wheels 318 | path: dist 319 | - uses: actions/download-artifact@v3 320 | with: 321 | name: sqlite-regex-macos-arm-wheels 322 | path: dist 323 | - uses: pypa/gh-action-pypi-publish@release/v1 324 | with: 325 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 326 | repository_url: https://test.pypi.org/legacy/ 327 | skip_existing: true 328 | -------------------------------------------------------------------------------- /.github/workflows/upload-deno-assets.js: -------------------------------------------------------------------------------- 1 | const fs = require("fs").promises; 2 | 3 | module.exports = async ({ github, context }) => { 4 | const { 5 | repo: { owner, repo }, 6 | sha, 7 | } = context; 8 | console.log(process.env.GITHUB_REF); 9 | const release = await github.rest.repos.getReleaseByTag({ 10 | owner, 11 | repo, 12 | tag: process.env.GITHUB_REF.replace("refs/tags/", ""), 13 | }); 14 | console.log("release id: ", release.data.id); 15 | const release_id = release.data.id; 16 | 17 | const compiled_extensions = [ 18 | { 19 | path: "sqlite-regex-macos-arm/regex0.dylib", 20 | name: "deno-darwin-aarch64.regex0.dylib", 21 | }, 22 | { 23 | path: "sqlite-regex-macos/regex0.dylib", 24 | name: "deno-darwin-x86_64.regex0.dylib", 25 | }, 26 | { 27 | path: "sqlite-regex-ubuntu/regex0.so", 28 | name: "deno-linux-x86_64.regex0.so", 29 | }, 30 | { 31 | path: "sqlite-regex-windows/regex0.dll", 32 | name: "deno-windows-x86_64.regex0.dll", 33 | }, 34 | ]; 35 | await Promise.all( 36 | compiled_extensions.map(async ({ name, path }) => { 37 | return github.rest.repos.uploadReleaseAsset({ 38 | owner, 39 | repo, 40 | release_id, 41 | name, 42 | data: await fs.readFile(path), 43 | }); 44 | }) 45 | ); 46 | }; 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | target/ 3 | benchmarks/*.sqlite 4 | benchmarks/*.duckdb 5 | benchmarks/*.dylib 6 | benchmarks/regexp.c 7 | benchmarks/sqlite3-re.c 8 | benchmarks/re.c 9 | benchmarks/re.h 10 | *.db 11 | *.dylib 12 | *.so 13 | *.dll 14 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "0.7.19" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "atty" 16 | version = "0.2.14" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 19 | dependencies = [ 20 | "hermit-abi", 21 | "libc", 22 | "winapi", 23 | ] 24 | 25 | [[package]] 26 | name = "autocfg" 27 | version = "1.1.0" 28 | source = "registry+https://github.com/rust-lang/crates.io-index" 29 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 30 | 31 | [[package]] 32 | name = "bindgen" 33 | version = "0.60.1" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "062dddbc1ba4aca46de6338e2bf87771414c335f7b2f2036e8f3e9befebf88e6" 36 | dependencies = [ 37 | "bitflags", 38 | "cexpr", 39 | "clang-sys", 40 | "clap", 41 | "env_logger", 42 | "lazy_static", 43 | "lazycell", 44 | "log", 45 | "peeking_take_while", 46 | "proc-macro2", 47 | "quote", 48 | "regex", 49 | "rustc-hash", 50 | "shlex", 51 | "which", 52 | ] 53 | 54 | [[package]] 55 | name = "bitflags" 56 | version = "1.3.2" 57 | source = "registry+https://github.com/rust-lang/crates.io-index" 58 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 59 | 60 | [[package]] 61 | name = "cc" 62 | version = "1.0.83" 63 | source = "registry+https://github.com/rust-lang/crates.io-index" 64 | checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" 65 | dependencies = [ 66 | "libc", 67 | ] 68 | 69 | [[package]] 70 | name = "cexpr" 71 | version = "0.6.0" 72 | source = "registry+https://github.com/rust-lang/crates.io-index" 73 | checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" 74 | dependencies = [ 75 | "nom", 76 | ] 77 | 78 | [[package]] 79 | name = "cfg-if" 80 | version = "1.0.0" 81 | source = "registry+https://github.com/rust-lang/crates.io-index" 82 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 83 | 84 | [[package]] 85 | name = "clang-sys" 86 | version = "1.4.0" 87 | source = "registry+https://github.com/rust-lang/crates.io-index" 88 | checksum = "fa2e27ae6ab525c3d369ded447057bca5438d86dc3a68f6faafb8269ba82ebf3" 89 | dependencies = [ 90 | "glob", 91 | "libc", 92 | "libloading", 93 | ] 94 | 95 | [[package]] 96 | name = "clap" 97 | version = "3.2.22" 98 | source = "registry+https://github.com/rust-lang/crates.io-index" 99 | checksum = "86447ad904c7fb335a790c9d7fe3d0d971dc523b8ccd1561a520de9a85302750" 100 | dependencies = [ 101 | "atty", 102 | "bitflags", 103 | "clap_lex", 104 | "indexmap", 105 | "strsim", 106 | "termcolor", 107 | "textwrap", 108 | ] 109 | 110 | [[package]] 111 | name = "clap_lex" 112 | version = "0.2.4" 113 | source = "registry+https://github.com/rust-lang/crates.io-index" 114 | checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" 115 | dependencies = [ 116 | "os_str_bytes", 117 | ] 118 | 119 | [[package]] 120 | name = "either" 121 | version = "1.8.0" 122 | source = "registry+https://github.com/rust-lang/crates.io-index" 123 | checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" 124 | 125 | [[package]] 126 | name = "env_logger" 127 | version = "0.9.1" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | checksum = "c90bf5f19754d10198ccb95b70664fc925bd1fc090a0fd9a6ebc54acc8cd6272" 130 | dependencies = [ 131 | "atty", 132 | "humantime", 133 | "log", 134 | "regex", 135 | "termcolor", 136 | ] 137 | 138 | [[package]] 139 | name = "glob" 140 | version = "0.3.0" 141 | source = "registry+https://github.com/rust-lang/crates.io-index" 142 | checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" 143 | 144 | [[package]] 145 | name = "hashbrown" 146 | version = "0.12.3" 147 | source = "registry+https://github.com/rust-lang/crates.io-index" 148 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" 149 | 150 | [[package]] 151 | name = "hermit-abi" 152 | version = "0.1.19" 153 | source = "registry+https://github.com/rust-lang/crates.io-index" 154 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 155 | dependencies = [ 156 | "libc", 157 | ] 158 | 159 | [[package]] 160 | name = "humantime" 161 | version = "2.1.0" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 164 | 165 | [[package]] 166 | name = "indexmap" 167 | version = "1.9.1" 168 | source = "registry+https://github.com/rust-lang/crates.io-index" 169 | checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" 170 | dependencies = [ 171 | "autocfg", 172 | "hashbrown", 173 | ] 174 | 175 | [[package]] 176 | name = "itoa" 177 | version = "1.0.4" 178 | source = "registry+https://github.com/rust-lang/crates.io-index" 179 | checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" 180 | 181 | [[package]] 182 | name = "lazy_static" 183 | version = "1.4.0" 184 | source = "registry+https://github.com/rust-lang/crates.io-index" 185 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 186 | 187 | [[package]] 188 | name = "lazycell" 189 | version = "1.3.0" 190 | source = "registry+https://github.com/rust-lang/crates.io-index" 191 | checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" 192 | 193 | [[package]] 194 | name = "libc" 195 | version = "0.2.134" 196 | source = "registry+https://github.com/rust-lang/crates.io-index" 197 | checksum = "329c933548736bc49fd575ee68c89e8be4d260064184389a5b77517cddd99ffb" 198 | 199 | [[package]] 200 | name = "libloading" 201 | version = "0.7.3" 202 | source = "registry+https://github.com/rust-lang/crates.io-index" 203 | checksum = "efbc0f03f9a775e9f6aed295c6a1ba2253c5757a9e03d55c6caa46a681abcddd" 204 | dependencies = [ 205 | "cfg-if", 206 | "winapi", 207 | ] 208 | 209 | [[package]] 210 | name = "log" 211 | version = "0.4.17" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" 214 | dependencies = [ 215 | "cfg-if", 216 | ] 217 | 218 | [[package]] 219 | name = "memchr" 220 | version = "2.5.0" 221 | source = "registry+https://github.com/rust-lang/crates.io-index" 222 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 223 | 224 | [[package]] 225 | name = "minimal-lexical" 226 | version = "0.2.1" 227 | source = "registry+https://github.com/rust-lang/crates.io-index" 228 | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" 229 | 230 | [[package]] 231 | name = "nom" 232 | version = "7.1.1" 233 | source = "registry+https://github.com/rust-lang/crates.io-index" 234 | checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" 235 | dependencies = [ 236 | "memchr", 237 | "minimal-lexical", 238 | ] 239 | 240 | [[package]] 241 | name = "once_cell" 242 | version = "1.15.0" 243 | source = "registry+https://github.com/rust-lang/crates.io-index" 244 | checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" 245 | 246 | [[package]] 247 | name = "os_str_bytes" 248 | version = "6.3.0" 249 | source = "registry+https://github.com/rust-lang/crates.io-index" 250 | checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff" 251 | 252 | [[package]] 253 | name = "peeking_take_while" 254 | version = "0.1.2" 255 | source = "registry+https://github.com/rust-lang/crates.io-index" 256 | checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" 257 | 258 | [[package]] 259 | name = "proc-macro2" 260 | version = "1.0.66" 261 | source = "registry+https://github.com/rust-lang/crates.io-index" 262 | checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" 263 | dependencies = [ 264 | "unicode-ident", 265 | ] 266 | 267 | [[package]] 268 | name = "quote" 269 | version = "1.0.21" 270 | source = "registry+https://github.com/rust-lang/crates.io-index" 271 | checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" 272 | dependencies = [ 273 | "proc-macro2", 274 | ] 275 | 276 | [[package]] 277 | name = "regex" 278 | version = "1.6.0" 279 | source = "registry+https://github.com/rust-lang/crates.io-index" 280 | checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" 281 | dependencies = [ 282 | "aho-corasick", 283 | "memchr", 284 | "regex-syntax", 285 | ] 286 | 287 | [[package]] 288 | name = "regex-syntax" 289 | version = "0.6.27" 290 | source = "registry+https://github.com/rust-lang/crates.io-index" 291 | checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" 292 | 293 | [[package]] 294 | name = "rustc-hash" 295 | version = "1.1.0" 296 | source = "registry+https://github.com/rust-lang/crates.io-index" 297 | checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" 298 | 299 | [[package]] 300 | name = "ryu" 301 | version = "1.0.11" 302 | source = "registry+https://github.com/rust-lang/crates.io-index" 303 | checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" 304 | 305 | [[package]] 306 | name = "serde" 307 | version = "1.0.147" 308 | source = "registry+https://github.com/rust-lang/crates.io-index" 309 | checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" 310 | dependencies = [ 311 | "serde_derive", 312 | ] 313 | 314 | [[package]] 315 | name = "serde_derive" 316 | version = "1.0.147" 317 | source = "registry+https://github.com/rust-lang/crates.io-index" 318 | checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" 319 | dependencies = [ 320 | "proc-macro2", 321 | "quote", 322 | "syn", 323 | ] 324 | 325 | [[package]] 326 | name = "serde_json" 327 | version = "1.0.87" 328 | source = "registry+https://github.com/rust-lang/crates.io-index" 329 | checksum = "6ce777b7b150d76b9cf60d28b55f5847135a003f7d7350c6be7a773508ce7d45" 330 | dependencies = [ 331 | "itoa", 332 | "ryu", 333 | "serde", 334 | ] 335 | 336 | [[package]] 337 | name = "shlex" 338 | version = "1.1.0" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" 341 | 342 | [[package]] 343 | name = "sqlite-loadable" 344 | version = "0.0.6-alpha.6" 345 | source = "registry+https://github.com/rust-lang/crates.io-index" 346 | checksum = "daaaad0ad506b154a72bf01fde23235377c01256abd4bd25e17419dbfd4e28a0" 347 | dependencies = [ 348 | "bitflags", 349 | "serde", 350 | "serde_json", 351 | "sqlite-loadable-macros", 352 | "sqlite3ext-sys", 353 | ] 354 | 355 | [[package]] 356 | name = "sqlite-loadable-macros" 357 | version = "0.0.3" 358 | source = "registry+https://github.com/rust-lang/crates.io-index" 359 | checksum = "96037a396115a2675db783f700faad878b44c8ff56c8a29c3404649a517a5e8f" 360 | dependencies = [ 361 | "proc-macro2", 362 | "quote", 363 | "syn", 364 | ] 365 | 366 | [[package]] 367 | name = "sqlite-regex" 368 | version = "0.2.4-alpha.1" 369 | dependencies = [ 370 | "regex", 371 | "sqlite-loadable", 372 | ] 373 | 374 | [[package]] 375 | name = "sqlite3ext-sys" 376 | version = "0.0.1" 377 | source = "registry+https://github.com/rust-lang/crates.io-index" 378 | checksum = "3afdc2b3dc08f16d6eecf8aa07d19975a268603ab1cca67d3f9b4172c507cf16" 379 | dependencies = [ 380 | "bindgen", 381 | "cc", 382 | ] 383 | 384 | [[package]] 385 | name = "strsim" 386 | version = "0.10.0" 387 | source = "registry+https://github.com/rust-lang/crates.io-index" 388 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" 389 | 390 | [[package]] 391 | name = "syn" 392 | version = "1.0.102" 393 | source = "registry+https://github.com/rust-lang/crates.io-index" 394 | checksum = "3fcd952facd492f9be3ef0d0b7032a6e442ee9b361d4acc2b1d0c4aaa5f613a1" 395 | dependencies = [ 396 | "proc-macro2", 397 | "quote", 398 | "unicode-ident", 399 | ] 400 | 401 | [[package]] 402 | name = "termcolor" 403 | version = "1.1.3" 404 | source = "registry+https://github.com/rust-lang/crates.io-index" 405 | checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" 406 | dependencies = [ 407 | "winapi-util", 408 | ] 409 | 410 | [[package]] 411 | name = "textwrap" 412 | version = "0.15.1" 413 | source = "registry+https://github.com/rust-lang/crates.io-index" 414 | checksum = "949517c0cf1bf4ee812e2e07e08ab448e3ae0d23472aee8a06c985f0c8815b16" 415 | 416 | [[package]] 417 | name = "unicode-ident" 418 | version = "1.0.4" 419 | source = "registry+https://github.com/rust-lang/crates.io-index" 420 | checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd" 421 | 422 | [[package]] 423 | name = "which" 424 | version = "4.3.0" 425 | source = "registry+https://github.com/rust-lang/crates.io-index" 426 | checksum = "1c831fbbee9e129a8cf93e7747a82da9d95ba8e16621cae60ec2cdc849bacb7b" 427 | dependencies = [ 428 | "either", 429 | "libc", 430 | "once_cell", 431 | ] 432 | 433 | [[package]] 434 | name = "winapi" 435 | version = "0.3.9" 436 | source = "registry+https://github.com/rust-lang/crates.io-index" 437 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 438 | dependencies = [ 439 | "winapi-i686-pc-windows-gnu", 440 | "winapi-x86_64-pc-windows-gnu", 441 | ] 442 | 443 | [[package]] 444 | name = "winapi-i686-pc-windows-gnu" 445 | version = "0.4.0" 446 | source = "registry+https://github.com/rust-lang/crates.io-index" 447 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 448 | 449 | [[package]] 450 | name = "winapi-util" 451 | version = "0.1.5" 452 | source = "registry+https://github.com/rust-lang/crates.io-index" 453 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 454 | dependencies = [ 455 | "winapi", 456 | ] 457 | 458 | [[package]] 459 | name = "winapi-x86_64-pc-windows-gnu" 460 | version = "0.4.0" 461 | source = "registry+https://github.com/rust-lang/crates.io-index" 462 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 463 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sqlite-regex" 3 | version = "0.2.4-alpha.1" 4 | edition = "2021" 5 | authors = ["Alex Garcia "] 6 | description = "A SQLite extension for working with regular expressions" 7 | homepage = "https://github.com/asg017/sqlite-regex" 8 | repository = "https://github.com/asg017/sqlite-regex" 9 | keywords = ["sqlite", "sqlite-extension"] 10 | license = "MIT/Apache-2.0" 11 | 12 | [dependencies] 13 | sqlite-loadable = "0.0.6-alpha.6" 14 | regex = "1" 15 | 16 | [lib] 17 | crate-type=["lib", "cdylib", "staticlib"] 18 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Alex Garcia 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VERSION=$(shell cat VERSION) 2 | 3 | ifeq ($(shell uname -s),Darwin) 4 | CONFIG_DARWIN=y 5 | else ifeq ($(OS),Windows_NT) 6 | CONFIG_WINDOWS=y 7 | else 8 | CONFIG_LINUX=y 9 | endif 10 | 11 | LIBRARY_PREFIX=lib 12 | ifdef CONFIG_DARWIN 13 | LOADABLE_EXTENSION=dylib 14 | endif 15 | 16 | ifdef CONFIG_LINUX 17 | LOADABLE_EXTENSION=so 18 | endif 19 | 20 | 21 | ifdef CONFIG_WINDOWS 22 | LOADABLE_EXTENSION=dll 23 | LIBRARY_PREFIX= 24 | endif 25 | 26 | prefix=dist 27 | TARGET_LOADABLE=$(prefix)/debug/regex0.$(LOADABLE_EXTENSION) 28 | TARGET_LOADABLE_RELEASE=$(prefix)/release/regex0.$(LOADABLE_EXTENSION) 29 | 30 | TARGET_STATIC=$(prefix)/debug/regex0.a 31 | TARGET_STATIC_RELEASE=$(prefix)/release/regex0.a 32 | 33 | TARGET_WHEELS=$(prefix)/debug/wheels 34 | TARGET_WHEELS_RELEASE=$(prefix)/release/wheels 35 | 36 | INTERMEDIATE_PYPACKAGE_EXTENSION=python/sqlite_regex/sqlite_regex/regex0.$(LOADABLE_EXTENSION) 37 | 38 | ifdef target 39 | CARGO_TARGET=--target=$(target) 40 | BUILT_LOCATION=target/$(target)/debug/$(LIBRARY_PREFIX)sqlite_regex.$(LOADABLE_EXTENSION) 41 | BUILT_LOCATION_RELEASE=target/$(target)/release/$(LIBRARY_PREFIX)sqlite_regex.$(LOADABLE_EXTENSION) 42 | BUILT_LOCATION_STATIC=target/$(target)/debug/libsqlite_regex.$(STATIC_EXTENSION) 43 | BUILT_LOCATION_STATIC_RELEASE=target/$(target)/release/libsqlite_regex.$(STATIC_EXTENSION) 44 | else 45 | CARGO_TARGET= 46 | BUILT_LOCATION=target/debug/$(LIBRARY_PREFIX)sqlite_regex.$(LOADABLE_EXTENSION) 47 | BUILT_LOCATION_RELEASE=target/release/$(LIBRARY_PREFIX)sqlite_regex.$(LOADABLE_EXTENSION) 48 | BUILT_LOCATION_STATIC=target/debug/libsqlite_regex.$(STATIC_EXTENSION) 49 | BUILT_LOCATION_STATIC_RELEASE=target/release/libsqlite_regex.$(STATIC_EXTENSION) 50 | endif 51 | 52 | ifdef python 53 | PYTHON=$(python) 54 | else 55 | PYTHON=python3 56 | endif 57 | 58 | ifdef IS_MACOS_ARM 59 | RENAME_WHEELS_ARGS=--is-macos-arm 60 | else 61 | RENAME_WHEELS_ARGS= 62 | endif 63 | 64 | $(prefix): 65 | mkdir -p $(prefix)/debug 66 | mkdir -p $(prefix)/release 67 | 68 | $(TARGET_WHEELS): $(prefix) 69 | mkdir -p $(TARGET_WHEELS) 70 | 71 | $(TARGET_WHEELS_RELEASE): $(prefix) 72 | mkdir -p $(TARGET_WHEELS_RELEASE) 73 | 74 | $(TARGET_LOADABLE): $(prefix) $(shell find . -type f -name '*.rs') 75 | cargo build $(CARGO_TARGET) 76 | cp $(BUILT_LOCATION) $@ 77 | 78 | $(TARGET_LOADABLE_RELEASE): $(prefix) $(shell find . -type f -name '*.rs') 79 | cargo build --release $(CARGO_TARGET) 80 | cp $(BUILT_LOCATION_RELEASE) $@ 81 | 82 | $(TARGET_STATIC): $(prefix) $(shell find . -type f -name '*.rs') 83 | cargo build $(CARGO_TARGET) --features=sqlite-loadable/static 84 | cp $(BUILT_LOCATION) $@ 85 | 86 | $(TARGET_STATIC_RELEASE): $(prefix) $(shell find . -type f -name '*.rs') 87 | cargo build --release $(CARGO_TARGET) --features=sqlite-loadable/static 88 | cp $(BUILT_LOCATION_RELEASE) $@ 89 | 90 | python: $(TARGET_WHEELS) $(TARGET_LOADABLE) python/sqlite_regex/setup.py python/sqlite_regex/sqlite_regex/__init__.py .github/workflows/rename-wheels.py 91 | cp $(TARGET_LOADABLE) $(INTERMEDIATE_PYPACKAGE_EXTENSION) 92 | rm $(TARGET_WHEELS)/sqlite_regex* || true 93 | pip3 wheel python/sqlite_regex/ -w $(TARGET_WHEELS) 94 | python3 .github/workflows/rename-wheels.py $(TARGET_WHEELS) $(RENAME_WHEELS_ARGS) 95 | 96 | python-release: $(TARGET_LOADABLE_RELEASE) $(TARGET_WHEELS_RELEASE) python/sqlite_regex/setup.py python/sqlite_regex/sqlite_regex/__init__.py .github/workflows/rename-wheels.py 97 | cp $(TARGET_LOADABLE_RELEASE) $(INTERMEDIATE_PYPACKAGE_EXTENSION) 98 | rm $(TARGET_WHEELS_RELEASE)/sqlite_regex* || true 99 | pip3 wheel python/sqlite_regex/ -w $(TARGET_WHEELS_RELEASE) 100 | python3 .github/workflows/rename-wheels.py $(TARGET_WHEELS_RELEASE) $(RENAME_WHEELS_ARGS) 101 | 102 | datasette: $(TARGET_WHEELS) python/datasette_sqlite_regex/setup.py python/datasette_sqlite_regex/datasette_sqlite_regex/__init__.py 103 | rm $(TARGET_WHEELS)/datasette* || true 104 | pip3 wheel python/datasette_sqlite_regex/ --no-deps -w $(TARGET_WHEELS) 105 | 106 | datasette-release: $(TARGET_WHEELS_RELEASE) python/datasette_sqlite_regex/setup.py python/datasette_sqlite_regex/datasette_sqlite_regex/__init__.py 107 | rm $(TARGET_WHEELS_RELEASE)/datasette* || true 108 | pip3 wheel python/datasette_sqlite_regex/ --no-deps -w $(TARGET_WHEELS_RELEASE) 109 | 110 | bindings/sqlite-utils/pyproject.toml: bindings/sqlite-utils/pyproject.toml.tmpl VERSION 111 | VERSION=$(VERSION) envsubst < $< > $@ 112 | echo "✅ generated $@" 113 | 114 | bindings/sqlite-utils/sqlite_utils_sqlite_regex/version.py: bindings/sqlite-utils/sqlite_utils_sqlite_regex/version.py.tmpl VERSION 115 | VERSION=$(VERSION) envsubst < $< > $@ 116 | echo "✅ generated $@" 117 | 118 | sqlite-utils: $(TARGET_WHEELS) bindings/sqlite-utils/pyproject.toml bindings/sqlite-utils/sqlite_utils_sqlite_regex/version.py 119 | python3 -m build bindings/sqlite-utils -w -o $(TARGET_WHEELS) 120 | 121 | sqlite-utils-release: $(TARGET_WHEELS) bindings/sqlite-utils/pyproject.toml bindings/sqlite-utils/sqlite_utils_sqlite_regex/version.py 122 | python3 -m build bindings/sqlite-utils -w -o $(TARGET_WHEELS_RELEASE) 123 | 124 | npm: VERSION npm/platform-package.README.md.tmpl npm/platform-package.package.json.tmpl npm/sqlite-regex/package.json.tmpl scripts/npm_generate_platform_packages.sh 125 | scripts/npm_generate_platform_packages.sh 126 | 127 | deno: VERSION deno/deno.json.tmpl 128 | scripts/deno_generate_package.sh 129 | 130 | Cargo.toml: VERSION 131 | cargo set-version `cat VERSION` 132 | 133 | python/sqlite_regex/sqlite_regex/version.py: VERSION 134 | printf '__version__ = "%s"\n__version_info__ = tuple(__version__.split("."))\n' `cat VERSION` > $@ 135 | 136 | python/datasette_sqlite_regex/datasette_sqlite_regex/version.py: VERSION 137 | printf '__version__ = "%s"\n__version_info__ = tuple(__version__.split("."))\n' `cat VERSION` > $@ 138 | 139 | bindings/ruby/lib/version.rb: bindings/ruby/lib/version.rb.tmpl VERSION 140 | VERSION=$(VERSION) envsubst < $< > $@ 141 | 142 | ruby: bindings/ruby/lib/version.rb 143 | 144 | version: 145 | make Cargo.toml 146 | make python/sqlite_regex/sqlite_regex/version.py 147 | make python/datasette_sqlite_regex/datasette_sqlite_regex/version.py 148 | make bindings/sqlite-utils/pyproject.toml bindings/sqlite-utils/sqlite_utils_sqlite_regex/version.py 149 | make npm 150 | make deno 151 | make ruby 152 | 153 | 154 | format: 155 | cargo fmt 156 | 157 | sqlite-regex.h: cbindgen.toml 158 | rustup run nightly cbindgen --config $< -o $@ 159 | 160 | release: $(TARGET_LOADABLE_RELEASE) $(TARGET_STATIC_RELEASE) 161 | 162 | loadable: $(TARGET_LOADABLE) 163 | loadable-release: $(TARGET_LOADABLE_RELEASE) 164 | 165 | static: $(TARGET_STATIC) 166 | static-release: $(TARGET_STATIC_RELEASE) 167 | 168 | debug: loadable static python datasette 169 | release: loadable-release static-release python-release datasette-release 170 | 171 | clean: 172 | rm dist/* 173 | cargo clean 174 | 175 | test-loadable: 176 | $(PYTHON) tests/test-loadable.py 177 | 178 | test-python: 179 | $(PYTHON) tests/test-python.py 180 | 181 | test-npm: 182 | node npm/sqlite-regex/test.js 183 | 184 | test-deno: 185 | deno task --config deno/deno.json test 186 | 187 | test: 188 | make test-loadable 189 | make test-python 190 | make test-npm 191 | make test-deno 192 | 193 | publish-release: 194 | ./scripts/publish_release.sh 195 | 196 | .PHONY: clean \ 197 | test test-loadable test-python test-npm test-deno \ 198 | loadable loadable-release \ 199 | python python-release \ 200 | datasette datasette-release \ 201 | sqlite-utils sqlite-utils-release \ 202 | static static-release \ 203 | debug release \ 204 | format version publish-release \ 205 | npm deno ruby 206 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sqlite-regex 2 | 3 | A fast and performant SQLite extension for regular expressions. Based on [`sqlite-loadable-rs`](https://github.com/asg017/sqlite-loadable-rs), and the [regex crate](https://crates.io/crates/regex). 4 | 5 | See [_Introducing sqlite-regex: The fastest Regular Expression Extension for SQLite_](https://observablehq.com/@asg017/introducing-sqlite-regex) (Jan 2023) for more details! 6 | 7 | If your company or organization finds this library useful, consider [supporting my work](#supporting)! 8 | 9 | ![](./benchmarks/dates.png) 10 | 11 | ## Usage 12 | 13 | ```sql 14 | .load ./regex0 15 | select 'foo' regexp 'f'; 16 | 17 | ``` 18 | 19 | **Find all occurrences of a pattern in a string** 20 | 21 | ```sql 22 | select regex_find( 23 | '[0-9]{3}-[0-9]{3}-[0-9]{4}', 24 | 'phone: 111-222-3333' 25 | ); 26 | -- '111-222-3333' 27 | 28 | select rowid, * 29 | from regex_find_all( 30 | '\b\w{13}\b', 31 | 'Retroactively relinquishing remunerations is reprehensible.' 32 | ); 33 | /* 34 | ┌───────┬───────┬─────┬───────────────┐ 35 | │ rowid │ start │ end │ match │ 36 | ├───────┼───────┼─────┼───────────────┤ 37 | │ 0 │ 0 │ 13 │ Retroactively │ 38 | │ 1 │ 14 │ 27 │ relinquishing │ 39 | │ 2 │ 28 │ 41 │ remunerations │ 40 | │ 3 │ 45 │ 58 │ reprehensible │ 41 | └───────┴───────┴─────┴───────────────┘ 42 | */ 43 | ``` 44 | 45 | **Extract capture group values by index or name** 46 | 47 | ```sql 48 | select 49 | regex_capture(captures, 0) as entire_match, 50 | regex_capture(captures, 'title') as title, 51 | regex_capture(captures, 'year') as year 52 | from regex_captures( 53 | regex("'(?P[^']+)'\s+\((?P<year>\d{4})\)"), 54 | "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)." 55 | ); 56 | /* 57 | ┌───────────────────────────┬──────────────────┬──────┐ 58 | │ entire_match │ title │ year │ 59 | ├───────────────────────────┼──────────────────┼──────┤ 60 | │ 'Citizen Kane' (1941) │ Citizen Kane │ 1941 │ 61 | │ 'The Wizard of Oz' (1939) │ The Wizard of Oz │ 1939 │ 62 | │ 'M' (1931) │ M │ 1931 │ 63 | └───────────────────────────┴──────────────────┴──────┘ 64 | */ 65 | ``` 66 | 67 | **Use RegexSets to match a string on multiple patterns in linear time** 68 | 69 | ```sql 70 | select regexset_is_match( 71 | regexset( 72 | "bar", 73 | "foo", 74 | "barfoo" 75 | ), 76 | 'foobar' 77 | ) 78 | ``` 79 | 80 | **Split the string on the given pattern delimiter** 81 | 82 | ```sql 83 | select rowid, * 84 | from regex_split('[ \t]+', 'a b c d e'); 85 | /* 86 | ┌───────┬──────┐ 87 | │ rowid │ item │ 88 | ├───────┼──────┤ 89 | │ 0 │ a │ 90 | │ 1 │ b │ 91 | │ 2 │ c │ 92 | │ 3 │ d │ 93 | │ 4 │ e │ 94 | └───────┴──────┘ 95 | */ 96 | ``` 97 | 98 | **Replace occurrences of a pattern with another string** 99 | 100 | ```sql 101 | select regex_replace( 102 | '(?P<last>[^,\s]+),\s+(?P<first>\S+)', 103 | 'Springsteen, Bruce', 104 | '$first $last' 105 | ); 106 | -- 'Bruce Springsteen' 107 | 108 | select regex_replace_all('a', 'abc abc', ''); 109 | -- 'bc bc' 110 | ``` 111 | 112 | ## Documentation 113 | 114 | See [`docs.md`](./docs.md) for a full API reference. 115 | 116 | ## Installing 117 | 118 | | Language | Install | | 119 | | -------------- | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | 120 | | Python | `pip install sqlite-regex` | [![PyPI](https://img.shields.io/pypi/v/sqlite-regex.svg?color=blue&logo=python&logoColor=white)](https://pypi.org/project/sqlite-regex/) | 121 | | Datasette | `datasette install datasette-sqlite-regex` | [![Datasette](https://img.shields.io/pypi/v/datasette-sqlite-regex.svg?color=B6B6D9&label=Datasette+plugin&logoColor=white&logo=python)](https://datasette.io/plugins/datasette-sqlite-regex) | 122 | | Node.js | `npm install sqlite-regex` | [![npm](https://img.shields.io/npm/v/sqlite-regex.svg?color=green&logo=nodedotjs&logoColor=white)](https://www.npmjs.com/package/sqlite-regex) | 123 | | Deno | [`deno.land/x/sqlite_regex`](https://deno.land/x/sqlite_regex) | [![deno.land/x release](https://img.shields.io/github/v/release/asg017/sqlite-regex?color=fef8d2&include_prereleases&label=deno.land%2Fx&logo=deno)](https://deno.land/x/sqlite_regex) | 124 | | Ruby | `gem install sqlite-regex` | ![Gem](https://img.shields.io/gem/v/sqlite-regex?color=red&logo=rubygems&logoColor=white) | 125 | | Github Release | | ![GitHub tag (latest SemVer pre-release)](https://img.shields.io/github/v/tag/asg017/sqlite-regex?color=lightgrey&include_prereleases&label=Github+release&logo=github) | 126 | | Rust | `cargo add sqlite-regex` | [![Crates.io](https://img.shields.io/crates/v/sqlite-regex?logo=rust)](https://crates.io/crates/sqlite-regex) | 127 | 128 | <!-- 129 | | Elixir | [`hex.pm/packages/sqlite_regex`](https://hex.pm/packages/sqlite_regex) | [![Hex.pm](https://img.shields.io/hexpm/v/sqlite_regex?color=purple&logo=elixir)](https://hex.pm/packages/sqlite_regex) | 130 | | Go | `go get -u github.com/asg017/sqlite-regex/bindings/go` | [![Go Reference](https://pkg.go.dev/badge/github.com/asg017/sqlite-regex/bindings/go.svg)](https://pkg.go.dev/github.com/asg017/sqlite-regex/bindings/go) | 131 | --> 132 | 133 | The [Releases page](https://github.com/asg017/sqlite-regex/releases) contains pre-built binaries for Linux x86_64, MacOS, and Windows. 134 | 135 | ### As a loadable extension 136 | 137 | If you want to use `sqlite-regex` as a [Runtime-loadable extension](https://www.sqlite.org/loadext.html), Download the `regex0.dylib` (for MacOS), `regex0.so` (Linux), or `regex0.dll` (Windows) file from a release and load it into your SQLite environment. 138 | 139 | > **Note:** 140 | > The `0` in the filename (`regex0.dylib`/ `regex0.so`/`regex0.dll`) denotes the major version of `sqlite-regex`. Currently `sqlite-regex` is pre v1, so expect breaking changes in future versions. 141 | 142 | For example, if you are using the [SQLite CLI](https://www.sqlite.org/cli.html), you can load the library like so: 143 | 144 | ```sql 145 | .load ./regex0 146 | select regex_version(); 147 | -- v0.1.0 148 | ``` 149 | 150 | Or in Python, using the builtin [sqlite3 module](https://docs.python.org/3/library/sqlite3.html): 151 | 152 | ```python 153 | import sqlite3 154 | con = sqlite3.connect(":memory:") 155 | con.enable_load_extension(True) 156 | con.load_extension("./regex0") 157 | print(con.execute("select regex_version()").fetchone()) 158 | # ('v0.1.0',) 159 | ``` 160 | 161 | Or in Node.js using [better-sqlite3](https://github.com/WiseLibs/better-sqlite3): 162 | 163 | ```javascript 164 | const Database = require("better-sqlite3"); 165 | const db = new Database(":memory:"); 166 | db.loadExtension("./regex0"); 167 | console.log(db.prepare("select regex_version()").get()); 168 | // { 'regex_version()': 'v0.1.0' } 169 | ``` 170 | 171 | Or with [Datasette](https://datasette.io/): 172 | 173 | ``` 174 | datasette data.db --load-extension ./regex0 175 | ``` 176 | 177 | ## Supporting 178 | 179 | I (Alex 👋🏼) spent a lot of time and energy on this project and [many other open source projects](https://github.com/asg017?tab=repositories&q=&type=&language=&sort=stargazers). If your company or organization uses this library (or you're feeling generous), then please [consider supporting my work](https://alexgarcia.regex/work.html), or share this project with a friend! 180 | 181 | ## See also 182 | 183 | - [sqlite-xsv](https://github.com/asg017/sqlite-xsv), A SQLite extension for working with CSVs 184 | - [sqlite-loadable](https://github.com/asg017/sqlite-loadable-rs), A framework for writing SQLite extensions in Rust 185 | - [sqlite-http](https://github.com/asg017/sqlite-http), A SQLite extension for making HTTP requests 186 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.2.4-alpha.1 -------------------------------------------------------------------------------- /benchmarks/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash 2 | 3 | dates.png: ./dates/results.json Makefile 4 | observable-prerender @asg017/hyperfine-benchmark-viz1 chart \ 5 | --redefine=width:900 \ 6 | --width 900 \ 7 | --redefine-file=data:json:$< \ 8 | --redefine-file=highlight:json:<(echo '["./sqlite-regex.sh"]') \ 9 | '--redefine=compareTo:./sqlite-regex.sh' \ 10 | '--redefine=title:Regex match "YYYY-MM-DD" dates in a 200k row, 8MB table.' \ 11 | '--redefine=footer:sqlite-regex version 0.0.1, ran on a 8GB RAM Macbook' \ 12 | -o $@ -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # `sqlite-regex` Benchmarks 2 | 3 | ## Caveat: Benchmarks are hard and easy to game 4 | 5 | This benchmark isn't exhaustive, and only benchmarks between other widely-used SQLite regex extensions. 6 | 7 | ## `REGEXP()` across all SQLite regex extensions 8 | 9 | ![](./dates.png) 10 | 11 | Explaination: Essentially running `select count(*) from corpus where line regexp "\d{4}-\d{2}-\d{2}"`, though `regexp` and `sqlean/re` doesn't support `\d` or `{4}` syntax. 12 | 13 | ``` 14 | gcc -O3 -shared -fPIC regexp.c -o regexp.dylib 15 | 16 | gcc -O3 -shared -fPIC -I./ re.c sqlite3-re.c -o re.dylib 17 | ``` 18 | -------------------------------------------------------------------------------- /benchmarks/dates.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/sqlite-regex/5fe28e3c7a978c621082cfef0ae6dea9a71167ee/benchmarks/dates.png -------------------------------------------------------------------------------- /benchmarks/dates/.gitignore: -------------------------------------------------------------------------------- 1 | input-text.txt 2 | test.db 3 | init-db.sql -------------------------------------------------------------------------------- /benchmarks/dates/README.md: -------------------------------------------------------------------------------- 1 | ## Data source 2 | 3 | https://github.com/mariomka/regex-benchmark/blob/master/input-text.txt 4 | 5 | ``` 6 | .load lines0 7 | 8 | create table corpus as 9 | select line 10 | from lines_read('./input-text.txt'); 11 | ``` 12 | -------------------------------------------------------------------------------- /benchmarks/dates/bench-dates.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | hyperfine --warmup 10 --export-json=results.json \ 3 | './sqlite-regex.sh' \ 4 | './regexp.sh' \ 5 | './sqlean-re.sh' 6 | -------------------------------------------------------------------------------- /benchmarks/dates/regexp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sqlite3 test.db '.load ../regexp' \ 3 | 'select count(*) from corpus where line regexp "([0-9])([0-9])([0-9])([0-9])-([0-9])([0-9])-([0-9])([0-9])"' -------------------------------------------------------------------------------- /benchmarks/dates/results.json: -------------------------------------------------------------------------------- 1 | { 2 | "results": [ 3 | { 4 | "command": "./sqlite-regex.sh", 5 | "mean": 0.054372564830784335, 6 | "stddev": 0.0005315813126198082, 7 | "median": 0.054373242870000005, 8 | "user": 0.04359116519607843, 9 | "system": 0.005847851078431375, 10 | "min": 0.05331200287000001, 11 | "max": 0.055514036870000005, 12 | "times": [ 13 | 0.055139827870000005, 14 | 0.05409116287, 15 | 0.05445735787000001, 16 | 0.053842791870000004, 17 | 0.05448646387, 18 | 0.055514036870000005, 19 | 0.05400693587, 20 | 0.055154393870000006, 21 | 0.05416289187000001, 22 | 0.05395684287000001, 23 | 0.054107605870000006, 24 | 0.054620449870000004, 25 | 0.05519709887, 26 | 0.05398021787000001, 27 | 0.054373242870000005, 28 | 0.05487026887, 29 | 0.053872738870000006, 30 | 0.05497840687, 31 | 0.05343802787, 32 | 0.053459206870000005, 33 | 0.054260989870000005, 34 | 0.053631891870000004, 35 | 0.054111958870000004, 36 | 0.05395722287, 37 | 0.05451430587, 38 | 0.053698185870000005, 39 | 0.05438437687000001, 40 | 0.055285750870000004, 41 | 0.054758866870000006, 42 | 0.05426507387000001, 43 | 0.05491254887, 44 | 0.05534420587, 45 | 0.05383498087000001, 46 | 0.05445113287, 47 | 0.054479982870000006, 48 | 0.05331200287000001, 49 | 0.05429368687, 50 | 0.054001591870000004, 51 | 0.05479302087000001, 52 | 0.05374784187000001, 53 | 0.054810163870000005, 54 | 0.054740056870000005, 55 | 0.05444112887, 56 | 0.054216403870000006, 57 | 0.05425796587, 58 | 0.055065487870000006, 59 | 0.054823834870000004, 60 | 0.05469258987, 61 | 0.053751407870000004, 62 | 0.05397611387, 63 | 0.05447606287 64 | ] 65 | }, 66 | { 67 | "command": "./regexp.sh", 68 | "mean": 0.12548822508739133, 69 | "stddev": 0.0011727640357036106, 70 | "median": 0.12519553887, 71 | "user": 0.11441889239130433, 72 | "system": 0.005927785434782608, 73 | "min": 0.12384031187, 74 | "max": 0.12927648487, 75 | "times": [ 76 | 0.12521587087, 77 | 0.12384031187, 78 | 0.12480195587000001, 79 | 0.12549902687, 80 | 0.12601430687, 81 | 0.12481248987000002, 82 | 0.12519553887, 83 | 0.12638769587, 84 | 0.12604039187000002, 85 | 0.12426679487, 86 | 0.12513222387, 87 | 0.12517647987, 88 | 0.12433811687000002, 89 | 0.12591836487000002, 90 | 0.12578433887, 91 | 0.12568511887, 92 | 0.12763933087, 93 | 0.12493951187000002, 94 | 0.12609870087, 95 | 0.12927648487, 96 | 0.12427449287, 97 | 0.12492496587000002, 98 | 0.12496666287 99 | ] 100 | }, 101 | { 102 | "command": "./sqlean-re.sh", 103 | "mean": 0.35747110417000005, 104 | "stddev": 0.019674148400044274, 105 | "median": 0.34542798687, 106 | "user": 0.3338626749999999, 107 | "system": 0.006953555, 108 | "min": 0.34252614387, 109 | "max": 0.39998933287, 110 | "times": [ 111 | 0.36573534187, 112 | 0.36690300387, 113 | 0.34263379087, 114 | 0.34604847087, 115 | 0.37801525287, 116 | 0.39998933287, 117 | 0.34252614387, 118 | 0.34459688187, 119 | 0.34480750287, 120 | 0.34345531987 121 | ] 122 | } 123 | ] 124 | } 125 | -------------------------------------------------------------------------------- /benchmarks/dates/sqlean-re.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sqlite3 test.db '.load ../re' \ 3 | 'select count(*) from corpus where line regexp "([0-9])([0-9])([0-9])([0-9])-([0-9])([0-9])-([0-9])([0-9])"' -------------------------------------------------------------------------------- /benchmarks/dates/sqlite-regex.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sqlite3 test.db '.load ../../dist/release/regex0' \ 3 | 'select count(*) from corpus where line regexp "\d{4}-\d{2}-\d{2}"' -------------------------------------------------------------------------------- /benchmarks/regexp.sql: -------------------------------------------------------------------------------- 1 | .load ./regexp 2 | 3 | select 4 | sum( 5 | regexp('^([0-9])([0-9])([0-9])([0-9])-([0-9])([0-9])-([0-9])([0-9])$', date) 6 | ) as total 7 | from dates; 8 | -------------------------------------------------------------------------------- /benchmarks/sqlean.sql: -------------------------------------------------------------------------------- 1 | .load ./re 2 | 3 | select 4 | sum( 5 | regexp('^([0-9])([0-9])([0-9])([0-9])-([0-9])([0-9])-([0-9])([0-9])$', date) 6 | ) as total 7 | from dates; 8 | -------------------------------------------------------------------------------- /benchmarks/this-pointer.sql: -------------------------------------------------------------------------------- 1 | .load ../dist/release/regex0 2 | 3 | select sum( 4 | regexp(regex('^\d{4}-\d{2}-\d{2}$'), date) 5 | ) 6 | from dates; 7 | -------------------------------------------------------------------------------- /benchmarks/this.sql: -------------------------------------------------------------------------------- 1 | .load ./target/release/libregex0 2 | 3 | select sum( 4 | regexp('^\d{4}-\d{2}-\d{2}$', date) 5 | ) 6 | from dates; -------------------------------------------------------------------------------- /benchmarks/thisx.sql: -------------------------------------------------------------------------------- 1 | .load ./target/release/libregex0 2 | 3 | select sum( 4 | regexpx('^\d{4}-\d{2}-\d{2}$', date) 5 | ) 6 | from dates; -------------------------------------------------------------------------------- /benchmarks/todo/bench-internal.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | hyperfine --warmup 10 \ 3 | 'sqlite3x words.db ".load ../target/release/libregex0" "select count(*) from words where regexp( regex(\"^[aeiou].*[aeiou]$\"), word);"' \ 4 | 'sqlite3x words.db ".load ../target/release/libregex0" "select count(*) from words where regexp( \"^[aeiou].*[aeiou]$\", word);"' -------------------------------------------------------------------------------- /benchmarks/todo/bench-sqlite.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #'sqlite3x benchmarks/test.sqlite ".read benchmarks/this.sql"' \ 3 | 4 | hyperfine --warmup 10 \ 5 | 'sqlite3x test.sqlite ".read this-pointer.sql"' \ 6 | 'sqlite3x test.sqlite ".read regexp.sql"' \ 7 | 'sqlite3x test.sqlite ".read sqlean.sql"' 8 | #'duckdb.0.5.1 benchmarks/test.duckdb ".read benchmarks/duckdb.sql"' \ 9 | #'sqlite3x benchmarks/test.sqlite ".read benchmarks/thisx.sql"' 10 | 11 | -------------------------------------------------------------------------------- /benchmarks/todo/bench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #'sqlite3x benchmarks/test.sqlite ".read benchmarks/this.sql"' \ 3 | 4 | hyperfine --warmup 10 \ 5 | 'sqlite3x benchmarks/test.sqlite ".read benchmarks/this-pointer.sql"' \ 6 | 'sqlite3x benchmarks/test.sqlite ".read benchmarks/regexp.sql"' \ 7 | 'sqlite3x benchmarks/test.sqlite ".read benchmarks/sqlean.sql"' 8 | #'duckdb.0.5.1 benchmarks/test.duckdb ".read benchmarks/duckdb.sql"' \ 9 | #'sqlite3x benchmarks/test.sqlite ".read benchmarks/thisx.sql"' 10 | -------------------------------------------------------------------------------- /benchmarks/todo/build-duckdb.sql: -------------------------------------------------------------------------------- 1 | create table dates as 2 | select 3 | ((DATE '1992-03-22') + generate_series::int)::text as t 4 | from generate_series(1, 1000000) -------------------------------------------------------------------------------- /benchmarks/todo/build-sqlite.sql: -------------------------------------------------------------------------------- 1 | create table dates as 2 | select 3 | date('now', format('-%d days', value)) as date 4 | from generate_series(1, 1e6); -------------------------------------------------------------------------------- /benchmarks/todo/build-words.sql: -------------------------------------------------------------------------------- 1 | .load ../../sqlite-lines/dist/lines0 2 | create table words as select line as word from lines_read('/usr/share/dict/words'); -------------------------------------------------------------------------------- /benchmarks/todo/duckdb.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum( 3 | regexp_matches(t, '^([0-9])([0-9])([0-9])([0-9])-([0-9])([0-9])-([0-9])([0-9])$')::int 4 | ) as matches 5 | from dates; -------------------------------------------------------------------------------- /benchmarks/todo/sqlite-regex-email.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sqlite3 test.db '.load ../../dist/release/regex0' \ 3 | 'select count(*) from corpus where line regexp "[\w\.+-]+@[\w\.-]+\.[\w\.-]+"' -------------------------------------------------------------------------------- /benchmarks/todo/sqlite-regex-ipv4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sqlite3 test.db '.load ../../dist/release/regex0' \ 3 | 'select count(*) from corpus where line regexp "(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])"' -------------------------------------------------------------------------------- /benchmarks/todo/sqlite-regex-uri.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sqlite3 test.db '.load ../../dist/release/regex0' \ 3 | 'select count(*) from corpus where line regexp "[\w]+://[^/\s?#]+[^\s?#]+(?:\?[^\s#]*)?(?:#[^\s]*)?"' -------------------------------------------------------------------------------- /bindings/ruby/.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | -------------------------------------------------------------------------------- /bindings/ruby/Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | git_source(:github) {|repo_name| "https://github.com/#{repo_name}" } 4 | 5 | gemspec 6 | -------------------------------------------------------------------------------- /bindings/ruby/Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | task :default => :spec 3 | -------------------------------------------------------------------------------- /bindings/ruby/lib/sqlite_regex.rb: -------------------------------------------------------------------------------- 1 | require "version" 2 | 3 | module SqliteRegex 4 | class Error < StandardError; end 5 | def self.regex_loadable_path 6 | File.expand_path('../regex0', __FILE__) 7 | end 8 | def self.load(db) 9 | db.load_extension(self.regex_loadable_path) 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /bindings/ruby/lib/version.rb: -------------------------------------------------------------------------------- 1 | # automatically generated, do not edit by hand. 2 | module SqliteRegex 3 | VERSION = "0.2.4-alpha.1" 4 | end 5 | -------------------------------------------------------------------------------- /bindings/ruby/lib/version.rb.tmpl: -------------------------------------------------------------------------------- 1 | # automatically generated, do not edit by hand. 2 | module SqliteRegex 3 | VERSION = "${VERSION}" 4 | end 5 | -------------------------------------------------------------------------------- /bindings/ruby/sqlite_regex.gemspec: -------------------------------------------------------------------------------- 1 | 2 | lib = File.expand_path("../lib", __FILE__) 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 4 | require "version" 5 | 6 | Gem::Specification.new do |spec| 7 | spec.name = "sqlite-regex" 8 | spec.version = SqliteRegex::VERSION 9 | spec.authors = ["Alex Garcia"] 10 | spec.email = ["alexsebastian.garcia@gmail.com"] 11 | 12 | spec.summary = "a" 13 | spec.description = "b" 14 | spec.homepage = "https://github.com/asg017/sqlite-regex" 15 | spec.license = "MIT" 16 | 17 | # The --platform flag would work in most cases, but on a GH action 18 | # linux runner, it would set platform to "ruby" and not "x86-linux". 19 | # Setting this to Gem::Platform::CURRENT 20 | spec.platform = ENV['PLATFORM'] 21 | 22 | if spec.respond_to?(:metadata) 23 | 24 | spec.metadata["homepage_uri"] = spec.homepage 25 | spec.metadata["source_code_uri"] = spec.homepage 26 | spec.metadata["changelog_uri"] = spec.homepage 27 | else 28 | raise "RubyGems 2.0 or newer is required to protect against " \ 29 | "public gem pushes." 30 | end 31 | 32 | spec.files = Dir["lib/*.rb"] + Dir.glob('lib/*.{so,dylib,dll}') 33 | 34 | spec.require_paths = ["lib"] 35 | 36 | spec.add_development_dependency "bundler", "~> 1.17" 37 | spec.add_development_dependency "rake", "~> 10.0" 38 | end 39 | -------------------------------------------------------------------------------- /bindings/sqlite-utils/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /bindings/sqlite-utils/README.md: -------------------------------------------------------------------------------- 1 | # `sqlite-utils-sqlite-regex` 2 | 3 | A `sqlite-utils` plugin that registers the `sqlite-regex` extension. 4 | -------------------------------------------------------------------------------- /bindings/sqlite-utils/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "sqlite-utils-sqlite-regex" 3 | version = "0.2.4-alpha.1" 4 | description = "TODO" 5 | readme = "README.md" 6 | authors = [{name = "Alex Garcia"}] 7 | license = {text = "Apache-2.0"} 8 | classifiers = [] 9 | 10 | dependencies = [ 11 | "sqlite-utils", 12 | "sqlite-regex" 13 | ] 14 | 15 | [project.urls] 16 | Homepage = "https://github.com/asg017/sqlite-regex" 17 | Changelog = "https://github.com/asg017/sqlite-regex/releases" 18 | Issues = "https://github.com/asg017/sqlite-regex/issues" 19 | 20 | [project.entry-points.sqlite_utils] 21 | sqlite_regex = "sqlite_utils_sqlite_regex" 22 | -------------------------------------------------------------------------------- /bindings/sqlite-utils/pyproject.toml.tmpl: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "sqlite-utils-sqlite-regex" 3 | version = "${VERSION}" 4 | description = "TODO" 5 | readme = "README.md" 6 | authors = [{name = "Alex Garcia"}] 7 | license = {text = "Apache-2.0"} 8 | classifiers = [] 9 | 10 | dependencies = [ 11 | "sqlite-utils", 12 | "sqlite-regex" 13 | ] 14 | 15 | [project.urls] 16 | Homepage = "https://github.com/asg017/sqlite-regex" 17 | Changelog = "https://github.com/asg017/sqlite-regex/releases" 18 | Issues = "https://github.com/asg017/sqlite-regex/issues" 19 | 20 | [project.entry-points.sqlite_utils] 21 | sqlite_regex = "sqlite_utils_sqlite_regex" 22 | -------------------------------------------------------------------------------- /bindings/sqlite-utils/sqlite_utils_sqlite_regex/__init__.py: -------------------------------------------------------------------------------- 1 | from sqlite_utils import hookimpl 2 | import sqlite_regex 3 | 4 | from sqlite_utils_sqlite_regex.version import __version_info__, __version__ 5 | 6 | 7 | @hookimpl 8 | def prepare_connection(conn): 9 | conn.enable_load_extension(True) 10 | sqlite_regex.load(conn) 11 | conn.enable_load_extension(False) 12 | -------------------------------------------------------------------------------- /bindings/sqlite-utils/sqlite_utils_sqlite_regex/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.4-alpha.1" 2 | __version_info__ = tuple(__version__.split(".")) 3 | -------------------------------------------------------------------------------- /bindings/sqlite-utils/sqlite_utils_sqlite_regex/version.py.tmpl: -------------------------------------------------------------------------------- 1 | __version__ = "${VERSION}" 2 | __version_info__ = tuple(__version__.split(".")) 3 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | use std::process::Command; 2 | fn main() { 3 | let output = Command::new("git") 4 | .args(["rev-parse", "HEAD"]) 5 | .output() 6 | .unwrap(); 7 | let git_hash = String::from_utf8(output.stdout).unwrap(); 8 | println!("cargo:rustc-env=GIT_HASH={}", git_hash); 9 | } 10 | -------------------------------------------------------------------------------- /cbindgen.toml: -------------------------------------------------------------------------------- 1 | language = "C" 2 | includes = ["sqlite3ext.h"] 3 | 4 | [parse.expand] 5 | crates = ["sqlite-regex"] 6 | -------------------------------------------------------------------------------- /deno/README.md: -------------------------------------------------------------------------------- 1 | <!--- Generated with the deno_generate_package.sh script, don't edit by hand! --> 2 | 3 | # `x/sqlite_regex` Deno Module 4 | 5 | [![Tags](https://img.shields.io/github/release/asg017/sqlite-regex)](https://github.com/asg017/sqlite-regex/releases) 6 | [![Doc](https://doc.deno.land/badge.svg)](https://doc.deno.land/https/deno.land/x/sqlite-regex@0.2.4-alpha.1/mod.ts) 7 | 8 | The [`sqlite-regex`](https://github.com/asg017/sqlite-regex) SQLite extension is available to Deno developers with the [`x/sqlite_regex`](https://deno.land/x/sqlite_regex) Deno module. It works with [`x/sqlite3`](https://deno.land/x/sqlite3), the fastest and native Deno SQLite3 module. 9 | 10 | ```js 11 | import { Database } from "https://deno.land/x/sqlite3@0.8.0/mod.ts"; 12 | import * as sqlite_regex from "https://deno.land/x/sqlite_regex@v0.2.4-alpha.1/mod.ts"; 13 | 14 | const db = new Database(":memory:"); 15 | 16 | db.enableLoadExtension = true; 17 | db.loadExtension(sqlite_regex.getLoadablePath()); 18 | 19 | const [version] = db 20 | .prepare("select regex_version()") 21 | .value<[string]>()!; 22 | 23 | console.log(version); 24 | 25 | ``` 26 | 27 | Like `x/sqlite3`, `x/sqlite_regex` requires network and filesystem permissions to download and cache the pre-compiled SQLite extension for your machine. Though `x/sqlite3` already requires `--allow-ffi` and `--unstable`, so you might as well use `--allow-all`/`-A`. 28 | 29 | ```bash 30 | deno run -A --unstable <file> 31 | ``` 32 | 33 | `x/sqlite_regex` does not work with [`x/sqlite`](https://deno.land/x/sqlite@v3.7.0), which is a WASM-based Deno SQLite module that does not support loading extensions. 34 | -------------------------------------------------------------------------------- /deno/README.md.tmpl: -------------------------------------------------------------------------------- 1 | <!--- Generated with the deno_generate_package.sh script, don't edit by hand! --> 2 | 3 | # `x/sqlite_regex` Deno Module 4 | 5 | [![Tags](https://img.shields.io/github/release/asg017/sqlite-regex)](https://github.com/asg017/sqlite-regex/releases) 6 | [![Doc](https://doc.deno.land/badge.svg)](https://doc.deno.land/https/deno.land/x/sqlite-regex@${VERSION}/mod.ts) 7 | 8 | The [`sqlite-regex`](https://github.com/asg017/sqlite-regex) SQLite extension is available to Deno developers with the [`x/sqlite_regex`](https://deno.land/x/sqlite_regex) Deno module. It works with [`x/sqlite3`](https://deno.land/x/sqlite3), the fastest and native Deno SQLite3 module. 9 | 10 | ```js 11 | import { Database } from "https://deno.land/x/sqlite3@0.8.0/mod.ts"; 12 | import * as sqlite_regex from "https://deno.land/x/sqlite_regex@v${VERSION}/mod.ts"; 13 | 14 | const db = new Database(":memory:"); 15 | 16 | db.enableLoadExtension = true; 17 | db.loadExtension(sqlite_regex.getLoadablePath()); 18 | 19 | const [version] = db 20 | .prepare("select regex_version()") 21 | .value<[string]>()!; 22 | 23 | console.log(version); 24 | 25 | ``` 26 | 27 | Like `x/sqlite3`, `x/sqlite_regex` requires network and filesystem permissions to download and cache the pre-compiled SQLite extension for your machine. Though `x/sqlite3` already requires `--allow-ffi` and `--unstable`, so you might as well use `--allow-all`/`-A`. 28 | 29 | ```bash 30 | deno run -A --unstable <file> 31 | ``` 32 | 33 | `x/sqlite_regex` does not work with [`x/sqlite`](https://deno.land/x/sqlite@v3.7.0), which is a WASM-based Deno SQLite module that does not support loading extensions. 34 | -------------------------------------------------------------------------------- /deno/deno.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sqlite-regex", 3 | "version": "0.2.4-alpha.1", 4 | "github": "https://github.com/asg017/sqlite-regex", 5 | "tasks": { 6 | "test": "deno test --unstable -A test.ts" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /deno/deno.json.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sqlite-regex", 3 | "version": "${VERSION}", 4 | "github": "https://github.com/asg017/${PACKAGE_NAME}", 5 | "tasks": { 6 | "test": "deno test --unstable -A test.ts" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /deno/deno.lock: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2", 3 | "remote": { 4 | "https://deno.land/std@0.152.0/_util/assert.ts": "e94f2eb37cebd7f199952e242c77654e43333c1ac4c5c700e929ea3aa5489f74", 5 | "https://deno.land/std@0.152.0/_util/os.ts": "3b4c6e27febd119d36a416d7a97bd3b0251b77c88942c8f16ee5953ea13e2e49", 6 | "https://deno.land/std@0.152.0/path/_constants.ts": "df1db3ffa6dd6d1252cc9617e5d72165cd2483df90e93833e13580687b6083c3", 7 | "https://deno.land/std@0.152.0/path/_interface.ts": "ee3b431a336b80cf445441109d089b70d87d5e248f4f90ff906820889ecf8d09", 8 | "https://deno.land/std@0.152.0/path/_util.ts": "c1e9686d0164e29f7d880b2158971d805b6e0efc3110d0b3e24e4b8af2190d2b", 9 | "https://deno.land/std@0.152.0/path/common.ts": "bee563630abd2d97f99d83c96c2fa0cca7cee103e8cb4e7699ec4d5db7bd2633", 10 | "https://deno.land/std@0.152.0/path/glob.ts": "cb5255638de1048973c3e69e420c77dc04f75755524cb3b2e160fe9277d939ee", 11 | "https://deno.land/std@0.152.0/path/mod.ts": "56fec03ad0ebd61b6ab39ddb9b0ddb4c4a5c9f2f4f632e09dd37ec9ebfd722ac", 12 | "https://deno.land/std@0.152.0/path/posix.ts": "c1f7afe274290ea0b51da07ee205653b2964bd74909a82deb07b69a6cc383aaa", 13 | "https://deno.land/std@0.152.0/path/separator.ts": "fe1816cb765a8068afb3e8f13ad272351c85cbc739af56dacfc7d93d710fe0f9", 14 | "https://deno.land/std@0.152.0/path/win32.ts": "bd7549042e37879c68ff2f8576a25950abbfca1d696d41d82c7bca0b7e6f452c", 15 | "https://deno.land/std@0.176.0/_util/asserts.ts": "178dfc49a464aee693a7e285567b3d0b555dc805ff490505a8aae34f9cfb1462", 16 | "https://deno.land/std@0.176.0/_util/os.ts": "d932f56d41e4f6a6093d56044e29ce637f8dcc43c5a90af43504a889cf1775e3", 17 | "https://deno.land/std@0.176.0/encoding/hex.ts": "50f8c95b52eae24395d3dfcb5ec1ced37c5fe7610ef6fffdcc8b0fdc38e3b32f", 18 | "https://deno.land/std@0.176.0/fmt/colors.ts": "938c5d44d889fb82eff6c358bea8baa7e85950a16c9f6dae3ec3a7a729164471", 19 | "https://deno.land/std@0.176.0/fs/_util.ts": "65381f341af1ff7f40198cee15c20f59951ac26e51ddc651c5293e24f9ce6f32", 20 | "https://deno.land/std@0.176.0/fs/copy.ts": "14214efd94fc3aa6db1e4af2b4b9578e50f7362b7f3725d5a14ad259a5df26c8", 21 | "https://deno.land/std@0.176.0/fs/empty_dir.ts": "c3d2da4c7352fab1cf144a1ecfef58090769e8af633678e0f3fabaef98594688", 22 | "https://deno.land/std@0.176.0/fs/ensure_dir.ts": "724209875497a6b4628dfb256116e5651c4f7816741368d6c44aab2531a1e603", 23 | "https://deno.land/std@0.176.0/fs/ensure_file.ts": "c38602670bfaf259d86ca824a94e6cb9e5eb73757fefa4ebf43a90dd017d53d9", 24 | "https://deno.land/std@0.176.0/fs/ensure_link.ts": "c0f5b2f0ec094ed52b9128eccb1ee23362a617457aa0f699b145d4883f5b2fb4", 25 | "https://deno.land/std@0.176.0/fs/ensure_symlink.ts": "2955cc8332aeca9bdfefd05d8d3976b94e282b0f353392a71684808ed2ffdd41", 26 | "https://deno.land/std@0.176.0/fs/eol.ts": "f1f2eb348a750c34500741987b21d65607f352cf7205f48f4319d417fff42842", 27 | "https://deno.land/std@0.176.0/fs/exists.ts": "b8c8a457b71e9d7f29b9d2f87aad8dba2739cbe637e8926d6ba6e92567875f8e", 28 | "https://deno.land/std@0.176.0/fs/expand_glob.ts": "45d17e89796a24bd6002e4354eda67b4301bb8ba67d2cac8453cdabccf1d9ab0", 29 | "https://deno.land/std@0.176.0/fs/mod.ts": "bc3d0acd488cc7b42627044caf47d72019846d459279544e1934418955ba4898", 30 | "https://deno.land/std@0.176.0/fs/move.ts": "4cb47f880e3f0582c55e71c9f8b1e5e8cfaacb5e84f7390781dd563b7298ec19", 31 | "https://deno.land/std@0.176.0/fs/walk.ts": "ea95ffa6500c1eda6b365be488c056edc7c883a1db41ef46ec3bf057b1c0fe32", 32 | "https://deno.land/std@0.176.0/path/_constants.ts": "e49961f6f4f48039c0dfed3c3f93e963ca3d92791c9d478ac5b43183413136e0", 33 | "https://deno.land/std@0.176.0/path/_interface.ts": "6471159dfbbc357e03882c2266d21ef9afdb1e4aa771b0545e90db58a0ba314b", 34 | "https://deno.land/std@0.176.0/path/_util.ts": "d7abb1e0dea065f427b89156e28cdeb32b045870acdf865833ba808a73b576d0", 35 | "https://deno.land/std@0.176.0/path/common.ts": "ee7505ab01fd22de3963b64e46cff31f40de34f9f8de1fff6a1bd2fe79380000", 36 | "https://deno.land/std@0.176.0/path/glob.ts": "d479e0a695621c94d3fd7fe7abd4f9499caf32a8de13f25073451c6ef420a4e1", 37 | "https://deno.land/std@0.176.0/path/mod.ts": "4b83694ac500d7d31b0cdafc927080a53dc0c3027eb2895790fb155082b0d232", 38 | "https://deno.land/std@0.176.0/path/posix.ts": "8b7c67ac338714b30c816079303d0285dd24af6b284f7ad63da5b27372a2c94d", 39 | "https://deno.land/std@0.176.0/path/separator.ts": "0fb679739d0d1d7bf45b68dacfb4ec7563597a902edbaf3c59b50d5bcadd93b1", 40 | "https://deno.land/std@0.176.0/path/win32.ts": "d186344e5583bcbf8b18af416d13d82b35a317116e6460a5a3953508c3de5bba", 41 | "https://deno.land/std@0.177.0/fmt/colors.ts": "938c5d44d889fb82eff6c358bea8baa7e85950a16c9f6dae3ec3a7a729164471", 42 | "https://deno.land/std@0.177.0/testing/_diff.ts": "1a3c044aedf77647d6cac86b798c6417603361b66b54c53331b312caeb447aea", 43 | "https://deno.land/std@0.177.0/testing/_format.ts": "a69126e8a469009adf4cf2a50af889aca364c349797e63174884a52ff75cf4c7", 44 | "https://deno.land/std@0.177.0/testing/asserts.ts": "984ab0bfb3faeed92ffaa3a6b06536c66811185328c5dd146257c702c41b01ab", 45 | "https://deno.land/x/plug@1.0.1/deps.ts": "35ea2acd5e3e11846817a429b7ef4bec47b80f2d988f5d63797147134cbd35c2", 46 | "https://deno.land/x/plug@1.0.1/download.ts": "8d6a023ade0806a0653b48cd5f6f8b15fcfaa1dbf2aa1f4bc90fc5732d27b144", 47 | "https://deno.land/x/plug@1.0.1/mod.ts": "5dec80ee7a3a325be45c03439558531bce7707ac118f4376cebbd6740ff24bfb", 48 | "https://deno.land/x/plug@1.0.1/types.ts": "d8eb738fc6ed883e6abf77093442c2f0b71af9090f15c7613621d4039e410ee1", 49 | "https://deno.land/x/plug@1.0.1/util.ts": "5ba8127b9adc36e070b9e22971fb8106869eea1741f452a87b4861e574f13481", 50 | "https://deno.land/x/sqlite3@0.8.0/deno.json": "61fbd0665a1b48f5e0f1773371d49b776f559cd6c3747a4e674adc3eb423686c", 51 | "https://deno.land/x/sqlite3@0.8.0/deps.ts": "722c865b9cef27b4cde0bb1ac9ebb08e94c43ad090a7313cea576658ff1e3bb0", 52 | "https://deno.land/x/sqlite3@0.8.0/mod.ts": "d41b8b30e1b20b537ef4d78cae98d90f6bd65c727b64aa1a18bffbb28f7d6ec3", 53 | "https://deno.land/x/sqlite3@0.8.0/src/blob.ts": "a956fc0cf4a8c7a21dc3fcb71a07ef773bcd08b5fd72e8ace89b1bfbd031bf06", 54 | "https://deno.land/x/sqlite3@0.8.0/src/constants.ts": "85fd27aa6e199093f25f5f437052e16fd0e0870b96ca9b24a98e04ddc8b7d006", 55 | "https://deno.land/x/sqlite3@0.8.0/src/database.ts": "c68c7fdfa7548000ea7e194360cdce86b81b667aab6b0778ea7ed9b74a37b7cb", 56 | "https://deno.land/x/sqlite3@0.8.0/src/ffi.ts": "d5d5e3b4524cf0b980d23e57b08f75dc0debc7af2e93ff8b00cdc5af52637b31", 57 | "https://deno.land/x/sqlite3@0.8.0/src/statement.ts": "0b2b3c8b5564ad3c35f2c7d57607e5755c38b23b835e9a46aa1002e68e6ec3a2", 58 | "https://deno.land/x/sqlite3@0.8.0/src/util.ts": "9627ebecc7a5eb250d2df9386a456a9a9ed7842a20fe32be1ee6b7c663c77bd3" 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /deno/mod.ts: -------------------------------------------------------------------------------- 1 | import { download } from "https://deno.land/x/plug@1.0.1/mod.ts"; 2 | import meta from "./deno.json" assert { type: "json" }; 3 | 4 | const BASE = `${meta.github}/releases/download/v${meta.version}`; 5 | 6 | // Similar to https://github.com/denodrivers/sqlite3/blob/f7529897720631c2341b713f0d78d4d668593ea9/src/ffi.ts#L561 7 | let path: string; 8 | try { 9 | const customPath = Deno.env.get("DENO_SQLITE_REGEX_PATH"); 10 | if (customPath) path = customPath; 11 | else { 12 | path = await download({ 13 | url: { 14 | darwin: { 15 | aarch64: `${BASE}/deno-darwin-aarch64.regex0.dylib`, 16 | x86_64: `${BASE}/deno-darwin-x86_64.regex0.dylib`, 17 | }, 18 | windows: { 19 | x86_64: `${BASE}/deno-windows-x86_64.regex0.dll`, 20 | }, 21 | linux: { 22 | x86_64: `${BASE}/deno-linux-x86_64.regex0.so`, 23 | }, 24 | }, 25 | suffixes: { 26 | darwin: "", 27 | linux: "", 28 | windows: "", 29 | }, 30 | }); 31 | } 32 | } catch (e) { 33 | if (e instanceof Deno.errors.PermissionDenied) { 34 | throw e; 35 | } 36 | 37 | const error = new Error("Failed to load sqlite-regex extension"); 38 | error.cause = e; 39 | 40 | throw error; 41 | } 42 | 43 | /** 44 | * Returns the full path to the compiled sqlite-regex extension. 45 | * Caution: this will not be named "regex0.dylib|so|dll", since plug will 46 | * replace the name with a hash. 47 | */ 48 | export function getLoadablePath(): string { 49 | return path; 50 | } 51 | 52 | /** 53 | * Entrypoint name for the sqlite-regex extension. 54 | */ 55 | export const entrypoint = "sqlite3_regex_init"; 56 | 57 | interface Db { 58 | // after https://deno.land/x/sqlite3@0.8.0/mod.ts?s=Database#method_loadExtension_0 59 | loadExtension(file: string, entrypoint?: string | undefined): void; 60 | } 61 | /** 62 | * Loads the sqlite-regex extension on the given sqlite3 database. 63 | */ 64 | export function load(db: Db): void { 65 | db.loadExtension(path, entrypoint); 66 | } 67 | -------------------------------------------------------------------------------- /deno/test.ts: -------------------------------------------------------------------------------- 1 | import * as sqlite_regex from "./mod.ts"; 2 | import meta from "./deno.json" assert { type: "json" }; 3 | 4 | import { assertEquals } from "https://deno.land/std@0.177.0/testing/asserts.ts"; 5 | import { Database } from "https://deno.land/x/sqlite3@0.8.0/mod.ts"; 6 | 7 | Deno.test("x/sqlite3", (t) => { 8 | const db = new Database(":memory:"); 9 | 10 | db.enableLoadExtension = true; 11 | sqlite_regex.load(db); 12 | 13 | const [version] = db.prepare("select regex_version()").value<[string]>()!; 14 | 15 | assertEquals(version[0], "v"); 16 | assertEquals(version.substring(1), meta.version); 17 | 18 | db.close(); 19 | }); 20 | -------------------------------------------------------------------------------- /docs.md: -------------------------------------------------------------------------------- 1 | # sqlite-regex Documentation 2 | 3 | A full reference to every function and module that sqlite-regex offers. 4 | 5 | As a reminder, sqlite-regex follows semver and is pre v1, so breaking changes are to be expected. 6 | 7 | ## API Reference 8 | 9 | <h3 name="regexp"><code>regexp()</code></h3> 10 | 11 | An implementation of the `REGEXP()` operator for SQLite, described here: 12 | 13 | > _The REGEXP operator is a special syntax for the regexp() user function. No regexp() user function is defined by default and so use of the REGEXP operator will normally result in an error message. If an application-defined SQL function named "regexp" is added at run-time, then the "X REGEXP Y" operator will be implemented as a call to "regexp(Y,X)"._ > 14 | 15 | <small><i><a href="https://www.sqlite.org/lang_expr.html">SQL Language Expressions</a></i>, on sqlite.org</small> 16 | 17 | This can be used with the `text regexp pattern` or `regexp(pattern, text)` syntax. See the [regex crate documentation](https://docs.rs/regex/latest/regex/struct.Regex.html) for allowed syntax/features in the regex pattern string. 18 | 19 | ```sql 20 | select regexp('[abc]', 'a'); -- 1 21 | select regexp('[abc]', 'x'); -- 0 22 | 23 | select 'a' regexp '[abc]'; -- 1 24 | select 'x' regexp '[abc]'; -- 0 25 | 26 | 27 | -- 28 | ``` 29 | 30 | <h3 name="regex"><code>regex(pattern)</code></h3> 31 | 32 | Creates a regex "object" with the given pattern, using [SQLite's pointer passing interface](https://www.sqlite.org/bindptr.html). Useful when caching regex patterns in heavy queries that use `sqlite-regex` table functions, like [`regex_split()`](#regex_split) or [`regex_find_all()`](#regex_find_all). 33 | 34 | Note that the return value will appear to be `NULL` because of SQLite pointer passing interface. To debug, use [`regex_print()`](#regex_print) to print the pattern string of a regex object. 35 | 36 | ```sql 37 | select regex('[abc]'); -- NULL, but is still a regex "object" 38 | select regex("[abc"); -- Errors with 'Error parsing pattern as regex: ...' 39 | 40 | select regex_print(regex('[abc]')); -- '[abc]' 41 | ``` 42 | 43 | <h3 name="regex_print"><code>regex_print(regex)</code></h3> 44 | 45 | Prints the pattern of a regex object created with [`regex()`](#regex). 46 | 47 | ```sql 48 | select regex_print(regex('[abc]')); -- '[abc]' 49 | -- 50 | ``` 51 | 52 | <h3 name="regex_valid"><code>regex_valid(pattern)</code></h3> 53 | 54 | Returns 1 if the given pattern is a valid regular expression, 0 otherwise. 55 | 56 | ```sql 57 | select regex_valid('abc'); -- 1 58 | select regex_valid('[abc]'); -- 1 59 | select regex_valid('[abc'); -- 0 60 | select regex_valid(''); -- 1 61 | -- 62 | ``` 63 | 64 | <h3 name="regex_find"><code>regex_find(pattern, text)</code></h3> 65 | 66 | Find and return the text of the given pattern in the string, or NULL otherwise. Errors if `pattern` is not legal regex. Based on [`Regex.find()`](https://docs.rs/regex/latest/regex/struct.Regex.html#method.find). 67 | 68 | ```sql 69 | select regex_find( 70 | '[0-9]{3}-[0-9]{3}-[0-9]{4}', 71 | 'phone: 111-222-3333' 72 | ); 73 | -- '111-222-3333' 74 | ``` 75 | 76 | <h3 name="regex_find_all"><code>select * from regex_find_all(pattern, text)</code></h3> 77 | 78 | Find all instances of a pattern in the given text. Based on [`Regex.find_iter()`](https://docs.rs/regex/latest/regex/struct.Regex.html#method.find_iter). 79 | 80 | The returned columns: 81 | 82 | - `rowid`: The 0-based index of the match. 83 | - `start`: The 0-based index of the starting character of the match inside the text. 84 | - `end`: The 0-based index of the ending character of the match inside the text. 85 | - `match`: The full string match. 86 | 87 | For faster results, wrap the pattern with the [`regex()`](#regex) function for caching. 88 | 89 | ```sql 90 | select rowid, * 91 | from regex_find_all( 92 | regex('\b\w{13}\b'), 93 | 'Retroactively relinquishing remunerations is reprehensible.' 94 | ); 95 | /* 96 | ┌───────┬───────┬─────┬───────────────┐ 97 | │ rowid │ start │ end │ match │ 98 | ├───────┼───────┼─────┼───────────────┤ 99 | │ 0 │ 0 │ 13 │ Retroactively │ 100 | │ 1 │ 14 │ 27 │ relinquishing │ 101 | │ 2 │ 28 │ 41 │ remunerations │ 102 | │ 3 │ 45 │ 58 │ reprehensible │ 103 | └───────┴───────┴─────┴───────────────┘ 104 | ``` 105 | 106 | <h3 name="regex_capture"><code>regex_capture(pattern, text, group)</code></h3> 107 | 108 | Returns the text of the capture group with the specific `group` index or name, or NULL otherwise. Errors if `pattern` is not legal regex. Based on [`Regex.captures()`](https://docs.rs/regex/latest/regex/struct.Regex.html#method.captures). 109 | 110 | If `group` is a number, then the N-th capture group is returned, where `0` refers to the entire match, `1` refers to the first left-most capture group in the match, `2` the second, and so on. If the provided group number "overflows', then NULL is returned. 111 | 112 | ```sql 113 | select regex_capture( 114 | "'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)", 115 | "Not my favorite movie: 'Citizen Kane' (1941).", 116 | 0 117 | ); 118 | -- "'Citizen Kane' (1941)" 119 | 120 | select regex_capture( 121 | "'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)", 122 | "Not my favorite movie: 'Citizen Kane' (1941).", 123 | 1 124 | ); 125 | -- "Citizen Kane" 126 | 127 | 128 | select regex_capture( 129 | "'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)", 130 | "Not my favorite movie: 'Citizen Kane' (1941).", 131 | 2 132 | ); 133 | -- "1941" 134 | 135 | select regex_capture( 136 | "'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)", 137 | "Not my favorite movie: 'Citizen Kane' (1941).", 138 | 3 139 | ); 140 | -- NULL 141 | ``` 142 | 143 | If group is a string, then the value of the capture group with the same name is returned. If there is no matching capture group with the name, or the group was not captured, then NULL is returned. 144 | 145 | ```sql 146 | select regex_capture( 147 | "'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)", 148 | "Not my favorite movie: 'Citizen Kane' (1941).", 149 | 'title' 150 | ); 151 | -- "Citizen Kane" 152 | 153 | 154 | select regex_capture( 155 | "'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)", 156 | "Not my favorite movie: 'Citizen Kane' (1941).", 157 | 'year' 158 | ); 159 | -- "1941" 160 | 161 | select regex_capture( 162 | "'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)", 163 | "Not my favorite movie: 'Citizen Kane' (1941).", 164 | 'not_exist' 165 | ); 166 | -- NULL 167 | ``` 168 | 169 | Note that there is a version of `regex_capture()` that only have two parameters: `captures` and `group`. This can only be used with the [`regex_captures`](#regex_captures) table function, with the special `captures` column like so: 170 | 171 | ```sql 172 | select 173 | regex_capture(captures, 'title') as title, 174 | regex_capture(captures, 'year') as year, 175 | regex_capture(captures, 'not_exist') as not_exist 176 | from regex_captures( 177 | regex("'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"), 178 | "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)." 179 | ); 180 | /* 181 | ┌──────────────────┬──────┬───────────┐ 182 | │ title │ year │ not_exist │ 183 | ├──────────────────┼──────┼───────────┤ 184 | │ Citizen Kane │ 1941 │ │ 185 | │ The Wizard of Oz │ 1939 │ │ 186 | │ M │ 1931 │ │ 187 | └──────────────────┴──────┴───────────┘ 188 | */ 189 | ``` 190 | 191 | <h3 name="regex_captures"><code>select * from regex_captures(pattern, text)</code></h3> 192 | 193 | Returns all non-overlapping capture groups in the given text. Similar to [`regex_find_all`](#regex_find_all), but allows for extracting capture information. Must use with the [`regex_capture`](#regex_capture) function to extract capture group values. Based on [`Regex.captures_iter()`](https://docs.rs/regex/latest/regex/struct.Regex.html#method.captures_iter). 194 | 195 | The returned columns: 196 | 197 | - `rowid`: The 0-based index of the match. `0` is the entire match, `1` the first matching capture group, `2` the second, etc. 198 | - `captures`: A special value that's meant to be passed into [`regex_capture()`](#regex_capture). Will appear NULL through direct access. 199 | 200 | For faster results, wrap the pattern with the [`regex()`](#regex) function for caching. 201 | 202 | ```sql 203 | select 204 | rowid, 205 | captures, 206 | regex_capture(captures, 0) as "0", 207 | regex_capture(captures, 1) as "1", 208 | regex_capture(captures, 2) as "2", 209 | regex_capture(captures, 3) as "3" 210 | from regex_captures( 211 | regex("'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"), 212 | "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)." 213 | ); 214 | /* 215 | ┌───────┬──────────┬───────────────────────────┬──────────────────┬──────┬───┐ 216 | │ rowid │ captures │ 0 │ 1 │ 2 │ 3 │ 217 | ├───────┼──────────┼───────────────────────────┼──────────────────┼──────┼───┤ 218 | │ 0 │ │ 'Citizen Kane' (1941) │ Citizen Kane │ 1941 │ │ 219 | │ 1 │ │ 'The Wizard of Oz' (1939) │ The Wizard of Oz │ 1939 │ │ 220 | │ 2 │ │ 'M' (1931) │ M │ 1931 │ │ 221 | └───────┴──────────┴───────────────────────────┴──────────────────┴──────┴───┘ 222 | */ 223 | ``` 224 | 225 | ```sql 226 | select 227 | rowid, 228 | captures, 229 | regex_capture(captures, 'title') as title, 230 | regex_capture(captures, 'year') as year, 231 | regex_capture(captures, 'blah') as blah 232 | from regex_captures( 233 | regex("'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"), 234 | "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)." 235 | ); 236 | /* 237 | ┌───────┬──────────┬──────────────────┬──────┬──────┐ 238 | │ rowid │ captures │ title │ year │ blah │ 239 | ├───────┼──────────┼──────────────────┼──────┼──────┤ 240 | │ 0 │ │ Citizen Kane │ 1941 │ │ 241 | │ 1 │ │ The Wizard of Oz │ 1939 │ │ 242 | │ 2 │ │ M │ 1931 │ │ 243 | └───────┴──────────┴──────────────────┴──────┴──────┘ 244 | */ 245 | ``` 246 | 247 | <h3 name="regex_replace"><code>regex_replace(pattern, text, replacement)</code></h3> 248 | 249 | Replace the **first** instance of `pattern` inside `text` with the given `replacement` text. Supports the [replacment string syntax](https://docs.rs/regex/latest/regex/struct.Regex.html#replacement-string-syntax). Based on [`Regex.replace()`](https://docs.rs/regex/latest/regex/struct.Regex.html#method.replace) 250 | 251 | ```sql 252 | 253 | select regex_replace( 254 | '[^01]+', 255 | '1078910', 256 | '' 257 | ); 258 | -- '1010' 259 | 260 | select regex_replace( 261 | '(?P<last>[^,\s]+),\s+(?P<first>\S+)', 262 | 'Springsteen, Bruce', 263 | '$first $last' 264 | ); 265 | -- 'Bruce Springsteen' 266 | ``` 267 | 268 | <h3 name="regex_replace_all"><code>regex_replace_all(pattern, text, replacement)</code></h3> 269 | 270 | Replace **all** instance of `pattern` inside `text` with the given `replacement` text. Supports the [replacment string syntax](https://docs.rs/regex/latest/regex/struct.Regex.html#replacement-string-syntax). Based on [`Regex.replace_all()`](https://docs.rs/regex/latest/regex/struct.Regex.html#method.replace_all) 271 | 272 | ```sql 273 | 274 | select regex_replace_all( 275 | 'dog', 276 | 'cat dog mouse dog', 277 | 'monkey' 278 | ) 279 | -- 'cat monkey mouse monkey' 280 | ``` 281 | 282 | <h3 name="regex_split"><code>select * from regex_split(pattern, text)</code></h3> 283 | 284 | Split the given text on each instance of the given pattern. Based on [`Regex.split()`](https://docs.rs/regex/latest/regex/struct.Regex.html#method.split). 285 | 286 | The returned columns: 287 | 288 | - `rowid`: The 0-based index of the split item. 289 | - `item`: The individual split item, as text. 290 | 291 | For faster results, wrap the pattern with the [`regex()`](#regex) function for caching. 292 | 293 | ```sql 294 | select rowid, * 295 | from regex_split( 296 | regex('[ \\t]+'), 297 | 'a b \t c\td e' 298 | ); 299 | /* 300 | ┌───────┬──────┐ 301 | │ rowid │ item │ 302 | ├───────┼──────┤ 303 | │ 0 │ a │ 304 | │ 1 │ b │ 305 | │ 2 │ c │ 306 | │ 3 │ d │ 307 | │ 4 │ e │ 308 | └───────┴──────┘ 309 | */ 310 | ``` 311 | 312 | <h3 name="regexset"><code>regexset(pattern1, patern2, ...)</code></h3> 313 | 314 | Creates a regexset "object" with the given pattern, using [SQLite's pointer passing interface](https://www.sqlite.org/bindptr.html). Required when using `regexset_is_match` and `regexset_matches`. Based on [`RegexSet`](https://docs.rs/regex/latest/regex/struct.RegexSet.html). 315 | 316 | Note that the return value will appear to be `NULL` because of SQLite pointer passing interface. To debug, use [`regexset_print()`](#regexset_print) to print the pattern string of a regex object. 317 | 318 | ```sql 319 | select regexset( 320 | "bar", 321 | "foo", 322 | "barfoo" 323 | ); 324 | -- NULL, but is still a regexset "object" 325 | 326 | select regexset("[abc"); --errors 327 | 328 | select regexset_print(regexset('abc', 'xyz')); -- '["abc","xyz"]' 329 | ``` 330 | 331 | <h3 name="regexset_print"><code>regexset_print()</code></h3> 332 | 333 | Prints the patterns of a regexset object created with [`regexset()`](#regexset). 334 | 335 | ```sql 336 | select regexset_print(regexset('abc', 'xyz')); -- '["abc","xyz"]' 337 | ``` 338 | 339 | <h3 name="regexset_is_match"><code>regexset_is_match(regexset, text)</code></h3> 340 | 341 | Returns 1 if any of the patterns in `regexset` matches `text`. Based on [`RegexSet.is_match()`](https://docs.rs/regex/latest/regex/struct.RegexSet.html#method.is_match). 342 | 343 | ```sql 344 | select regexset_is_match( 345 | regexset( 346 | "bar", 347 | "foo", 348 | "barfoo" 349 | ), 350 | 'foobar' 351 | ); -- 1 352 | 353 | select regexset_is_match( 354 | regexset( 355 | "bar", 356 | "foo", 357 | "barfoo" 358 | ), 359 | 'xxx' 360 | ); -- 0 361 | 362 | ``` 363 | 364 | <h3 name="regexset_matches"><code>select * from regexset_matches(regexset, text)</code></h3> 365 | 366 | Returns all the matching patterns inside `regexset` found inside `text`. Note that this doesn't return rows for each of the matches themselves, only if there was at least 1 match for each patten. Based on [`RegexSet.matches()`](https://docs.rs/regex/latest/regex/struct.RegexSet.html#method.matches). 367 | 368 | ```sql 369 | select 370 | key, 371 | pattern 372 | from regexset_matches( 373 | regexset( 374 | '\w+', 375 | '\d+', 376 | '\pL+', 377 | 'foo', 378 | 'bar', 379 | 'barfoo', 380 | 'foobar' 381 | ), 382 | 'foobar' 383 | ); 384 | /* 385 | ┌─────┬─────────┐ 386 | │ key │ pattern │ 387 | ├─────┼─────────┤ 388 | │ 0 │ \w+ │ 389 | │ 2 │ \pL+ │ 390 | │ 3 │ foo │ 391 | │ 4 │ bar │ 392 | │ 6 │ foobar │ 393 | └─────┴─────────┘ 394 | */ 395 | ``` 396 | 397 | <h3 name="regex_version"><code>regex_version()</code></h3> 398 | 399 | Returns the semver version string of the current version of sqlite-regex. 400 | 401 | ```sql 402 | select regex_version(); 403 | -- "v0.1.0" 404 | ``` 405 | 406 | <h3 name="regex_debug"><code>regex_debug()</code></h3> 407 | 408 | Returns a debug string of various info about sqlite-regex, including 409 | the version string, build date, and commit hash. 410 | 411 | ```sql 412 | select regex_debug(); 413 | /* 414 | Version: v0.0.0-alpha.4 415 | Source: 85fd18bea80c42782f35975351ea3760d4396eb6 416 | */ 417 | ``` 418 | -------------------------------------------------------------------------------- /npm/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | .pnpm-debug.log* 9 | 10 | # Diagnostic reports (https://nodejs.org/api/report.html) 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 12 | 13 | # Runtime data 14 | pids 15 | *.pid 16 | *.seed 17 | *.pid.lock 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 30 | .grunt 31 | 32 | # Bower dependency directory (https://bower.io/) 33 | bower_components 34 | 35 | # node-waf configuration 36 | .lock-wscript 37 | 38 | # Compiled binary addons (https://nodejs.org/api/addons.html) 39 | build/Release 40 | 41 | # Dependency directories 42 | node_modules/ 43 | jspm_packages/ 44 | 45 | # Snowpack dependency directory (https://snowpack.dev/) 46 | web_modules/ 47 | 48 | # TypeScript cache 49 | *.tsbuildinfo 50 | 51 | # Optional npm cache directory 52 | .npm 53 | 54 | # Optional eslint cache 55 | .eslintcache 56 | 57 | # Optional stylelint cache 58 | .stylelintcache 59 | 60 | # Microbundle cache 61 | .rpt2_cache/ 62 | .rts2_cache_cjs/ 63 | .rts2_cache_es/ 64 | .rts2_cache_umd/ 65 | 66 | # Optional REPL history 67 | .node_repl_history 68 | 69 | # Output of 'npm pack' 70 | *.tgz 71 | 72 | # Yarn Integrity file 73 | .yarn-integrity 74 | 75 | # dotenv environment variable files 76 | .env 77 | .env.development.local 78 | .env.test.local 79 | .env.production.local 80 | .env.local 81 | 82 | # parcel-bundler cache (https://parceljs.org/) 83 | .cache 84 | .parcel-cache 85 | 86 | # Next.js build output 87 | .next 88 | out 89 | 90 | # Nuxt.js build / generate output 91 | .nuxt 92 | dist 93 | 94 | # Gatsby files 95 | .cache/ 96 | # Comment in the public line in if your project uses Gatsby and not Next.js 97 | # https://nextjs.org/blog/next-9-1#public-directory-support 98 | # public 99 | 100 | # vuepress build output 101 | .vuepress/dist 102 | 103 | # vuepress v2.x temp and cache directory 104 | .temp 105 | .cache 106 | 107 | # Docusaurus cache and generated files 108 | .docusaurus 109 | 110 | # Serverless directories 111 | .serverless/ 112 | 113 | # FuseBox cache 114 | .fusebox/ 115 | 116 | # DynamoDB Local files 117 | .dynamodb/ 118 | 119 | # TernJS port file 120 | .tern-port 121 | 122 | # Stores VSCode versions used for testing VSCode extensions 123 | .vscode-test 124 | 125 | # yarn v2 126 | .yarn/cache 127 | .yarn/unplugged 128 | .yarn/build-state.yml 129 | .yarn/install-state.gz 130 | .pnp.* 131 | 132 | # not needed for libraries, right? running into some issues in ci with this, so gonna yeet 133 | package-lock.json -------------------------------------------------------------------------------- /npm/README.md: -------------------------------------------------------------------------------- 1 | # sqlite-regex on npm 2 | 3 | `sqlite-regex` is also available for download through [`npm`](https://www.npmjs.com/) for Node.js developers. See the [`sqlite-regex` NPM package README](./sqlite-regex/README.md) for details. 4 | 5 | The other NPM packages in this folder (`sqlite-regex-darwin-x64`, `sqlite-regex-windows-x64` etc.) are autogenerated platform-specific packages. See [Supported Platforms](./sqlite-regex/README.md#supported-platforms) for details. 6 | -------------------------------------------------------------------------------- /npm/platform-package.README.md.tmpl: -------------------------------------------------------------------------------- 1 | <!--- Generated with the npm_generate_platform_packages.sh script, don't edit by hand --> 2 | 3 | # ${PACKAGE_NAME} 4 | 5 | A `${PACKAGE_NAME_BASE}` platform-specific package for `${PLATFORM_OS}-${PLATFORM_ARCH}`. 6 | 7 | When `${PACKAGE_NAME_BASE}` is installed and the host computer has a `${PLATFORM_OS}` operating system with `${PLATFORM_ARCH}` architecture, then this package is downloaded with the pre-compiled SQLite extension bundled under `lib/${EXTENSION_NAME}.${EXTENSION_SUFFIX}`. At runtime, the `${PACKAGE_NAME_BASE}` package will resolve to this platform-specific package for use with [`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3)' or [`node-sqlite3`](https://github.com/TryGhost/node-sqlite3). 8 | 9 | See the `${PACKAGE_NAME_BASE}` package for more details. -------------------------------------------------------------------------------- /npm/platform-package.package.json.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "//": "Autogenerated by the npm_generate_platform_packages.sh script, do not edit by hand", 3 | "name": "${PACKAGE_NAME}", 4 | "version": "${VERSION}", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/asg017/${PACKAGE_NAME_BASE}.git", 8 | "directory": "npm/${PACKAGE_NAME}" 9 | }, 10 | "author": "Alex Garcia <alexsebastian.garcia@gmail.com>", 11 | "os": [ 12 | "${PLATFORM_OS}" 13 | ], 14 | "cpu": [ 15 | "${PLATFORM_ARCH}" 16 | ] 17 | } -------------------------------------------------------------------------------- /npm/sqlite-regex-darwin-arm64/README.md: -------------------------------------------------------------------------------- 1 | <!--- Generated with the npm_generate_platform_packages.sh script, don't edit by hand --> 2 | 3 | # sqlite-regex-darwin-arm64 4 | 5 | A `sqlite-regex` platform-specific package for `darwin-arm64`. 6 | 7 | When `sqlite-regex` is installed and the host computer has a `darwin` operating system with `arm64` architecture, then this package is downloaded with the pre-compiled SQLite extension bundled under `lib/regex0.dylib`. At runtime, the `sqlite-regex` package will resolve to this platform-specific package for use with [`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3)' or [`node-sqlite3`](https://github.com/TryGhost/node-sqlite3). 8 | 9 | See the `sqlite-regex` package for more details. -------------------------------------------------------------------------------- /npm/sqlite-regex-darwin-arm64/lib/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/sqlite-regex/5fe28e3c7a978c621082cfef0ae6dea9a71167ee/npm/sqlite-regex-darwin-arm64/lib/.gitkeep -------------------------------------------------------------------------------- /npm/sqlite-regex-darwin-arm64/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "//": "Autogenerated by the npm_generate_platform_packages.sh script, do not edit by hand", 3 | "name": "sqlite-regex-darwin-arm64", 4 | "version": "0.2.4-alpha.1", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/asg017/sqlite-regex.git", 8 | "directory": "npm/sqlite-regex-darwin-arm64" 9 | }, 10 | "author": "Alex Garcia <alexsebastian.garcia@gmail.com>", 11 | "os": [ 12 | "darwin" 13 | ], 14 | "cpu": [ 15 | "arm64" 16 | ] 17 | } -------------------------------------------------------------------------------- /npm/sqlite-regex-darwin-x64/README.md: -------------------------------------------------------------------------------- 1 | <!--- Generated with the npm_generate_platform_packages.sh script, don't edit by hand --> 2 | 3 | # sqlite-regex-darwin-x64 4 | 5 | A `sqlite-regex` platform-specific package for `darwin-x64`. 6 | 7 | When `sqlite-regex` is installed and the host computer has a `darwin` operating system with `x64` architecture, then this package is downloaded with the pre-compiled SQLite extension bundled under `lib/regex0.dylib`. At runtime, the `sqlite-regex` package will resolve to this platform-specific package for use with [`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3)' or [`node-sqlite3`](https://github.com/TryGhost/node-sqlite3). 8 | 9 | See the `sqlite-regex` package for more details. -------------------------------------------------------------------------------- /npm/sqlite-regex-darwin-x64/lib/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/sqlite-regex/5fe28e3c7a978c621082cfef0ae6dea9a71167ee/npm/sqlite-regex-darwin-x64/lib/.gitkeep -------------------------------------------------------------------------------- /npm/sqlite-regex-darwin-x64/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "//": "Autogenerated by the npm_generate_platform_packages.sh script, do not edit by hand", 3 | "name": "sqlite-regex-darwin-x64", 4 | "version": "0.2.4-alpha.1", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/asg017/sqlite-regex.git", 8 | "directory": "npm/sqlite-regex-darwin-x64" 9 | }, 10 | "author": "Alex Garcia <alexsebastian.garcia@gmail.com>", 11 | "os": [ 12 | "darwin" 13 | ], 14 | "cpu": [ 15 | "x64" 16 | ] 17 | } -------------------------------------------------------------------------------- /npm/sqlite-regex-linux-x64/README.md: -------------------------------------------------------------------------------- 1 | <!--- Generated with the npm_generate_platform_packages.sh script, don't edit by hand --> 2 | 3 | # sqlite-regex-linux-x64 4 | 5 | A `sqlite-regex` platform-specific package for `linux-x64`. 6 | 7 | When `sqlite-regex` is installed and the host computer has a `linux` operating system with `x64` architecture, then this package is downloaded with the pre-compiled SQLite extension bundled under `lib/regex0.so`. At runtime, the `sqlite-regex` package will resolve to this platform-specific package for use with [`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3)' or [`node-sqlite3`](https://github.com/TryGhost/node-sqlite3). 8 | 9 | See the `sqlite-regex` package for more details. -------------------------------------------------------------------------------- /npm/sqlite-regex-linux-x64/lib/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/sqlite-regex/5fe28e3c7a978c621082cfef0ae6dea9a71167ee/npm/sqlite-regex-linux-x64/lib/.gitkeep -------------------------------------------------------------------------------- /npm/sqlite-regex-linux-x64/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "//": "Autogenerated by the npm_generate_platform_packages.sh script, do not edit by hand", 3 | "name": "sqlite-regex-linux-x64", 4 | "version": "0.2.4-alpha.1", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/asg017/sqlite-regex.git", 8 | "directory": "npm/sqlite-regex-linux-x64" 9 | }, 10 | "author": "Alex Garcia <alexsebastian.garcia@gmail.com>", 11 | "os": [ 12 | "linux" 13 | ], 14 | "cpu": [ 15 | "x64" 16 | ] 17 | } -------------------------------------------------------------------------------- /npm/sqlite-regex-windows-x64/README.md: -------------------------------------------------------------------------------- 1 | <!--- Generated with the npm_generate_platform_packages.sh script, don't edit by hand --> 2 | 3 | # sqlite-regex-windows-x64 4 | 5 | A `sqlite-regex` platform-specific package for `windows-x64`. 6 | 7 | When `sqlite-regex` is installed and the host computer has a `windows` operating system with `x64` architecture, then this package is downloaded with the pre-compiled SQLite extension bundled under `lib/regex0.dll`. At runtime, the `sqlite-regex` package will resolve to this platform-specific package for use with [`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3)' or [`node-sqlite3`](https://github.com/TryGhost/node-sqlite3). 8 | 9 | See the `sqlite-regex` package for more details. -------------------------------------------------------------------------------- /npm/sqlite-regex-windows-x64/lib/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/sqlite-regex/5fe28e3c7a978c621082cfef0ae6dea9a71167ee/npm/sqlite-regex-windows-x64/lib/.gitkeep -------------------------------------------------------------------------------- /npm/sqlite-regex-windows-x64/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "//": "Autogenerated by the npm_generate_platform_packages.sh script, do not edit by hand", 3 | "name": "sqlite-regex-windows-x64", 4 | "version": "0.2.4-alpha.1", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/asg017/sqlite-regex.git", 8 | "directory": "npm/sqlite-regex-windows-x64" 9 | }, 10 | "author": "Alex Garcia <alexsebastian.garcia@gmail.com>", 11 | "os": [ 12 | "windows" 13 | ], 14 | "cpu": [ 15 | "x64" 16 | ] 17 | } -------------------------------------------------------------------------------- /npm/sqlite-regex/README.md: -------------------------------------------------------------------------------- 1 | # `sqlite-regex` NPM Package 2 | 3 | `sqlite-regex` is distributed on `npm` for Node.js developers. To install on [supported platforms](#supported-platforms), simply run: 4 | 5 | ``` 6 | npm install sqlite-regex 7 | ``` 8 | 9 | The `sqlite-regex` package is meant to be used with Node SQLite clients like [`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3) and [`node-sqlite3`](https://github.com/TryGhost/node-sqlite3). For `better-sqlite3`, call [`.loadExtension()`](https://github.com/WiseLibs/better-sqlite3/blob/master/docs/api.md#loadextensionpath-entrypoint---this) on your database object, passing in [`getLoadablePath()`](#getLoadablePath). 10 | 11 | ```js 12 | import Database from "better-sqlite3"; 13 | import * as sqlite_regex from "sqlite-regex"; 14 | 15 | const db = new Database(":memory:"); 16 | 17 | db.loadExtension(sqlite_regex.getLoadablePath()); 18 | 19 | const version = db.prepare("select regex_version()").pluck().get(); 20 | console.log(version); // "v0.2.0" 21 | ``` 22 | 23 | For `node-sqlite3`, call the similarly named [`.loadExtension()`](https://github.com/TryGhost/node-sqlite3/wiki/API#loadextensionpath--callback) method on your database object, and pass in [`getLoadablePath()`](#getLoadablePath). 24 | 25 | ```js 26 | import sqlite3 from "sqlite3"; 27 | import * as sqlite_regex from "sqlite-regex"; 28 | 29 | const db = new sqlite3.Database(":memory:"); 30 | 31 | db.loadExtension(sqlite_regex.getLoadablePath()); 32 | 33 | db.get("select regex_version()", (err, row) => { 34 | console.log(row); // {json_schema_version(): "v0.2.0"} 35 | }); 36 | ``` 37 | 38 | See [the full API Reference](#api-reference) for the Node API, and [`docs.md`](../../docs.md) for documentation on the `sqlite-regex` SQL API. 39 | 40 | ## Supported Platforms 41 | 42 | Since the underlying `regex0` SQLite extension is pre-compiled, the `sqlite-regex` NPM package only works on a few "platforms" (operating systems + CPU architectures). These platforms include: 43 | 44 | - `darwin-x64` (MacOS x86_64) 45 | - `darwin-arm64` (MacOS M1 and M2 chips) 46 | - `win32-x64` (Windows x86_64) 47 | - `linux-x64` (Linux x86_64) 48 | 49 | To see which platform your machine is, check the [`process.arch`](https://nodejs.org/api/process.html#processarch) and [`process.platform`](https://nodejs.org/api/process.html#processplatform) values like so: 50 | 51 | ```bash 52 | $ node -e 'console.log([process.platform, process.arch])' 53 | [ 'darwin', 'x64' ] 54 | ``` 55 | 56 | When the `sqlite-regex` NPM package is installed, the correct pre-compiled extension for your operating system and CPU architecture will be downloaded from the [optional dependencies](https://docs.npmjs.com/cli/v9/configuring-npm/package-json#optionaldependencies), with platform-specific packages like `sqlite-regex-darwin-x64`. This will be automatically, there's no need to directly install those packages. 57 | 58 | More platforms may be supported in the future. Consider [supporting my work](https://github.com/sponsors/asg017/) if you'd like to see more operating systems and CPU architectures supported in `sqlite-regex`. 59 | 60 | ## API Reference 61 | 62 | <a href="#getLoadablePath" name="getLoadablePath">#</a> <b>getLoadablePath</b> [<>](https://github.com/asg017/sqlite-regex/blob/main/npm/sqlite-regex/src/index.js "Source") 63 | 64 | Returns the full path to where the `sqlite-regex` _should_ be installed, based on the `sqlite-regex`'s `package.json` optional dependencies and the host's operating system and architecture. 65 | 66 | This path can be directly passed into [`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3)'s [`.loadExtension()`](https://github.com/WiseLibs/better-sqlite3/blob/master/docs/api.md#loadextensionpath-entrypoint---this). 67 | 68 | ```js 69 | import Database from "better-sqlite3"; 70 | import * as sqlite_regex from "sqlite-regex"; 71 | 72 | const db = new Database(":memory:"); 73 | db.loadExtension(sqlite_regex.getLoadablePath()); 74 | ``` 75 | 76 | It can also be used in [`node-sqlite3`](https://github.com/TryGhost/node-sqlite3)'s [`.loadExtension()`](https://github.com/TryGhost/node-sqlite3/wiki/API#loadextensionpath--callback). 77 | 78 | ```js 79 | import sqlite3 from "sqlite3"; 80 | import * as sqlite_regex from "sqlite-regex"; 81 | 82 | const db = new sqlite3.Database(":memory:"); 83 | db.loadExtension(sqlite_regex.getLoadablePath()); 84 | ``` 85 | 86 | This function throws an `Error` in two different cases. The first case is when `sqlite-regex` is installed and run on an [unsupported platform](#supported-platforms). The second case is when the platform-specific optional dependency is not installed. If you reach this, ensure you aren't using `--no-optional` flag, and [file an issue](https://github.com/asg017/sqlite-regex/issues/new) if you are stuck. 87 | 88 | The `db.loadExtension()` function may also throw an Error if the compiled extension is incompatible with your SQLite connection for any reason, including missing system packages, outdated glib versions, or other misconfigurations. If you reach this, please [file an issue](https://github.com/asg017/sqlite-regex/issues/new). 89 | -------------------------------------------------------------------------------- /npm/sqlite-regex/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "//": "Autogenerated by the npm_generate_platform_packages.sh script, do not edit by hand", 3 | "name": "sqlite-regex", 4 | "version": "0.2.4-alpha.1", 5 | "description": "", 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/asg017/sqlite-regex.git", 9 | "directory": "npm/sqlite-regex" 10 | }, 11 | "author": "Alex Garcia <alexsebastian.garcia@gmail.com>", 12 | "license": "(MIT OR Apache-2.0)", 13 | "main": "src/index.js", 14 | "type": "module", 15 | "scripts": { 16 | "test": "node test.js" 17 | }, 18 | "files": [ 19 | "*.dylib", 20 | "*.so", 21 | "*.dll" 22 | ], 23 | "optionalDependencies": { 24 | "sqlite-regex-darwin-arm64": "0.2.4-alpha.1", 25 | "sqlite-regex-darwin-x64": "0.2.4-alpha.1", 26 | "sqlite-regex-linux-x64": "0.2.4-alpha.1", 27 | "sqlite-regex-windows-x64": "0.2.4-alpha.1" 28 | }, 29 | "devDependencies": { 30 | "better-sqlite3": "^8.1.0", 31 | "sqlite3": "^5.1.4" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /npm/sqlite-regex/package.json.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "//": "Autogenerated by the npm_generate_platform_packages.sh script, do not edit by hand", 3 | "name": "${PACKAGE_NAME_BASE}", 4 | "version": "${VERSION}", 5 | "description": "", 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/asg017/${PACKAGE_NAME_BASE}.git", 9 | "directory": "npm/${PACKAGE_NAME_BASE}" 10 | }, 11 | "author": "Alex Garcia <alexsebastian.garcia@gmail.com>", 12 | "license": "(MIT OR Apache-2.0)", 13 | "main": "src/index.js", 14 | "type": "module", 15 | "scripts": { 16 | "test": "node test.js" 17 | }, 18 | "files": [ 19 | "*.dylib", 20 | "*.so", 21 | "*.dll" 22 | ], 23 | "optionalDependencies": { 24 | "sqlite-regex-darwin-arm64": "${VERSION}", 25 | "sqlite-regex-darwin-x64": "${VERSION}", 26 | "sqlite-regex-linux-x64": "${VERSION}", 27 | "sqlite-regex-windows-x64": "${VERSION}" 28 | }, 29 | "devDependencies": { 30 | "better-sqlite3": "^8.1.0", 31 | "sqlite3": "^5.1.4" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /npm/sqlite-regex/src/index.js: -------------------------------------------------------------------------------- 1 | import { join } from "node:path"; 2 | import { fileURLToPath } from "node:url"; 3 | import { arch, platform } from "node:process"; 4 | import { statSync } from "node:fs"; 5 | 6 | const supportedPlatforms = [ 7 | ["darwin", "x64"], 8 | ["darwin", "arm64"], 9 | ["win32", "x64"], 10 | ["linux", "x64"], 11 | ]; 12 | 13 | function validPlatform(platform, arch) { 14 | return ( 15 | supportedPlatforms.find(([p, a]) => platform == p && arch === a) !== null 16 | ); 17 | } 18 | function extensionSuffix(platform) { 19 | if (platform === "win32") return "dll"; 20 | if (platform === "darwin") return "dylib"; 21 | return "so"; 22 | } 23 | function platformPackageName(platform, arch) { 24 | const os = platform === "win32" ? "windows" : platform; 25 | return `sqlite-regex-${os}-${arch}`; 26 | } 27 | 28 | export function getLoadablePath() { 29 | if (!validPlatform(platform, arch)) { 30 | throw new Error( 31 | `Unsupported platform for sqlite-regex, on a ${platform}-${arch} machine, but not in supported platforms (${supportedPlatforms 32 | .map(([p, a]) => `${p}-${a}`) 33 | .join(",")}). Consult the sqlite-regex NPM package README for details. ` 34 | ); 35 | } 36 | const packageName = platformPackageName(platform, arch); 37 | const loadablePath = join( 38 | fileURLToPath(new URL(".", import.meta.url)), 39 | "..", 40 | "..", 41 | packageName, 42 | "lib", 43 | `regex0.${extensionSuffix(platform)}` 44 | ); 45 | if (!statSync(loadablePath, { throwIfNoEntry: false })) { 46 | throw new Error( 47 | `Loadble extension for sqlite-regex not found. Was the ${packageName} package installed? Avoid using the --no-optional flag, as the optional dependencies for sqlite-regex are required.` 48 | ); 49 | } 50 | 51 | return loadablePath; 52 | } 53 | -------------------------------------------------------------------------------- /npm/sqlite-regex/test.js: -------------------------------------------------------------------------------- 1 | import test from "node:test"; 2 | import * as assert from "node:assert"; 3 | 4 | import { getLoadablePath } from "./src/index.js"; 5 | import { basename, extname, isAbsolute } from "node:path"; 6 | 7 | import Database from "better-sqlite3"; 8 | import sqlite3 from "sqlite3"; 9 | 10 | test("getLoadblePath()", (t) => { 11 | const loadablePath = getLoadablePath(); 12 | assert.strictEqual(isAbsolute(loadablePath), true); 13 | assert.strictEqual(basename(loadablePath, extname(loadablePath)), "regex0"); 14 | }); 15 | 16 | test("better-sqlite3", (t) => { 17 | const db = new Database(":memory:"); 18 | db.loadExtension(getLoadablePath()); 19 | const version = db.prepare("select regex_version()").pluck().get(); 20 | assert.strictEqual(version[0], "v"); 21 | }); 22 | 23 | test("sqlite3", async (t) => { 24 | const db = new sqlite3.Database(":memory:"); 25 | db.loadExtension(getLoadablePath()); 26 | let version = await new Promise((resolve, reject) => { 27 | db.get("select regex_version()", (err, row) => { 28 | if (err) return reject(err); 29 | resolve(row["regex_version()"]); 30 | }); 31 | }); 32 | assert.strictEqual(version[0], "v"); 33 | }); 34 | -------------------------------------------------------------------------------- /python/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | # `sqlite-regex` Python Packages 2 | 3 | The `sqlite-regex` project offers two python packages for easy distribution. They are: 4 | 5 | 1. The [`sqlite-regex` Python package](https://pypi.org/project/sqlite-regex/), source in [`sqlite_regex/`](./sqlite_regex/README.md) 6 | 2. The [`datasette-sqlite-regex` Python package](https://pypi.org/project/sqlite-regex/), a [Datasette](https://datasette.io/) plugin,which is a light wrapper around the `sqlite-regex` package, source in [`datasette_sqlite_regex/`](./datasette_sqlite_regex/README.md) 7 | -------------------------------------------------------------------------------- /python/datasette_sqlite_regex/README.md: -------------------------------------------------------------------------------- 1 | # The `datasette-sqlite-regex` Datasette Plugin 2 | 3 | `datasette-sqlite-regex` is a [Datasette plugin](https://docs.datasette.io/en/stable/plugins.html) that loads the [`sqlite-regex`](https://github.com/asg017/sqlite-regex) extension in Datasette instances, allowing you to generate and work with [regexs](https://github.com/regex/spec) in SQL. 4 | 5 | ``` 6 | datasette install datasette-sqlite-regex 7 | ``` 8 | 9 | See [`docs.md`](../../docs.md) for a full API reference for the TODO SQL functions. 10 | 11 | Alternatively, when publishing Datasette instances, you can use the `--install` option to install the plugin. 12 | 13 | ``` 14 | datasette publish cloudrun data.db --service=my-service --install=datasette-sqlite-regex 15 | 16 | ``` 17 | -------------------------------------------------------------------------------- /python/datasette_sqlite_regex/datasette_sqlite_regex/__init__.py: -------------------------------------------------------------------------------- 1 | from datasette import hookimpl 2 | import sqlite_regex 3 | 4 | from datasette_sqlite_regex.version import __version_info__, __version__ 5 | 6 | @hookimpl 7 | def prepare_connection(conn): 8 | conn.enable_load_extension(True) 9 | sqlite_regex.load(conn) 10 | conn.enable_load_extension(False) -------------------------------------------------------------------------------- /python/datasette_sqlite_regex/datasette_sqlite_regex/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.4-alpha.1" 2 | __version_info__ = tuple(__version__.split(".")) 3 | -------------------------------------------------------------------------------- /python/datasette_sqlite_regex/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | version = {} 4 | with open("datasette_sqlite_regex/version.py") as fp: 5 | exec(fp.read(), version) 6 | 7 | VERSION = version['__version__'] 8 | 9 | setup( 10 | name="datasette-sqlite-regex", 11 | description="", 12 | long_description="", 13 | long_description_content_type="text/markdown", 14 | author="Alex Garcia", 15 | url="https://github.com/asg017/sqlite-regex", 16 | project_urls={ 17 | "Issues": "https://github.com/asg017/sqlite-regex/issues", 18 | "CI": "https://github.com/asg017/sqlite-regex/actions", 19 | "Changelog": "https://github.com/asg017/sqlite-regex/releases", 20 | }, 21 | license="MIT License, Apache License, Version 2.0", 22 | version=VERSION, 23 | packages=["datasette_sqlite_regex"], 24 | entry_points={"datasette": ["sqlite_regex = datasette_sqlite_regex"]}, 25 | install_requires=["datasette", "sqlite-regex"], 26 | extras_require={"test": ["pytest"]}, 27 | python_requires=">=3.7", 28 | ) -------------------------------------------------------------------------------- /python/datasette_sqlite_regex/tests/test_sqlite_regex.py: -------------------------------------------------------------------------------- 1 | from datasette.app import Datasette 2 | import pytest 3 | 4 | 5 | @pytest.mark.asyncio 6 | async def test_plugin_is_installed(): 7 | datasette = Datasette(memory=True) 8 | response = await datasette.client.get("/-/plugins.json") 9 | assert response.status_code == 200 10 | installed_plugins = {p["name"] for p in response.json()} 11 | assert "datasette-sqlite-regex" in installed_plugins 12 | 13 | @pytest.mark.asyncio 14 | async def test_sqlite_regex_functions(): 15 | datasette = Datasette(memory=True) 16 | response = await datasette.client.get("/_memory.json?sql=select+regex_version(),regex()") 17 | assert response.status_code == 200 18 | regex_version, regex = response.json()["rows"][0] 19 | assert regex_version[0] == "v" 20 | assert len(regex) == 26 -------------------------------------------------------------------------------- /python/sqlite_regex/README.md: -------------------------------------------------------------------------------- 1 | # The `sqlite-regex` Python package 2 | 3 | `sqlite-regex` is also distributed on PyPi as a Python package, for use in Python applications. It works well with the builtin [`sqlite3`](https://docs.python.org/3/library/sqlite3.html) Python module. 4 | 5 | ``` 6 | pip install sqlite-regex 7 | ``` 8 | 9 | ## Usage 10 | 11 | The `sqlite-regex` python package exports two functions: `loadable_path()`, which returns the full path to the loadable extension, and `load(conn)`, which loads the `sqlite-regex` extension into the given [sqlite3 Connection object](https://docs.python.org/3/library/sqlite3.html#connection-objects). 12 | 13 | ```python 14 | import sqlite_regex 15 | print(sqlite_regex.loadable_path()) 16 | # '/.../venv/lib/python3.9/site-packages/sqlite_regex/regex0' 17 | 18 | import sqlite3 19 | conn = sqlite3.connect(':memory:') 20 | sqlite_regex.load(conn) 21 | conn.execute('select regex_version(), regex()').fetchone() 22 | # ('v0.1.0', '01gr7gwc5aq22ycea6j8kxq4s9') 23 | ``` 24 | 25 | See [the full API Reference](#api-reference) for the Python API, and [`docs.md`](../../docs.md) for documentation on the `sqlite-regex` SQL API. 26 | 27 | See [`datasette-sqlite-regex`](../datasette_sqlite_regex/) for a Datasette plugin that is a light wrapper around the `sqlite-regex` Python package. 28 | 29 | ## Compatibility 30 | 31 | Currently the `sqlite-regex` Python package is only distributed on PyPi as pre-build wheels, it's not possible to install from the source distribution. This is because the underlying `sqlite-regex` extension requires a lot of build dependencies like `make`, `cc`, and `cargo`. 32 | 33 | If you get a `unsupported platform` error when pip installing `sqlite-regex`, you'll have to build the `sqlite-regex` manually and load in the dynamic library manually. 34 | 35 | ## API Reference 36 | 37 | <h3 name="loadable_path"><code>loadable_path()</code></h3> 38 | 39 | Returns the full path to the locally-install `sqlite-regex` extension, without the filename. 40 | 41 | This can be directly passed to [`sqlite3.Connection.load_extension()`](https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.load_extension), but the [`sqlite_regex.load()`](#load) function is preferred. 42 | 43 | ```python 44 | import sqlite_regex 45 | print(sqlite_regex.loadable_path()) 46 | # '/.../venv/lib/python3.9/site-packages/sqlite_regex/regex0' 47 | ``` 48 | 49 | > Note: this extension path doesn't include the file extension (`.dylib`, `.so`, `.dll`). This is because [SQLite will infer the correct extension](https://www.sqlite.org/loadext.html#loading_an_extension). 50 | 51 | <h3 name="load"><code>load(connection)</code></h3> 52 | 53 | Loads the `sqlite-regex` extension on the given [`sqlite3.Connection`](https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection) object, calling [`Connection.load_extension()`](https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.load_extension). 54 | 55 | ```python 56 | import sqlite_regex 57 | import sqlite3 58 | conn = sqlite3.connect(':memory:') 59 | 60 | conn.enable_load_extension(True) 61 | sqlite_regex.load(conn) 62 | conn.enable_load_extension(False) 63 | 64 | conn.execute('select regex_version(), regex()').fetchone() 65 | # ('v0.1.0', '01gr7gwc5aq22ycea6j8kxq4s9') 66 | TODO 67 | ``` 68 | -------------------------------------------------------------------------------- /python/sqlite_regex/noop.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asg017/sqlite-regex/5fe28e3c7a978c621082cfef0ae6dea9a71167ee/python/sqlite_regex/noop.c -------------------------------------------------------------------------------- /python/sqlite_regex/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | import os 3 | import platform 4 | 5 | version = {} 6 | with open("sqlite_regex/version.py") as fp: 7 | exec(fp.read(), version) 8 | 9 | VERSION = version['__version__'] 10 | 11 | 12 | 13 | system = platform.system() 14 | machine = platform.machine() 15 | 16 | print(system, machine) 17 | 18 | if system == 'Darwin': 19 | if machine not in ['x86_64', 'arm64']: 20 | raise Exception("unsupported platform") 21 | elif system == 'Linux': 22 | if machine not in ['x86_64']: 23 | raise Exception("unsupported platform") 24 | elif system == 'Windows': 25 | # TODO only 64 bit I think 26 | pass 27 | else: 28 | raise Exception("unsupported platform") 29 | 30 | setup( 31 | name="sqlite-regex", 32 | description="", 33 | long_description="", 34 | long_description_content_type="text/markdown", 35 | author="Alex Garcia", 36 | url="https://github.com/asg017/sqlite-regex", 37 | project_urls={ 38 | "Issues": "https://github.com/asg017/sqlite-regex/issues", 39 | "CI": "https://github.com/asg017/sqlite-regex/actions", 40 | "Changelog": "https://github.com/asg017/sqlite-regex/releases", 41 | }, 42 | license="MIT License, Apache License, Version 2.0", 43 | version=VERSION, 44 | packages=["sqlite_regex"], 45 | package_data={"sqlite_regex": ['*.so', '*.dylib', '*.dll']}, 46 | install_requires=[], 47 | # Adding an Extension makes `pip wheel` believe that this isn't a 48 | # pure-python package. The noop.c was added since the windows build 49 | # didn't seem to respect optional=True 50 | ext_modules=[Extension("noop", ["noop.c"], optional=True)], 51 | extras_require={"test": ["pytest"]}, 52 | python_requires=">=3.7", 53 | ) -------------------------------------------------------------------------------- /python/sqlite_regex/sqlite_regex/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sqlite3 3 | 4 | from sqlite_regex.version import __version_info__, __version__ 5 | 6 | def loadable_path(): 7 | loadable_path = os.path.join(os.path.dirname(__file__), "regex0") 8 | return os.path.normpath(loadable_path) 9 | 10 | def load(conn: sqlite3.Connection) -> None: 11 | conn.load_extension(loadable_path()) 12 | -------------------------------------------------------------------------------- /python/sqlite_regex/sqlite_regex/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.4-alpha.1" 2 | __version_info__ = tuple(__version__.split(".")) 3 | -------------------------------------------------------------------------------- /scripts/deno_generate_package.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | export PACKAGE_NAME="sqlite-regex" 6 | export EXTENSION_NAME="regex0" 7 | export VERSION=$(cat VERSION) 8 | 9 | envsubst < deno/deno.json.tmpl > deno/deno.json 10 | echo "✅ generated deno/deno.json" 11 | 12 | envsubst < deno/README.md.tmpl > deno/README.md 13 | echo "✅ generated deno/README.md" 14 | -------------------------------------------------------------------------------- /scripts/npm_generate_platform_packages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | export PACKAGE_NAME_BASE="sqlite-regex" 6 | export EXTENSION_NAME="regex0" 7 | export VERSION=$(cat VERSION) 8 | 9 | generate () { 10 | export PLATFORM_OS=$1 11 | export PLATFORM_ARCH=$2 12 | export PACKAGE_NAME=$PACKAGE_NAME_BASE-$PLATFORM_OS-$PLATFORM_ARCH 13 | 14 | if [ "$PLATFORM_OS" == "windows" ]; then 15 | export EXTENSION_SUFFIX="dll" 16 | elif [ "$PLATFORM_OS" == "darwin" ]; then 17 | export EXTENSION_SUFFIX="dylib" 18 | else 19 | export EXTENSION_SUFFIX="so" 20 | fi 21 | 22 | 23 | mkdir -p npm/$PACKAGE_NAME/lib 24 | 25 | envsubst < npm/platform-package.package.json.tmpl > npm/$PACKAGE_NAME/package.json 26 | envsubst < npm/platform-package.README.md.tmpl > npm/$PACKAGE_NAME/README.md 27 | 28 | touch npm/$PACKAGE_NAME/lib/.gitkeep 29 | 30 | echo "✅ generated npm/$PACKAGE_NAME" 31 | } 32 | 33 | envsubst < npm/$PACKAGE_NAME_BASE/package.json.tmpl > npm/$PACKAGE_NAME_BASE/package.json 34 | echo "✅ generated npm/$PACKAGE_NAME_BASE" 35 | 36 | generate darwin x64 37 | generate darwin arm64 38 | generate linux x64 39 | generate windows x64 -------------------------------------------------------------------------------- /scripts/publish_release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail xtrace 4 | 5 | if [[ -n $(git status --porcelain | grep -v VERSION) ]]; then 6 | echo "❌ There are other un-staged changes to the repository besides VERSION" 7 | exit 1 8 | fi 9 | 10 | VERSION="$(cat VERSION)" 11 | 12 | echo "Publishing version v$VERSION..." 13 | 14 | make version 15 | git add --all 16 | git commit -m "v$VERSION" 17 | git tag v$VERSION 18 | git push origin main v$VERSION 19 | 20 | if grep -qE "alpha|beta" VERSION; then 21 | gh release create v$VERSION --title=v$VERSION --prerelease --notes="" 22 | else 23 | gh release create v$VERSION --title=v$VERSION 24 | fi 25 | 26 | 27 | echo "✅ Published! version v$VERSION" 28 | -------------------------------------------------------------------------------- /sqlite-regex.h: -------------------------------------------------------------------------------- 1 | #include <stdarg.h> 2 | #include <stdbool.h> 3 | #include <stdint.h> 4 | #include <stdlib.h> 5 | #include "sqlite3ext.h" 6 | 7 | /** 8 | * # Safety 9 | * 10 | * Should only be called by underlying SQLite C APIs, 11 | * like sqlite3_auto_extension and sqlite3_cancel_auto_extension. 12 | */ 13 | unsigned int sqlite3_regex_init(sqlite3 *db, char **pz_err_msg, sqlite3_api_routines *p_api); 14 | -------------------------------------------------------------------------------- /src/captures.rs: -------------------------------------------------------------------------------- 1 | use regex::{Captures, Regex}; 2 | use sqlite_loadable::{ 3 | api, 4 | scalar::scalar_function_raw, 5 | table::{ConstraintOperator, FindResult, IndexInfo, VTab, VTabArguments, VTabCursor, VTabFind}, 6 | BestIndexError, Result, 7 | }; 8 | use sqlite_loadable::{prelude::*, Error}; 9 | 10 | use std::{ffi::c_void, mem, os::raw::c_int}; 11 | 12 | use crate::utils::{result_regex_captures, value_regex}; 13 | 14 | static CREATE_SQL: &str = "CREATE TABLE x(captures, pattern hidden, contents text hidden)"; 15 | enum Columns { 16 | Captures, 17 | Pattern, 18 | Contents, 19 | } 20 | fn column(index: i32) -> Option<Columns> { 21 | match index { 22 | 0 => Some(Columns::Captures), 23 | 1 => Some(Columns::Pattern), 24 | 2 => Some(Columns::Contents), 25 | _ => None, 26 | } 27 | } 28 | 29 | #[repr(C)] 30 | pub struct RegexCapturesTable { 31 | /// must be first 32 | base: sqlite3_vtab, 33 | } 34 | 35 | impl<'vtab> VTab<'vtab> for RegexCapturesTable { 36 | type Aux = (); 37 | type Cursor = RegexCapturesCursor<'vtab>; 38 | 39 | fn connect( 40 | _db: *mut sqlite3, 41 | _aux: Option<&Self::Aux>, 42 | _args: VTabArguments, 43 | ) -> Result<(String, RegexCapturesTable)> { 44 | let base: sqlite3_vtab = unsafe { mem::zeroed() }; 45 | let vtab = RegexCapturesTable { base }; 46 | // TODO db.config(VTabConfig::Innocuous)?; 47 | Ok((CREATE_SQL.to_owned(), vtab)) 48 | } 49 | fn destroy(&self) -> Result<()> { 50 | Ok(()) 51 | } 52 | 53 | fn best_index(&self, mut info: IndexInfo) -> core::result::Result<(), BestIndexError> { 54 | let mut has_pattern = false; 55 | let mut has_contents = false; 56 | for mut constraint in info.constraints() { 57 | match column(constraint.column_idx()) { 58 | Some(Columns::Pattern) => { 59 | if constraint.usable() && constraint.op() == Some(ConstraintOperator::EQ) { 60 | constraint.set_omit(true); 61 | constraint.set_argv_index(1); 62 | has_pattern = true; 63 | } else { 64 | return Err(BestIndexError::Constraint); 65 | } 66 | } 67 | Some(Columns::Contents) => { 68 | if constraint.usable() && constraint.op() == Some(ConstraintOperator::EQ) { 69 | constraint.set_omit(true); 70 | constraint.set_argv_index(2); 71 | has_contents = true; 72 | } else { 73 | return Err(BestIndexError::Constraint); 74 | } 75 | } 76 | _ => (), 77 | } 78 | } 79 | if !has_pattern || !has_contents { 80 | return Err(BestIndexError::Error); 81 | } 82 | info.set_estimated_cost(100000.0); 83 | info.set_estimated_rows(100000); 84 | info.set_idxnum(2); 85 | 86 | Ok(()) 87 | } 88 | 89 | fn open(&mut self) -> Result<RegexCapturesCursor<'_>> { 90 | Ok(RegexCapturesCursor::new()) 91 | } 92 | } 93 | 94 | impl<'vtab> VTabFind<'vtab> for RegexCapturesTable { 95 | fn find_function(&mut self, argc: i32, name: &str) -> Option<FindResult> { 96 | if name == "->>" && argc == 2 { 97 | return Some((scalar_function_raw(crate::regex_capture2), None, None)); 98 | } 99 | None 100 | } 101 | } 102 | 103 | #[repr(C)] 104 | pub struct RegexCapturesCursor<'vtab> { 105 | /// Base class. Must be first 106 | base: sqlite3_vtab_cursor, 107 | r_clone: Option<Regex>, 108 | all_captures: Option<Vec<Captures<'vtab>>>, 109 | curr: usize, 110 | } 111 | impl RegexCapturesCursor<'_> { 112 | fn new<'vtab>() -> RegexCapturesCursor<'vtab> { 113 | let base: sqlite3_vtab_cursor = unsafe { mem::zeroed() }; 114 | RegexCapturesCursor { 115 | base, 116 | r_clone: None, 117 | all_captures: None, 118 | curr: 0, 119 | } 120 | } 121 | } 122 | 123 | impl VTabCursor for RegexCapturesCursor<'_> { 124 | fn filter( 125 | &mut self, 126 | _idx_num: c_int, 127 | _idx_str: Option<&str>, 128 | values: &[*mut sqlite3_value], 129 | ) -> Result<()> { 130 | let r = value_regex( 131 | values 132 | .get(0) 133 | .ok_or_else(|| Error::new_message("expected 1st argument as regex"))?, 134 | )?; 135 | let r = unsafe { &*r }; 136 | let contents = api::value_text_notnull( 137 | values 138 | .get(1) 139 | .ok_or_else(|| Error::new_message("expected 2nd argument as contents"))?, 140 | )?; 141 | 142 | let mut res = vec![]; 143 | for captures in r.captures_iter(contents) { 144 | res.push(captures) 145 | } 146 | self.r_clone = Some((r).clone()); 147 | self.all_captures = Some(res); 148 | self.curr = 0; 149 | Ok(()) 150 | } 151 | 152 | fn next(&mut self) -> Result<()> { 153 | self.curr += 1; 154 | Ok(()) 155 | } 156 | 157 | fn eof(&self) -> bool { 158 | self.all_captures 159 | .as_ref() 160 | .map_or(true, |m| self.curr >= m.len()) 161 | } 162 | 163 | fn column(&self, context: *mut sqlite3_context, i: c_int) -> Result<()> { 164 | let captures = self 165 | .all_captures 166 | .as_ref() 167 | .ok_or_else(|| { 168 | Error::new_message("sqlite-regex internal error: self.all_captures is not defined") 169 | })? 170 | .get(self.curr) 171 | .ok_or_else(|| { 172 | Error::new_message( 173 | "sqlite-regex internal error: self.curr greater than all_captures result", 174 | ) 175 | })?; 176 | match column(i) { 177 | Some(Columns::Captures) => { 178 | result_regex_captures(context, self.r_clone.as_ref().unwrap(), captures); 179 | } 180 | Some(Columns::Pattern) => (), 181 | Some(Columns::Contents) => (), 182 | None => (), 183 | } 184 | Ok(()) 185 | } 186 | 187 | fn rowid(&self) -> Result<i64> { 188 | Ok(self.curr as i64) 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /src/find_all.rs: -------------------------------------------------------------------------------- 1 | use sqlite_loadable::{ 2 | api, 3 | table::{ConstraintOperator, IndexInfo, VTab, VTabArguments, VTabCursor}, 4 | BestIndexError, Result, 5 | }; 6 | use sqlite_loadable::{prelude::*, Error}; 7 | 8 | use std::{marker::PhantomData, mem, os::raw::c_int}; 9 | 10 | use crate::utils::value_regex; 11 | 12 | static CREATE_SQL: &str = 13 | "CREATE TABLE x(start int, end int, match text, pattern hidden, contents text hidden)"; 14 | enum Columns { 15 | Start, 16 | End, 17 | Match, 18 | Pattern, 19 | Contents, 20 | } 21 | fn column(index: i32) -> Option<Columns> { 22 | match index { 23 | 0 => Some(Columns::Start), 24 | 1 => Some(Columns::End), 25 | 2 => Some(Columns::Match), 26 | 3 => Some(Columns::Pattern), 27 | 4 => Some(Columns::Contents), 28 | _ => None, 29 | } 30 | } 31 | 32 | #[repr(C)] 33 | pub struct RegexFindAllTable { 34 | /// must be first 35 | base: sqlite3_vtab, 36 | } 37 | 38 | impl<'vtab> VTab<'vtab> for RegexFindAllTable { 39 | type Aux = (); 40 | type Cursor = RegexFindAllCursor<'vtab>; 41 | 42 | fn connect( 43 | _db: *mut sqlite3, 44 | _aux: Option<&Self::Aux>, 45 | _args: VTabArguments, 46 | ) -> Result<(String, RegexFindAllTable)> { 47 | let base: sqlite3_vtab = unsafe { mem::zeroed() }; 48 | let vtab = RegexFindAllTable { base }; 49 | // TODO db.config(VTabConfig::Innocuous)?; 50 | Ok((CREATE_SQL.to_owned(), vtab)) 51 | } 52 | fn destroy(&self) -> Result<()> { 53 | Ok(()) 54 | } 55 | 56 | fn best_index(&self, mut info: IndexInfo) -> core::result::Result<(), BestIndexError> { 57 | let mut has_pattern = false; 58 | let mut has_contents = false; 59 | for mut constraint in info.constraints() { 60 | match column(constraint.column_idx()) { 61 | Some(Columns::Pattern) => { 62 | if constraint.usable() && constraint.op() == Some(ConstraintOperator::EQ) { 63 | constraint.set_omit(true); 64 | constraint.set_argv_index(1); 65 | has_pattern = true; 66 | } else { 67 | return Err(BestIndexError::Constraint); 68 | } 69 | } 70 | Some(Columns::Contents) => { 71 | if constraint.usable() && constraint.op() == Some(ConstraintOperator::EQ) { 72 | constraint.set_omit(true); 73 | constraint.set_argv_index(2); 74 | has_contents = true; 75 | } else { 76 | return Err(BestIndexError::Constraint); 77 | } 78 | } 79 | _ => (), 80 | } 81 | } 82 | if !has_pattern || !has_contents { 83 | return Err(BestIndexError::Error); 84 | } 85 | info.set_estimated_cost(100000.0); 86 | info.set_estimated_rows(100000); 87 | info.set_idxnum(2); 88 | 89 | Ok(()) 90 | } 91 | 92 | fn open(&mut self) -> Result<RegexFindAllCursor<'_>> { 93 | Ok(RegexFindAllCursor::new()) 94 | } 95 | } 96 | 97 | type MMatch = (usize, usize, String); 98 | #[repr(C)] 99 | pub struct RegexFindAllCursor<'vtab> { 100 | /// Base class. Must be first 101 | base: sqlite3_vtab_cursor, 102 | matches: Option<Vec<MMatch>>, 103 | curr: usize, 104 | phantom: PhantomData<&'vtab RegexFindAllTable>, 105 | } 106 | impl RegexFindAllCursor<'_> { 107 | fn new<'vtab>() -> RegexFindAllCursor<'vtab> { 108 | let base: sqlite3_vtab_cursor = unsafe { mem::zeroed() }; 109 | RegexFindAllCursor { 110 | base, 111 | matches: None, 112 | curr: 0, 113 | phantom: PhantomData, 114 | } 115 | } 116 | } 117 | 118 | impl VTabCursor for RegexFindAllCursor<'_> { 119 | fn filter( 120 | &mut self, 121 | _idx_num: c_int, 122 | _idx_str: Option<&str>, 123 | values: &[*mut sqlite3_value], 124 | ) -> Result<()> { 125 | let r = value_regex( 126 | values 127 | .get(0) 128 | .ok_or_else(|| Error::new_message("expected 1st argument as regex"))?, 129 | )?; 130 | let r = unsafe { &*r }; 131 | let contents = api::value_text_notnull( 132 | values 133 | .get(1) 134 | .ok_or_else(|| Error::new_message("expected 2nd argument as contents"))?, 135 | )?; 136 | 137 | let mut res = vec![]; 138 | for m in r.find_iter(contents) { 139 | res.push((m.start(), m.end(), m.as_str().to_string())) 140 | } 141 | self.matches = Some(res); 142 | self.curr = 0; 143 | Ok(()) 144 | } 145 | 146 | fn next(&mut self) -> Result<()> { 147 | self.curr += 1; 148 | Ok(()) 149 | } 150 | 151 | fn eof(&self) -> bool { 152 | self.matches.as_ref().map_or(true, |m| self.curr >= m.len()) 153 | } 154 | 155 | fn column(&self, context: *mut sqlite3_context, i: c_int) -> Result<()> { 156 | let m = self 157 | .matches 158 | .as_ref() 159 | .ok_or_else(|| { 160 | Error::new_message("sqlite-regex internal error: self.match is not defined") 161 | })? 162 | .get(self.curr) 163 | .ok_or_else(|| { 164 | Error::new_message( 165 | "sqlite-regex internal error: self.curr greater than matches result", 166 | ) 167 | })?; 168 | 169 | match column(i) { 170 | Some(Columns::Start) => { 171 | api::result_int(context, m.0 as i32); 172 | } 173 | Some(Columns::End) => { 174 | api::result_int(context, m.1 as i32); 175 | } 176 | Some(Columns::Match) => { 177 | api::result_text(context, &m.2)?; 178 | } 179 | _ => (), 180 | } 181 | Ok(()) 182 | } 183 | 184 | fn rowid(&self) -> Result<i64> { 185 | Ok(self.curr as i64) 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | mod captures; 2 | mod find_all; 3 | mod meta; 4 | mod regex; 5 | mod regexset; 6 | mod regexset_matches; 7 | mod split; 8 | mod utils; 9 | 10 | use regexset_matches::RegexSetMatchesTable; 11 | use sqlite_loadable::prelude::*; 12 | use sqlite_loadable::{ 13 | define_scalar_function, define_table_function, errors::Result, 14 | table::define_table_function_with_find, FunctionFlags, 15 | }; 16 | 17 | use crate::{ 18 | captures::RegexCapturesTable, find_all::RegexFindAllTable, meta::*, regex::*, regexset::*, 19 | split::RegexSplitTable, 20 | }; 21 | 22 | #[sqlite_entrypoint] 23 | pub fn sqlite3_regex_init(db: *mut sqlite3) -> Result<()> { 24 | let flags = FunctionFlags::UTF8 | FunctionFlags::DETERMINISTIC; 25 | 26 | define_scalar_function(db, "regex_version", 0, regex_version, flags)?; 27 | define_scalar_function(db, "regex_debug", 0, regex_debug, flags)?; 28 | 29 | define_scalar_function(db, "regex", 1, regex, flags)?; 30 | define_scalar_function(db, "regex_print", 1, regex_print, flags)?; 31 | 32 | define_scalar_function(db, "regexp", 2, regexp, flags)?; 33 | 34 | define_scalar_function(db, "regex_valid", 1, regex_valid, flags)?; 35 | 36 | define_scalar_function(db, "regex_find", 2, regex_find, flags)?; 37 | define_scalar_function(db, "regex_find_at", 3, regex_find_at, flags)?; 38 | 39 | define_scalar_function(db, "regex_replace", 3, regex_replace, flags)?; 40 | define_scalar_function(db, "regex_replace_all", 3, regex_replace_all, flags)?; 41 | 42 | define_scalar_function(db, "regex_capture", 3, regex_capture, flags)?; 43 | define_scalar_function(db, "regex_capture", 2, regex_capture2, flags)?; 44 | 45 | define_table_function::<RegexFindAllTable>(db, "regex_find_all", None)?; 46 | define_table_function::<RegexSplitTable>(db, "regex_split", None)?; 47 | define_table_function_with_find::<RegexCapturesTable>(db, "regex_captures", None)?; 48 | 49 | define_scalar_function(db, "regexset", -1, regexset, flags)?; 50 | define_scalar_function(db, "regexset_print", 1, regexset_print, flags)?; 51 | define_scalar_function(db, "regexset_is_match", 2, regexset_is_match, flags)?; 52 | 53 | define_table_function::<RegexSetMatchesTable>(db, "regexset_matches", None)?; 54 | Ok(()) 55 | } 56 | -------------------------------------------------------------------------------- /src/meta.rs: -------------------------------------------------------------------------------- 1 | use sqlite_loadable::prelude::*; 2 | use sqlite_loadable::{api, Result}; 3 | 4 | pub fn regex_version(context: *mut sqlite3_context, _values: &[*mut sqlite3_value]) -> Result<()> { 5 | api::result_text(context, &format!("v{}", env!("CARGO_PKG_VERSION")))?; 6 | Ok(()) 7 | } 8 | 9 | pub fn regex_debug(context: *mut sqlite3_context, _values: &[*mut sqlite3_value]) -> Result<()> { 10 | api::result_text( 11 | context, 12 | &format!( 13 | "Version: v{} 14 | Source: {} 15 | ", 16 | env!("CARGO_PKG_VERSION"), 17 | env!("GIT_HASH") 18 | ), 19 | )?; 20 | Ok(()) 21 | } 22 | -------------------------------------------------------------------------------- /src/regex.rs: -------------------------------------------------------------------------------- 1 | use regex::Regex; 2 | 3 | use crate::utils::{ 4 | cleanup_regex_value_cached, regex_from_value_or_cache, result_regex, value_regex, 5 | value_regex_captures, CaptureGroupKey, 6 | }; 7 | use sqlite_loadable::prelude::*; 8 | use sqlite_loadable::{api, Error, Result}; 9 | 10 | // regex(pattern [, flags]) 11 | pub fn regex_print(context: *mut sqlite3_context, values: &[*mut sqlite3_value]) -> Result<()> { 12 | let regex = value_regex(values.get(0).ok_or("asdf")?)?; 13 | let regex = unsafe { &mut *regex }; 14 | api::result_text(context, regex.as_str())?; 15 | Ok(()) 16 | } 17 | 18 | // regex(pattern) 19 | pub fn regex(context: *mut sqlite3_context, values: &[*mut sqlite3_value]) -> Result<()> { 20 | let pattern = api::value_text_notnull(values.get(0).ok_or("")?)?; 21 | let regex = Regex::new(pattern).map_err(|err| { 22 | Error::new_message(format!("Error parsing pattern as regex: {}", err).as_str()) 23 | })?; 24 | result_regex(context, regex); 25 | Ok(()) 26 | } 27 | 28 | /// regex_matches(regex, text) 29 | pub fn regex_matches(context: *mut sqlite3_context, values: &[*mut sqlite3_value]) -> Result<()> { 30 | let (regex, input_type) = regex_from_value_or_cache(context, values, 0)?; 31 | let regex = unsafe { &mut *regex }; 32 | let content = 33 | api::value_text_notnull(values.get(1).ok_or("expected 2nd argument as contents")?)?; 34 | 35 | api::result_bool(context, regex.is_match(content)); 36 | cleanup_regex_value_cached(context, regex, input_type); 37 | Ok(()) 38 | } 39 | 40 | /// regexp(pattern, text) or text REGEXP pattern 41 | // Alias of regex_matches 42 | pub fn regexp(context: *mut sqlite3_context, values: &[*mut sqlite3_value]) -> Result<()> { 43 | regex_matches(context, values) 44 | } 45 | 46 | /// regex_valid(pattern) 47 | pub fn regex_valid(context: *mut sqlite3_context, values: &[*mut sqlite3_value]) -> Result<()> { 48 | let pattern = api::value_text_notnull( 49 | values 50 | .get(0) 51 | .ok_or_else(|| Error::new_message("expected 1st argument as pattern"))?, 52 | )?; 53 | api::result_bool(context, Regex::new(pattern).is_ok()); 54 | Ok(()) 55 | } 56 | 57 | /// regex_find(regex, contents) 58 | pub fn regex_find(context: *mut sqlite3_context, values: &[*mut sqlite3_value]) -> Result<()> { 59 | let (regex, input_type) = regex_from_value_or_cache(context, values, 0)?; 60 | let regex = unsafe { &mut *regex }; 61 | let arg_content = values 62 | .get(1) 63 | .ok_or_else(|| Error::new_message("expected 2nd argument as contents"))?; 64 | 65 | let content = api::value_text_notnull(arg_content)?; 66 | match regex.find(content) { 67 | Some(m) => { 68 | api::result_text(context, m.as_str())?; 69 | } 70 | None => { 71 | api::result_null(context); 72 | } 73 | }; 74 | 75 | cleanup_regex_value_cached(context, regex, input_type); 76 | Ok(()) 77 | } 78 | 79 | /// regex_find_at(regex, contents, offset) 80 | pub fn regex_find_at(context: *mut sqlite3_context, values: &[*mut sqlite3_value]) -> Result<()> { 81 | let (regex, input_type) = regex_from_value_or_cache(context, values, 0)?; 82 | let regex = unsafe { &mut *regex }; 83 | let arg_content = values 84 | .get(1) 85 | .ok_or_else(|| Error::new_message("expected 2nd argument as contents"))?; 86 | let arg_offset = values 87 | .get(1) 88 | .ok_or_else(|| Error::new_message("expected 2nd argument as offset"))?; 89 | 90 | let content = api::value_text_notnull(arg_content)?; 91 | let offset = api::value_int(arg_offset) as usize; 92 | match regex.find_at(content, offset) { 93 | Some(m) => { 94 | api::result_text(context, m.as_str())?; 95 | } 96 | None => { 97 | api::result_null(context); 98 | } 99 | }; 100 | 101 | cleanup_regex_value_cached(context, regex, input_type); 102 | 103 | Ok(()) 104 | } 105 | 106 | /// regex_replace(regex, contents, replacement) 107 | pub fn regex_replace(context: *mut sqlite3_context, values: &[*mut sqlite3_value]) -> Result<()> { 108 | let (regex, input_type) = regex_from_value_or_cache(context, values, 0)?; 109 | let regex = unsafe { &mut *regex }; 110 | let content = api::value_text_notnull( 111 | values 112 | .get(1) 113 | .ok_or_else(|| Error::new_message("expected 2nd argument as contents"))?, 114 | )?; 115 | let replacement = api::value_text_notnull( 116 | values 117 | .get(2) 118 | .ok_or_else(|| Error::new_message("expected 3rd argument as replacement"))?, 119 | )?; 120 | 121 | let result = regex.replace(content, replacement); 122 | 123 | api::result_text(context, result)?; 124 | cleanup_regex_value_cached(context, regex, input_type); 125 | 126 | Ok(()) 127 | } 128 | 129 | /// regex_replace_all(regex, contents, replacement) 130 | pub fn regex_replace_all( 131 | context: *mut sqlite3_context, 132 | values: &[*mut sqlite3_value], 133 | ) -> Result<()> { 134 | let (regex, input_type) = regex_from_value_or_cache(context, values, 0)?; 135 | let regex = unsafe { &mut *regex }; 136 | let content = api::value_text_notnull( 137 | values 138 | .get(1) 139 | .ok_or_else(|| Error::new_message("expected 2nd argument as contents"))?, 140 | )?; 141 | let replacement = api::value_text_notnull( 142 | values 143 | .get(2) 144 | .ok_or_else(|| Error::new_message("expected 3rd argument as replacement"))?, 145 | )?; 146 | let result = regex.replace_all(content, replacement); 147 | api::result_text(context, result)?; 148 | 149 | cleanup_regex_value_cached(context, regex, input_type); 150 | Ok(()) 151 | } 152 | 153 | /// regex_capture(regex, contents, group) 154 | pub fn regex_capture(context: *mut sqlite3_context, values: &[*mut sqlite3_value]) -> Result<()> { 155 | let (regex, input_type) = regex_from_value_or_cache(context, values, 0)?; 156 | let regex = unsafe { &mut *regex }; 157 | let content = api::value_text_notnull( 158 | values 159 | .get(1) 160 | .ok_or_else(|| Error::new_message("expected 2nd argument as contents"))?, 161 | )?; 162 | let group_arg = values 163 | .get(2) 164 | .ok_or_else(|| Error::new_message("expected 3rd argument as group index or name"))?; 165 | 166 | let result = regex.captures(content); 167 | match result { 168 | None => api::result_null(context), 169 | Some(captures) => { 170 | let matched_capture = match api::value_type(group_arg) { 171 | api::ValueType::Integer => captures.get(api::value_int64(group_arg) as usize), 172 | _ => { 173 | let name = api::value_text(group_arg)?; 174 | captures.name(name) 175 | } 176 | }; 177 | match matched_capture { 178 | None => api::result_null(context), 179 | Some(matched_group) => { 180 | api::result_text(context, matched_group.as_str())?; 181 | } 182 | } 183 | } 184 | } 185 | cleanup_regex_value_cached(context, regex, input_type); 186 | Ok(()) 187 | } 188 | 189 | /// regex_capture(captures, group) 190 | pub fn regex_capture2(context: *mut sqlite3_context, values: &[*mut sqlite3_value]) -> Result<()> { 191 | let captures = value_regex_captures( 192 | values 193 | .get(0) 194 | .ok_or_else(|| Error::new_message("expected 1st argument as capture group"))?, 195 | )?; 196 | let captures = unsafe { &*captures }; 197 | let group_arg = values 198 | .get(1) 199 | .ok_or_else(|| Error::new_message("expected 3rd argument as group index or name"))?; 200 | 201 | let matched_capture = match api::value_type(group_arg) { 202 | api::ValueType::Integer => { 203 | let lookup = api::value_int64(group_arg) as usize; 204 | captures.iter().find(|c| { 205 | if let CaptureGroupKey::Index(idx) = c.key { 206 | idx == lookup 207 | } else { 208 | false 209 | } 210 | }) 211 | } 212 | _ => { 213 | let name = api::value_text(group_arg)?; 214 | captures.iter().find(|c| { 215 | if let CaptureGroupKey::Name(n) = &c.key { 216 | name == n 217 | } else { 218 | false 219 | } 220 | }) 221 | } 222 | }; 223 | match matched_capture { 224 | None => api::result_null(context), 225 | Some(m) => match &m.value { 226 | Some(v) => api::result_text(context, v.as_str())?, 227 | None => api::result_null(context), 228 | }, 229 | } 230 | Ok(()) 231 | } 232 | -------------------------------------------------------------------------------- /src/regexset.rs: -------------------------------------------------------------------------------- 1 | use regex::RegexSet; 2 | 3 | use crate::utils::{result_regexset, value_regexset}; 4 | use sqlite_loadable::prelude::*; 5 | use sqlite_loadable::{api, Error, Result}; 6 | 7 | /// regexset(pattern1, ...) 8 | pub fn regexset(context: *mut sqlite3_context, values: &[*mut sqlite3_value]) -> Result<()> { 9 | let mut patterns = Vec::with_capacity(values.len()); 10 | for value in values { 11 | let pattern = api::value_text_notnull(value)?; 12 | patterns.push(pattern); 13 | } 14 | let set = RegexSet::new(patterns).map_err(|_| Error::new_message("asdf"))?; 15 | result_regexset(context, set); 16 | Ok(()) 17 | } 18 | 19 | /// regexset_print(regexset) 20 | pub fn regexset_print(context: *mut sqlite3_context, values: &[*mut sqlite3_value]) -> Result<()> { 21 | let regexset = value_regexset(values.get(0).ok_or_else(|| Error::new_message(""))?)?; 22 | let regexset = unsafe { &*regexset }; 23 | api::result_json(context, regexset.patterns().into())?; 24 | Ok(()) 25 | } 26 | 27 | /// regexset_is_match(regexset, contents) 28 | pub fn regexset_is_match( 29 | context: *mut sqlite3_context, 30 | values: &[*mut sqlite3_value], 31 | ) -> Result<()> { 32 | let regexset = value_regexset(values.get(0).ok_or_else(|| Error::new_message(""))?)?; 33 | let regexset = unsafe { &*regexset }; 34 | let text = api::value_text_notnull(values.get(1).ok_or_else(|| Error::new_message(""))?)?; 35 | api::result_bool(context, regexset.is_match(text)); 36 | Ok(()) 37 | } 38 | -------------------------------------------------------------------------------- /src/regexset_matches.rs: -------------------------------------------------------------------------------- 1 | use regex::RegexSet; 2 | use sqlite_loadable::{ 3 | api, 4 | table::{ConstraintOperator, IndexInfo, VTab, VTabArguments, VTabCursor}, 5 | BestIndexError, Result, 6 | }; 7 | use sqlite_loadable::{prelude::*, Error}; 8 | 9 | use std::{mem, os::raw::c_int}; 10 | 11 | use crate::utils::value_regexset; 12 | 13 | static CREATE_SQL: &str = "CREATE TABLE x(key, pattern, regexset hidden, contents hidden)"; 14 | enum Columns { 15 | Key, 16 | RegexPattern, 17 | Regexset, 18 | Contents, 19 | } 20 | fn column(index: i32) -> Option<Columns> { 21 | match index { 22 | 0 => Some(Columns::Key), 23 | 1 => Some(Columns::RegexPattern), 24 | 2 => Some(Columns::Regexset), 25 | 3 => Some(Columns::Contents), 26 | _ => None, 27 | } 28 | } 29 | 30 | #[repr(C)] 31 | pub struct RegexSetMatchesTable { 32 | /// must be first 33 | base: sqlite3_vtab, 34 | } 35 | 36 | impl<'vtab> VTab<'vtab> for RegexSetMatchesTable { 37 | type Aux = (); 38 | type Cursor = RegexSetMatchesCursor; 39 | 40 | fn connect( 41 | _db: *mut sqlite3, 42 | _aux: Option<&Self::Aux>, 43 | _args: VTabArguments, 44 | ) -> Result<(String, RegexSetMatchesTable)> { 45 | let vtab = RegexSetMatchesTable { 46 | base: unsafe { mem::zeroed() }, 47 | }; 48 | // TODO db.config(VTabConfig::Innocuous)?; 49 | Ok((CREATE_SQL.to_owned(), vtab)) 50 | } 51 | fn destroy(&self) -> Result<()> { 52 | Ok(()) 53 | } 54 | 55 | fn best_index(&self, mut info: IndexInfo) -> core::result::Result<(), BestIndexError> { 56 | let mut has_pattern = false; 57 | let mut has_contents = false; 58 | for mut constraint in info.constraints() { 59 | match column(constraint.column_idx()) { 60 | Some(Columns::Regexset) => { 61 | if constraint.usable() && constraint.op() == Some(ConstraintOperator::EQ) { 62 | constraint.set_omit(true); 63 | constraint.set_argv_index(1); 64 | has_pattern = true; 65 | } else { 66 | return Err(BestIndexError::Constraint); 67 | } 68 | } 69 | Some(Columns::Contents) => { 70 | if constraint.usable() && constraint.op() == Some(ConstraintOperator::EQ) { 71 | constraint.set_omit(true); 72 | constraint.set_argv_index(2); 73 | has_contents = true; 74 | } else { 75 | return Err(BestIndexError::Constraint); 76 | } 77 | } 78 | _ => (), 79 | } 80 | } 81 | if !has_pattern || !has_contents { 82 | return Err(BestIndexError::Error); 83 | } 84 | info.set_estimated_cost(100000.0); 85 | info.set_estimated_rows(100000); 86 | info.set_idxnum(2); 87 | 88 | Ok(()) 89 | } 90 | 91 | fn open(&mut self) -> Result<RegexSetMatchesCursor> { 92 | Ok(RegexSetMatchesCursor::new()) 93 | } 94 | } 95 | 96 | #[repr(C)] 97 | pub struct RegexSetMatchesCursor { 98 | /// Base class. Must be first 99 | base: sqlite3_vtab_cursor, 100 | regex_set: Option<RegexSet>, 101 | matches: Option<Vec<usize>>, 102 | rowid: usize, 103 | } 104 | impl RegexSetMatchesCursor { 105 | fn new() -> RegexSetMatchesCursor { 106 | let base: sqlite3_vtab_cursor = unsafe { mem::zeroed() }; 107 | RegexSetMatchesCursor { 108 | base, 109 | regex_set: None, 110 | matches: None, 111 | rowid: 0, 112 | } 113 | } 114 | } 115 | 116 | impl VTabCursor for RegexSetMatchesCursor { 117 | fn filter( 118 | &mut self, 119 | _idx_num: c_int, 120 | _idx_str: Option<&str>, 121 | values: &[*mut sqlite3_value], 122 | ) -> Result<()> { 123 | let r = value_regexset(values.get(0).ok_or_else(|| { 124 | Error::new_message("internal error: pattern not passed into xFilter") 125 | })?)?; 126 | let r = unsafe { &mut *r }; 127 | let contents = api::value_text_notnull(values.get(1).ok_or_else(|| { 128 | Error::new_message("internal error: contents not passed into xFilter") 129 | })?)?; 130 | 131 | self.regex_set = Some(r.clone()); 132 | self.matches = Some(r.matches(contents).into_iter().collect()); 133 | self.rowid = 0; 134 | Ok(()) 135 | } 136 | 137 | fn next(&mut self) -> Result<()> { 138 | self.rowid += 1; 139 | Ok(()) 140 | } 141 | 142 | fn eof(&self) -> bool { 143 | self.matches 144 | .as_ref() 145 | .map_or(true, |m| self.rowid >= m.len()) 146 | } 147 | 148 | fn column(&self, context: *mut sqlite3_context, i: c_int) -> Result<()> { 149 | let match_idx = self 150 | .matches 151 | .as_ref() 152 | .ok_or_else(|| { 153 | Error::new_message("sqlite-regex internal error: self.matches is not defined") 154 | })? 155 | .get(self.rowid) 156 | .ok_or_else(|| { 157 | Error::new_message( 158 | "sqlite-regex internal error: self.rowid greater than matches result", 159 | ) 160 | })?; 161 | 162 | match column(i) { 163 | Some(Columns::Key) => { 164 | api::result_int(context, (*match_idx) as i32); 165 | } 166 | Some(Columns::RegexPattern) => { 167 | let pattern = self 168 | .regex_set 169 | .as_ref() 170 | .ok_or_else(|| { 171 | Error::new_message( 172 | "sqlite-regex internal error: self.regex_set is not defined", 173 | ) 174 | })? 175 | .patterns() 176 | .get(*match_idx) 177 | .ok_or_else(|| { 178 | Error::new_message( 179 | "sqlite-regex internal error: match_idx greater than matches result", 180 | ) 181 | })?; 182 | api::result_text(context, pattern)?; 183 | } 184 | Some(Columns::Regexset) => { 185 | api::result_json( 186 | context, 187 | self.regex_set 188 | .as_ref() 189 | .ok_or_else(|| { 190 | Error::new_message( 191 | "sqlite-regex internal error: self.regex_set is not defined", 192 | ) 193 | })? 194 | .patterns() 195 | .into(), 196 | )?; 197 | } 198 | Some(Columns::Contents) => {} 199 | None => (), 200 | } 201 | Ok(()) 202 | } 203 | 204 | fn rowid(&self) -> Result<i64> { 205 | Ok(self.rowid as i64) 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /src/split.rs: -------------------------------------------------------------------------------- 1 | use sqlite_loadable::{ 2 | api, 3 | table::{ConstraintOperator, IndexInfo, VTab, VTabArguments, VTabCursor}, 4 | BestIndexError, Result, 5 | }; 6 | use sqlite_loadable::{prelude::*, Error}; 7 | 8 | use std::{marker::PhantomData, mem, os::raw::c_int}; 9 | 10 | use crate::utils::value_regex; 11 | 12 | static CREATE_SQL: &str = "CREATE TABLE x(item text, pattern hidden, contents text hidden)"; 13 | enum Columns { 14 | Item, 15 | Pattern, 16 | Contents, 17 | } 18 | fn column(index: i32) -> Option<Columns> { 19 | match index { 20 | 0 => Some(Columns::Item), 21 | 1 => Some(Columns::Pattern), 22 | 2 => Some(Columns::Contents), 23 | _ => None, 24 | } 25 | } 26 | 27 | #[repr(C)] 28 | pub struct RegexSplitTable { 29 | /// must be first 30 | base: sqlite3_vtab, 31 | } 32 | 33 | impl<'vtab> VTab<'vtab> for RegexSplitTable { 34 | type Aux = (); 35 | type Cursor = RegexSplitCursor<'vtab>; 36 | 37 | fn connect( 38 | _db: *mut sqlite3, 39 | _aux: Option<&()>, 40 | _args: VTabArguments, 41 | ) -> Result<(String, RegexSplitTable)> { 42 | let base: sqlite3_vtab = unsafe { mem::zeroed() }; 43 | let vtab = RegexSplitTable { base }; 44 | // TODO db.config(VTabConfig::Innocuous)?; 45 | Ok((CREATE_SQL.to_owned(), vtab)) 46 | } 47 | fn destroy(&self) -> Result<()> { 48 | Ok(()) 49 | } 50 | 51 | fn best_index(&self, mut info: IndexInfo) -> core::result::Result<(), BestIndexError> { 52 | let mut has_pattern = false; 53 | let mut has_contents = false; 54 | for mut constraint in info.constraints() { 55 | match column(constraint.column_idx()) { 56 | Some(Columns::Pattern) => { 57 | if constraint.usable() && constraint.op() == Some(ConstraintOperator::EQ) { 58 | constraint.set_omit(true); 59 | constraint.set_argv_index(1); 60 | has_pattern = true; 61 | } else { 62 | return Err(BestIndexError::Constraint); 63 | } 64 | } 65 | Some(Columns::Contents) => { 66 | if constraint.usable() && constraint.op() == Some(ConstraintOperator::EQ) { 67 | constraint.set_omit(true); 68 | constraint.set_argv_index(2); 69 | has_contents = true; 70 | } else { 71 | return Err(BestIndexError::Constraint); 72 | } 73 | } 74 | _ => (), 75 | } 76 | } 77 | if !has_pattern || !has_contents { 78 | return Err(BestIndexError::Error); 79 | } 80 | info.set_estimated_cost(100000.0); 81 | info.set_estimated_rows(100000); 82 | info.set_idxnum(2); 83 | 84 | Ok(()) 85 | } 86 | 87 | fn open(&mut self) -> Result<RegexSplitCursor<'_>> { 88 | Ok(RegexSplitCursor::new()) 89 | } 90 | } 91 | 92 | #[repr(C)] 93 | pub struct RegexSplitCursor<'vtab> { 94 | /// Base class. Must be first 95 | base: sqlite3_vtab_cursor, 96 | contents: Option<String>, 97 | split: Option<Vec<String>>, 98 | rowid: usize, 99 | phantom: PhantomData<&'vtab RegexSplitTable>, 100 | } 101 | impl RegexSplitCursor<'_> { 102 | fn new<'vtab>() -> RegexSplitCursor<'vtab> { 103 | let base: sqlite3_vtab_cursor = unsafe { mem::zeroed() }; 104 | RegexSplitCursor { 105 | base, 106 | contents: None, 107 | split: None, 108 | rowid: 0, 109 | phantom: PhantomData, 110 | } 111 | } 112 | } 113 | 114 | impl VTabCursor for RegexSplitCursor<'_> { 115 | fn filter( 116 | &mut self, 117 | _idx_num: c_int, 118 | _idx_str: Option<&str>, 119 | values: &[*mut sqlite3_value], 120 | ) -> Result<()> { 121 | let r = value_regex( 122 | values 123 | .get(0) 124 | .ok_or_else(|| Error::new_message("expected 1st argument as regex"))?, 125 | )?; 126 | let r = unsafe { &*r }; 127 | let contents = api::value_text_notnull( 128 | values 129 | .get(1) 130 | .ok_or_else(|| Error::new_message("expected 2nd argument as contents"))?, 131 | )?; 132 | 133 | let split = r.split(contents); 134 | self.split = Some(split.map(|i| i.to_string()).collect()); 135 | self.rowid = 0; 136 | self.contents = Some(contents.to_owned()); 137 | Ok(()) 138 | } 139 | 140 | fn next(&mut self) -> Result<()> { 141 | self.rowid += 1; 142 | Ok(()) 143 | } 144 | 145 | fn eof(&self) -> bool { 146 | self.split.as_ref().map_or(true, |m| self.rowid >= m.len()) 147 | } 148 | 149 | fn column(&self, context: *mut sqlite3_context, i: c_int) -> Result<()> { 150 | match column(i) { 151 | Some(Columns::Item) => { 152 | api::result_text( 153 | context, 154 | self.split 155 | .as_ref() 156 | .ok_or_else(|| { 157 | Error::new_message( 158 | "sqlite-regex internal error: self.split is not defined", 159 | ) 160 | })? 161 | .get(self.rowid) 162 | .ok_or_else(|| { 163 | Error::new_message( 164 | "sqlite-regex internal error: self.rowid greater than matches result", 165 | ) 166 | })?, 167 | )?; 168 | } 169 | // TODO return contents as text 170 | Some(Columns::Contents) => { 171 | if let Some(contents) = &self.contents { 172 | api::result_text(context, contents)?; 173 | } 174 | } 175 | _ => (), 176 | } 177 | Ok(()) 178 | } 179 | 180 | fn rowid(&self) -> Result<i64> { 181 | Ok(self.rowid as i64) 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | use regex::{Captures, Regex, RegexSet}; 2 | use sqlite_loadable::prelude::*; 3 | use sqlite_loadable::{api, Error, Result}; 4 | use std::os::raw::c_void; 5 | 6 | // Raw bytes as performance. the string MUST end in the null byte '\0' 7 | const REGEX_POINTER_NAME: &[u8] = b"regex0\0"; 8 | 9 | pub fn value_regex(value: &*mut sqlite3_value) -> Result<*mut Regex> { 10 | unsafe { 11 | if let Some(regex) = api::value_pointer(value, REGEX_POINTER_NAME) { 12 | return Ok(regex); 13 | } 14 | } 15 | let pattern = api::value_text_notnull(value)?; 16 | let x = Box::new( 17 | Regex::new(pattern) 18 | .map_err(|err| Error::new_message(format!("Error parsing regex: {}", err).as_str()))?, 19 | ); 20 | Ok(Box::into_raw(x)) 21 | } 22 | 23 | pub fn result_regex(context: *mut sqlite3_context, regex: Regex) { 24 | api::result_pointer(context, REGEX_POINTER_NAME, regex) 25 | } 26 | 27 | pub(crate) enum CaptureGroupKey { 28 | Index(usize), 29 | Name(String), 30 | } 31 | 32 | pub(crate) struct CaptureGroup { 33 | pub key: CaptureGroupKey, 34 | pub value: Option<String>, 35 | } 36 | const REGEX_CAPTURES_NAME: &[u8] = b"regex_captures0\0"; 37 | 38 | pub(crate) fn value_regex_captures(value: &*mut sqlite3_value) -> Result<*mut Vec<CaptureGroup>> { 39 | unsafe { 40 | if let Some(capture) = api::value_pointer(value, REGEX_CAPTURES_NAME) { 41 | return Ok(capture); 42 | } 43 | } 44 | Err(Error::new_message("value is not a regex captures object")) 45 | } 46 | 47 | pub fn result_regex_captures(context: *mut sqlite3_context, regex: &Regex, captures: &Captures) { 48 | let mut caps: Vec<CaptureGroup> = vec![]; 49 | for name in regex.capture_names().flatten() { 50 | caps.push(CaptureGroup { 51 | key: CaptureGroupKey::Name(name.to_string()), 52 | value: captures.name(name).map(|v| v.as_str().to_string()), 53 | }) 54 | } 55 | for (i, m) in captures.iter().enumerate() { 56 | caps.push(CaptureGroup { 57 | key: CaptureGroupKey::Index(i), 58 | value: m.map(|v| v.as_str().to_string()), 59 | }) 60 | } 61 | api::result_pointer(context, REGEX_CAPTURES_NAME, caps) 62 | } 63 | 64 | pub enum RegexInputType { 65 | Pointer, 66 | TextInitial(usize), 67 | GetAuxdata, 68 | } 69 | pub fn regex_from_value_or_cache( 70 | context: *mut sqlite3_context, 71 | values: &[*mut sqlite3_value], 72 | at: usize, 73 | ) -> Result<(*mut Regex, RegexInputType)> { 74 | let value = values 75 | .get(at) 76 | .ok_or_else(|| Error::new_message("expected 1st argument as pattern"))?; 77 | 78 | // Step 1: If the value is a pointer result of regex(), 79 | // just use that. 80 | unsafe { 81 | if let Some(regex) = api::value_pointer(value, REGEX_POINTER_NAME) { 82 | return Ok((regex, RegexInputType::Pointer)); 83 | } 84 | } 85 | 86 | // Step 2: If sqlite3_get_auxdata returns a pointer, 87 | // then use that. 88 | 89 | let auxdata = api::auxdata_get(context, at as i32); 90 | if !auxdata.is_null() { 91 | Ok((auxdata.cast::<Regex>(), RegexInputType::GetAuxdata)) 92 | } else { 93 | // Step 3: if a string is passed in, then try to make 94 | // a regex from that, and return a flag to call sqlite3_set_auxdata 95 | 96 | let pattern = api::value_text_notnull(value)?; 97 | let boxed = Box::new( 98 | Regex::new(pattern).map_err(|_| Error::new_message("pattern not valid regex"))?, 99 | ); 100 | Ok((Box::into_raw(boxed), RegexInputType::TextInitial(at))) 101 | } 102 | } 103 | 104 | unsafe extern "C" fn cleanup_regex(arg1: *mut c_void) { 105 | drop(Box::from_raw(arg1.cast::<Regex>())) 106 | } 107 | 108 | pub fn cleanup_regex_value_cached( 109 | context: *mut sqlite3_context, 110 | regex: *mut Regex, 111 | input_type: RegexInputType, 112 | ) { 113 | match input_type { 114 | RegexInputType::Pointer => (), 115 | RegexInputType::GetAuxdata => {} 116 | RegexInputType::TextInitial(at) => api::auxdata_set( 117 | context, 118 | at as i32, 119 | regex.cast::<c_void>(), 120 | Some(cleanup_regex), 121 | ), 122 | } 123 | } 124 | 125 | // Raw bytes as performance. the string MUST end in the null byte '\0' 126 | const REGEX_SET_POINTER_NAME: &[u8] = b"regexset0\0"; 127 | 128 | pub fn value_regexset(value: &*mut sqlite3_value) -> Result<*mut RegexSet> { 129 | unsafe { 130 | if let Some(regex) = api::value_pointer(value, REGEX_SET_POINTER_NAME) { 131 | return Ok(regex); 132 | } 133 | } 134 | Err(Error::new_message("asdf")) 135 | } 136 | 137 | pub fn result_regexset(context: *mut sqlite3_context, set: RegexSet) { 138 | api::result_pointer(context, REGEX_SET_POINTER_NAME, set) 139 | } 140 | -------------------------------------------------------------------------------- /tests/test-loadable.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import unittest 3 | import time 4 | import os 5 | 6 | EXT_PATH="./dist/debug/regex0" 7 | 8 | def connect(ext): 9 | db = sqlite3.connect(":memory:") 10 | 11 | db.execute("create table base_functions as select name from pragma_function_list") 12 | db.execute("create table base_modules as select name from pragma_module_list") 13 | 14 | db.enable_load_extension(True) 15 | db.load_extension(ext) 16 | 17 | db.execute("create temp table loaded_functions as select name from pragma_function_list where name not in (select name from base_functions) order by name") 18 | db.execute("create temp table loaded_modules as select name from pragma_module_list where name not in (select name from base_modules) order by name") 19 | 20 | db.row_factory = sqlite3.Row 21 | return db 22 | 23 | 24 | db = connect(EXT_PATH) 25 | 26 | def explain_query_plan(sql): 27 | return db.execute("explain query plan " + sql).fetchone()["detail"] 28 | 29 | def execute_all(sql, args=None): 30 | if args is None: args = [] 31 | results = db.execute(sql, args).fetchall() 32 | return list(map(lambda x: dict(x), results)) 33 | 34 | FUNCTIONS = [ 35 | "regex", 36 | "regex_capture", 37 | "regex_capture", 38 | "regex_debug", 39 | "regex_find", 40 | "regex_find_at", 41 | "regex_print", 42 | "regex_replace", 43 | "regex_replace_all", 44 | "regex_valid", 45 | "regex_version", 46 | "regexp", 47 | "regexset", 48 | "regexset_is_match", 49 | "regexset_print", 50 | ] 51 | 52 | MODULES = [ 53 | "regex_captures", 54 | "regex_find_all", 55 | "regex_split", 56 | "regexset_matches", 57 | ] 58 | def spread_args(args): 59 | return ",".join(['?'] * len(args)) 60 | 61 | class TestRegex(unittest.TestCase): 62 | def test_funcs(self): 63 | funcs = list(map(lambda a: a[0], db.execute("select name from loaded_functions").fetchall())) 64 | self.assertEqual(funcs, FUNCTIONS) 65 | 66 | def test_modules(self): 67 | modules = list(map(lambda a: a[0], db.execute("select name from loaded_modules").fetchall())) 68 | self.assertEqual(modules, MODULES) 69 | 70 | def test_regex_version(self): 71 | self.assertEqual(db.execute("select regex_version()").fetchone()[0][0], "v") 72 | 73 | def test_regex_debug(self): 74 | debug = db.execute("select regex_debug()").fetchone()[0] 75 | self.assertEqual(len(debug.splitlines()), 2) 76 | 77 | def test_regex(self): 78 | regex = lambda pattern: db.execute("select regex(?)", [pattern]).fetchone()[0] 79 | self.assertEqual(regex('^\d{4}-\d{2}-\d{2}$'), None) 80 | 81 | with self.assertRaisesRegex(sqlite3.OperationalError, "Error parsing pattern as regex: regex parse error:.*"): 82 | regex("[nope") 83 | 84 | 85 | def test_regex_print(self): 86 | regex_print = lambda pattern: db.execute("select regex_print(regex(?))", [pattern]).fetchone()[0] 87 | self.assertEqual(regex_print('^\d{4}-\d{2}-\d{2}$'), '^\d{4}-\d{2}-\d{2}$') 88 | 89 | def test_regexset(self): 90 | regexset = lambda *patterns: db.execute("select regexset({args})".format(args=spread_args(patterns)), patterns).fetchone()[0] 91 | self.assertEqual(regexset('a'), None) 92 | self.assertEqual(regexset('a', 'b'), None) 93 | 94 | def test_regexset_print(self): 95 | regexset_print = lambda *patterns: db.execute("select regexset_print(regexset({args}))".format(args=spread_args(patterns)), patterns).fetchone()[0] 96 | self.assertEqual(regexset_print('a', 'b', 'c'), '["a","b","c"]') 97 | 98 | def test_regexset_is_match(self): 99 | regexset_is_match = lambda *patterns, text: db.execute("select regexset_is_match(regexset({args}), ?)".format(args=spread_args(patterns)), [*patterns, text]).fetchone()[0] 100 | self.assertEqual(regexset_is_match('a', text='bbb'), 0) 101 | self.assertEqual(regexset_is_match('a', 'b', text='ccc'), 0) 102 | self.assertEqual(regexset_is_match('a', 'b', text='ccca'), 1) 103 | self.assertEqual(regexset_is_match('a', 'b', text='cccb'), 1) 104 | 105 | def test_regexset_matches(self): 106 | regexset_matches = lambda *patterns, text: execute_all("select rowid, * from regexset_matches(regexset({args}), ?)".format(args=spread_args(patterns)), [*patterns, text]) 107 | self.assertEqual( 108 | regexset_matches('x', 'y', 'z', 'a', 'b', text='cab'), 109 | [ 110 | {'rowid': 0, 'key': 3, 'pattern': 'a'}, 111 | {'rowid': 1, 'key': 4, 'pattern': 'b'} 112 | ] 113 | ) 114 | 115 | def test_regexp(self): 116 | regexp = lambda pattern, content: db.execute("select regexp(?, ?)", [pattern, content]).fetchone()[0] 117 | self.assertEqual(regexp('^\d{4}-\d{2}-\d{2}$', '2022-01-01'), 1) 118 | 119 | def test_regex_valid(self): 120 | regex_valid = lambda pattern: db.execute("select regex_valid(?)", [pattern]).fetchone()[0] 121 | self.assertEqual( 122 | regex_valid("[0-9]{3}-[0-9]{3}-[0-9]{4}"), 123 | 1 124 | ) 125 | self.assertEqual( 126 | regex_valid("no("), 127 | 0 128 | ) 129 | 130 | def test_regex_find(self): 131 | regex_find = lambda pattern, content: db.execute("select regex_find(?, ?)", [pattern, content]).fetchone()[0] 132 | self.assertEqual( 133 | regex_find("[0-9]{3}-[0-9]{3}-[0-9]{4}", "phone: 111-222-3333"), 134 | '111-222-3333' 135 | ) 136 | self.assertEqual( 137 | regex_find("[0-9]{3}-[0-9]{3}-[0-9]{4}", "phone: 111-222-333"), 138 | None 139 | ) 140 | 141 | with self.assertRaisesRegex(sqlite3.OperationalError, "pattern not valid regex"): 142 | regex_find("[invalidregex", "abc") 143 | 144 | 145 | def test_regex_find_at(self): 146 | regex_find_at = lambda pattern, content, offset: db.execute("select regex_find_at(?, ?, ?)", [pattern, content, offset]).fetchone()[0] 147 | self.assertEqual( 148 | regex_find_at("[0-9]{3}-[0-9]{3}-[0-9]{4}", "phone: 111-222-3333", 0), 149 | '111-222-3333' 150 | ) 151 | with self.assertRaisesRegex(sqlite3.OperationalError, "pattern not valid regex"): 152 | regex_find_at("[invalidregex", "abc", 0) 153 | 154 | def test_regex_capture(self): 155 | regex_capture = lambda pattern, content, group: db.execute("select regex_capture(?, ?, ?)", [pattern, content, group]).fetchone()[0] 156 | MOVIE_PATTERN = "'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" 157 | EXAMPLE1 = "Not my favorite movie: 'Citizen Kane' (1941)." 158 | self.assertEqual( 159 | regex_capture(MOVIE_PATTERN, EXAMPLE1, 0), 160 | "'Citizen Kane' (1941)" 161 | ) 162 | self.assertEqual( 163 | regex_capture(MOVIE_PATTERN, EXAMPLE1, 1), 164 | "Citizen Kane" 165 | ) 166 | self.assertEqual( 167 | regex_capture(MOVIE_PATTERN, EXAMPLE1, 2), 168 | "1941" 169 | ) 170 | self.assertEqual( 171 | regex_capture(MOVIE_PATTERN, EXAMPLE1, "title"), 172 | "Citizen Kane" 173 | ) 174 | self.assertEqual( 175 | regex_capture(MOVIE_PATTERN, EXAMPLE1, "year"), 176 | "1941" 177 | ) 178 | self.assertEqual( 179 | regex_capture(MOVIE_PATTERN, EXAMPLE1, "not exist"), 180 | None 181 | ) 182 | self.assertEqual( 183 | regex_capture(MOVIE_PATTERN, EXAMPLE1, 3), 184 | None 185 | ) 186 | self.assertEqual( 187 | regex_capture(MOVIE_PATTERN, EXAMPLE1, 1.1), 188 | None 189 | ) 190 | self.assertEqual( 191 | regex_capture(MOVIE_PATTERN, EXAMPLE1, None), 192 | None 193 | ) 194 | 195 | def test_regex_replace(self): 196 | regex_replace = lambda pattern, content, replacement: db.execute("select regex_replace(?, ?, ?)", [pattern, content, replacement]).fetchone()[0] 197 | 198 | self.assertEqual( 199 | regex_replace( 200 | '(?P<last>[^,\s]+),\s+(?P<first>\S+)', 201 | 'Springsteen, Bruce', 202 | '$first $last' 203 | ), 204 | 'Bruce Springsteen' 205 | ) 206 | 207 | self.assertEqual( 208 | regex_replace( 209 | '(?P<first>\w+)\s+(?P<second>\w+)', 210 | 'deep fried', 211 | '${first}_$second' 212 | ), 213 | 'deep_fried' 214 | ) 215 | self.assertEqual( 216 | regex_replace('a', 'abc abc', ''), 217 | 'bc abc' 218 | ) 219 | 220 | #with self.assertRaisesRegex(sqlite3.OperationalError, "pattern not valid regex"): 221 | # regex_find("[invalidregex", "abc") 222 | 223 | def test_regex_replace_all(self): 224 | regex_replace_all = lambda pattern, content, replacement: db.execute("select regex_replace_all(?, ?, ?)", [pattern, content, replacement]).fetchone()[0] 225 | 226 | self.assertEqual( 227 | regex_replace_all('a', 'abc abc', ''), 228 | 'bc bc' 229 | ) 230 | 231 | def test_regex_captures(self): 232 | MOVIE_PATTERN = "'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" 233 | EXAMPLE1 = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)." 234 | self.assertEqual( 235 | execute_all( 236 | "select rowid, * from regex_captures(?, ?)", 237 | [MOVIE_PATTERN, EXAMPLE1] 238 | ), 239 | [ 240 | {'rowid': 0, 'captures': None}, 241 | {'rowid': 1, 'captures': None}, 242 | {'rowid': 2, 'captures': None}, 243 | ] 244 | ) 245 | self.assertEqual( 246 | execute_all( 247 | """select 248 | rowid, 249 | regex_capture(captures, 0) as c0, 250 | regex_capture(captures, 1) as c1, 251 | regex_capture(captures, 2) as c2, 252 | regex_capture(captures, 3) as c3, 253 | regex_capture(captures, 'title') as title, 254 | regex_capture(captures, 'year') as year, 255 | regex_capture(captures, 'not_exist') as not_exist 256 | from regex_captures(?, ?) 257 | """, 258 | [MOVIE_PATTERN, EXAMPLE1] 259 | ), 260 | [ 261 | {'rowid': 0, 'c0': '\'Citizen Kane\' (1941)', 'c1': 'Citizen Kane', 'c2': '1941', 'c3': None, 'title': 'Citizen Kane', 'year': '1941', 'not_exist': None, }, 262 | {'rowid': 1, 'c0': '\'The Wizard of Oz\' (1939)', 'c1': 'The Wizard of Oz', 'c2': '1939', 'c3': None, 'title': 'The Wizard of Oz', 'year': '1939', 'not_exist': None, }, 263 | {'rowid': 2, 'c0': '\'M\' (1931)', 'c1': 'M', 'c2': '1931', 'c3': None, 'title': 'M', 'year': '1931', 'not_exist': None, }, 264 | ] 265 | ) 266 | 267 | execute_all(""" 268 | create temp table comments as 269 | select 270 | key as rowid, 271 | value as comment 272 | from json_each(?) 273 | """, 274 | ['["\'Citizen Kane\' (1941), \'The Wizard of Oz\' (1939), \'M\' (1931)", "\'Moonlight\' (2016), \'Arrival\' (2016)", "\'Parasite\' (2020), \'Joker\' (2019), and \'Marriage Story\' (2019)."]'] 275 | ) 276 | self.assertEqual( 277 | execute_all( 278 | """ 279 | select 280 | comments.rowid as comment, 281 | captures.rowid as capture_idx, 282 | regex_capture(captures, 'title') as title, 283 | regex_capture(captures, 'year') as year 284 | from comments 285 | join regex_captures( 286 | regex(?), 287 | comments.comment 288 | )as captures; 289 | """, [MOVIE_PATTERN] 290 | ), 291 | [ 292 | {'comment': 0, 'capture_idx': 0, 'title': 'Citizen Kane', 'year': '1941'}, 293 | {'comment': 0, 'capture_idx': 1, 'title': 'The Wizard of Oz', 'year': '1939'}, 294 | {'comment': 0, 'capture_idx': 2, 'title': 'M', 'year': '1931'}, 295 | {'comment': 1, 'capture_idx': 0, 'title': 'Moonlight', 'year': '2016'}, 296 | {'comment': 1, 'capture_idx': 1, 'title': 'Arrival', 'year': '2016'}, 297 | {'comment': 2, 'capture_idx': 0, 'title': 'Parasite', 'year': '2020'}, 298 | {'comment': 2, 'capture_idx': 1, 'title': 'Joker', 'year': '2019'}, 299 | {'comment': 2, 'capture_idx': 2, 'title': 'Marriage Story', 'year': '2019'} 300 | ] 301 | ) 302 | # with ->> syntax 303 | if sqlite3.sqlite_version_info[1] >= 38: 304 | self.assertEqual( 305 | execute_all( 306 | """ 307 | select 308 | comments.rowid as comment, 309 | captures.rowid as capture_idx, 310 | captures ->> 'title' as title2, 311 | captures ->> 'year' as year2 312 | from comments 313 | join regex_captures( 314 | regex(?), 315 | comments.comment 316 | )as captures; 317 | """, [MOVIE_PATTERN] 318 | ), 319 | [ 320 | {'comment': 0, 'capture_idx': 0, 'title2': 'Citizen Kane', 'year2': '1941'}, 321 | {'comment': 0, 'capture_idx': 1, 'title2': 'The Wizard of Oz', 'year2': '1939'}, 322 | {'comment': 0, 'capture_idx': 2, 'title2': 'M', 'year2': '1931'}, 323 | {'comment': 1, 'capture_idx': 0, 'title2': 'Moonlight', 'year2': '2016'}, 324 | {'comment': 1, 'capture_idx': 1, 'title2': 'Arrival', 'year2': '2016'}, 325 | {'comment': 2, 'capture_idx': 0, 'title2': 'Parasite', 'year2': '2020'}, 326 | {'comment': 2, 'capture_idx': 1, 'title2': 'Joker', 'year2': '2019'}, 327 | {'comment': 2, 'capture_idx': 2, 'title2': 'Marriage Story', 'year2': '2019'} 328 | ] 329 | ) 330 | 331 | def test_regex_find_all(self): 332 | regex_find_all = lambda pattern, content: execute_all("select rowid, * from regex_find_all(?, ?)", [pattern, content]) 333 | self.assertEqual( 334 | regex_find_all('\\b\w{13}\\b', 'Retroactively relinquishing remunerations is reprehensible.'), 335 | [ 336 | {'rowid': 0, 'start': 0, 'end': 13, 'match': 'Retroactively',}, 337 | {'rowid': 1, 'start': 14, 'end': 27, 'match': 'relinquishing',}, 338 | {'rowid': 2, 'start': 28, 'end': 41, 'match': 'remunerations',}, 339 | {'rowid': 3, 'start': 45, 'end': 58, 'match': 'reprehensible',} 340 | ] 341 | ) 342 | self.assertEqual( 343 | execute_all(""" 344 | with inputs as ( 345 | select value as text 346 | from json_each(?) 347 | ) 348 | select matches.rowid, matches.* 349 | from inputs 350 | join regex_find_all(regex(?), inputs.text) as matches 351 | """, ['["Retroactively relinquishing remunerations is reprehensible.", "embezzlements objectivizing"]', '\\b\w{13}\\b']), 352 | [ 353 | {'rowid': 0, 'start': 0, 'end': 13, 'match': 'Retroactively',}, 354 | {'rowid': 1, 'start': 14, 'end': 27, 'match': 'relinquishing',}, 355 | {'rowid': 2, 'start': 28, 'end': 41, 'match': 'remunerations',}, 356 | {'rowid': 3, 'start': 45, 'end': 58, 'match': 'reprehensible',}, 357 | {'rowid': 0, 'start': 0, 'end': 13, 'match': 'embezzlements',}, 358 | {'rowid': 1, 'start': 14, 'end': 27, 'match': 'objectivizing',}, 359 | ] 360 | ) 361 | 362 | 363 | 364 | def test_regex_split(self): 365 | regex_split = lambda pattern, content: execute_all("select rowid, * from regex_split(?, ?)", [pattern, content]) 366 | self.assertEqual( 367 | regex_split('[ \t]+', 'a b \t c\td e'), 368 | [ 369 | {'rowid': 0, 'item': 'a'}, 370 | {'rowid': 1, 'item': 'b'}, 371 | {'rowid': 2, 'item': 'c'}, 372 | {'rowid': 3, 'item': 'd'}, 373 | {'rowid': 4, 'item': 'e'} 374 | ] 375 | ) 376 | self.assertEqual( 377 | execute_all("select rowid, * from regex_split(regex(?), ?)", ['[ \t]+', 'a b \t c\td e']), 378 | [ 379 | {'rowid': 0, 'item': 'a'}, 380 | {'rowid': 1, 'item': 'b'}, 381 | {'rowid': 2, 'item': 'c'}, 382 | {'rowid': 3, 'item': 'd'}, 383 | {'rowid': 4, 'item': 'e'} 384 | ] 385 | ) 386 | 387 | 388 | class TestCoverage(unittest.TestCase): 389 | def test_coverage(self): 390 | test_methods = [method for method in dir(TestRegex) if method.startswith('test_')] 391 | funcs_with_tests = set([x.replace("test_", "") for x in test_methods]) 392 | 393 | for func in FUNCTIONS: 394 | self.assertTrue(func in funcs_with_tests, f"{func} does not have corresponding test in {funcs_with_tests}") 395 | 396 | for module in MODULES: 397 | self.assertTrue(module in funcs_with_tests, f"{module} does not have corresponding test in {funcs_with_tests}") 398 | 399 | if __name__ == '__main__': 400 | unittest.main() 401 | -------------------------------------------------------------------------------- /tests/test-python.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sqlite3 3 | import sqlite_regex 4 | 5 | class TestSqliteregexPython(unittest.TestCase): 6 | def test_path(self): 7 | db = sqlite3.connect(':memory:') 8 | db.enable_load_extension(True) 9 | 10 | self.assertEqual(type(sqlite_regex.loadable_path()), str) 11 | 12 | sqlite_regex.load(db) 13 | version, result = db.execute('select regex_version(), regexp("[abc]", "c")').fetchone() 14 | self.assertEqual(version[0], "v") 15 | self.assertEqual(result, 1) 16 | 17 | if __name__ == '__main__': 18 | unittest.main() --------------------------------------------------------------------------------