├── .circleci └── config.yml ├── .clang-format ├── .github └── workflows │ ├── ci.yml │ └── codeql.yml ├── .gitignore ├── .gitmodules ├── .npmignore ├── LICENSE ├── README.md ├── appveyor.yml ├── benchmark ├── large-text-buffer.benchmark.js ├── marker-index.benchmark.js ├── native │ └── marker-index-benchmark.cc └── text-buffer.benchmark.js ├── binding.gyp ├── index.js ├── package.json ├── script ├── build-browser-version.sh ├── install-emscripten.sh ├── test-native.js └── test-with-debug-graph.sh ├── src ├── bindings │ ├── bindings.cc │ ├── em │ │ ├── auto-wrap.h │ │ ├── epilogue.js │ │ ├── marker-index.cc │ │ ├── patch.cc │ │ ├── point.cc │ │ ├── prologue.js │ │ ├── range.cc │ │ └── text-buffer.cc │ ├── marker-index-wrapper.cc │ ├── marker-index-wrapper.h │ ├── noop.h │ ├── number-conversion.h │ ├── patch-wrapper.cc │ ├── patch-wrapper.h │ ├── point-wrapper.cc │ ├── point-wrapper.h │ ├── range-wrapper.cc │ ├── range-wrapper.h │ ├── string-conversion.cc │ ├── string-conversion.h │ ├── text-buffer-snapshot-wrapper.cc │ ├── text-buffer-snapshot-wrapper.h │ ├── text-buffer-wrapper.cc │ ├── text-buffer-wrapper.h │ ├── text-reader.cc │ ├── text-reader.h │ ├── text-writer.cc │ └── text-writer.h └── core │ ├── encoding-conversion.cc │ ├── encoding-conversion.h │ ├── flat_set.h │ ├── libmba-diff.cc │ ├── libmba-diff.h │ ├── marker-index.cc │ ├── marker-index.h │ ├── optional.h │ ├── patch.cc │ ├── patch.h │ ├── point.cc │ ├── point.h │ ├── range.cc │ ├── range.h │ ├── regex.cc │ ├── regex.h │ ├── serializer.h │ ├── text-buffer.cc │ ├── text-buffer.h │ ├── text-diff.cc │ ├── text-diff.h │ ├── text-slice.cc │ ├── text-slice.h │ ├── text.cc │ └── text.h ├── test ├── js │ ├── helpers │ │ ├── point-helpers.js │ │ ├── test-document.js │ │ ├── text-helpers.js │ │ └── words.js │ ├── marker-index.test.js │ ├── patch.test.js │ └── text-buffer.test.js └── native │ ├── encoding-conversion-test.cc │ ├── patch-test.cc │ ├── test-helpers.cc │ ├── test-helpers.h │ ├── tests.cc │ ├── text-buffer-test.cc │ ├── text-diff-test.cc │ └── text-test.cc └── vendor ├── catch.hpp ├── libcxx ├── LICENSE.txt └── utf8-conversions.h └── pcre ├── 10.23 ├── AUTHORS ├── COPYING ├── LICENCE ├── NON-AUTOTOOLS-BUILD ├── README └── src │ ├── config.h.generic │ ├── config.h.in │ ├── pcre2.h.generic │ ├── pcre2.h.in │ ├── pcre2_auto_possess.c │ ├── pcre2_chartables.c.dist │ ├── pcre2_compile.c │ ├── pcre2_config.c │ ├── pcre2_context.c │ ├── pcre2_dfa_match.c │ ├── pcre2_error.c │ ├── pcre2_find_bracket.c │ ├── pcre2_internal.h │ ├── pcre2_intmodedep.h │ ├── pcre2_jit_compile.c │ ├── pcre2_jit_match.c │ ├── pcre2_jit_misc.c │ ├── pcre2_maketables.c │ ├── pcre2_match.c │ ├── pcre2_match_data.c │ ├── pcre2_newline.c │ ├── pcre2_ord2utf.c │ ├── pcre2_pattern_info.c │ ├── pcre2_printint.c │ ├── pcre2_serialize.c │ ├── pcre2_string_utils.c │ ├── pcre2_study.c │ ├── pcre2_substitute.c │ ├── pcre2_substring.c │ ├── pcre2_tables.c │ ├── pcre2_ucd.c │ ├── pcre2_ucp.h │ ├── pcre2_valid_utf.c │ ├── pcre2_xclass.c │ └── sljit │ ├── sljitConfig.h │ ├── sljitConfigInternal.h │ ├── sljitExecAllocator.c │ ├── sljitLir.c │ ├── sljitLir.h │ ├── sljitNativeARM_32.c │ ├── sljitNativeARM_64.c │ ├── sljitNativeARM_T2_32.c │ ├── sljitNativeMIPS_32.c │ ├── sljitNativeMIPS_64.c │ ├── sljitNativeMIPS_common.c │ ├── sljitNativePPC_32.c │ ├── sljitNativePPC_64.c │ ├── sljitNativePPC_common.c │ ├── sljitNativeSPARC_32.c │ ├── sljitNativeSPARC_common.c │ ├── sljitNativeTILEGX-encoder.c │ ├── sljitNativeTILEGX_64.c │ ├── sljitNativeX86_32.c │ ├── sljitNativeX86_64.c │ ├── sljitNativeX86_common.c │ └── sljitUtils.c ├── README.md ├── include ├── config.h └── pcre2.h ├── pcre.gyp └── pcre2_chartables.c /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | jobs: 4 | build: 5 | environment: 6 | XCODE_SCHEME: test 7 | XCODE_WORKSPACE: test 8 | XCODE_PROJECT: test 9 | NODE_VERSION: '10.2.1' 10 | macos: 11 | xcode: 8.3.3 12 | steps: 13 | - checkout 14 | - run: 15 | name: Update submodules 16 | command: git submodule update --init 17 | - restore_cache: 18 | key: node-{{ .Environment.NODE_VERSION }} 19 | - restore_cache: 20 | key: emsdk-{{ checksum "script/install-emscripten.sh" }} 21 | - run: 22 | name: Install Node.js with nvm 23 | command: | 24 | export NVM_DIR=${HOME}/.nvm 25 | curl -o- https://raw.githubusercontent.com/creationix/nvm/v0.33.8/install.sh | bash 26 | [ -s "${NVM_DIR}/nvm.sh" ] && \. "${NVM_DIR}/nvm.sh" 27 | nvm install ${NODE_VERSION} 28 | nvm alias default ${NODE_VERSION} 29 | echo "[ -s \"${NVM_DIR}/nvm.sh\" ] && . \"${NVM_DIR}/nvm.sh\"" >> $BASH_ENV 30 | - run: 31 | name: Install node dependencies 32 | command: npm install 33 | - run: 34 | name: Build with emscripten 35 | command: script/install-emscripten.sh 36 | - save_cache: 37 | key: node-{{ .Environment.NODE_VERSION }} 38 | paths: 39 | - .nvm 40 | - save_cache: 41 | key: emsdk-{{ checksum "script/install-emscripten.sh" }} 42 | paths: 43 | - .emscripten_cache 44 | - emsdk-portable 45 | - run: 46 | name: Build emscripten 47 | command: npm run build:browser 48 | - run: 49 | name: Lint JavaScript 50 | command: npm run standard 51 | - run: 52 | name: Test Browser 53 | command: npm run test:browser 54 | - run: 55 | name: Test Node.js 56 | command: npm run test:node 57 | - run: 58 | name: Test native 59 | command: npm run test:native 60 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | BasedOnStyle: Google 4 | ColumnLimit: 120 5 | ... 6 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: 3 | - pull_request 4 | - push 5 | 6 | jobs: 7 | Test: 8 | if: "!contains(github.event.head_commit.message, '[skip ci]')" 9 | runs-on: ${{ matrix.os }} 10 | strategy: 11 | fail-fast: false 12 | matrix: 13 | os: 14 | - ubuntu-latest 15 | - macos-latest 16 | - windows-latest 17 | name: Node ${{ matrix.node_version }} on ${{ matrix.os }} 18 | 19 | steps: 20 | - uses: actions/checkout@v2 21 | with: 22 | submodules: true 23 | - name: Cache 24 | uses: actions/cache@v2 25 | with: 26 | path: | 27 | 'node_modules' 28 | key: ${{ runner.os }}-${{ matrix.node_version }}-${{ hashFiles('package.json') }} 29 | 30 | - name: Setup node 31 | uses: actions/setup-node@v2-beta 32 | with: 33 | node-version: 14 34 | 35 | - name: Install dependencies 36 | run: npm install 37 | 38 | - name: Lint 39 | run: npm run standard 40 | 41 | - name: Run tests 42 | run: | 43 | npm run test:node 44 | npm run test:native 45 | 46 | Skip: 47 | if: contains(github.event.head_commit.message, '[skip ci]') 48 | runs-on: ubuntu-latest 49 | steps: 50 | - name: Skip CI 🚫 51 | run: echo skip ci 52 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "Code Scanning - Action" 2 | 3 | on: 4 | push: 5 | schedule: 6 | - cron: '0 0 * * 0' 7 | 8 | jobs: 9 | CodeQL-Build: 10 | 11 | strategy: 12 | fail-fast: false 13 | 14 | 15 | # CodeQL runs on ubuntu-latest, windows-latest, and macos-latest 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - name: Checkout repository 20 | uses: actions/checkout@v2 21 | 22 | # Initializes the CodeQL tools for scanning. 23 | - name: Initialize CodeQL 24 | uses: github/codeql-action/init@v1 25 | # Override language selection by uncommenting this and choosing your languages 26 | with: 27 | languages: javascript, cpp 28 | 29 | - run: | 30 | npm install 31 | npm run standard 32 | 33 | - name: Perform CodeQL Analysis 34 | uses: github/codeql-action/analyze@v1 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | build 3 | .DS_Store 4 | .clang_complete 5 | 6 | /browser.js 7 | emsdk-portable 8 | package-lock.json 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "vendor/win-iconv"] 2 | path = vendor/win-iconv 3 | url = https://github.com/win-iconv/win-iconv 4 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | * 2 | 3 | !README.md 4 | !LICENSE 5 | !index.js 6 | !browser.js 7 | !src/core/* 8 | !src/bindings/*.h 9 | !src/bindings/*.cc 10 | 11 | !vendor/libcxx/* 12 | 13 | !vendor/pcre/pcre.gyp 14 | !vendor/pcre/pcre2_chartables.c 15 | !vendor/pcre/include/*.h 16 | !vendor/pcre/10.23/src/*.h 17 | !vendor/pcre/10.23/src/*.c 18 | !vendor/pcre/10.23/src/sljit/* 19 | !vendor/pcre/10.23/COPYING 20 | !vendor/pcre/10.23/LICENSE 21 | 22 | !vendor/win-iconv/iconv.h 23 | !vendor/win-iconv/win_iconv.c 24 | !vendor/win-iconv/readme.txt 25 | 26 | !package.json 27 | !binding.gyp 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 GitHub 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ##### Atom and all repositories under Atom will be archived on December 15, 2022. Learn more in our [official announcement](https://github.blog/2022-06-08-sunsetting-atom/) 2 | # Superstring 3 | [![ci](https://github.com/atom/superstring/actions/workflows/ci.yml/badge.svg)](https://github.com/atom/superstring/actions/workflows/ci.yml) 4 | 5 | Native library at the core of Atom's text editor. 6 | 7 | ## Components: 8 | 9 | ### Patch 10 | 11 | This data structure represents a transformation from input to output text, and it's useful for combining changes that occur at different points in time and space. 12 | 13 | Example: 14 | ```js 15 | const patch = new Patch 16 | 17 | // At column 5, replace the string 'abc' with '1234': 18 | patch.splice({row: 0, column: 5}, {row: 0, column: 3}, {row: 0, column: 4}, 'abc', '1234') 19 | 20 | // Then at column 7, replace 3 characters with 4 characters: 21 | patch.splice({row: 0, column: 7}, {row: 0, column: 3}, {row: 0, column: 4}, '34d', '5678') 22 | 23 | // Retrieve the consolidated changes: 24 | assert.deepEqual(patch.getChanges(), [ 25 | { 26 | oldStart: {row: 0, column: 5}, 27 | oldEnd: {row: 0, column: 9}, 28 | oldText: 'abcd', 29 | newStart: {row: 0, column: 5}, 30 | newEnd: {row: 0, column: 11}, 31 | newText: '125678' 32 | } 33 | ]) 34 | ``` 35 | 36 | ### MarkerIndex 37 | 38 | This data structure is used to track logical locations in a text buffer as the contents of the buffer are changed. 39 | 40 | Example: 41 | 42 | ```js 43 | const index = new MarkerIndex 44 | 45 | // Associate a marker id with two ordered start and end points 46 | index.insert(1, {row: 2, column: 5}, {row: 4, column: 10}) 47 | 48 | // Splice represents a change to the text file 49 | // you pass it a starting point, then points representing the old and new extent 50 | index.splice({row: 3, column: 5}, {row: 0, column: 0}, {row: 1, column: 0}) 51 | 52 | // The marker's end point was updated by the splice 53 | assert.deepEqual(index.getEnd(1), {row: 5, column: 10}) 54 | ``` 55 | 56 | #### API 57 | 58 | ##### `insert (id, start, end)` 59 | 60 | Associates the given non-negative integer with a range represented by two `{row: number, column: number}` objects. 61 | 62 | ##### `splice (start, oldExtent, newExtent)` 63 | 64 | Update the locations of all markers based on the description of a change to the text. The range of the replaced text is described by *traversing* from `start` by `oldExtent`. The range of the new text is described by *traversing* from `start` to `newExtent`. 65 | 66 | *Traversal* means that beginning with the `start` location, we arrive at a new location by performing X line feeds and carriage returns and then walk forward Y columns, where X is the `row` of the given traversal extent and Y is its `column`. So basically `start`, `oldExtent`, and `newExtent` describe two ranges in the file, basically the spatial before and after effects of a change. 67 | 68 | This method returns an object that describes what markers were *invalidated* by the change based on various invalidation strategies. If a marker is in a set for a given strategy, it was invalidated according to that strategy. The strategies are as follows: 69 | 70 | * `touch` Contains markers that the change touched in any way. 71 | * `inside` Contains markers that the change touched, but not markers with endpoints immediately adjacent to the change. 72 | * `overlap` Contains markers that had one or both of their endpoints surrounded by the change. 73 | * `surround` Contains markers that had both endpoints surrounded by the change. 74 | 75 | ##### `setExclusive (markerId, boolean)` 76 | 77 | This method allows to control the behavior of a marker when splices start and/or end at the marker's endpoints. 78 | 79 | By default, we consider markers to be *inclusive*: that is, splices exactly at the beginning of the marked range will be considered to begin inside the marker (meaning that the marker's start position **will not** move), and splices exactly at the end of the marked range will be considered to end inside the marker (meaning that the marker's end position **will** move). 80 | 81 | *Exclusive* markers, on the other hand, exhibit a slightly different behavior: in fact, splices exactly at the beginning of the marked range will be considered to begin outside the marker (meaning that the marker's start position **will** move), and splices exactly at the end of the marked range will be considered to end outside the marker (meaning that the marker's end position **will not** move). 82 | 83 | Please note that, independently of whether a marker is inclusive or exclusive, its end **will always** be moved when its start gets moved as a result of a splice. 84 | 85 | ##### `isExclusive (markerId)` 86 | 87 | Returns whether the given marker id has been set to behave exclusively via `setExclusive`. 88 | 89 | ##### `delete (markerId)` 90 | 91 | Removes the specified marker from the index. 92 | 93 | ##### `getRange (markerId)` 94 | 95 | Returns the range for the given marker id, in the form of an object with `start` and `end` points. 96 | 97 | ##### `getStart (markerId)` 98 | 99 | Returns a `{row: number, column: number}` object representing the start of the specified marker. 100 | 101 | ##### `getEnd (markerId)` 102 | 103 | Returns a `{row: number, column: number}` object representing the end of the specified marker. 104 | 105 | ##### `dump ()` 106 | 107 | Returns the current location of every marker in the index, represented as an object mapping marker ids to range objects. For example: 108 | 109 | ```js 110 | { 111 | '1': {start: {row: 2, column: 5}, end: {row: 5, column: 10}}, 112 | '2': {start: {row: 4, column: 10}, end: {row: 6, column: 3}} 113 | } 114 | ``` 115 | 116 | ##### `findIntersecting (start, end = start)` 117 | 118 | Returns a set with the ids of all markers intersecting the specified point range. 119 | 120 | ##### `findContaining (start, end = start)` 121 | 122 | Returns a set with the ids of all markers intersecting the specified point range. 123 | 124 | ##### `findContainedIn (start, end)` 125 | 126 | Returns a set with the ids of all markers contained in the specified point range. 127 | 128 | ##### `findStartingIn (start, end)` 129 | 130 | Returns a set with the ids of all markers starting in the specified point range. 131 | 132 | ##### `findEndingIn (start, end)` 133 | 134 | Returns a set with the ids of all markers ending in the specified point range. 135 | 136 | ##### `findStartingAt (position)` 137 | 138 | Returns a set with the ids of all markers starting at the specified point. 139 | 140 | ##### `findEndingAt (position)` 141 | 142 | Returns a set with the ids of all markers ending at the specified point. 143 | 144 | ##### `findBoundariesIn (start, end)` 145 | 146 | A boundary is a position in the index where a marker starts or ends. Multiple markers starting and/or ending at the same position describe only one boundary. This method returns an object containing all the boundaries in the specified point range, and an array of marker ids that overlap the specified start position. For example: 147 | 148 | ```js 149 | { 150 | containingStart: [1, 2, 3, 4], 151 | boundaries: [ 152 | {position: {row: 0, column: 1}, starting: new Set([5, 6]), ending: new Set()}, 153 | {position: {row: 1, column: 0}, starting: new Set(), ending: new Set([5])} 154 | {position: {row: 2, column: 0}, starting: new Set(), ending: new Set([6])} 155 | ] 156 | } 157 | ``` 158 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # empty appveyor 2 | build: off 3 | 4 | branches: 5 | only: 6 | - non-existing 7 | -------------------------------------------------------------------------------- /benchmark/large-text-buffer.benchmark.js: -------------------------------------------------------------------------------- 1 | const http = require('http') 2 | const fs = require('fs') 3 | const unzip = require('unzip') 4 | const { TextBuffer } = require('..') 5 | 6 | const unzipper = unzip.Parse() 7 | 8 | const getText = () => { 9 | return new Promise(resolve => { 10 | console.log('fetching text file...') 11 | const req = http.get({ 12 | hostname: 'www.acleddata.com', 13 | port: 80, 14 | // 51 MB text file 15 | path: '/wp-content/uploads/2017/01/ACLED-Version-7-All-Africa-1997-2016_csv_dyadic-file.zip', 16 | agent: false 17 | }, res => { 18 | res 19 | .pipe(unzipper) 20 | .on('entry', entry => { 21 | let data = ''; 22 | entry.on('data', chunk => data += chunk); 23 | entry.on('end', () => { 24 | resolve(data) 25 | }); 26 | }) 27 | }) 28 | 29 | req.end() 30 | }) 31 | } 32 | 33 | const timer = size => `Time to find "cat" in ${size} file` 34 | 35 | getText().then(txt => { 36 | const buffer = new TextBuffer() 37 | 38 | console.log('running findWordsWithSubsequence tests...') 39 | 40 | const sizes = [['10b', 10], ['100b', 100], ['1kb', 1000], ['1MB', 1000000], ['51MB', 100000000]] 41 | 42 | const test = size => { 43 | const _timer = timer(size[0]) 44 | buffer.setText(txt.slice(0, size[1])) 45 | console.time(_timer) 46 | return buffer.findWordsWithSubsequence('cat', '', 100).then(sugs => { 47 | console.timeEnd(_timer) 48 | }) 49 | } 50 | 51 | return sizes.reduce((promise, size) => { 52 | return promise.then(() => test(size)) 53 | }, Promise.resolve()) 54 | }).then(() => { 55 | console.log('finished') 56 | }) 57 | -------------------------------------------------------------------------------- /benchmark/marker-index.benchmark.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const Random = require('random-seed') 4 | const {MarkerIndex} = require('..') 5 | const {traverse, traversalDistance, compare} = require('../test/js/helpers/point-helpers') 6 | 7 | let random = new Random(1) 8 | let markerIds = [] 9 | let idCounter = 1 10 | let lastInsertionEnd = {row: 0, column: 0} 11 | let markerIndex = null 12 | let sequentialInsertOperations = [] 13 | let insertOperations = [] 14 | let spliceOperations = [] 15 | let deleteOperations = [] 16 | let rangeQueryOperations = [] 17 | 18 | function runBenchmark () { 19 | for (let i = 0; i < 40000; i++) { 20 | enqueueSequentialInsert() 21 | } 22 | 23 | for (let i = 0; i < 40000; i++) { 24 | enqueueInsert() 25 | enqueueSplice() 26 | enqueueDelete() 27 | } 28 | 29 | for (let i = 0; i < 500; i++) { 30 | enqueueRangeQuery() 31 | } 32 | 33 | markerIndex = new MarkerIndex() 34 | profileOperations('sequential inserts', sequentialInsertOperations) 35 | 36 | markerIndex = new MarkerIndex() 37 | profileOperations('inserts', insertOperations) 38 | profileOperations('range queries', rangeQueryOperations) 39 | profileOperations('splices', spliceOperations) 40 | profileOperations('deletes', deleteOperations) 41 | } 42 | 43 | function profileOperations (name, operations) { 44 | console.time(name) 45 | for (let i = 0, n = operations.length; i < n; i++) { 46 | const operation = operations[i] 47 | markerIndex[operation[0]].apply(markerIndex, operation[1]) 48 | } 49 | console.timeEnd(name) 50 | } 51 | 52 | function enqueueSequentialInsert () { 53 | let id = (idCounter++).toString() 54 | let row, startColumn, endColumn 55 | if (random(10) < 3) { 56 | row = lastInsertionEnd.row + 1 + random(3) 57 | startColumn = random(100) 58 | endColumn = startColumn + random(20) 59 | } else { 60 | row = lastInsertionEnd.row 61 | startColumn = lastInsertionEnd.column + 1 + random(20) 62 | endColumn = startColumn + random(20) 63 | } 64 | lastInsertionEnd = {row, column: endColumn} 65 | sequentialInsertOperations.push(['insert', [id, {row, column: startColumn}, lastInsertionEnd]]) 66 | } 67 | 68 | function enqueueInsert () { 69 | let id = (idCounter++).toString() 70 | let range = getRange() 71 | let start = range[0] 72 | let end = range[1] 73 | let exclusive = Boolean(random(2)) 74 | markerIds.push(id) 75 | insertOperations.push(['insert', [id, start, end]]) 76 | insertOperations.push(['setExclusive', [id, exclusive]]) 77 | } 78 | 79 | function enqueueSplice () { 80 | spliceOperations.push(['splice', getSplice()]) 81 | } 82 | 83 | function enqueueRangeQuery() { 84 | rangeQueryOperations.push(['findIntersecting', getRange()]) 85 | } 86 | 87 | function enqueueDelete () { 88 | let id = markerIds.splice(random(markerIds.length), 1) 89 | deleteOperations.push(['delete', [id]]) 90 | } 91 | 92 | function getRange () { 93 | let start = {row: random(100), column: random(100)} 94 | let end = start 95 | while (random(3) > 0) { 96 | end = traverse(end, {row: random.intBetween(-10, 10), column: random.intBetween(-10, 10)}) 97 | } 98 | end.row = Math.max(end.row, 0) 99 | end.column = Math.max(end.column, 0) 100 | 101 | if (compare(start, end) <= 0) { 102 | return [start, end] 103 | } else { 104 | return [end, start] 105 | } 106 | } 107 | 108 | function getSplice () { 109 | let range = getRange() 110 | let start = range[0] 111 | let oldEnd = range[1] 112 | let oldExtent = traversalDistance(oldEnd, start) 113 | let newExtent = {row: 0, column: 0} 114 | while (random(2)) { 115 | newExtent = traverse(newExtent, {row: random(10), column: random(10)}) 116 | } 117 | return [start, oldExtent, newExtent] 118 | } 119 | 120 | runBenchmark() 121 | -------------------------------------------------------------------------------- /benchmark/native/marker-index-benchmark.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "catch.hpp" 6 | #include "point.h" 7 | #include "range.h" 8 | #include "marker-index.h" 9 | 10 | using namespace std::chrono; 11 | using std::vector; 12 | 13 | Range get_random_range() { 14 | Point start(rand() % 100, rand() % 100); 15 | Point end = start; 16 | if (rand() % 10 < 5) { 17 | end = end.traverse(Point(rand() % 10, rand() % 10)); 18 | } 19 | return Range{start, end}; 20 | } 21 | 22 | 23 | TEST_CASE("MarkerIndex::insert") { 24 | srand(0); 25 | MarkerIndex marker_index; 26 | vector ranges; 27 | uint count = 20000; 28 | 29 | for (uint i = 0; i < count; i++) { 30 | ranges.push_back(get_random_range()); 31 | } 32 | 33 | milliseconds start = duration_cast(system_clock::now().time_since_epoch()); 34 | for (uint i = 0; i < count; i++) { 35 | marker_index.insert(i, ranges[i].start, ranges[i].end); 36 | } 37 | milliseconds end = duration_cast(system_clock::now().time_since_epoch()); 38 | std::cout << "Inserting " << (end - start).count(); 39 | } 40 | -------------------------------------------------------------------------------- /benchmark/text-buffer.benchmark.js: -------------------------------------------------------------------------------- 1 | const assert = require('assert') 2 | const {TextBuffer} = require('..') 3 | 4 | const text = 'abc def ghi jkl\n'.repeat(1024 * 1024) 5 | const lines = text.split('\n') 6 | const buffer = new TextBuffer(text) 7 | const trialCount = 10 8 | 9 | function benchmarkSearch(description, pattern, expectedPosition) { 10 | let name = `Search for ${description} - TextBuffer` 11 | console.time(name) 12 | for (let i = 0; i < trialCount; i++) { 13 | assert.deepEqual(buffer.searchSync(pattern), expectedPosition) 14 | } 15 | console.timeEnd(name) 16 | 17 | name = `Search for ${description} - lines array` 18 | console.time(name) 19 | const regex = new RegExp(pattern) 20 | for (let i = 0; i < trialCount; i++) { 21 | for (let row = 0, rowCount = lines.length; row < rowCount; row++) { 22 | let match = regex.exec(lines[row]) 23 | if (match) { 24 | assert.deepEqual( 25 | { 26 | start: {row, column: match.index}, 27 | end: {row, column: match.index + match[0].length} 28 | }, 29 | expectedPosition 30 | ) 31 | break 32 | } 33 | } 34 | } 35 | console.timeEnd(name) 36 | console.log() 37 | } 38 | 39 | benchmarkSearch('simple non-existent pattern', '\t', null) 40 | benchmarkSearch('complex non-existent pattern', '123|456|789', null) 41 | benchmarkSearch('simple existing pattern', 'jkl', {start: {row: 0, column: 12}, end: {row: 0, column: 15}}) 42 | benchmarkSearch('complex existing pattern', 'j\\w+', {start: {row: 0, column: 12}, end: {row: 0, column: 15}}) -------------------------------------------------------------------------------- /binding.gyp: -------------------------------------------------------------------------------- 1 | { 2 | "targets": [ 3 | { 4 | "target_name": "superstring", 5 | "dependencies": [ 6 | "superstring_core" 7 | ], 8 | "sources": [ 9 | "src/bindings/bindings.cc", 10 | "src/bindings/marker-index-wrapper.cc", 11 | "src/bindings/patch-wrapper.cc", 12 | "src/bindings/point-wrapper.cc", 13 | "src/bindings/range-wrapper.cc", 14 | "src/bindings/text-buffer-wrapper.cc", 15 | "src/bindings/text-buffer-snapshot-wrapper.cc", 16 | "src/bindings/text-reader.cc", 17 | "src/bindings/string-conversion.cc", 18 | "src/bindings/text-writer.cc", 19 | ], 20 | "include_dirs": [ 21 | "src/core", 22 | "", 28 | "license": "MIT", 29 | "bugs": { 30 | "url": "https://github.com/atom/superstring/issues" 31 | }, 32 | "homepage": "https://github.com/atom/superstring", 33 | "dependencies": { 34 | "nan": "^2.14.2" 35 | }, 36 | "devDependencies": { 37 | "chai": "^2.0.0", 38 | "mocha": "^2.3.4", 39 | "random-seed": "^0.2.0", 40 | "standard": "^4.5.4", 41 | "temp": "^0.8.3", 42 | "unzip": "^0.1.11" 43 | }, 44 | "standard": { 45 | "global": [ 46 | "describe", 47 | "it", 48 | "expect" 49 | ] 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /script/build-browser-version.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | source emsdk-portable/emsdk_env.sh 4 | 5 | mkdir -p build 6 | 7 | emcc \ 8 | -o build/pcre.o \ 9 | -O3 \ 10 | -I vendor/pcre/10.23/src \ 11 | -I vendor/pcre/include \ 12 | -D HAVE_CONFIG_H \ 13 | -D PCRE2_CODE_UNIT_WIDTH=16 \ 14 | vendor/pcre/pcre2_chartables.c \ 15 | vendor/pcre/10.23/src/pcre2_auto_possess.c \ 16 | vendor/pcre/10.23/src/pcre2_compile.c \ 17 | vendor/pcre/10.23/src/pcre2_config.c \ 18 | vendor/pcre/10.23/src/pcre2_context.c \ 19 | vendor/pcre/10.23/src/pcre2_dfa_match.c \ 20 | vendor/pcre/10.23/src/pcre2_error.c \ 21 | vendor/pcre/10.23/src/pcre2_find_bracket.c \ 22 | vendor/pcre/10.23/src/pcre2_jit_compile.c \ 23 | vendor/pcre/10.23/src/pcre2_maketables.c \ 24 | vendor/pcre/10.23/src/pcre2_match.c \ 25 | vendor/pcre/10.23/src/pcre2_match_data.c \ 26 | vendor/pcre/10.23/src/pcre2_newline.c \ 27 | vendor/pcre/10.23/src/pcre2_ord2utf.c \ 28 | vendor/pcre/10.23/src/pcre2_pattern_info.c \ 29 | vendor/pcre/10.23/src/pcre2_serialize.c \ 30 | vendor/pcre/10.23/src/pcre2_string_utils.c \ 31 | vendor/pcre/10.23/src/pcre2_study.c \ 32 | vendor/pcre/10.23/src/pcre2_substitute.c \ 33 | vendor/pcre/10.23/src/pcre2_substring.c \ 34 | vendor/pcre/10.23/src/pcre2_tables.c \ 35 | vendor/pcre/10.23/src/pcre2_ucd.c \ 36 | vendor/pcre/10.23/src/pcre2_valid_utf.c \ 37 | vendor/pcre/10.23/src/pcre2_xclass.c 38 | 39 | em++ \ 40 | --bind \ 41 | -o browser.js \ 42 | -O3 \ 43 | -I src/bindings/em \ 44 | -I src/core \ 45 | -I vendor/libcxx \ 46 | -I vendor/pcre/include \ 47 | -D PCRE2_CODE_UNIT_WIDTH=16 \ 48 | -xc++ \ 49 | --pre-js src/bindings/em/prologue.js \ 50 | --post-js src/bindings/em/epilogue.js \ 51 | src/core/*.cc \ 52 | src/bindings/em/*.cc \ 53 | build/pcre.o \ 54 | -s TOTAL_MEMORY=134217728 \ 55 | --memory-init-file 0 \ 56 | "$@" 57 | -------------------------------------------------------------------------------- /script/install-emscripten.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | EMSCRIPTEN_DOWNLOAD_URL='https://s3.amazonaws.com/mozilla-games/emscripten/releases/emsdk-portable.tar.gz' 6 | EMSDK_PATH="./emsdk-portable/emsdk" 7 | 8 | if [ ! -f $EMSDK_PATH ]; then 9 | echo 'Downloading emscripten SDK installer...' 10 | curl $EMSCRIPTEN_DOWNLOAD_URL | tar xz 11 | fi 12 | 13 | echo 'Installing emscripten SDK...' 14 | 15 | # Workaround https://github.com/juj/emsdk/pull/74 16 | sed -i{} "s_/kripken/emscripten/'_/kripken/emscripten'_" $EMSDK_PATH 17 | sed -i{} "s_/WebAssembly/binaryen/'_/WebAssembly/binaryen'_" $EMSDK_PATH 18 | 19 | $EMSDK_PATH update 20 | $EMSDK_PATH list 21 | $EMSDK_PATH install sdk-1.37.9-64bit 22 | $EMSDK_PATH activate sdk-1.37.9-64bit 23 | -------------------------------------------------------------------------------- /script/test-native.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | const fs = require('fs') 4 | const path = require('path') 5 | const {spawnSync} = require('child_process') 6 | 7 | const testsPath = path.resolve(__dirname, '..', 'build', 'Debug', 'tests') 8 | const dotPath = path.resolve(__dirname, '..', 'build', 'debug.dot') 9 | const htmlPath = path.join(__dirname, '..', 'build', 'debug.html') 10 | 11 | if (fs.existsSync(testsPath)) { 12 | run('node-gyp', ['build']) 13 | } else { 14 | run('node-gyp', ['rebuild', '--debug', '--tests']) 15 | } 16 | 17 | const args = process.argv.slice(2) 18 | 19 | switch (args[0]) { 20 | case '-d': 21 | case '--debug': 22 | args.shift() 23 | run('lldb', [testsPath, '--', ...args]) 24 | break 25 | 26 | case '-v': 27 | case '--valgrind': 28 | args.shift() 29 | run('valgrind', ['--leak-check=full', testsPath, args[0]]) 30 | break 31 | 32 | case '-s': 33 | case '--svg': 34 | args.shift() 35 | 36 | let dotFile = fs.openSync(dotPath, 'w') 37 | const {status} = spawnSync(testsPath, args, {stdio: ['ignore', 1, dotFile]}) 38 | fs.closeSync(dotFile) 39 | 40 | dotFile = fs.openSync(dotPath, 'r') 41 | let htmlFile = fs.openSync(htmlPath, 'w') 42 | fs.writeSync(htmlFile, '\n\n') 43 | spawnSync('dot', ['-Tsvg'], {stdio: [dotFile, htmlFile, 2]}) 44 | spawnSync('open', [htmlPath]) 45 | 46 | process.exit(status) 47 | break 48 | 49 | default: 50 | run(testsPath, args) 51 | break 52 | } 53 | 54 | function run(command, args = [], options = {stdio: 'inherit'}) { 55 | const {status} = spawnSync(command, args, options) 56 | if (status !== 0) process.exit(status) 57 | } -------------------------------------------------------------------------------- /script/test-with-debug-graph.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo '' > build/debug.html 4 | echo '' >> build/debug.html 5 | node_modules/.bin/mocha test/js/*.js 2> >(dot -Tsvg >> build/debug.html) 6 | open build/debug.html 7 | -------------------------------------------------------------------------------- /src/bindings/bindings.cc: -------------------------------------------------------------------------------- 1 | #include "marker-index-wrapper.h" 2 | #include "nan.h" 3 | #include "patch-wrapper.h" 4 | #include "range-wrapper.h" 5 | #include "point-wrapper.h" 6 | #include "text-writer.h" 7 | #include "text-reader.h" 8 | #include "text-buffer-wrapper.h" 9 | #include "text-buffer-snapshot-wrapper.h" 10 | 11 | using namespace v8; 12 | 13 | void Init(Local exports) { 14 | PointWrapper::init(); 15 | RangeWrapper::init(); 16 | PatchWrapper::init(exports); 17 | MarkerIndexWrapper::init(exports); 18 | TextBufferWrapper::init(exports); 19 | TextWriter::init(exports); 20 | TextReader::init(exports); 21 | TextBufferSnapshotWrapper::init(); 22 | } 23 | 24 | NODE_MODULE(superstring, Init) 25 | -------------------------------------------------------------------------------- /src/bindings/em/epilogue.js: -------------------------------------------------------------------------------- 1 | return Module; 2 | })); 3 | -------------------------------------------------------------------------------- /src/bindings/em/marker-index.cc: -------------------------------------------------------------------------------- 1 | #include "auto-wrap.h" 2 | #include "marker-index.h" 3 | #include 4 | 5 | EMSCRIPTEN_BINDINGS(MarkerIndex) { 6 | emscripten::class_("MarkerIndex") 7 | .constructor<>() 8 | .constructor() 9 | .function("generateRandomNumber", WRAP(&MarkerIndex::generate_random_number)) 10 | .function("insert", WRAP(&MarkerIndex::insert)) 11 | .function("setExclusive", WRAP(&MarkerIndex::set_exclusive)) 12 | .function("remove", WRAP(&MarkerIndex::remove)) 13 | .function("splice", WRAP(&MarkerIndex::splice)) 14 | .function("has", WRAP(&MarkerIndex::has)) 15 | .function("getStart", WRAP(&MarkerIndex::get_start)) 16 | .function("getEnd", WRAP(&MarkerIndex::get_end)) 17 | .function("getRange", WRAP(&MarkerIndex::get_range)) 18 | .function("compare", WRAP(&MarkerIndex::compare)) 19 | .function("findIntersecting", WRAP(&MarkerIndex::find_intersecting)) 20 | .function("findContaining", WRAP(&MarkerIndex::find_containing)) 21 | .function("findContainedIn", WRAP(&MarkerIndex::find_contained_in)) 22 | .function("findStartingIn", WRAP(&MarkerIndex::find_starting_in)) 23 | .function("findStartingAt", WRAP(&MarkerIndex::find_starting_at)) 24 | .function("findEndingIn", WRAP(&MarkerIndex::find_ending_in)) 25 | .function("findEndingAt", WRAP(&MarkerIndex::find_ending_at)) 26 | .function("findBoundariesAfter", WRAP(&MarkerIndex::find_boundaries_after)) 27 | .function("dump", WRAP(&MarkerIndex::dump)); 28 | 29 | emscripten::value_object("SpliceResult") 30 | .field("touch", WRAP_FIELD(MarkerIndex::SpliceResult, touch)) 31 | .field("inside", WRAP_FIELD(MarkerIndex::SpliceResult, inside)) 32 | .field("overlap", WRAP_FIELD(MarkerIndex::SpliceResult, overlap)) 33 | .field("surround", WRAP_FIELD(MarkerIndex::SpliceResult, surround)); 34 | 35 | emscripten::value_object("BoundaryQueryResult") 36 | .field("containing_start", &MarkerIndex::BoundaryQueryResult::containing_start) 37 | .field("boundaries", &MarkerIndex::BoundaryQueryResult::boundaries); 38 | 39 | emscripten::value_object("Boundary") 40 | .field("position", &MarkerIndex::Boundary::position) 41 | .field("starting", &MarkerIndex::Boundary::starting) 42 | .field("ending", &MarkerIndex::Boundary::ending); 43 | } 44 | -------------------------------------------------------------------------------- /src/bindings/em/patch.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "auto-wrap.h" 4 | #include "patch.h" 5 | #include 6 | #include 7 | 8 | using std::runtime_error; 9 | using std::string; 10 | using std::vector; 11 | 12 | template <> 13 | inline Patch const *emscripten::val::as(void) const { 14 | using namespace emscripten; 15 | using namespace internal; 16 | 17 | EM_DESTRUCTORS destructors; 18 | EM_GENERIC_WIRE_TYPE result = _emval_as( 19 | handle, 20 | TypeID>::get(), 21 | &destructors 22 | ); 23 | DestructorsRunner destructors_runner(destructors); 24 | 25 | return fromGenericWireType(result); 26 | } 27 | 28 | Patch *constructor(emscripten::val value) { 29 | bool merge_adjacent_changes = false; 30 | if (value.as() && value["mergeAdjacentChanges"].as()) { 31 | merge_adjacent_changes = true; 32 | } 33 | return new Patch(merge_adjacent_changes); 34 | } 35 | 36 | vector serialize(Patch &patch) { 37 | vector output; 38 | Serializer serializer(output); 39 | patch.serialize(serializer); 40 | return output; 41 | } 42 | 43 | Patch *compose(vector const &patches) { 44 | auto result = new Patch(); 45 | bool left_to_right = true; 46 | for (const Patch *patch : patches) { 47 | if (!result->combine(*patch, left_to_right)) { 48 | delete result; 49 | return nullptr; 50 | } 51 | left_to_right = !left_to_right; 52 | } 53 | return result; 54 | } 55 | 56 | Patch *deserialize(const vector &bytes) { 57 | Deserializer deserializer(bytes); 58 | return new Patch(deserializer); 59 | } 60 | 61 | bool splice(Patch &patch, Point start, Point deleted_extent, Point inserted_extent) { 62 | return patch.splice( 63 | start, 64 | deleted_extent, 65 | inserted_extent 66 | ); 67 | } 68 | 69 | bool splice_with_text(Patch &patch, Point start, Point deleted_extent, Point inserted_extent, 70 | const string &deleted_text, const string &inserted_text) { 71 | return patch.splice( 72 | start, 73 | deleted_extent, 74 | inserted_extent, 75 | Text(deleted_text.begin(), deleted_text.end()), 76 | Text(inserted_text.begin(), inserted_text.end()) 77 | ); 78 | } 79 | 80 | template 81 | void change_set_noop(Patch::Change &change, T const &) {} 82 | 83 | EMSCRIPTEN_BINDINGS(Patch) { 84 | emscripten::class_("Patch") 85 | .constructor<>() 86 | .constructor(WRAP_STATIC(&constructor), emscripten::allow_raw_pointers()) 87 | .function("splice", splice) 88 | .function("splice", splice_with_text) 89 | .function("spliceOld", WRAP(&Patch::splice_old)) 90 | .function("copy", WRAP(&Patch::copy)) 91 | .function("invert", WRAP(&Patch::invert)) 92 | .function("getChanges", WRAP(&Patch::get_changes)) 93 | .function("getChangesInNewRange", WRAP(&Patch::grab_changes_in_new_range)) 94 | .function("getChangesInOldRange", WRAP(&Patch::grab_changes_in_old_range)) 95 | .function("getChangeCount", WRAP(&Patch::get_change_count)) 96 | .function("changeForOldPosition", WRAP(&Patch::grab_change_starting_before_old_position)) 97 | .function("changeForNewPosition", WRAP(&Patch::grab_change_starting_before_new_position)) 98 | .function("getBounds", WRAP(&Patch::get_bounds)) 99 | .function("rebalance", WRAP(&Patch::rebalance)) 100 | .function("serialize", WRAP(&serialize)) 101 | .class_function("compose", WRAP_STATIC(&compose), emscripten::allow_raw_pointers()) 102 | .class_function("deserialize", WRAP_STATIC(&deserialize), emscripten::allow_raw_pointers()); 103 | 104 | emscripten::value_object("Change") 105 | .field("oldStart", WRAP_FIELD(Patch::Change, old_start)) 106 | .field("oldEnd", WRAP_FIELD(Patch::Change, old_end)) 107 | .field("newStart", WRAP_FIELD(Patch::Change, new_start)) 108 | .field("newEnd", WRAP_FIELD(Patch::Change, new_end)) 109 | .field("oldText", WRAP_FIELD(Patch::Change, old_text)) 110 | .field("newText", WRAP_FIELD(Patch::Change, new_text)); 111 | } 112 | -------------------------------------------------------------------------------- /src/bindings/em/point.cc: -------------------------------------------------------------------------------- 1 | #include "point.h" 2 | #include 3 | #include 4 | #include 5 | 6 | double get_row(const Point &point) { 7 | return point.row; 8 | } 9 | 10 | void set_row(Point &point, double row) { 11 | if (row < 0) { 12 | point.row = 0; 13 | } else { 14 | point.row = std::min( 15 | row, 16 | static_cast(std::numeric_limits::max()) 17 | ); 18 | } 19 | } 20 | 21 | double get_column(const Point &point) { 22 | return point.column; 23 | } 24 | 25 | void set_column(Point &point, double column) { 26 | if (column < 0) { 27 | point.column = 0; 28 | } else { 29 | point.column = std::min( 30 | column, 31 | static_cast(std::numeric_limits::max()) 32 | ); 33 | } 34 | } 35 | 36 | EMSCRIPTEN_BINDINGS(Point) { 37 | emscripten::value_object("Point") 38 | .field("row", &get_row, &set_row) 39 | .field("column", &get_column, &set_column); 40 | } 41 | -------------------------------------------------------------------------------- /src/bindings/em/prologue.js: -------------------------------------------------------------------------------- 1 | (function (root, factory) { 2 | if (typeof define === 'function' && define.amd) { 3 | define([], factory); 4 | } else if (typeof exports === 'object') { 5 | module.exports = factory(); 6 | } else { 7 | window.Superstring = factory(); 8 | } 9 | }(this, function () { 10 | -------------------------------------------------------------------------------- /src/bindings/em/range.cc: -------------------------------------------------------------------------------- 1 | #include "auto-wrap.h" 2 | #include "range.h" 3 | #include 4 | 5 | EMSCRIPTEN_BINDINGS(Range) { 6 | emscripten::value_object("Range") 7 | .field("start", WRAP_FIELD(Range, start)) 8 | .field("end", WRAP_FIELD(Range, end)); 9 | } 10 | -------------------------------------------------------------------------------- /src/bindings/em/text-buffer.cc: -------------------------------------------------------------------------------- 1 | #include "auto-wrap.h" 2 | #include "text-buffer.h" 3 | #include "marker-index.h" 4 | #include 5 | 6 | using std::string; 7 | using std::u16string; 8 | 9 | static TextBuffer *construct(const std::wstring &text) { 10 | return new TextBuffer(u16string(text.begin(), text.end())); 11 | } 12 | 13 | static emscripten::val find_sync(TextBuffer &buffer, std::wstring js_pattern, bool ignore_case, bool unicode, Range range) { 14 | u16string pattern(js_pattern.begin(), js_pattern.end()); 15 | u16string error_message; 16 | Regex regex(pattern, &error_message, ignore_case, unicode); 17 | if (!error_message.empty()) { 18 | return emscripten::val(string(error_message.begin(), error_message.end())); 19 | } 20 | 21 | auto result = buffer.find(regex, range); 22 | if (result) { 23 | return emscripten::val(*result); 24 | } 25 | 26 | return emscripten::val::null(); 27 | } 28 | 29 | static emscripten::val find_all_sync(TextBuffer &buffer, std::wstring js_pattern, bool ignore_case, bool unicode, Range range) { 30 | u16string pattern(js_pattern.begin(), js_pattern.end()); 31 | u16string error_message; 32 | Regex regex(pattern, &error_message, ignore_case, unicode); 33 | if (!error_message.empty()) { 34 | return emscripten::val(string(error_message.begin(), error_message.end())); 35 | } 36 | 37 | return em_transmit(buffer.find_all(regex, range)); 38 | } 39 | 40 | static emscripten::val find_and_mark_all_sync(TextBuffer &buffer, MarkerIndex &index, unsigned next_id, 41 | bool exclusive, std::wstring js_pattern, bool ignore_case, bool unicode, 42 | Range range) { 43 | u16string pattern(js_pattern.begin(), js_pattern.end()); 44 | u16string error_message; 45 | Regex regex(pattern, &error_message, ignore_case, unicode); 46 | if (!error_message.empty()) { 47 | return emscripten::val(string(error_message.begin(), error_message.end())); 48 | } 49 | 50 | return emscripten::val(buffer.find_and_mark_all(index, next_id, exclusive, regex, range)); 51 | } 52 | 53 | static emscripten::val line_ending_for_row(TextBuffer &buffer, uint32_t row) { 54 | auto line_ending = buffer.line_ending_for_row(row); 55 | if (line_ending) { 56 | string result; 57 | for (const uint16_t *character = line_ending; *character != 0; character++) { 58 | result += (char)*character; 59 | } 60 | return emscripten::val(result); 61 | } 62 | return emscripten::val::undefined(); 63 | } 64 | 65 | static uint32_t character_index_for_position(TextBuffer &buffer, Point position) { 66 | return buffer.clip_position(position).offset; 67 | } 68 | 69 | static uint32_t get_line_count(TextBuffer &buffer) { 70 | return buffer.extent().row + 1; 71 | } 72 | 73 | static Point position_for_character_index(TextBuffer &buffer, long index) { 74 | return index < 0 ? 75 | Point{0, 0} : 76 | buffer.position_for_offset(static_cast(index)); 77 | } 78 | 79 | EMSCRIPTEN_BINDINGS(TextBuffer) { 80 | emscripten::class_("TextBuffer") 81 | .constructor<>() 82 | .constructor(construct, emscripten::allow_raw_pointers()) 83 | .function("getText", WRAP(&TextBuffer::text)) 84 | .function("setText", WRAP_OVERLOAD(&TextBuffer::set_text, void (TextBuffer::*)(u16string &&))) 85 | .function("getCharacterAtPosition", WRAP(&TextBuffer::character_at)) 86 | .function("getTextInRange", WRAP(&TextBuffer::text_in_range)) 87 | .function("setTextInRange", WRAP_OVERLOAD(&TextBuffer::set_text_in_range, void (TextBuffer::*)(Range, u16string &&))) 88 | .function("getLength", &TextBuffer::size) 89 | .function("getExtent", &TextBuffer::extent) 90 | .function("getLineCount", get_line_count) 91 | .function("hasAstral", &TextBuffer::has_astral) 92 | .function("reset", WRAP(&TextBuffer::reset)) 93 | .function("lineLengthForRow", WRAP(&TextBuffer::line_length_for_row)) 94 | .function("lineEndingForRow", line_ending_for_row) 95 | .function("lineForRow", WRAP(&TextBuffer::line_for_row)) 96 | .function("characterIndexForPosition", character_index_for_position) 97 | .function("positionForCharacterIndex", position_for_character_index) 98 | .function("isModified", WRAP_OVERLOAD(&TextBuffer::is_modified, bool (TextBuffer::*)() const)) 99 | .function("findSync", find_sync) 100 | .function("findAllSync", find_all_sync) 101 | .function("findAndMarkAllSync", find_and_mark_all_sync) 102 | .function("findWordsWithSubsequenceInRange", WRAP(&TextBuffer::find_words_with_subsequence_in_range)); 103 | 104 | emscripten::value_object("SubsequenceMatch") 105 | .field("word", WRAP_FIELD(TextBuffer::SubsequenceMatch, word)) 106 | .field("positions", WRAP_FIELD(TextBuffer::SubsequenceMatch, positions)) 107 | .field("matchIndices", WRAP_FIELD(TextBuffer::SubsequenceMatch, match_indices)) 108 | .field("score", WRAP_FIELD(TextBuffer::SubsequenceMatch, score)); 109 | } 110 | -------------------------------------------------------------------------------- /src/bindings/marker-index-wrapper.h: -------------------------------------------------------------------------------- 1 | #include "nan.h" 2 | #include "marker-index.h" 3 | #include "optional.h" 4 | #include "range.h" 5 | 6 | class MarkerIndexWrapper : public Nan::ObjectWrap { 7 | public: 8 | static void init(v8::Local exports); 9 | static MarkerIndex *from_js(v8::Local); 10 | 11 | private: 12 | static void construct(const Nan::FunctionCallbackInfo &info); 13 | static void generate_random_number(const Nan::FunctionCallbackInfo &info); 14 | static bool is_finite(v8::Local number); 15 | static v8::Local marker_ids_set_to_js(const MarkerIndex::MarkerIdSet &marker_ids); 16 | static v8::Local marker_ids_vector_to_js(const std::vector &marker_ids); 17 | static v8::Local snapshot_to_js(const std::unordered_map &snapshot); 18 | static optional marker_id_from_js(v8::Local value); 19 | static optional unsigned_from_js(v8::Local value); 20 | static optional bool_from_js(v8::Local value); 21 | static void insert(const Nan::FunctionCallbackInfo &info); 22 | static void set_exclusive(const Nan::FunctionCallbackInfo &info); 23 | static void remove(const Nan::FunctionCallbackInfo &info); 24 | static void has(const Nan::FunctionCallbackInfo &info); 25 | static void splice(const Nan::FunctionCallbackInfo &info); 26 | static void get_start(const Nan::FunctionCallbackInfo &info); 27 | static void get_end(const Nan::FunctionCallbackInfo &info); 28 | static void get_range(const Nan::FunctionCallbackInfo &info); 29 | static void compare(const Nan::FunctionCallbackInfo &info); 30 | static void find_intersecting(const Nan::FunctionCallbackInfo &info); 31 | static void find_containing(const Nan::FunctionCallbackInfo &info); 32 | static void find_contained_in(const Nan::FunctionCallbackInfo &info); 33 | static void find_starting_in(const Nan::FunctionCallbackInfo &info); 34 | static void find_starting_at(const Nan::FunctionCallbackInfo &info); 35 | static void find_ending_in(const Nan::FunctionCallbackInfo &info); 36 | static void find_ending_at(const Nan::FunctionCallbackInfo &info); 37 | static void find_boundaries_after(const Nan::FunctionCallbackInfo &info); 38 | static void dump(const Nan::FunctionCallbackInfo &info); 39 | MarkerIndexWrapper(unsigned seed); 40 | MarkerIndex marker_index; 41 | }; 42 | -------------------------------------------------------------------------------- /src/bindings/noop.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "nan.h" 4 | 5 | static void noop(const Nan::FunctionCallbackInfo&) {} 6 | -------------------------------------------------------------------------------- /src/bindings/number-conversion.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_NUMBER_CONVERSION_H 2 | #define SUPERSTRING_NUMBER_CONVERSION_H 3 | 4 | #include "nan.h" 5 | #include "optional.h" 6 | 7 | namespace number_conversion { 8 | template 9 | optional number_from_js(v8::Local js_value) { 10 | v8::Local js_number; 11 | if (Nan::To(js_value).ToLocal(&js_number)) { 12 | auto maybe_number = Nan::To(js_number); 13 | if (maybe_number.IsJust()) { 14 | return maybe_number.FromJust(); 15 | } 16 | } 17 | return optional{}; 18 | } 19 | } 20 | 21 | #endif // SUPERSTRING_NUMBER_CONVERSION_H 22 | -------------------------------------------------------------------------------- /src/bindings/patch-wrapper.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "patch.h" 3 | 4 | class PatchWrapper : public Nan::ObjectWrap { 5 | public: 6 | static void init(v8::Local exports); 7 | static v8::Local from_patch(Patch &&); 8 | 9 | private: 10 | PatchWrapper(Patch &&patch); 11 | static void construct(const Nan::FunctionCallbackInfo &info); 12 | static void splice(const Nan::FunctionCallbackInfo &info); 13 | static void splice_old(const Nan::FunctionCallbackInfo &info); 14 | static void copy(const Nan::FunctionCallbackInfo &info); 15 | static void invert(const Nan::FunctionCallbackInfo &info); 16 | static void get_changes(const Nan::FunctionCallbackInfo &info); 17 | static void get_changes_in_old_range(const Nan::FunctionCallbackInfo &info); 18 | static void get_changes_in_new_range(const Nan::FunctionCallbackInfo &info); 19 | static void change_for_old_position(const Nan::FunctionCallbackInfo &info); 20 | static void change_for_new_position(const Nan::FunctionCallbackInfo &info); 21 | static void serialize(const Nan::FunctionCallbackInfo &info); 22 | static void deserialize(const Nan::FunctionCallbackInfo &info); 23 | static void compose(const Nan::FunctionCallbackInfo &info); 24 | static void get_dot_graph(const Nan::FunctionCallbackInfo &info); 25 | static void get_json(const Nan::FunctionCallbackInfo &info); 26 | static void get_change_count(const Nan::FunctionCallbackInfo &info); 27 | static void get_bounds(const Nan::FunctionCallbackInfo &info); 28 | static void rebalance(const Nan::FunctionCallbackInfo &info); 29 | 30 | Patch patch; 31 | }; 32 | -------------------------------------------------------------------------------- /src/bindings/point-wrapper.cc: -------------------------------------------------------------------------------- 1 | #include "point-wrapper.h" 2 | #include 3 | #include "nan.h" 4 | 5 | using namespace v8; 6 | 7 | static Nan::Persistent row_string; 8 | static Nan::Persistent column_string; 9 | static Nan::Persistent constructor; 10 | 11 | static uint32_t number_from_js(Local js_number) { 12 | double number = Nan::To(js_number).FromMaybe(0); 13 | if (number > 0 && !std::isfinite(number)) { 14 | return UINT32_MAX; 15 | } else { 16 | return std::max(0.0, number); 17 | } 18 | } 19 | 20 | optional PointWrapper::point_from_js(Local value) { 21 | Nan::MaybeLocal maybe_object = Nan::To(value); 22 | Local object; 23 | if (!maybe_object.ToLocal(&object)) { 24 | Nan::ThrowTypeError("Expected an object with 'row' and 'column' properties."); 25 | return optional{}; 26 | } 27 | 28 | Nan::MaybeLocal maybe_row = Nan::To(Nan::Get(object, Nan::New(row_string)).ToLocalChecked()); 29 | Local js_row; 30 | if (!maybe_row.ToLocal(&js_row)) { 31 | Nan::ThrowTypeError("Expected an object with 'row' and 'column' properties."); 32 | return optional{}; 33 | } 34 | 35 | Nan::MaybeLocal maybe_column = Nan::To(Nan::Get(object, Nan::New(column_string)).ToLocalChecked()); 36 | Local js_column; 37 | if (!maybe_column.ToLocal(&js_column)) { 38 | Nan::ThrowTypeError("Expected an object with 'row' and 'column' properties."); 39 | return optional{}; 40 | } 41 | 42 | return Point(number_from_js(js_row), number_from_js(js_column)); 43 | } 44 | 45 | void PointWrapper::init() { 46 | row_string.Reset(Nan::Persistent(Nan::New("row").ToLocalChecked())); 47 | column_string.Reset(Nan::Persistent(Nan::New("column").ToLocalChecked())); 48 | 49 | Local constructor_template = Nan::New(construct); 50 | constructor_template->SetClassName(Nan::New("Point").ToLocalChecked()); 51 | constructor_template->InstanceTemplate()->SetInternalFieldCount(1); 52 | Nan::SetAccessor(constructor_template->InstanceTemplate(), Nan::New(row_string), get_row); 53 | Nan::SetAccessor(constructor_template->InstanceTemplate(), Nan::New(column_string), get_column); 54 | constructor.Reset(Nan::GetFunction(constructor_template).ToLocalChecked()); 55 | } 56 | 57 | Local PointWrapper::from_point(Point point) { 58 | Local result; 59 | if (Nan::New(constructor)->NewInstance(Nan::GetCurrentContext()).ToLocal(&result)) { 60 | (new PointWrapper(point))->Wrap(result); 61 | return result; 62 | } else { 63 | return Nan::Null(); 64 | } 65 | } 66 | 67 | PointWrapper::PointWrapper(Point point) : point(point) {} 68 | 69 | void PointWrapper::construct(const Nan::FunctionCallbackInfo &info) {} 70 | 71 | void PointWrapper::get_row(v8::Local property, const Nan::PropertyCallbackInfo &info) { 72 | PointWrapper *wrapper = Nan::ObjectWrap::Unwrap(info.This()); 73 | Point &point = wrapper->point; 74 | info.GetReturnValue().Set(Nan::New(point.row)); 75 | } 76 | 77 | void PointWrapper::get_column(v8::Local property, const Nan::PropertyCallbackInfo &info) { 78 | PointWrapper *wrapper = Nan::ObjectWrap::Unwrap(info.This()); 79 | Point &point = wrapper->point; 80 | info.GetReturnValue().Set(Nan::New(point.column)); 81 | } 82 | -------------------------------------------------------------------------------- /src/bindings/point-wrapper.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_POINT_WRAPPER_H 2 | #define SUPERSTRING_POINT_WRAPPER_H 3 | 4 | #include "nan.h" 5 | #include "optional.h" 6 | #include "point.h" 7 | 8 | class PointWrapper : public Nan::ObjectWrap { 9 | public: 10 | static void init(); 11 | static v8::Local from_point(Point point); 12 | static optional point_from_js(v8::Local); 13 | 14 | private: 15 | PointWrapper(Point point); 16 | 17 | static void construct(const Nan::FunctionCallbackInfo &info); 18 | 19 | static void get_row(v8::Local property, 20 | const Nan::PropertyCallbackInfo &info); 21 | 22 | static void get_column(v8::Local property, 23 | const Nan::PropertyCallbackInfo &info); 24 | 25 | Point point; 26 | }; 27 | 28 | #endif // SUPERSTRING_POINT_WRAPPER_H 29 | -------------------------------------------------------------------------------- /src/bindings/range-wrapper.cc: -------------------------------------------------------------------------------- 1 | #include "range-wrapper.h" 2 | #include "point-wrapper.h" 3 | #include "nan.h" 4 | 5 | using namespace v8; 6 | 7 | static Nan::Persistent start_string; 8 | static Nan::Persistent end_string; 9 | static Nan::Persistent constructor; 10 | 11 | optional RangeWrapper::range_from_js(Local value) { 12 | Local object; 13 | if (!Nan::To(value).ToLocal(&object)) { 14 | Nan::ThrowTypeError("Expected an object with 'start' and 'end' properties."); 15 | return optional{}; 16 | } 17 | 18 | auto start = PointWrapper::point_from_js(Nan::Get(object, Nan::New(start_string)).ToLocalChecked()); 19 | auto end = PointWrapper::point_from_js(Nan::Get(object, Nan::New(end_string)).ToLocalChecked()); 20 | if (start && end) { 21 | return Range{*start, *end}; 22 | } else { 23 | Nan::ThrowTypeError("Expected an object with 'start' and 'end' properties."); 24 | return optional{}; 25 | } 26 | } 27 | 28 | void RangeWrapper::init() { 29 | start_string.Reset(Nan::Persistent(Nan::New("start").ToLocalChecked())); 30 | end_string.Reset(Nan::Persistent(Nan::New("end").ToLocalChecked())); 31 | 32 | Local constructor_template = Nan::New(construct); 33 | constructor_template->SetClassName(Nan::New("Range").ToLocalChecked()); 34 | constructor_template->InstanceTemplate()->SetInternalFieldCount(1); 35 | Nan::SetAccessor(constructor_template->InstanceTemplate(), Nan::New(start_string), get_start); 36 | Nan::SetAccessor(constructor_template->InstanceTemplate(), Nan::New(end_string), get_end); 37 | constructor.Reset(Nan::GetFunction(constructor_template).ToLocalChecked()); 38 | } 39 | 40 | Local RangeWrapper::from_range(Range range) { 41 | Local result; 42 | if (Nan::New(constructor)->NewInstance(Nan::GetCurrentContext()).ToLocal(&result)) { 43 | (new RangeWrapper(range))->Wrap(result); 44 | return result; 45 | } else { 46 | return Nan::Null(); 47 | } 48 | } 49 | 50 | RangeWrapper::RangeWrapper(Range range) : range(range) {} 51 | 52 | void RangeWrapper::construct(const Nan::FunctionCallbackInfo &info) {} 53 | 54 | void RangeWrapper::get_start(v8::Local property, const Nan::PropertyCallbackInfo &info) { 55 | RangeWrapper *wrapper = Nan::ObjectWrap::Unwrap(info.This()); 56 | Range &range = wrapper->range; 57 | info.GetReturnValue().Set(PointWrapper::from_point(range.start)); 58 | } 59 | 60 | void RangeWrapper::get_end(v8::Local property, const Nan::PropertyCallbackInfo &info) { 61 | RangeWrapper *wrapper = Nan::ObjectWrap::Unwrap(info.This()); 62 | Range &range = wrapper->range; 63 | info.GetReturnValue().Set(PointWrapper::from_point(range.end)); 64 | } 65 | -------------------------------------------------------------------------------- /src/bindings/range-wrapper.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_RANGE_WRAPPER_H 2 | #define SUPERSTRING_RANGE_WRAPPER_H 3 | 4 | #include "nan.h" 5 | #include "optional.h" 6 | #include "point.h" 7 | #include "range.h" 8 | 9 | class RangeWrapper : public Nan::ObjectWrap { 10 | public: 11 | static void init(); 12 | static v8::Local from_range(Range); 13 | static optional range_from_js(v8::Local); 14 | 15 | private: 16 | RangeWrapper(Range); 17 | 18 | static void construct(const Nan::FunctionCallbackInfo &); 19 | static void get_start(v8::Local, const Nan::PropertyCallbackInfo &); 20 | static void get_end(v8::Local, const Nan::PropertyCallbackInfo &); 21 | 22 | Range range; 23 | }; 24 | 25 | #endif // SUPERSTRING_RANGE_WRAPPER_H 26 | -------------------------------------------------------------------------------- /src/bindings/string-conversion.cc: -------------------------------------------------------------------------------- 1 | #include "string-conversion.h" 2 | #include "text.h" 3 | 4 | using namespace v8; 5 | using std::u16string; 6 | 7 | optional string_conversion::string_from_js(Local value) { 8 | Local string; 9 | if (!Nan::To(value).ToLocal(&string)) { 10 | Nan::ThrowTypeError("Expected a string."); 11 | return optional{}; 12 | } 13 | 14 | u16string result; 15 | result.resize(string->Length()); 16 | string->Write( 17 | 18 | // Nan doesn't wrap this functionality 19 | #if NODE_MAJOR_VERSION >= 12 20 | Isolate::GetCurrent(), 21 | #endif 22 | 23 | reinterpret_cast(&result[0]), 24 | 0, 25 | -1, 26 | String::WriteOptions::NO_NULL_TERMINATION 27 | ); 28 | return result; 29 | } 30 | 31 | Local string_conversion::string_to_js(const u16string &text, const char *failure_message) { 32 | Local result; 33 | if (Nan::New( 34 | reinterpret_cast(text.data()), 35 | text.size() 36 | ).ToLocal(&result)) { 37 | return result; 38 | } else { 39 | if (!failure_message) failure_message = "Couldn't convert text to a String"; 40 | Nan::ThrowError(failure_message); 41 | return Nan::New("").ToLocalChecked(); 42 | } 43 | } 44 | 45 | Local string_conversion::char_to_js(const uint16_t c, const char *failure_message) { 46 | Local result; 47 | if (Nan::New(&c, 1).ToLocal(&result)) { 48 | return result; 49 | } else { 50 | if (!failure_message) failure_message = "Couldn't convert character to a String"; 51 | Nan::ThrowError(failure_message); 52 | return Nan::New("").ToLocalChecked(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/bindings/string-conversion.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_STRING_CONVERSION_H 2 | #define SUPERSTRING_STRING_CONVERSION_H 3 | 4 | #include 5 | #include "nan.h" 6 | #include "optional.h" 7 | #include "text.h" 8 | 9 | namespace string_conversion { 10 | v8::Local string_to_js( 11 | const std::u16string &, 12 | const char *failure_message = nullptr 13 | ); 14 | v8::Local char_to_js( 15 | const std::uint16_t, 16 | const char *failure_message = nullptr 17 | ); 18 | optional string_from_js(v8::Local); 19 | }; 20 | 21 | #endif // SUPERSTRING_STRING_CONVERSION_H 22 | -------------------------------------------------------------------------------- /src/bindings/text-buffer-snapshot-wrapper.cc: -------------------------------------------------------------------------------- 1 | #include "text-buffer.h" 2 | #include "text-buffer-wrapper.h" 3 | #include "text-buffer-snapshot-wrapper.h" 4 | 5 | using namespace v8; 6 | 7 | static Nan::Persistent snapshot_wrapper_constructor; 8 | 9 | void TextBufferSnapshotWrapper::init() { 10 | auto class_name = Nan::New("Snapshot").ToLocalChecked(); 11 | 12 | auto constructor_template = Nan::New(construct); 13 | constructor_template->SetClassName(class_name); 14 | constructor_template->InstanceTemplate()->SetInternalFieldCount(1); 15 | 16 | const auto &prototype_template = constructor_template->PrototypeTemplate(); 17 | Nan::SetTemplate(prototype_template, Nan::New("destroy").ToLocalChecked(), Nan::New(destroy), None); 18 | 19 | snapshot_wrapper_constructor.Reset(Nan::GetFunction(constructor_template).ToLocalChecked()); 20 | } 21 | 22 | TextBufferSnapshotWrapper::TextBufferSnapshotWrapper(Local js_buffer, void *snapshot) : 23 | snapshot{snapshot} { 24 | slices_ = reinterpret_cast(snapshot)->primitive_chunks(); 25 | js_text_buffer.Reset(Isolate::GetCurrent(), js_buffer); 26 | } 27 | 28 | TextBufferSnapshotWrapper::~TextBufferSnapshotWrapper() { 29 | if (snapshot) { 30 | delete reinterpret_cast(snapshot); 31 | } 32 | } 33 | 34 | Local TextBufferSnapshotWrapper::new_instance(Local js_buffer, void *snapshot) { 35 | Local result; 36 | if (Nan::NewInstance(Nan::New(snapshot_wrapper_constructor)).ToLocal(&result)) { 37 | (new TextBufferSnapshotWrapper(js_buffer, snapshot))->Wrap(result); 38 | return result; 39 | } else { 40 | return Nan::Null(); 41 | } 42 | } 43 | 44 | void TextBufferSnapshotWrapper::construct(const Nan::FunctionCallbackInfo &info) { 45 | info.GetReturnValue().Set(Nan::Null()); 46 | } 47 | 48 | void TextBufferSnapshotWrapper::destroy(const Nan::FunctionCallbackInfo &info) { 49 | auto reader = Nan::ObjectWrap::Unwrap(Nan::To(info.This()).ToLocalChecked()); 50 | if (reader->snapshot) { 51 | delete reinterpret_cast(reader->snapshot); 52 | reader->snapshot = nullptr; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/bindings/text-buffer-snapshot-wrapper.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_TEXT_BUFFER_SNAPSHOT_WRAPPER_H 2 | #define SUPERSTRING_TEXT_BUFFER_SNAPSHOT_WRAPPER_H 3 | 4 | #include "nan.h" 5 | #include 6 | 7 | // This header can be included by other native node modules, allowing them 8 | // to access the content of a TextBuffer::Snapshot without having to call 9 | // any superstring APIs. 10 | 11 | class TextBufferSnapshotWrapper : public Nan::ObjectWrap { 12 | public: 13 | static void init(); 14 | 15 | static v8::Local new_instance(v8::Local, void *); 16 | 17 | inline const std::vector> *slices() { 18 | return &slices_; 19 | } 20 | 21 | private: 22 | TextBufferSnapshotWrapper(v8::Local js_buffer, void *snapshot); 23 | ~TextBufferSnapshotWrapper(); 24 | 25 | static void construct(const Nan::FunctionCallbackInfo &info); 26 | static void destroy(const Nan::FunctionCallbackInfo &info); 27 | 28 | v8::Persistent js_text_buffer; 29 | void *snapshot; 30 | std::vector> slices_; 31 | }; 32 | 33 | #endif // SUPERSTRING_TEXT_BUFFER_SNAPSHOT_WRAPPER_H 34 | -------------------------------------------------------------------------------- /src/bindings/text-buffer-wrapper.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_TEXT_BUFFER_WRAPPER_H 2 | #define SUPERSTRING_TEXT_BUFFER_WRAPPER_H 3 | 4 | #include "nan.h" 5 | #include "text-buffer.h" 6 | #include 7 | 8 | class CancellableWorker { 9 | public: 10 | virtual void CancelIfQueued() = 0; 11 | }; 12 | 13 | class TextBufferWrapper : public Nan::ObjectWrap { 14 | public: 15 | static void init(v8::Local exports); 16 | TextBuffer text_buffer; 17 | std::unordered_set outstanding_workers; 18 | 19 | private: 20 | static void construct(const Nan::FunctionCallbackInfo &info); 21 | static void get_length(const Nan::FunctionCallbackInfo &info); 22 | static void get_extent(const Nan::FunctionCallbackInfo &info); 23 | static void get_line_count(const Nan::FunctionCallbackInfo &info); 24 | static void has_astral(const Nan::FunctionCallbackInfo &info); 25 | static void get_text(const Nan::FunctionCallbackInfo &info); 26 | static void get_character_at_position(const Nan::FunctionCallbackInfo &info); 27 | static void get_text_in_range(const Nan::FunctionCallbackInfo &info); 28 | static void set_text(const Nan::FunctionCallbackInfo &info); 29 | static void set_text_in_range(const Nan::FunctionCallbackInfo &info); 30 | static void line_for_row(const Nan::FunctionCallbackInfo &info); 31 | static void line_length_for_row(const Nan::FunctionCallbackInfo &info); 32 | static void line_ending_for_row(const Nan::FunctionCallbackInfo &info); 33 | static void get_lines(const Nan::FunctionCallbackInfo &info); 34 | static void character_index_for_position(const Nan::FunctionCallbackInfo &info); 35 | static void position_for_character_index(const Nan::FunctionCallbackInfo &info); 36 | static void find(const Nan::FunctionCallbackInfo &info); 37 | static void find_sync(const Nan::FunctionCallbackInfo &info); 38 | static void find_all(const Nan::FunctionCallbackInfo &info); 39 | static void find_all_sync(const Nan::FunctionCallbackInfo &info); 40 | static void find_and_mark_all_sync(const Nan::FunctionCallbackInfo &info); 41 | static void find_words_with_subsequence_in_range(const Nan::FunctionCallbackInfo &info); 42 | static void is_modified(const Nan::FunctionCallbackInfo &info); 43 | static void load(const Nan::FunctionCallbackInfo &info); 44 | static void base_text_matches_file(const Nan::FunctionCallbackInfo &info); 45 | static void save(const Nan::FunctionCallbackInfo &info); 46 | static void load_sync(const Nan::FunctionCallbackInfo &info); 47 | static void save_sync(const Nan::FunctionCallbackInfo &info); 48 | static void serialize_changes(const Nan::FunctionCallbackInfo &info); 49 | static void deserialize_changes(const Nan::FunctionCallbackInfo &info); 50 | static void reset(const Nan::FunctionCallbackInfo &info); 51 | static void base_text_digest(const Nan::FunctionCallbackInfo &info); 52 | static void get_snapshot(const Nan::FunctionCallbackInfo &info); 53 | static void dot_graph(const Nan::FunctionCallbackInfo &info); 54 | 55 | void cancel_queued_workers(); 56 | }; 57 | 58 | #endif // SUPERSTRING_TEXT_BUFFER_WRAPPER_H 59 | -------------------------------------------------------------------------------- /src/bindings/text-reader.cc: -------------------------------------------------------------------------------- 1 | #include "text-slice.h" 2 | #include "text-reader.h" 3 | #include "encoding-conversion.h" 4 | #include "text-buffer-wrapper.h" 5 | 6 | using std::move; 7 | using std::string; 8 | using namespace v8; 9 | 10 | void TextReader::init(Local exports) { 11 | Local constructor_template = Nan::New(construct); 12 | constructor_template->SetClassName(Nan::New("TextReader").ToLocalChecked()); 13 | constructor_template->InstanceTemplate()->SetInternalFieldCount(1); 14 | const auto &prototype_template = constructor_template->PrototypeTemplate(); 15 | Nan::SetTemplate(prototype_template, Nan::New("read").ToLocalChecked(), Nan::New(read), None); 16 | Nan::SetTemplate(prototype_template, Nan::New("end").ToLocalChecked(), Nan::New(end), None); 17 | Nan::SetTemplate(prototype_template, Nan::New("destroy").ToLocalChecked(), Nan::New(destroy), None); 18 | Nan::Set(exports, Nan::New("TextReader").ToLocalChecked(), Nan::GetFunction(constructor_template).ToLocalChecked()); 19 | } 20 | 21 | TextReader::TextReader(Local js_buffer, 22 | TextBuffer::Snapshot *snapshot, 23 | EncodingConversion &&conversion) : 24 | snapshot{snapshot}, 25 | slices{snapshot->chunks()}, 26 | slice_index{0}, 27 | text_offset{slices[0].start_offset()}, 28 | conversion{move(conversion)} { 29 | js_text_buffer.Reset(Isolate::GetCurrent(), js_buffer); 30 | } 31 | 32 | TextReader::~TextReader() { 33 | if (snapshot) delete snapshot; 34 | } 35 | 36 | void TextReader::construct(const Nan::FunctionCallbackInfo &info) { 37 | Local js_text_buffer; 38 | if (!Nan::To(info[0]).ToLocal(&js_text_buffer)) return; 39 | auto &text_buffer = Nan::ObjectWrap::Unwrap(js_text_buffer)->text_buffer; 40 | auto snapshot = text_buffer.create_snapshot(); 41 | 42 | Local js_encoding_name; 43 | if (!Nan::To(info[1]).ToLocal(&js_encoding_name)) return; 44 | Nan::Utf8String encoding_name(js_encoding_name); 45 | auto conversion = transcoding_to(*encoding_name); 46 | if (!conversion) { 47 | Nan::ThrowError((string("Invalid encoding name: ") + *encoding_name).c_str()); 48 | return; 49 | } 50 | 51 | TextReader *reader = new TextReader(js_text_buffer, snapshot, move(*conversion)); 52 | reader->Wrap(info.This()); 53 | } 54 | 55 | void TextReader::read(const Nan::FunctionCallbackInfo &info) { 56 | TextReader *reader = Nan::ObjectWrap::Unwrap(Nan::To(info.This()).ToLocalChecked()); 57 | 58 | if (!info[0]->IsUint8Array()) { 59 | Nan::ThrowError("Expected a buffer"); 60 | return; 61 | } 62 | 63 | char *buffer = node::Buffer::Data(info[0]); 64 | size_t buffer_length = node::Buffer::Length(info[0]); 65 | size_t total_bytes_written = 0; 66 | 67 | for (;;) { 68 | if (reader->slice_index == reader->slices.size()) break; 69 | TextSlice &slice = reader->slices[reader->slice_index]; 70 | size_t end_offset = slice.end_offset(); 71 | size_t bytes_written = reader->conversion.encode( 72 | slice.text->content, 73 | &reader->text_offset, 74 | end_offset, 75 | buffer + total_bytes_written, 76 | buffer_length - total_bytes_written 77 | ); 78 | if (bytes_written == 0) break; 79 | total_bytes_written += bytes_written; 80 | if (reader->text_offset == end_offset) { 81 | reader->slice_index++; 82 | if (reader->slice_index == reader->slices.size()) break; 83 | reader->text_offset = reader->slices[reader->slice_index].start_offset(); 84 | } 85 | } 86 | 87 | info.GetReturnValue().Set(Nan::New(total_bytes_written)); 88 | } 89 | 90 | void TextReader::end(const Nan::FunctionCallbackInfo &info) { 91 | TextReader *reader = Nan::ObjectWrap::Unwrap(Nan::To(info.This()).ToLocalChecked()); 92 | if (reader->snapshot) { 93 | reader->snapshot->flush_preceding_changes(); 94 | delete reader->snapshot; 95 | reader->snapshot = nullptr; 96 | } 97 | } 98 | 99 | void TextReader::destroy(const Nan::FunctionCallbackInfo &info) { 100 | TextReader *reader = Nan::ObjectWrap::Unwrap(Nan::To(info.This()).ToLocalChecked()); 101 | if (reader->snapshot) { 102 | delete reader->snapshot; 103 | reader->snapshot = nullptr; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/bindings/text-reader.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_TEXT_READER_H 2 | #define SUPERSTRING_TEXT_READER_H 3 | 4 | #include "nan.h" 5 | #include "text.h" 6 | #include "text-buffer.h" 7 | #include "encoding-conversion.h" 8 | 9 | class TextReader : public Nan::ObjectWrap { 10 | public: 11 | static void init(v8::Local exports); 12 | 13 | private: 14 | TextReader(v8::Local js_buffer, TextBuffer::Snapshot *snapshot, 15 | EncodingConversion &&conversion); 16 | ~TextReader(); 17 | 18 | static void construct(const Nan::FunctionCallbackInfo &info); 19 | static void read(const Nan::FunctionCallbackInfo &info); 20 | static void end(const Nan::FunctionCallbackInfo &info); 21 | static void destroy(const Nan::FunctionCallbackInfo &info); 22 | 23 | v8::Persistent js_text_buffer; 24 | TextBuffer::Snapshot *snapshot; 25 | std::vector slices; 26 | size_t slice_index; 27 | size_t text_offset; 28 | EncodingConversion conversion; 29 | }; 30 | 31 | #endif // SUPERSTRING_TEXT_READER_H 32 | -------------------------------------------------------------------------------- /src/bindings/text-writer.cc: -------------------------------------------------------------------------------- 1 | #include "text-writer.h" 2 | 3 | using std::string; 4 | using std::move; 5 | using std::u16string; 6 | using namespace v8; 7 | 8 | void TextWriter::init(Local exports) { 9 | Local constructor_template = Nan::New(construct); 10 | constructor_template->SetClassName(Nan::New("TextWriter").ToLocalChecked()); 11 | constructor_template->InstanceTemplate()->SetInternalFieldCount(1); 12 | const auto &prototype_template = constructor_template->PrototypeTemplate(); 13 | Nan::SetTemplate(prototype_template, Nan::New("write").ToLocalChecked(), Nan::New(write), None); 14 | Nan::SetTemplate(prototype_template, Nan::New("end").ToLocalChecked(), Nan::New(end), None); 15 | Nan::Set(exports, Nan::New("TextWriter").ToLocalChecked(), Nan::GetFunction(constructor_template).ToLocalChecked()); 16 | } 17 | 18 | TextWriter::TextWriter(EncodingConversion &&conversion) : conversion{move(conversion)} {} 19 | 20 | void TextWriter::construct(const Nan::FunctionCallbackInfo &info) { 21 | Local js_encoding_name; 22 | if (!Nan::To(info[0]).ToLocal(&js_encoding_name)) return; 23 | Nan::Utf8String encoding_name(js_encoding_name); 24 | auto conversion = transcoding_from(*encoding_name); 25 | if (!conversion) { 26 | Nan::ThrowError((string("Invalid encoding name: ") + *encoding_name).c_str()); 27 | return; 28 | } 29 | 30 | TextWriter *wrapper = new TextWriter(move(*conversion)); 31 | wrapper->Wrap(info.This()); 32 | } 33 | 34 | void TextWriter::write(const Nan::FunctionCallbackInfo &info) { 35 | auto writer = Nan::ObjectWrap::Unwrap(info.This()); 36 | 37 | Local js_chunk; 38 | if (Nan::To(info[0]).ToLocal(&js_chunk)) { 39 | size_t size = writer->content.size(); 40 | writer->content.resize(size + js_chunk->Length()); 41 | js_chunk->Write( 42 | 43 | // Nan doesn't wrap this functionality 44 | #if NODE_MAJOR_VERSION >= 12 45 | Isolate::GetCurrent(), 46 | #endif 47 | 48 | reinterpret_cast(&writer->content[0]) + size, 49 | 0, 50 | -1, 51 | String::WriteOptions::NO_NULL_TERMINATION 52 | ); 53 | } else if (info[0]->IsUint8Array()) { 54 | auto *data = node::Buffer::Data(info[0]); 55 | size_t length = node::Buffer::Length(info[0]); 56 | if (!writer->leftover_bytes.empty()) { 57 | writer->leftover_bytes.insert( 58 | writer->leftover_bytes.end(), 59 | data, 60 | data + length 61 | ); 62 | data = writer->leftover_bytes.data(); 63 | length = writer->leftover_bytes.size(); 64 | } 65 | size_t bytes_written = writer->conversion.decode( 66 | writer->content, 67 | data, 68 | length 69 | ); 70 | if (bytes_written < length) { 71 | writer->leftover_bytes.assign(data + bytes_written, data + length); 72 | } else { 73 | writer->leftover_bytes.clear(); 74 | } 75 | } 76 | } 77 | 78 | void TextWriter::end(const Nan::FunctionCallbackInfo &info) { 79 | auto writer = Nan::ObjectWrap::Unwrap(info.This()); 80 | if (!writer->leftover_bytes.empty()) { 81 | writer->conversion.decode( 82 | writer->content, 83 | writer->leftover_bytes.data(), 84 | writer->leftover_bytes.size(), 85 | true 86 | ); 87 | } 88 | } 89 | 90 | u16string TextWriter::get_text() { 91 | return move(content); 92 | } 93 | -------------------------------------------------------------------------------- /src/bindings/text-writer.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_TEXT_WRITER_H 2 | #define SUPERSTRING_TEXT_WRITER_H 3 | 4 | #include "nan.h" 5 | #include "text.h" 6 | #include "encoding-conversion.h" 7 | 8 | class TextWriter : public Nan::ObjectWrap { 9 | public: 10 | static void init(v8::Local exports); 11 | TextWriter(EncodingConversion &&conversion); 12 | std::u16string get_text(); 13 | 14 | private: 15 | static void construct(const Nan::FunctionCallbackInfo &info); 16 | static void write(const Nan::FunctionCallbackInfo &info); 17 | static void end(const Nan::FunctionCallbackInfo &info); 18 | 19 | EncodingConversion conversion; 20 | std::vector leftover_bytes; 21 | std::u16string content; 22 | }; 23 | 24 | #endif // SUPERSTRING_TEXT_WRITER_H 25 | -------------------------------------------------------------------------------- /src/core/encoding-conversion.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_ENCODING_CONVERSION_H_ 2 | #define SUPERSTRING_ENCODING_CONVERSION_H_ 3 | 4 | #include "optional.h" 5 | #include "text.h" 6 | #include 7 | 8 | class EncodingConversion { 9 | void *data; 10 | int mode; 11 | 12 | EncodingConversion(int, void *); 13 | int convert(const char **, const char *, char **, char *) const; 14 | 15 | public: 16 | EncodingConversion(EncodingConversion &&); 17 | EncodingConversion(); 18 | ~EncodingConversion(); 19 | 20 | bool encode(const std::u16string &, size_t start_offset, size_t end_offset, 21 | FILE *stream, std::vector &buffer); 22 | size_t encode(const std::u16string &, size_t *start_offset, size_t end_offset, 23 | char *buffer, size_t buffer_size, bool is_last = false); 24 | bool decode(std::u16string &, FILE *stream, std::vector &buffer, 25 | std::function progress_callback); 26 | size_t decode(std::u16string &, const char *buffer, size_t buffer_size, 27 | bool is_last = false); 28 | 29 | friend optional transcoding_to(const char *); 30 | friend optional transcoding_from(const char *); 31 | }; 32 | 33 | optional transcoding_to(const char *); 34 | optional transcoding_from(const char *); 35 | 36 | #endif // SUPERSTRING_ENCODING_CONVERSION_H_ 37 | -------------------------------------------------------------------------------- /src/core/flat_set.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_FLAT_SET_H 2 | #define SUPERSTRING_FLAT_SET_H 3 | 4 | #include 5 | #include 6 | 7 | template class flat_set { 8 | typedef std::vector contents_type; 9 | contents_type contents; 10 | 11 | public: 12 | typedef typename contents_type::iterator iterator; 13 | typedef typename contents_type::const_iterator const_iterator; 14 | 15 | void insert(T value) { 16 | auto iter = std::lower_bound(contents.begin(), contents.end(), value); 17 | if (iter == contents.end() || *iter != value) { 18 | contents.insert(iter, value); 19 | } 20 | } 21 | 22 | void insert(const_iterator start, const_iterator end) { 23 | for (auto i = start; i != end; i++) { 24 | insert(*i); 25 | } 26 | } 27 | 28 | iterator erase(const iterator &iter) { 29 | return contents.erase(iter); 30 | } 31 | 32 | void erase(T value) { 33 | auto end = this->end(); 34 | auto iter = std::lower_bound(begin(), end, value); 35 | if (iter != end && *iter == value) { 36 | erase(iter); 37 | } 38 | } 39 | 40 | iterator begin() { 41 | return contents.begin(); 42 | } 43 | 44 | const_iterator begin() const { 45 | return contents.begin(); 46 | } 47 | 48 | iterator end() { 49 | return contents.end(); 50 | } 51 | 52 | const_iterator end() const { 53 | return contents.end(); 54 | } 55 | 56 | size_t count(T value) const { 57 | return std::binary_search(contents.begin(), contents.end(), value) ? 1 : 0; 58 | } 59 | 60 | size_t size() const { 61 | return contents.size(); 62 | } 63 | }; 64 | 65 | #endif // SUPERSTRING_FLAT_SET_H 66 | -------------------------------------------------------------------------------- /src/core/libmba-diff.h: -------------------------------------------------------------------------------- 1 | #ifndef MBA_DIFF_H_ 2 | #define MBA_DIFF_H_ 3 | 4 | #include 5 | #include 6 | 7 | typedef enum { 8 | DIFF_MATCH = 1, 9 | DIFF_DELETE, 10 | DIFF_INSERT 11 | } diff_op; 12 | 13 | struct diff_edit { 14 | diff_op op; 15 | uint32_t off; /* off into s1 if MATCH or DELETE but s2 if INSERT */ 16 | uint32_t len; 17 | }; 18 | 19 | int diff( 20 | const char16_t *old_text, uint32_t old_length, 21 | const char16_t *new_text, uint32_t new_length, 22 | int dmax, std::vector *ses 23 | ); 24 | 25 | #endif // MBA_DIFF_H_ 26 | -------------------------------------------------------------------------------- /src/core/marker-index.h: -------------------------------------------------------------------------------- 1 | #ifndef MARKER_INDEX_H_ 2 | #define MARKER_INDEX_H_ 3 | 4 | #include 5 | #include 6 | #include "flat_set.h" 7 | #include "point.h" 8 | #include "range.h" 9 | 10 | class MarkerIndex { 11 | public: 12 | using MarkerId = unsigned; 13 | using MarkerIdSet = flat_set; 14 | 15 | struct SpliceResult { 16 | flat_set touch; 17 | flat_set inside; 18 | flat_set overlap; 19 | flat_set surround; 20 | }; 21 | 22 | struct Boundary { 23 | Point position; 24 | flat_set starting; 25 | flat_set ending; 26 | }; 27 | 28 | struct BoundaryQueryResult { 29 | std::vector containing_start; 30 | std::vector boundaries; 31 | }; 32 | 33 | MarkerIndex(unsigned seed = 0u); 34 | ~MarkerIndex(); 35 | int generate_random_number(); 36 | void insert(MarkerId id, Point start, Point end); 37 | void set_exclusive(MarkerId id, bool exclusive); 38 | void remove(MarkerId id); 39 | bool has(MarkerId id); 40 | SpliceResult splice(Point start, Point old_extent, Point new_extent); 41 | Point get_start(MarkerId id) const; 42 | Point get_end(MarkerId id) const; 43 | Range get_range(MarkerId id) const; 44 | 45 | int compare(MarkerId id1, MarkerId id2) const; 46 | flat_set find_intersecting(Point start, Point end); 47 | flat_set find_containing(Point start, Point end); 48 | flat_set find_contained_in(Point start, Point end); 49 | flat_set find_starting_in(Point start, Point end); 50 | flat_set find_starting_at(Point position); 51 | flat_set find_ending_in(Point start, Point end); 52 | flat_set find_ending_at(Point position); 53 | BoundaryQueryResult find_boundaries_after(Point start, size_t max_count); 54 | 55 | std::unordered_map dump(); 56 | 57 | private: 58 | friend class Iterator; 59 | 60 | struct Node { 61 | Node *parent; 62 | Node *left; 63 | Node *right; 64 | Point left_extent; 65 | flat_set left_marker_ids; 66 | flat_set right_marker_ids; 67 | flat_set start_marker_ids; 68 | flat_set end_marker_ids; 69 | int priority; 70 | 71 | Node(Node *parent, Point left_extent); 72 | bool is_marker_endpoint(); 73 | }; 74 | 75 | class Iterator { 76 | public: 77 | Iterator(MarkerIndex *marker_index); 78 | void reset(); 79 | Node* insert_marker_start(const MarkerId &id, const Point &start_position, const Point &end_position); 80 | Node* insert_marker_end(const MarkerId &id, const Point &start_position, const Point &end_position); 81 | Node* insert_splice_boundary(const Point &position, bool is_insertion_end); 82 | void find_intersecting(const Point &start, const Point &end, flat_set *result); 83 | void find_contained_in(const Point &start, const Point &end, flat_set *result); 84 | void find_starting_in(const Point &start, const Point &end, flat_set *result); 85 | void find_ending_in(const Point &start, const Point &end, flat_set *result); 86 | void find_boundaries_after(Point start, size_t max_count, BoundaryQueryResult *result); 87 | std::unordered_map dump(); 88 | 89 | private: 90 | void ascend(); 91 | void descend_left(); 92 | void descend_right(); 93 | void move_to_successor(); 94 | void seek_to_first_node_greater_than_or_equal_to(const Point &position); 95 | void mark_right(const MarkerId &id, const Point &start_position, const Point &end_position); 96 | void mark_left(const MarkerId &id, const Point &start_position, const Point &end_position); 97 | Node* insert_left_child(const Point &position); 98 | Node* insert_right_child(const Point &position); 99 | void check_intersection(const Point &start, const Point &end, flat_set *results); 100 | void cache_node_position() const; 101 | 102 | MarkerIndex *marker_index; 103 | Node *current_node; 104 | Point current_node_position; 105 | Point left_ancestor_position; 106 | Point right_ancestor_position; 107 | std::vector left_ancestor_position_stack; 108 | std::vector right_ancestor_position_stack; 109 | }; 110 | 111 | Point get_node_position(const Node *node) const; 112 | void delete_node(Node *node); 113 | void delete_subtree(Node *node); 114 | void bubble_node_up(Node *node); 115 | void bubble_node_down(Node *node); 116 | void rotate_node_left(Node *pivot); 117 | void rotate_node_right(Node *pivot); 118 | void get_starting_and_ending_markers_within_subtree(const Node *node, flat_set *starting, flat_set *ending); 119 | void populate_splice_invalidation_sets(SpliceResult *invalidated, const Node *start_node, const Node *end_node, const flat_set &starting_inside_splice, const flat_set &ending_inside_splice); 120 | 121 | std::default_random_engine random_engine; 122 | std::uniform_int_distribution random_distribution; 123 | Node *root; 124 | std::unordered_map start_nodes_by_id; 125 | std::unordered_map end_nodes_by_id; 126 | Iterator iterator; 127 | flat_set exclusive_marker_ids; 128 | mutable std::unordered_map node_position_cache; 129 | }; 130 | 131 | #endif // MARKER_INDEX_H_ 132 | -------------------------------------------------------------------------------- /src/core/optional.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_OPTIONAL_H 2 | #define SUPERSTRING_OPTIONAL_H 3 | 4 | #include 5 | 6 | template class optional { 7 | T value; 8 | bool is_some; 9 | 10 | public: 11 | optional(T &&value) : value(std::move(value)), is_some(true) {} 12 | optional(const T &value) : value(value), is_some(true) {} 13 | optional() : value(T()), is_some(false) {} 14 | 15 | T &operator*() { return value; } 16 | const T &operator*() const { return value; } 17 | const T *operator->() const { return &value; } 18 | T *operator->() { return &value; } 19 | operator bool() const { return is_some; } 20 | bool operator==(const optional &other) { 21 | if (is_some) { 22 | return other.is_some && value == other.value; 23 | } else { 24 | return !other.is_some; 25 | } 26 | } 27 | }; 28 | 29 | #endif // SUPERSTRING_OPTIONAL_H 30 | -------------------------------------------------------------------------------- /src/core/patch.h: -------------------------------------------------------------------------------- 1 | #ifndef PATCH_H_ 2 | #define PATCH_H_ 3 | 4 | #include "optional.h" 5 | #include "point.h" 6 | #include "serializer.h" 7 | #include "text.h" 8 | #include 9 | #include 10 | #include 11 | 12 | class Patch { 13 | struct Node; 14 | struct OldCoordinates; 15 | struct NewCoordinates; 16 | struct PositionStackEntry; 17 | 18 | Node *root; 19 | std::vector node_stack; 20 | std::vector left_ancestor_stack; 21 | uint32_t change_count; 22 | bool merges_adjacent_changes; 23 | 24 | public: 25 | struct Change { 26 | Point old_start; 27 | Point old_end; 28 | Point new_start; 29 | Point new_end; 30 | Text *old_text; 31 | Text *new_text; 32 | uint32_t preceding_old_text_size; 33 | uint32_t preceding_new_text_size; 34 | uint32_t old_text_size; 35 | }; 36 | 37 | // Construction and destruction 38 | Patch(bool merges_adjacent_changes = true); 39 | Patch(Patch &&); 40 | Patch(Deserializer &input); 41 | Patch &operator=(Patch &&); 42 | ~Patch(); 43 | void serialize(Serializer &serializer); 44 | 45 | Patch copy(); 46 | Patch invert(); 47 | 48 | // Mutations 49 | bool splice(Point new_splice_start, 50 | Point new_deletion_extent, Point new_insertion_extent, 51 | optional &&deleted_text = optional{}, 52 | optional &&inserted_text = optional{}, 53 | uint32_t deleted_text_size = 0); 54 | void splice_old(Point start, Point deletion_extent, Point insertion_extent); 55 | bool combine(const Patch &other, bool left_to_right = true); 56 | void clear(); 57 | void rebalance(); 58 | 59 | // Non-splaying reads 60 | std::vector get_changes() const; 61 | size_t get_change_count() const; 62 | std::vector get_changes_in_old_range(Point start, Point end) const; 63 | std::vector get_changes_in_new_range(Point start, Point end) const; 64 | optional get_change_starting_before_old_position(Point position) const; 65 | optional get_change_starting_before_new_position(Point position) const; 66 | optional get_change_ending_after_new_position(Point position) const; 67 | optional get_bounds() const; 68 | Point new_position_for_new_offset(uint32_t new_offset, 69 | std::function old_offset_for_old_position, 70 | std::function old_position_for_old_offset) const; 71 | 72 | // Splaying reads 73 | std::vector grab_changes_in_old_range(Point start, Point end); 74 | std::vector grab_changes_in_new_range(Point start, Point end); 75 | optional grab_change_starting_before_old_position(Point position); 76 | optional grab_change_starting_before_new_position(Point position); 77 | optional grab_change_ending_after_new_position(Point position, bool exclusive = false); 78 | 79 | // Debugging 80 | std::string get_dot_graph() const; 81 | std::string get_json() const; 82 | 83 | private: 84 | Patch(Node *root, uint32_t change_count, bool merges_adjacent_changes); 85 | 86 | template 87 | std::vector get_changes_in_range(Point, Point, bool inclusive) const; 88 | 89 | template 90 | optional get_change_starting_before_position(Point target) const; 91 | 92 | template 93 | optional get_change_ending_after_position(Point target) const; 94 | 95 | template 96 | std::vector grab_changes_in_range(Point, Point, bool inclusive = false); 97 | 98 | template 99 | optional grab_change_starting_before_position(Point position); 100 | 101 | template 102 | Node *splay_node_starting_before(Point target); 103 | 104 | template 105 | Node *splay_node_starting_after(Point target, optional exclusive_lower_bound); 106 | 107 | template 108 | Node *splay_node_ending_before(Point target); 109 | 110 | template 111 | Node *splay_node_ending_after(Point target, optional exclusive_lower_bound); 112 | 113 | Change change_for_root_node(); 114 | 115 | std::pair, bool> compute_old_text(optional &&, Point, Point); 116 | uint32_t compute_old_text_size(uint32_t, Point, Point); 117 | 118 | void splay_node(Node *); 119 | void rotate_node_right(Node *, Node *, Node *); 120 | void rotate_node_left(Node *, Node *, Node *); 121 | void delete_root(); 122 | void perform_rebalancing_rotations(uint32_t); 123 | Node *build_node(Node *, Node *, Point, Point, Point, Point, 124 | optional &&, optional &&, uint32_t old_text_size); 125 | void delete_node(Node **); 126 | void remove_noop_change(); 127 | }; 128 | 129 | std::ostream &operator<<(std::ostream &, const Patch::Change &); 130 | 131 | #endif // PATCH_H_ 132 | -------------------------------------------------------------------------------- /src/core/point.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "point.h" 4 | 5 | Point Point::min(const Point &left, const Point &right) { 6 | return left <= right ? left : right; 7 | } 8 | 9 | Point Point::max(const Point &left, const Point &right) { 10 | return left >= right ? left : right; 11 | } 12 | 13 | Point Point::max() { 14 | return Point(UINT32_MAX, UINT32_MAX); 15 | } 16 | 17 | Point::Point() : Point(0, 0) {} 18 | 19 | Point::Point(unsigned row, unsigned column) : row{row}, column{column} {} 20 | 21 | Point::Point(Deserializer &input) : 22 | row{input.read()}, 23 | column{input.read()} {} 24 | 25 | int Point::compare(const Point &other) const { 26 | if (row < other.row) return -1; 27 | if (row > other.row) return 1; 28 | if (column < other.column) return -1; 29 | if (column > other.column) return 1; 30 | return 0; 31 | } 32 | 33 | bool Point::is_zero() const { 34 | return row == 0 && column == 0; 35 | } 36 | 37 | static uint32_t checked_add(uint32_t a, uint32_t b) { 38 | return std::min( 39 | UINT32_MAX, 40 | static_cast(a) + static_cast(b) 41 | ); 42 | } 43 | 44 | Point Point::traverse(const Point &traversal) const { 45 | if (traversal.row == 0) { 46 | return Point(row, checked_add(column, traversal.column)); 47 | } else { 48 | return Point(checked_add(row, traversal.row), traversal.column); 49 | } 50 | } 51 | 52 | Point Point::traversal(const Point &start) const { 53 | if (row == start.row) { 54 | return Point(0, column - start.column); 55 | } else { 56 | return Point(row - start.row, column); 57 | } 58 | } 59 | 60 | void Point::serialize(Serializer &output) const { 61 | output.append(row); 62 | output.append(column); 63 | } 64 | 65 | bool Point::operator==(const Point &other) const { 66 | return compare(other) == 0; 67 | } 68 | 69 | bool Point::operator!=(const Point &other) const { 70 | return compare(other) != 0; 71 | } 72 | 73 | bool Point::operator<(const Point &other) const { 74 | return compare(other) < 0; 75 | } 76 | 77 | bool Point::operator<=(const Point &other) const { 78 | return compare(other) <= 0; 79 | } 80 | 81 | bool Point::operator>(const Point &other) const { 82 | return compare(other) > 0; 83 | } 84 | 85 | bool Point::operator>=(const Point &other) const { 86 | return compare(other) >= 0; 87 | } 88 | -------------------------------------------------------------------------------- /src/core/point.h: -------------------------------------------------------------------------------- 1 | #ifndef POINT_H_ 2 | #define POINT_H_ 3 | 4 | #include 5 | #include "serializer.h" 6 | 7 | struct Point { 8 | unsigned row; 9 | unsigned column; 10 | 11 | static Point min(const Point &left, const Point &right); 12 | static Point max(const Point &left, const Point &right); 13 | static Point max(); 14 | 15 | Point(); 16 | Point(unsigned row, unsigned column); 17 | Point(Deserializer &input); 18 | 19 | int compare(const Point &other) const; 20 | bool is_zero() const; 21 | Point traverse(const Point &other) const; 22 | Point traversal(const Point &other) const; 23 | void serialize(Serializer &output) const; 24 | 25 | bool operator!=(const Point &other) const; 26 | bool operator==(const Point &other) const; 27 | bool operator<(const Point &other) const; 28 | bool operator<=(const Point &other) const; 29 | bool operator>(const Point &other) const; 30 | bool operator>=(const Point &other) const; 31 | }; 32 | 33 | inline std::ostream &operator<<(std::ostream &stream, const Point &point) { 34 | return stream << "(" << point.row << ", " << point.column << ")"; 35 | } 36 | 37 | #endif // POINT_H_ 38 | -------------------------------------------------------------------------------- /src/core/range.cc: -------------------------------------------------------------------------------- 1 | #include "range.h" 2 | 3 | Range Range::all_inclusive() { 4 | return Range{Point(), Point::max()}; 5 | } 6 | 7 | Point Range::extent() const { 8 | return end.traversal(start); 9 | } 10 | -------------------------------------------------------------------------------- /src/core/range.h: -------------------------------------------------------------------------------- 1 | #ifndef RANGE_H_ 2 | #define RANGE_H_ 3 | 4 | #include 5 | #include "point.h" 6 | 7 | struct Range { 8 | Point start; 9 | Point end; 10 | 11 | static Range all_inclusive(); 12 | 13 | Point extent() const; 14 | 15 | bool operator==(const Range &other) const { 16 | return start == other.start && end == other.end; 17 | } 18 | }; 19 | 20 | inline std::ostream &operator<<(std::ostream &stream, const Range &range) { 21 | return stream << "(" << range.start << ", " << range.end << ")"; 22 | } 23 | 24 | #endif // RANGE_H_ 25 | -------------------------------------------------------------------------------- /src/core/regex.cc: -------------------------------------------------------------------------------- 1 | #include "regex.h" 2 | #include 3 | #include "pcre2.h" 4 | 5 | using std::u16string; 6 | using MatchResult = Regex::MatchResult; 7 | 8 | const char16_t EMPTY_PATTERN[] = u".{0}"; 9 | 10 | Regex::Regex() : code{nullptr} {} 11 | 12 | static u16string preprocess_pattern(const char16_t *pattern, uint32_t length) { 13 | u16string result; 14 | for (unsigned i = 0; i < length;) { 15 | char16_t c = pattern[i]; 16 | 17 | // Replace escape sequences like '\u00cf' with their literal UTF16 value 18 | if (c == '\\' && i + 1 < length) { 19 | if (pattern[i + 1] == 'u') { 20 | if (i + 6 <= length) { 21 | std::string char_code_string(&pattern[i + 2], &pattern[i + 6]); 22 | char16_t char_code_value = strtol(char_code_string.data(), nullptr, 16); 23 | if (char_code_value != 0) { 24 | result += char_code_value; 25 | i += 6; 26 | continue; 27 | } 28 | } 29 | 30 | // Replace invalid '\u' escape sequences with the literal characters '\' and 'u' 31 | result += u"\\\\u"; 32 | i += 2; 33 | continue; 34 | } else if (pattern[i + 1] == '\\') { 35 | // Prevent '\\u' from UTF16 replacement 36 | result += u"\\\\"; 37 | i += 2; 38 | continue; 39 | } 40 | } 41 | 42 | result += c; 43 | i++; 44 | } 45 | 46 | return result; 47 | } 48 | 49 | 50 | Regex::Regex(const char16_t *pattern, uint32_t pattern_length, u16string *error_message, bool ignore_case, bool unicode) { 51 | if (pattern_length == 0) { 52 | pattern = EMPTY_PATTERN; 53 | pattern_length = 4; 54 | } 55 | 56 | u16string final_pattern = preprocess_pattern(pattern, pattern_length); 57 | 58 | int error_number = 0; 59 | size_t error_offset = 0; 60 | uint32_t options = PCRE2_MULTILINE; 61 | if (ignore_case) options |= PCRE2_CASELESS; 62 | if (unicode) options |= PCRE2_UTF; 63 | code = pcre2_compile( 64 | reinterpret_cast(final_pattern.data()), 65 | final_pattern.size(), 66 | options, 67 | &error_number, 68 | &error_offset, 69 | nullptr 70 | ); 71 | 72 | if (!code) { 73 | uint16_t message_buffer[256]; 74 | size_t length = pcre2_get_error_message(error_number, message_buffer, 256); 75 | error_message->assign(message_buffer, message_buffer + length); 76 | return; 77 | } 78 | 79 | pcre2_jit_compile( 80 | code, 81 | PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_PARTIAL_SOFT 82 | ); 83 | } 84 | 85 | Regex::Regex(const u16string &pattern, u16string *error_message, bool ignore_case, bool unicode) 86 | : Regex(pattern.data(), pattern.size(), error_message, ignore_case, unicode) {} 87 | 88 | Regex::Regex(Regex &&other) : code{other.code} { 89 | other.code = nullptr; 90 | } 91 | 92 | Regex::~Regex() { 93 | if (code) pcre2_code_free(code); 94 | } 95 | 96 | Regex::MatchData::MatchData(const Regex ®ex) 97 | : data{pcre2_match_data_create_from_pattern(regex.code, nullptr)} {} 98 | 99 | Regex::MatchData::~MatchData() { 100 | pcre2_match_data_free(data); 101 | } 102 | 103 | MatchResult Regex::match(const char16_t *string, size_t length, 104 | MatchData &match_data, unsigned options) const { 105 | MatchResult result{MatchResult::None, 0, 0}; 106 | 107 | unsigned int pcre_options = 0; 108 | if (!(options & MatchOptions::IsEndSearch)) pcre_options |= PCRE2_PARTIAL_HARD; 109 | if (!(options & MatchOptions::IsBeginningOfLine)) pcre_options |= PCRE2_NOTBOL; 110 | if (!(options & MatchOptions::IsEndOfLine)) pcre_options |= PCRE2_NOTEOL; 111 | 112 | int status = pcre2_match( 113 | code, 114 | reinterpret_cast(string), 115 | length, 116 | 0, 117 | pcre_options, 118 | match_data.data, 119 | nullptr 120 | ); 121 | 122 | if (status < 0) { 123 | switch (status) { 124 | case PCRE2_ERROR_PARTIAL: 125 | result.type = MatchResult::Partial; 126 | result.start_offset = pcre2_get_ovector_pointer(match_data.data)[0]; 127 | result.end_offset = pcre2_get_ovector_pointer(match_data.data)[1]; 128 | break; 129 | case PCRE2_ERROR_NOMATCH: 130 | result.type = MatchResult::None; 131 | break; 132 | default: 133 | result.type = MatchResult::Error; 134 | break; 135 | } 136 | } else { 137 | result.type = MatchResult::Full; 138 | result.start_offset = pcre2_get_ovector_pointer(match_data.data)[0]; 139 | result.end_offset = pcre2_get_ovector_pointer(match_data.data)[1]; 140 | } 141 | 142 | return result; 143 | } 144 | -------------------------------------------------------------------------------- /src/core/regex.h: -------------------------------------------------------------------------------- 1 | #ifndef REGEX_H_ 2 | #define REGEX_H_ 3 | 4 | #include "optional.h" 5 | #include 6 | 7 | struct pcre2_real_code_16; 8 | struct pcre2_real_match_data_16; 9 | struct BuildRegexResult; 10 | 11 | class Regex { 12 | pcre2_real_code_16 *code; 13 | Regex(pcre2_real_code_16 *); 14 | 15 | public: 16 | Regex(); 17 | Regex(const char16_t *, uint32_t, std::u16string *error_message, bool ignore_case = false, bool unicode = false); 18 | Regex(const std::u16string &, std::u16string *error_message, bool ignore_case = false, bool unicode = false); 19 | Regex(Regex &&); 20 | ~Regex(); 21 | 22 | class MatchData { 23 | pcre2_real_match_data_16 *data; 24 | friend class Regex; 25 | 26 | public: 27 | MatchData(const Regex &); 28 | ~MatchData(); 29 | }; 30 | 31 | struct MatchResult { 32 | enum { 33 | None, 34 | Error, 35 | Partial, 36 | Full, 37 | } type; 38 | 39 | size_t start_offset; 40 | size_t end_offset; 41 | }; 42 | 43 | enum MatchOptions { 44 | None = 0, 45 | IsBeginningOfLine = 1, 46 | IsEndOfLine = 2, 47 | IsEndSearch = 4, 48 | }; 49 | 50 | MatchResult match(const char16_t *data, size_t length, MatchData &, unsigned options = 0) const; 51 | }; 52 | 53 | struct BuildRegexResult { 54 | optional regex; 55 | std::u16string error_message; 56 | }; 57 | 58 | #endif // REGX_H_ 59 | -------------------------------------------------------------------------------- /src/core/serializer.h: -------------------------------------------------------------------------------- 1 | #ifndef SERIALIZER_H_ 2 | #define SERIALIZER_H_ 3 | 4 | #include 5 | #include 6 | 7 | class Serializer { 8 | std::vector &vector; 9 | 10 | public: 11 | inline Serializer(std::vector &output) : 12 | vector(output) {}; 13 | 14 | template 15 | void append(T value) { 16 | for (auto i = 0u; i < sizeof(T); i++) { 17 | vector.push_back(value & 0xFF); 18 | value >>= 8; 19 | } 20 | } 21 | }; 22 | 23 | class Deserializer { 24 | const uint8_t *read_ptr; 25 | const uint8_t *end_ptr; 26 | 27 | public: 28 | inline Deserializer(const std::vector &input) : 29 | read_ptr(input.data()), 30 | end_ptr(input.data() + input.size()) {}; 31 | 32 | template 33 | T peek() const { 34 | T value = 0; 35 | const uint8_t *temp_ptr = read_ptr; 36 | if (static_cast(end_ptr - temp_ptr) >= sizeof(T)) { 37 | for (auto i = 0u; i < sizeof(T); i++) { 38 | value |= static_cast(*(temp_ptr++)) << static_cast(8 * i); 39 | } 40 | } 41 | return value; 42 | } 43 | 44 | template 45 | T read() { 46 | T value = peek(); 47 | read_ptr += sizeof(T); 48 | return value; 49 | } 50 | }; 51 | 52 | #endif // SERIALIZER_H_ 53 | -------------------------------------------------------------------------------- /src/core/text-buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_TEXT_BUFFER_H_ 2 | #define SUPERSTRING_TEXT_BUFFER_H_ 3 | 4 | #include 5 | #include 6 | #include "text.h" 7 | #include "patch.h" 8 | #include "point.h" 9 | #include "range.h" 10 | #include "regex.h" 11 | #include "marker-index.h" 12 | 13 | class TextBuffer { 14 | struct Layer; 15 | Layer *base_layer; 16 | Layer *top_layer; 17 | void squash_layers(const std::vector &); 18 | void consolidate_layers(); 19 | 20 | public: 21 | static uint32_t MAX_CHUNK_SIZE_TO_COPY; 22 | 23 | TextBuffer(); 24 | TextBuffer(std::u16string &&); 25 | TextBuffer(const std::u16string &text); 26 | ~TextBuffer(); 27 | 28 | uint32_t size() const; 29 | Point extent() const; 30 | optional line_for_row(uint32_t row); 31 | void with_line_for_row(uint32_t row, const std::function &); 32 | 33 | optional line_length_for_row(uint32_t row); 34 | const uint16_t *line_ending_for_row(uint32_t row); 35 | ClipResult clip_position(Point); 36 | Point position_for_offset(uint32_t offset); 37 | std::u16string text(); 38 | uint16_t character_at(Point position) const; 39 | std::u16string text_in_range(Range range); 40 | void set_text(std::u16string &&); 41 | void set_text(const std::u16string &); 42 | void set_text_in_range(Range old_range, std::u16string &&); 43 | void set_text_in_range(Range old_range, const std::u16string &); 44 | bool is_modified() const; 45 | bool has_astral(); 46 | std::vector chunks() const; 47 | 48 | void reset(Text &&); 49 | void flush_changes(); 50 | void serialize_changes(Serializer &); 51 | bool deserialize_changes(Deserializer &); 52 | const Text &base_text() const; 53 | 54 | optional find(const Regex &, Range range = Range::all_inclusive()) const; 55 | std::vector find_all(const Regex &, Range range = Range::all_inclusive()) const; 56 | unsigned find_and_mark_all(MarkerIndex &, MarkerIndex::MarkerId, bool exclusive, 57 | const Regex &, Range range = Range::all_inclusive()) const; 58 | 59 | struct SubsequenceMatch { 60 | std::u16string word; 61 | std::vector positions; 62 | std::vector match_indices; 63 | int32_t score; 64 | bool operator==(const SubsequenceMatch &) const; 65 | }; 66 | 67 | std::vector find_words_with_subsequence_in_range(const std::u16string &, const std::u16string &, Range) const; 68 | 69 | class Snapshot { 70 | friend class TextBuffer; 71 | TextBuffer &buffer; 72 | Layer &layer; 73 | Layer &base_layer; 74 | 75 | Snapshot(TextBuffer &, Layer &, Layer &); 76 | 77 | public: 78 | ~Snapshot(); 79 | void flush_preceding_changes(); 80 | 81 | uint32_t size() const; 82 | Point extent() const; 83 | uint32_t line_length_for_row(uint32_t) const; 84 | std::vector chunks() const; 85 | std::vector chunks_in_range(Range) const; 86 | std::vector> primitive_chunks() const; 87 | std::u16string text() const; 88 | std::u16string text_in_range(Range) const; 89 | const Text &base_text() const; 90 | optional find(const Regex &, Range range = Range::all_inclusive()) const; 91 | std::vector find_all(const Regex &, Range range = Range::all_inclusive()) const; 92 | std::vector find_words_with_subsequence_in_range(std::u16string query, const std::u16string &extra_word_characters, Range range) const; 93 | }; 94 | 95 | friend class Snapshot; 96 | Snapshot *create_snapshot(); 97 | 98 | bool is_modified(const Snapshot *) const; 99 | Patch get_inverted_changes(const Snapshot *) const; 100 | 101 | size_t layer_count() const; 102 | std::string get_dot_graph() const; 103 | }; 104 | 105 | #endif // SUPERSTRING_TEXT_BUFFER_H_ 106 | -------------------------------------------------------------------------------- /src/core/text-diff.cc: -------------------------------------------------------------------------------- 1 | #include "text-diff.h" 2 | #include "libmba-diff.h" 3 | #include "text-slice.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using std::move; 10 | using std::ostream; 11 | using std::vector; 12 | 13 | static Point previous_column(Point position) { 14 | assert(position.column > 0); 15 | position.column--; 16 | return position; 17 | } 18 | 19 | static int MAX_EDIT_DISTANCE = 4 * 1024; 20 | 21 | Patch text_diff(const Text &old_text, const Text &new_text) { 22 | Patch result; 23 | Text empty; 24 | Text cr{u"\r"}; 25 | Text lf{u"\n"}; 26 | 27 | vector edit_script; 28 | 29 | int edit_distance = diff( 30 | old_text.content.data(), 31 | old_text.content.size(), 32 | new_text.content.data(), 33 | new_text.content.size(), 34 | MAX_EDIT_DISTANCE, 35 | &edit_script 36 | ); 37 | 38 | if (edit_distance == -1 || edit_distance >= MAX_EDIT_DISTANCE) { 39 | result.splice(Point(), old_text.extent(), new_text.extent(), old_text, new_text); 40 | return result; 41 | } 42 | 43 | size_t old_offset = 0; 44 | size_t new_offset = 0; 45 | Point old_position; 46 | Point new_position; 47 | 48 | for (struct diff_edit &edit : edit_script) { 49 | switch (edit.op) { 50 | case DIFF_MATCH: 51 | if (edit.len == 0) continue; 52 | 53 | // If the previous change ended between a CR and an LF, then expand 54 | // that change downward to include the LF. 55 | if (new_text.at(new_offset) == '\n' && 56 | ((old_offset > 0 && old_text.at(old_offset - 1) == '\r') || 57 | (new_offset > 0 && new_text.at(new_offset - 1) == '\r'))) { 58 | result.splice(new_position, Point(1, 0), Point(1, 0), lf, lf); 59 | old_position.row++; 60 | old_position.column = 0; 61 | new_position.row++; 62 | new_position.column = 0; 63 | } 64 | 65 | old_offset += edit.len; 66 | new_offset += edit.len; 67 | old_position = old_text.position_for_offset(old_offset, 0, false); 68 | new_position = new_text.position_for_offset(new_offset, 0, false); 69 | 70 | // If the next change starts between a CR and an LF, then expand that 71 | // change leftward to include the CR. 72 | if (new_text.at(new_offset - 1) == '\r' && 73 | ((old_offset < old_text.size() && old_text.at(old_offset) == '\n') || 74 | (new_offset < new_text.size() && new_text.at(new_offset) == '\n'))) { 75 | result.splice(previous_column(new_position), Point(0, 1), Point(0, 1), cr, cr); 76 | } 77 | break; 78 | 79 | case DIFF_DELETE: { 80 | uint32_t deletion_end = old_offset + edit.len; 81 | Text deleted_text{old_text.begin() + old_offset, old_text.begin() + deletion_end}; 82 | old_offset = deletion_end; 83 | Point next_old_position = old_text.position_for_offset(old_offset, 0, false); 84 | result.splice(new_position, next_old_position.traversal(old_position), Point(), deleted_text, empty); 85 | old_position = next_old_position; 86 | break; 87 | } 88 | 89 | case DIFF_INSERT: { 90 | uint32_t insertion_end = new_offset + edit.len; 91 | Text inserted_text{new_text.begin() + new_offset, new_text.begin() + insertion_end}; 92 | new_offset = insertion_end; 93 | Point next_new_position = new_text.position_for_offset(new_offset, 0, false); 94 | result.splice(new_position, Point(), next_new_position.traversal(new_position), empty, inserted_text); 95 | new_position = next_new_position; 96 | break; 97 | } 98 | } 99 | } 100 | 101 | return result; 102 | } 103 | -------------------------------------------------------------------------------- /src/core/text-diff.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_TEXT_DIFF_H 2 | #define SUPERSTRING_TEXT_DIFF_H 3 | 4 | #include "patch.h" 5 | #include "text.h" 6 | 7 | Patch text_diff(const Text &old_text, const Text &new_text); 8 | 9 | #endif // SUPERSTRING_TEXT_DIFF_H -------------------------------------------------------------------------------- /src/core/text-slice.cc: -------------------------------------------------------------------------------- 1 | #include "text-slice.h" 2 | #include "text.h" 3 | #include 4 | 5 | TextSlice::TextSlice() : 6 | text{nullptr} {} 7 | 8 | TextSlice::TextSlice(const Text *text, Point start_position, Point end_position) : 9 | text{text}, start_position{start_position}, end_position{end_position} {} 10 | 11 | TextSlice::TextSlice(const Text &text) : 12 | text{&text}, start_position{Point()}, end_position{text.extent()} {} 13 | 14 | size_t TextSlice::start_offset() const { 15 | if (start_position.is_zero()) return 0; 16 | assert(start_position.row < text->line_offsets.size()); 17 | return text->line_offsets[start_position.row] + start_position.column; 18 | } 19 | 20 | size_t TextSlice::end_offset() const { 21 | if (end_position.is_zero()) return 0; 22 | return text->line_offsets[end_position.row] + end_position.column; 23 | } 24 | 25 | bool TextSlice::is_valid() const { 26 | uint32_t start_offset = this->start_offset(); 27 | uint32_t end_offset = this->end_offset(); 28 | 29 | if (start_offset > end_offset) { 30 | return false; 31 | } 32 | 33 | if (start_position.row + 1 < text->line_offsets.size()) { 34 | if (start_offset >= text->line_offsets[start_position.row + 1]) { 35 | return false; 36 | } 37 | } 38 | 39 | if (end_position.row + 1 < text->line_offsets.size()) { 40 | if (end_offset >= text->line_offsets[end_position.row + 1]) { 41 | return false; 42 | } 43 | } 44 | 45 | if (end_offset > text->size()) { 46 | return false; 47 | } 48 | 49 | return true; 50 | } 51 | 52 | std::pair TextSlice::split(Point split_point) const { 53 | Point absolute_split_point = Point::min( 54 | end_position, 55 | start_position.traverse(split_point) 56 | ); 57 | 58 | return std::pair{ 59 | TextSlice{text, start_position, absolute_split_point}, 60 | TextSlice{text, absolute_split_point, end_position} 61 | }; 62 | } 63 | 64 | std::pair TextSlice::split(uint32_t split_offset) const { 65 | return split(position_for_offset(split_offset)); 66 | } 67 | 68 | Point TextSlice::position_for_offset(uint32_t offset, uint32_t min_row) const { 69 | return text->position_for_offset( 70 | offset + start_offset(), 71 | start_position.row + min_row 72 | ).traversal(start_position); 73 | } 74 | 75 | TextSlice TextSlice::prefix(Point prefix_end) const { 76 | return split(prefix_end).first; 77 | } 78 | 79 | TextSlice TextSlice::prefix(uint32_t prefix_end) const { 80 | return split(prefix_end).first; 81 | } 82 | 83 | TextSlice TextSlice::suffix(Point suffix_start) const { 84 | return split(suffix_start).second; 85 | } 86 | 87 | TextSlice TextSlice::slice(Range range) const { 88 | return suffix(range.start).prefix(range.extent()); 89 | } 90 | 91 | Point TextSlice::extent() const { 92 | return end_position.traversal(start_position); 93 | } 94 | 95 | const char16_t *TextSlice::data() const { 96 | return text->data() + start_offset(); 97 | } 98 | 99 | uint32_t TextSlice::size() const { 100 | return end_offset() - start_offset(); 101 | } 102 | 103 | bool TextSlice::empty() const { 104 | return size() == 0; 105 | } 106 | 107 | Text::const_iterator TextSlice::begin() const { 108 | return text->cbegin() + start_offset(); 109 | } 110 | 111 | Text::const_iterator TextSlice::end() const { 112 | return text->cbegin() + end_offset(); 113 | } 114 | 115 | uint16_t TextSlice::front() const { 116 | return *begin(); 117 | } 118 | 119 | uint16_t TextSlice::back() const { 120 | return *(end() - 1); 121 | } 122 | -------------------------------------------------------------------------------- /src/core/text-slice.h: -------------------------------------------------------------------------------- 1 | #ifndef FLAT_TEXT_SLICE_H_ 2 | #define FLAT_TEXT_SLICE_H_ 3 | 4 | #include 5 | #include "point.h" 6 | #include "range.h" 7 | #include "text.h" 8 | 9 | class TextSlice { 10 | public: 11 | const Text *text; 12 | Point start_position; 13 | Point end_position; 14 | 15 | TextSlice(const Text *text, Point start_position, Point end_position); 16 | size_t start_offset() const; 17 | size_t end_offset() const; 18 | 19 | TextSlice(); 20 | TextSlice(const Text &text); 21 | std::pair split(Point) const; 22 | std::pair split(uint32_t) const; 23 | TextSlice prefix(Point) const; 24 | TextSlice prefix(uint32_t) const; 25 | TextSlice suffix(Point) const; 26 | TextSlice slice(Range range) const; 27 | Point position_for_offset(uint32_t offset, uint32_t min_row = 0) const; 28 | Point extent() const; 29 | uint16_t front() const; 30 | uint16_t back() const; 31 | bool is_valid() const; 32 | 33 | const char16_t *data() const; 34 | uint32_t size() const; 35 | bool empty() const; 36 | 37 | Text::const_iterator begin() const; 38 | Text::const_iterator end() const; 39 | }; 40 | 41 | #endif // FLAT_TEXT_SLICE_H_ 42 | -------------------------------------------------------------------------------- /src/core/text.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_TEXT_H_ 2 | #define SUPERSTRING_TEXT_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "serializer.h" 9 | #include "point.h" 10 | #include "optional.h" 11 | 12 | class TextSlice; 13 | 14 | struct ClipResult { 15 | Point position; 16 | uint32_t offset; 17 | }; 18 | 19 | class Text { 20 | friend class TextSlice; 21 | 22 | public: 23 | static Point extent(const std::u16string &); 24 | 25 | std::u16string content; 26 | std::vector line_offsets; 27 | Text(const std::u16string &&, const std::vector &&); 28 | 29 | using const_iterator = std::u16string::const_iterator; 30 | 31 | Text(); 32 | Text(const std::u16string &); 33 | Text(std::u16string &&); 34 | Text(TextSlice slice); 35 | Text(Deserializer &deserializer); 36 | template 37 | Text(Iter begin, Iter end) : Text(std::u16string{begin, end}) {} 38 | 39 | static Text concat(TextSlice a, TextSlice b); 40 | static Text concat(TextSlice a, TextSlice b, TextSlice c); 41 | void splice(Point start, Point deletion_extent, TextSlice inserted_slice); 42 | 43 | uint16_t at(Point position) const; 44 | uint16_t at(uint32_t offset) const; 45 | const_iterator begin() const; 46 | const_iterator end() const; 47 | inline const_iterator cbegin() const { return begin(); } 48 | inline const_iterator cend() const { return end(); } 49 | ClipResult clip_position(Point) const; 50 | Point extent() const; 51 | bool empty() const; 52 | uint32_t offset_for_position(Point) const; 53 | Point position_for_offset(uint32_t, uint32_t min_row = 0, bool clip_crlf = true) const; 54 | uint32_t line_length_for_row(uint32_t row) const; 55 | void append(TextSlice); 56 | void assign(TextSlice); 57 | void serialize(Serializer &) const; 58 | uint32_t size() const; 59 | const char16_t *data() const; 60 | size_t digest() const; 61 | void clear(); 62 | 63 | bool operator!=(const Text &) const; 64 | bool operator==(const Text &) const; 65 | 66 | friend std::ostream &operator<<(std::ostream &, const Text &); 67 | }; 68 | 69 | #endif // SUPERSTRING_TEXT_H_ 70 | -------------------------------------------------------------------------------- /test/js/helpers/point-helpers.js: -------------------------------------------------------------------------------- 1 | exports.INFINITY_POINT = Object.freeze({row: Infinity, column: Infinity}) 2 | 3 | exports.compare = function compare (a, b) { 4 | if (a.row === b.row) { 5 | return compareNumbers(a.column, b.column) 6 | } else { 7 | return compareNumbers(a.row, b.row) 8 | } 9 | } 10 | 11 | exports.isZero = function isZero (point) { 12 | return (point.row === 0 && point.column === 0) 13 | } 14 | 15 | exports.isInfinity = function isInfinity (point) { 16 | return (point.row === Infinity || point.column === Infinity) 17 | } 18 | 19 | exports.min = function min (a, b) { 20 | if (compare(a, b) <= 0) { 21 | return a 22 | } else { 23 | return b 24 | } 25 | } 26 | 27 | exports.traverse = function traverse (start, distance) { 28 | if (distance.row === 0) { 29 | return { 30 | row: start.row, 31 | column: start.column + distance.column 32 | } 33 | } else { 34 | return { 35 | row: start.row + distance.row, 36 | column: distance.column 37 | } 38 | } 39 | } 40 | 41 | exports.traversalDistance = function traversalDistance (end, start) { 42 | if (end.row === start.row) { 43 | return {row: 0, column: end.column - start.column} 44 | } else { 45 | return {row: end.row - start.row, column: end.column} 46 | } 47 | } 48 | 49 | exports.format = function format (point) { 50 | return `(${point.row}, ${point.column})` 51 | } 52 | 53 | function compareNumbers (a, b) { 54 | if (a < b) { 55 | return -1 56 | } else if (a > b) { 57 | return 1 58 | } else { 59 | return 0 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /test/js/helpers/test-document.js: -------------------------------------------------------------------------------- 1 | const Random = require('random-seed') 2 | const WORDS = require('./words') 3 | const pointHelpers = require('./point-helpers') 4 | const textHelpers = require('./text-helpers') 5 | 6 | module.exports = 7 | class TestDocument { 8 | constructor (randomSeed, maxLineCount = 50) { 9 | this.random = new Random(randomSeed) 10 | this.lines = this.buildRandomLines(1, maxLineCount) 11 | } 12 | 13 | clone () { 14 | let clone = Object.create(Object.getPrototypeOf(this)) 15 | clone.random = this.random 16 | clone.lines = this.lines.slice() 17 | return clone 18 | } 19 | 20 | getLines () { 21 | return this.lines.slice() 22 | } 23 | 24 | getText () { 25 | return this.lines.join('\n') 26 | } 27 | 28 | getTextInRange (start, end) { 29 | let endRow = Math.min(end.row, this.lines.length - 1) 30 | if (start.row === endRow) { 31 | return this.lines[start.row].substring(start.column, end.column) 32 | } else if (!pointHelpers.isInfinity(start)) { 33 | let text = this.lines[start.row].substring(start.column) + '\n' 34 | for (let row = start.row + 1; row < endRow; row++) { 35 | text += this.lines[row] + '\n' 36 | } 37 | text += this.lines[endRow].substring(0, end.column) 38 | return text 39 | } else { 40 | return "" 41 | } 42 | } 43 | 44 | searchAll (regex) { 45 | return this.searchAllInRange( 46 | {start: {row: 0, column: 0}, end: this.getExtent()}, 47 | regex 48 | ) 49 | } 50 | 51 | searchAllInRange (range, regex) { 52 | const ranges = [] 53 | const text = this.getTextInRange(range.start, range.end) 54 | let match 55 | while (match = regex.exec(text)) { 56 | const start = pointHelpers.traverse(range.start, textHelpers.getExtent(text.slice(0, match.index))) 57 | const extent = textHelpers.getExtent(match[0]) 58 | ranges.push({start, end: pointHelpers.traverse(start, extent)}) 59 | if (match[0].length === 0) regex.lastIndex++ 60 | } 61 | return ranges 62 | } 63 | 64 | getExtent () { 65 | const row = this.lines.length - 1 66 | return {row, column: this.lines[row].length} 67 | } 68 | 69 | performRandomSplice (upperCase) { 70 | let deletedRange = this.buildRandomRange() 71 | let start = deletedRange.start 72 | let deletedText = this.getTextInRange(start, deletedRange.end) 73 | let deletedExtent = pointHelpers.traversalDistance(deletedRange.end, deletedRange.start) 74 | let insertedText = this.buildRandomLines(0, 3, upperCase).join('\n') 75 | let insertedExtent = textHelpers.getExtent(insertedText) 76 | this.splice(start, deletedExtent, insertedText) 77 | return {start, deletedExtent, insertedExtent, deletedText, insertedText} 78 | } 79 | 80 | splice (start, deletedExtent, insertedText) { 81 | let deletedText = this.getTextInRange(start, pointHelpers.traverse(start, deletedExtent)) 82 | let end = pointHelpers.traverse(start, deletedExtent) 83 | let replacementLines = insertedText.split('\n') 84 | 85 | replacementLines[0] = 86 | this.lines[start.row].substring(0, start.column) + replacementLines[0] 87 | replacementLines[replacementLines.length - 1] = 88 | replacementLines[replacementLines.length - 1] + this.lines[end.row].substring(end.column) 89 | 90 | this.lines.splice(start.row, deletedExtent.row + 1, ...replacementLines) 91 | return deletedText 92 | } 93 | 94 | characterAtPosition ({row, column}) { 95 | return this.lines[row][column] 96 | } 97 | 98 | buildRandomLines (min, max, upperCase) { 99 | let lineCount = this.random.intBetween(min, max - 1) 100 | let lines = [] 101 | for (let i = 0; i < lineCount; i++) { 102 | lines.push(this.buildRandomLine(upperCase)) 103 | } 104 | return lines 105 | } 106 | 107 | buildRandomLine (upperCase) { 108 | let wordCount = this.random(5) 109 | let words = [] 110 | for (let i = 0; i < wordCount; i++) { 111 | words.push(this.buildRandomWord(upperCase)) 112 | } 113 | return words.join(' ') 114 | } 115 | 116 | buildRandomWord (upperCase) { 117 | let word = WORDS[this.random(WORDS.length)] 118 | if (upperCase) word = word.toUpperCase() 119 | return word 120 | } 121 | 122 | buildRandomRange () { 123 | const start = this.buildRandomPoint() 124 | let end = start 125 | 126 | if (this.random(10)) { 127 | do { 128 | end = pointHelpers.traverse(end, { 129 | row: this.random(3), 130 | column: this.random(5) 131 | }) 132 | } while (this.random(2)); 133 | } 134 | 135 | return {start, end: this.clipPosition(end)} 136 | } 137 | 138 | buildRandomPoint () { 139 | let row = this.random(this.lines.length) 140 | let column = this.random(this.lines[row].length) 141 | return {row, column} 142 | } 143 | 144 | clipPosition ({row, column}) { 145 | if (row >= this.lines.length) { 146 | row = this.lines.length - 1 147 | column = this.lines[row].length 148 | } else if (column > this.lines[row].length) { 149 | column = this.lines[row].length 150 | } 151 | return {row, column} 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /test/js/helpers/text-helpers.js: -------------------------------------------------------------------------------- 1 | const NEWLINE_REG_EXP = /\n/g 2 | 3 | exports.getExtent = function getExtent (text) { 4 | let lastLineStartIndex = 0 5 | let row = 0 6 | NEWLINE_REG_EXP.lastIndex = 0 7 | while (NEWLINE_REG_EXP.exec(text)) { 8 | row++ 9 | lastLineStartIndex = NEWLINE_REG_EXP.lastIndex 10 | } 11 | let column = text.length - lastLineStartIndex 12 | return {row, column} 13 | } 14 | 15 | exports.getPrefix = function getPrefix (text, prefixExtent) { 16 | return text.substring(0, characterIndexForPoint(text, prefixExtent)) 17 | } 18 | 19 | exports.getSuffix = function getSuffix (text, prefixExtent) { 20 | return text.substring(characterIndexForPoint(text, prefixExtent)) 21 | } 22 | 23 | exports.characterIndexForPoint = function characterIndexForPoint(text, point) { 24 | let {row, column} = point 25 | NEWLINE_REG_EXP.lastIndex = 0 26 | while (row-- > 0) { 27 | let matches = NEWLINE_REG_EXP.exec(text) 28 | if (matches == null) { 29 | return text.length 30 | } 31 | } 32 | return NEWLINE_REG_EXP.lastIndex + column 33 | } 34 | -------------------------------------------------------------------------------- /test/native/encoding-conversion-test.cc: -------------------------------------------------------------------------------- 1 | #include "test-helpers.h" 2 | #include 3 | #include "text.h" 4 | #include "encoding-conversion.h" 5 | 6 | using std::string; 7 | using std::stringstream; 8 | using std::vector; 9 | using std::u16string; 10 | 11 | TEST_CASE("EncodingConversion::decode - basic UTF-8") { 12 | auto conversion = transcoding_from("UTF-8"); 13 | string input("abγdefg\nhijklmnop"); 14 | 15 | u16string string; 16 | conversion->decode(string, input.data(), input.size()); 17 | REQUIRE(string == u"abγdefg\nhijklmnop"); 18 | 19 | // This first chunk ends in the middle of the multi-byte 'γ' character, so 20 | // decoding stops before that character. 21 | u16string string2; 22 | size_t bytes_read = conversion->decode(string2, input.data(), 3); 23 | REQUIRE(bytes_read == 2); 24 | 25 | // We can pick up where we left off and decode the reset of the input. 26 | conversion->decode(string2, input.data() + 2, input.size() - 2); 27 | REQUIRE(string2 == u"abγdefg\nhijklmnop"); 28 | } 29 | 30 | TEST_CASE("EncodingConversion::decode - basic ISO-8859-1") { 31 | auto conversion = transcoding_from("ISO-8859-1"); 32 | string input("qrst" "\xfc" "v"); // qrstüv 33 | 34 | u16string string; 35 | conversion->decode(string, input.data(), input.size()); 36 | REQUIRE(string == u"qrstüv"); 37 | } 38 | 39 | TEST_CASE("EncodingConversion::decode - invalid byte sequences in the middle of the input") { 40 | auto conversion = transcoding_from("UTF-8"); 41 | string input("ab" "\xc0" "\xc1" "de"); 42 | 43 | u16string string; 44 | conversion->decode(string, input.data(), input.size()); 45 | REQUIRE(string == u"ab" "\ufffd" "\ufffd" "de"); 46 | } 47 | 48 | TEST_CASE("EncodingConversion::decode - invalid byte sequences at the end of the input") { 49 | auto conversion = transcoding_from("UTF-8"); 50 | string input("ab" "\xf0\x9f"); // incomplete 4-byte code point for '😁' at the end of the stream 51 | 52 | u16string string; 53 | size_t bytes_encoded = conversion->decode(string, input.data(), input.size()); 54 | REQUIRE(bytes_encoded == 2); 55 | REQUIRE(string == u"ab"); 56 | 57 | // Passing the `is_end` 58 | string.clear(); 59 | bytes_encoded = conversion->decode(string, input.data(), input.size(), true); 60 | REQUIRE(bytes_encoded == 4); 61 | REQUIRE(string == u"ab" "\ufffd" "\ufffd"); 62 | } 63 | 64 | TEST_CASE("EncodingConversion::decode - four-byte UTF-16 characters") { 65 | auto conversion = transcoding_from("UTF-8"); 66 | string input("ab" "\xf0\x9f" "\x98\x81" "cd"); // 'ab😁cd' 67 | 68 | u16string string; 69 | conversion->decode(string, input.data(), input.size()); 70 | REQUIRE(string == u"ab" "\xd83d" "\xde01" "cd"); 71 | } 72 | 73 | TEST_CASE("EncodingConversion::encode - basic") { 74 | auto conversion = transcoding_to("UTF-8"); 75 | u16string string = u"abγdefg\nhijklmnop"; 76 | 77 | vector output(3); 78 | size_t bytes_encoded = 0, start = 0; 79 | 80 | // The 'γ' requires to UTF-8 bytes, so it doesn't fit in the output buffer 81 | bytes_encoded = conversion->encode( 82 | string, &start, string.size(), output.data(), output.size()); 83 | REQUIRE(std::string(output.data(), bytes_encoded) == "ab"); 84 | 85 | bytes_encoded = conversion->encode( 86 | string, &start, string.size(), output.data(), output.size()); 87 | REQUIRE(std::string(output.data(), bytes_encoded) == "γd"); 88 | 89 | bytes_encoded = conversion->encode( 90 | string, &start, string.size(), output.data(), output.size()); 91 | REQUIRE(std::string(output.data(), bytes_encoded) == "efg"); 92 | } 93 | 94 | TEST_CASE("EncodingConversion::encode - four-byte UTF-16 characters") { 95 | auto conversion = transcoding_to("UTF-8"); 96 | u16string string = u"ab" "\xd83d" "\xde01" "cd"; // 'ab😁cd' 97 | 98 | vector output(10); 99 | size_t bytes_encoded = 0, start = 0; 100 | 101 | bytes_encoded = conversion->encode( 102 | string, &start, string.size(), output.data(), output.size()); 103 | REQUIRE(std::string(output.data(), bytes_encoded) == "ab" "\xf0\x9f" "\x98\x81" "cd"); 104 | 105 | // The end offset, 3, is in the middle of the 4-byte character. 106 | start = 0; 107 | bytes_encoded = conversion->encode( 108 | string, &start, 3, output.data(), output.size()); 109 | REQUIRE(std::string(output.data(), bytes_encoded) == "ab"); 110 | 111 | // We can pick up where we left off. 112 | bytes_encoded += conversion->encode( 113 | string, &start, string.size(), output.data() + bytes_encoded, output.size() - bytes_encoded); 114 | REQUIRE(std::string(output.data(), bytes_encoded) == "ab" "\xf0\x9f" "\x98\x81" "cd"); 115 | } 116 | 117 | TEST_CASE("EncodingConversion::encode - invalid characters in the middle of the string") { 118 | auto conversion = transcoding_to("UTF-8"); 119 | u16string string = u"abc" "\xD800" "def"; 120 | 121 | vector output(10); 122 | size_t bytes_encoded = 0, start = 0; 123 | 124 | bytes_encoded = conversion->encode( 125 | string, &start, string.size(), output.data(), output.size()); 126 | REQUIRE(std::string(output.data(), bytes_encoded) == "abc" "\ufffd" "def"); 127 | 128 | // Here, the invalid character occurs at the end of a chunk. 129 | start = 0; 130 | bytes_encoded = conversion->encode( 131 | string, &start, 4, output.data(), output.size()); 132 | bytes_encoded += conversion->encode( 133 | string, &start, string.size(), output.data() + bytes_encoded, output.size() - bytes_encoded); 134 | REQUIRE(std::string(output.data(), bytes_encoded) == "abc" "\ufffd" "def"); 135 | } 136 | 137 | TEST_CASE("EncodingConversion::encode - invalid characters at the end of the string") { 138 | auto conversion = transcoding_to("UTF-8"); 139 | u16string string = u"abc" "\xD800"; 140 | 141 | vector output(10); 142 | size_t bytes_encoded = 0, start = 0; 143 | 144 | bytes_encoded = conversion->encode( 145 | string, &start, string.size(), output.data(), output.size(), true); 146 | REQUIRE(std::string(output.data(), bytes_encoded) == "abc" "\ufffd"); 147 | } 148 | -------------------------------------------------------------------------------- /test/native/test-helpers.cc: -------------------------------------------------------------------------------- 1 | #include "test-helpers.h" 2 | #include "patch.h" 3 | #include "range.h" 4 | #include "text-buffer.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using std::vector; 12 | using std::u16string; 13 | 14 | bool text_eq(const Text *left, const Text *right) { 15 | if (left == right) 16 | return true; 17 | if (!left && right) 18 | return false; 19 | if (left && !right) 20 | return false; 21 | return *left == *right; 22 | } 23 | 24 | bool operator==(const Patch::Change &left, const Patch::Change &right) { 25 | return left.old_start == right.old_start && 26 | left.new_start == right.new_start && left.old_end == right.old_end && 27 | left.new_end == right.new_end && 28 | text_eq(left.old_text, right.old_text) && 29 | text_eq(left.new_text, right.new_text); 30 | } 31 | 32 | std::unique_ptr get_text(const u16string content) { 33 | return std::unique_ptr { new Text(content) }; 34 | } 35 | 36 | std::u16string get_random_string(Generator &rand, uint32_t character_count) { 37 | u16string content; 38 | content.reserve(character_count); 39 | for (uint i = 0; i < character_count; i++) { 40 | if (rand() % 20 < 1) { 41 | content.push_back('\n'); 42 | } else if (rand() % 20 < 1) { 43 | content.push_back('\r'); 44 | content.push_back('\n'); 45 | i++; 46 | } else if (rand() % 20 < 1) { 47 | content.push_back('\r'); 48 | } else { 49 | uint16_t character = 'a' + (rand() % 26); 50 | content.push_back(character); 51 | } 52 | } 53 | return content; 54 | } 55 | 56 | Text get_random_text(Generator &rand) { 57 | return Text {get_random_string(rand)}; 58 | } 59 | 60 | Range get_random_range(Generator &rand, const Text &text) { 61 | uint32_t start_row = rand() % (text.extent().row + 1); 62 | uint32_t max_column = text.line_length_for_row(start_row); 63 | uint32_t start_column = 0; 64 | if (max_column > 0) start_column = rand() % max_column; 65 | Point start {start_row, start_column}; 66 | Point end {start}; 67 | while (rand() % 10 < 3) { 68 | end = text.clip_position(end.traverse(Point(rand() % 2, rand() % 10))).position; 69 | } 70 | return {start, end}; 71 | } 72 | 73 | Range get_random_range(Generator &rand, TextBuffer &buffer) { 74 | return get_random_range(rand, buffer.text()); 75 | } 76 | -------------------------------------------------------------------------------- /test/native/test-helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPERSTRING_TEST_HELPERS_H 2 | #define SUPERSTRING_TEST_HELPERS_H 3 | 4 | #include "patch.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "range.h" 12 | #include "text.h" 13 | #include "text-buffer.h" 14 | #include 15 | 16 | using std::cout; 17 | using std::cerr; 18 | 19 | class TextBuffer; 20 | 21 | class Generator { 22 | std::default_random_engine engine; 23 | std::uniform_int_distribution distribution; 24 | 25 | public: 26 | Generator(uint32_t seed) : engine{seed} {} 27 | uint32_t operator()() { return distribution(engine); } 28 | }; 29 | 30 | bool operator==(const Patch::Change &left, const Patch::Change &right); 31 | std::unique_ptr get_text(const std::u16string content); 32 | std::u16string get_random_string(Generator &, uint32_t character_count = 20); 33 | Text get_random_text(Generator &); 34 | Range get_random_range(Generator &, const Text &); 35 | Range get_random_range(Generator &, TextBuffer &); 36 | 37 | namespace std { 38 | inline std::ostream &operator<<(std::ostream &stream, const std::u16string &text) { 39 | for (uint16_t character : text) { 40 | if (character == '\r') { 41 | stream << "\\\\r"; 42 | } else if (character < 255) { 43 | stream << static_cast(character); 44 | } else { 45 | stream << "\\u"; 46 | stream << character; 47 | } 48 | } 49 | 50 | return stream; 51 | } 52 | 53 | inline std::ostream &operator<<(std::ostream &stream, const TextBuffer::SubsequenceMatch &match) { 54 | stream << "SubsequenceMatch{ word: " << match.word << ", positions: ["; 55 | 56 | for (size_t i = 0; i < match.positions.size(); i++) { 57 | stream << match.positions[i]; 58 | if (i < match.positions.size() - 1) stream << ", "; 59 | } 60 | 61 | stream << "], match_indices: ["; 62 | 63 | for (size_t i = 0; i < match.match_indices.size(); i++) { 64 | stream << match.match_indices[i]; 65 | if (i < match.match_indices.size() - 1) stream << ", "; 66 | } 67 | 68 | stream << "], score: " << match.score << " }"; 69 | 70 | return stream; 71 | } 72 | } 73 | 74 | template 75 | std::ostream &operator<<(std::ostream &stream, const optional &value) { 76 | if (value) { 77 | return stream << *value; 78 | } else { 79 | return stream << "nullopt"; 80 | } 81 | } 82 | 83 | #endif // SUPERSTRING_TEST_HELPERS_H 84 | -------------------------------------------------------------------------------- /test/native/tests.cc: -------------------------------------------------------------------------------- 1 | // Because this file is slow to compile, we separate it from patch_test.cc 2 | // for a faster feedback loop 3 | 4 | #define CATCH_CONFIG_MAIN 5 | #include 6 | -------------------------------------------------------------------------------- /test/native/text-diff-test.cc: -------------------------------------------------------------------------------- 1 | #include "test-helpers.h" 2 | #include "text.h" 3 | #include "text-slice.h" 4 | #include "text-diff.h" 5 | 6 | using Change = Patch::Change; 7 | using std::vector; 8 | 9 | TEST_CASE("text_diff - multiple lines") { 10 | Text old_text{u"abc\nghi\njk\nmno\n"}; 11 | Text new_text{u"abc\ndef\nghi\njkl\nmno\n"}; 12 | 13 | Patch patch = text_diff(old_text, new_text); 14 | 15 | REQUIRE(patch.get_changes() == vector({ 16 | Change{ 17 | Point{1, 0}, Point{1, 0}, 18 | Point{1, 0}, Point{2, 0}, 19 | get_text(u"").get(), get_text(u"def\n").get(), 20 | 0, 0, 0 21 | }, 22 | Change{ 23 | Point{2, 2}, Point{2, 2}, 24 | Point{3, 2}, Point{3, 3}, 25 | get_text(u"").get(), get_text(u"l").get(), 26 | 0, 3, 0 27 | } 28 | })); 29 | 30 | // We temporarily move the Text's content in order to diff it without 31 | // copying. Check that the text is unchanged afterwards. 32 | REQUIRE(old_text == Text(u"abc\nghi\njk\nmno\n")); 33 | REQUIRE(new_text == Text(u"abc\ndef\nghi\njkl\nmno\n")); 34 | } 35 | 36 | TEST_CASE("text_diff - single line") { 37 | Text old_text{u"abcdefghij"}; 38 | Text new_text{u"abcxyefij"}; 39 | 40 | Patch patch = text_diff(old_text, new_text); 41 | 42 | REQUIRE(patch.get_changes() == vector({ 43 | Change{ 44 | Point{0, 3}, Point{0, 4}, 45 | Point{0, 3}, Point{0, 5}, 46 | get_text(u"d").get(), get_text(u"xy").get(), 47 | 0, 0, 0 48 | 49 | }, 50 | Change{ 51 | Point{0, 6}, Point{0, 8}, 52 | Point{0, 7}, Point{0, 7}, 53 | get_text(u"gh").get(), get_text(u"").get(), 54 | 1, 2, 0 55 | }, 56 | })); 57 | } 58 | 59 | TEST_CASE("text_diff - old text is empty") { 60 | Text old_text{u""}; 61 | Text new_text{u"abc\ndef\nghi\njkl\n"}; 62 | 63 | Patch patch = text_diff(old_text, new_text); 64 | 65 | REQUIRE(patch.get_changes() == vector({ 66 | Change{ 67 | Point{0, 0}, Point{0, 0}, 68 | Point{0, 0}, Point{4, 0}, 69 | get_text(u"").get(), get_text(u"abc\ndef\nghi\njkl\n").get(), 70 | 0, 0, 0 71 | }, 72 | })); 73 | } 74 | 75 | TEST_CASE("text_diff - old text is a prefix of new text") { 76 | Text old_text{u"abc\ndef\n"}; 77 | Text new_text{u"abc\ndef\nghi\njkl\n"}; 78 | 79 | Patch patch = text_diff(old_text, new_text); 80 | 81 | REQUIRE(patch.get_changes() == vector({ 82 | Change{ 83 | Point{2, 0}, Point{2, 0}, 84 | Point{2, 0}, Point{4, 0}, 85 | get_text(u"").get(), get_text(u"ghi\njkl\n").get(), 86 | 0, 0, 0 87 | }, 88 | })); 89 | } 90 | 91 | TEST_CASE("text_diff - old text is a suffix of new text") { 92 | Text old_text{u"ghi\njkl\n"}; 93 | Text new_text{u"abc\ndef\nghi\njkl\n"}; 94 | 95 | Patch patch = text_diff(old_text, new_text); 96 | 97 | REQUIRE(patch.get_changes() == vector({ 98 | Change{ 99 | Point{0, 0}, Point{0, 0}, 100 | Point{0, 0}, Point{2, 0}, 101 | get_text(u"").get(), get_text(u"abc\ndef\n").get(), 102 | 0, 0, 0 103 | }, 104 | })); 105 | } 106 | 107 | TEST_CASE("text_diff - randomized changes") { 108 | auto t = time(nullptr); 109 | for (uint i = 0; i < 100; i++) { 110 | uint32_t seed = t * 1000 + i; 111 | Generator rand(seed); 112 | cout << "seed: " << seed << "\n"; 113 | 114 | Text old_text{get_random_string(rand, 100)}; 115 | Text new_text = old_text; 116 | 117 | // cout << "extent: " << new_text.extent() << " text:\n" << new_text << "\n\n"; 118 | 119 | for (uint j = 0; j < 1 + rand() % 10; j++) { 120 | // cout << "j: " << j << "\n"; 121 | 122 | Range deleted_range = get_random_range(rand, new_text); 123 | Text inserted_text{get_random_string(rand, 3)}; 124 | 125 | new_text.splice(deleted_range.start, deleted_range.extent(), inserted_text); 126 | 127 | // cout << "replace " << deleted_range << " with " << inserted_text << "\n\n"; 128 | // cout << "extent: " << new_text.extent() << " text:\n" << new_text << "\n\n"; 129 | } 130 | 131 | Patch patch = text_diff(old_text, new_text); 132 | 133 | for (const Change &change : patch.get_changes()) { 134 | REQUIRE( 135 | *change.new_text == 136 | Text(TextSlice(new_text).slice(Range{change.new_start, change.new_end})) 137 | ); 138 | 139 | old_text.splice( 140 | change.new_start, 141 | change.old_end.traversal(change.old_start), 142 | *change.new_text 143 | ); 144 | } 145 | 146 | REQUIRE(old_text == new_text); 147 | } 148 | } -------------------------------------------------------------------------------- /test/native/text-test.cc: -------------------------------------------------------------------------------- 1 | #include "test-helpers.h" 2 | #include "text.h" 3 | #include "text-slice.h" 4 | 5 | TEST_CASE("Text::split") { 6 | Text text {u"abc\ndef\r\nghi"}; 7 | TextSlice base_slice {text}; 8 | 9 | { 10 | auto slices = base_slice.split({0, 2}); 11 | REQUIRE(Text(slices.first) == Text(u"ab")); 12 | REQUIRE(Text(slices.second) == Text(u"c\ndef\r\nghi")); 13 | } 14 | 15 | { 16 | auto slices = base_slice.split({1, 2}); 17 | REQUIRE(Text(slices.first) == Text(u"abc\nde")); 18 | REQUIRE(Text(slices.second) == Text(u"f\r\nghi")); 19 | } 20 | 21 | { 22 | auto slices = base_slice.split({1, 3}); 23 | REQUIRE(Text(slices.first) == Text(u"abc\ndef")); 24 | REQUIRE(Text(slices.second) == Text(u"\r\nghi")); 25 | } 26 | 27 | { 28 | auto slices = base_slice.split({2, 0}); 29 | REQUIRE(Text(slices.first) == Text(u"abc\ndef\r\n")); 30 | REQUIRE(Text(slices.second) == Text(u"ghi")); 31 | } 32 | 33 | { 34 | auto slices = base_slice.split({2, 3}); 35 | REQUIRE(Text(slices.first) == Text(u"abc\ndef\r\nghi")); 36 | REQUIRE(Text(slices.second) == Text(u"")); 37 | } 38 | } 39 | 40 | TEST_CASE("Text::concat") { 41 | Text text {u"abc\ndef\r\nghi"}; 42 | TextSlice base_slice {text}; 43 | 44 | REQUIRE(Text::concat(base_slice, base_slice) == Text(u"abc\ndef\r\nghiabc\ndef\r\nghi")); 45 | 46 | { 47 | auto prefix = base_slice.prefix({0, 2}); 48 | auto suffix = base_slice.suffix({2, 2}); 49 | REQUIRE(Text::concat(prefix, suffix) == Text(u"abi")); 50 | } 51 | 52 | { 53 | auto prefix = base_slice.prefix({1, 3}); 54 | auto suffix = base_slice.suffix({2, 2}); 55 | REQUIRE(Text::concat(prefix, suffix) == Text(u"abc\ndefi")); 56 | } 57 | 58 | { 59 | auto prefix = base_slice.prefix({1, 3}); 60 | auto suffix = base_slice.suffix({2, 3}); 61 | REQUIRE(Text::concat(prefix, suffix) == Text(u"abc\ndef")); 62 | } 63 | } 64 | 65 | TEST_CASE("Text::splice") { 66 | Text text {u"abc\ndef\r\nghi\njkl"}; 67 | text.splice({1, 2}, {1, 1}, Text {u"mno\npq\r\nst"}); 68 | REQUIRE(text == Text {u"abc\ndemno\npq\r\nsthi\njkl"}); 69 | text.splice({2, 1}, {2, 1}, Text {u""}); 70 | REQUIRE(text == Text {u"abc\ndemno\npkl"}); 71 | text.splice({1, 1}, {0, 0}, Text {u"uvw"}); 72 | REQUIRE(text == Text {u"abc\nduvwemno\npkl"}); 73 | text.splice(text.extent(), {0, 0}, Text {u"\nxyz\r\nabc"}); 74 | REQUIRE(text == Text {u"abc\nduvwemno\npkl\nxyz\r\nabc"}); 75 | text.splice({0, 0}, {0, 0}, Text {u"def\nghi"}); 76 | REQUIRE(text == Text {u"def\nghiabc\nduvwemno\npkl\nxyz\r\nabc"}); 77 | } 78 | 79 | TEST_CASE("Text::offset_for_position - basic") { 80 | Text text {u"abc\ndefg\r\nhijkl"}; 81 | 82 | REQUIRE(text.offset_for_position({0, 2}) == 2); 83 | REQUIRE(text.offset_for_position({0, 3}) == 3); 84 | REQUIRE(text.offset_for_position({0, 4}) == 3); 85 | REQUIRE(text.offset_for_position({0, 8}) == 3); 86 | 87 | REQUIRE(text.offset_for_position({1, 1}) == 5); 88 | REQUIRE(text.offset_for_position({1, 4}) == 8); 89 | REQUIRE(text.offset_for_position({1, 5}) == 8); 90 | REQUIRE(text.offset_for_position({1, 8}) == 8); 91 | 92 | REQUIRE(text.offset_for_position({2, 0}) == 10); 93 | REQUIRE(text.offset_for_position({2, 1}) == 11); 94 | REQUIRE(text.offset_for_position({2, 5}) == 15); 95 | REQUIRE(text.offset_for_position({2, 6}) == 15); 96 | } 97 | 98 | TEST_CASE("Text::offset_for_position - empty lines") { 99 | Text text {u"a\n\nb\r\rc"}; 100 | TextSlice slice(text); 101 | 102 | REQUIRE(text.offset_for_position({0, 1}) == 1); 103 | REQUIRE(text.offset_for_position({0, 2}) == 1); 104 | REQUIRE(text.offset_for_position({0, UINT32_MAX}) == 1); 105 | REQUIRE(text.offset_for_position({1, 0}) == 2); 106 | REQUIRE(slice.position_for_offset(1) == Point(0, 1)); 107 | REQUIRE(text.offset_for_position({1, 1}) == 2); 108 | REQUIRE(text.offset_for_position({1, UINT32_MAX}) == 2); 109 | REQUIRE(slice.position_for_offset(2) == Point(1, 0)); 110 | } 111 | -------------------------------------------------------------------------------- /vendor/libcxx/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ============================================================================== 2 | libc++ License 3 | ============================================================================== 4 | 5 | The libc++ library is dual licensed under both the University of Illinois 6 | "BSD-Like" license and the MIT license. As a user of this code you may choose 7 | to use it under either license. As a contributor, you agree to allow your code 8 | to be used under both. 9 | 10 | Full text of the relevant licenses is included below. 11 | 12 | ============================================================================== 13 | 14 | University of Illinois/NCSA 15 | Open Source License 16 | 17 | Copyright (c) 2009-2017 by the contributors listed in CREDITS.TXT 18 | 19 | All rights reserved. 20 | 21 | Developed by: 22 | 23 | LLVM Team 24 | 25 | University of Illinois at Urbana-Champaign 26 | 27 | http://llvm.org 28 | 29 | Permission is hereby granted, free of charge, to any person obtaining a copy of 30 | this software and associated documentation files (the "Software"), to deal with 31 | the Software without restriction, including without limitation the rights to 32 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 33 | of the Software, and to permit persons to whom the Software is furnished to do 34 | so, subject to the following conditions: 35 | 36 | * Redistributions of source code must retain the above copyright notice, 37 | this list of conditions and the following disclaimers. 38 | 39 | * Redistributions in binary form must reproduce the above copyright notice, 40 | this list of conditions and the following disclaimers in the 41 | documentation and/or other materials provided with the distribution. 42 | 43 | * Neither the names of the LLVM Team, University of Illinois at 44 | Urbana-Champaign, nor the names of its contributors may be used to 45 | endorse or promote products derived from this Software without specific 46 | prior written permission. 47 | 48 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 49 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 50 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 51 | CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 52 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 53 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE 54 | SOFTWARE. 55 | 56 | ============================================================================== 57 | 58 | Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT 59 | 60 | Permission is hereby granted, free of charge, to any person obtaining a copy 61 | of this software and associated documentation files (the "Software"), to deal 62 | in the Software without restriction, including without limitation the rights 63 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 64 | copies of the Software, and to permit persons to whom the Software is 65 | furnished to do so, subject to the following conditions: 66 | 67 | The above copyright notice and this permission notice shall be included in 68 | all copies or substantial portions of the Software. 69 | 70 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 71 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 72 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 73 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 74 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 75 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 76 | THE SOFTWARE. -------------------------------------------------------------------------------- /vendor/pcre/10.23/AUTHORS: -------------------------------------------------------------------------------- 1 | THE MAIN PCRE2 LIBRARY CODE 2 | --------------------------- 3 | 4 | Written by: Philip Hazel 5 | Email local part: ph10 6 | Email domain: cam.ac.uk 7 | 8 | University of Cambridge Computing Service, 9 | Cambridge, England. 10 | 11 | Copyright (c) 1997-2017 University of Cambridge 12 | All rights reserved 13 | 14 | 15 | PCRE2 JUST-IN-TIME COMPILATION SUPPORT 16 | -------------------------------------- 17 | 18 | Written by: Zoltan Herczeg 19 | Email local part: hzmester 20 | Emain domain: freemail.hu 21 | 22 | Copyright(c) 2010-2017 Zoltan Herczeg 23 | All rights reserved. 24 | 25 | 26 | STACK-LESS JUST-IN-TIME COMPILER 27 | -------------------------------- 28 | 29 | Written by: Zoltan Herczeg 30 | Email local part: hzmester 31 | Emain domain: freemail.hu 32 | 33 | Copyright(c) 2009-2017 Zoltan Herczeg 34 | All rights reserved. 35 | 36 | #### 37 | -------------------------------------------------------------------------------- /vendor/pcre/10.23/COPYING: -------------------------------------------------------------------------------- 1 | PCRE2 LICENCE 2 | 3 | Please see the file LICENCE in the PCRE2 distribution for licensing details. 4 | 5 | End 6 | -------------------------------------------------------------------------------- /vendor/pcre/10.23/LICENCE: -------------------------------------------------------------------------------- 1 | PCRE2 LICENCE 2 | ------------- 3 | 4 | PCRE2 is a library of functions to support regular expressions whose syntax 5 | and semantics are as close as possible to those of the Perl 5 language. 6 | 7 | Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as 8 | specified below. The documentation for PCRE2, supplied in the "doc" 9 | directory, is distributed under the same terms as the software itself. The data 10 | in the testdata directory is not copyrighted and is in the public domain. 11 | 12 | The basic library functions are written in C and are freestanding. Also 13 | included in the distribution is a just-in-time compiler that can be used to 14 | optimize pattern matching. This is an optional feature that can be omitted when 15 | the library is built. 16 | 17 | 18 | THE BASIC LIBRARY FUNCTIONS 19 | --------------------------- 20 | 21 | Written by: Philip Hazel 22 | Email local part: ph10 23 | Email domain: cam.ac.uk 24 | 25 | University of Cambridge Computing Service, 26 | Cambridge, England. 27 | 28 | Copyright (c) 1997-2017 University of Cambridge 29 | All rights reserved. 30 | 31 | 32 | PCRE2 JUST-IN-TIME COMPILATION SUPPORT 33 | -------------------------------------- 34 | 35 | Written by: Zoltan Herczeg 36 | Email local part: hzmester 37 | Emain domain: freemail.hu 38 | 39 | Copyright(c) 2010-2017 Zoltan Herczeg 40 | All rights reserved. 41 | 42 | 43 | STACK-LESS JUST-IN-TIME COMPILER 44 | -------------------------------- 45 | 46 | Written by: Zoltan Herczeg 47 | Email local part: hzmester 48 | Emain domain: freemail.hu 49 | 50 | Copyright(c) 2009-2017 Zoltan Herczeg 51 | All rights reserved. 52 | 53 | 54 | THE "BSD" LICENCE 55 | ----------------- 56 | 57 | Redistribution and use in source and binary forms, with or without 58 | modification, are permitted provided that the following conditions are met: 59 | 60 | * Redistributions of source code must retain the above copyright notice, 61 | this list of conditions and the following disclaimer. 62 | 63 | * Redistributions in binary form must reproduce the above copyright 64 | notice, this list of conditions and the following disclaimer in the 65 | documentation and/or other materials provided with the distribution. 66 | 67 | * Neither the name of the University of Cambridge nor the names of any 68 | contributors may be used to endorse or promote products derived from this 69 | software without specific prior written permission. 70 | 71 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 72 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 73 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 74 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 75 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 76 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 77 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 78 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 79 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 80 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 81 | POSSIBILITY OF SUCH DAMAGE. 82 | 83 | End 84 | -------------------------------------------------------------------------------- /vendor/pcre/10.23/src/pcre2_find_bracket.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | /* This module contains a single function that scans through a compiled pattern 43 | until it finds a capturing bracket with the given number, or, if the number is 44 | negative, an instance of OP_REVERSE for a lookbehind. The function is called 45 | from pcre2_compile.c and also from pcre2_study.c when finding the minimum 46 | matching length. */ 47 | 48 | 49 | #ifdef HAVE_CONFIG_H 50 | #include "config.h" 51 | #endif 52 | 53 | #include "pcre2_internal.h" 54 | 55 | 56 | /************************************************* 57 | * Scan compiled regex for specific bracket * 58 | *************************************************/ 59 | 60 | /* 61 | Arguments: 62 | code points to start of expression 63 | utf TRUE in UTF mode 64 | number the required bracket number or negative to find a lookbehind 65 | 66 | Returns: pointer to the opcode for the bracket, or NULL if not found 67 | */ 68 | 69 | PCRE2_SPTR 70 | PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number) 71 | { 72 | for (;;) 73 | { 74 | PCRE2_UCHAR c = *code; 75 | 76 | if (c == OP_END) return NULL; 77 | 78 | /* XCLASS is used for classes that cannot be represented just by a bit map. 79 | This includes negated single high-valued characters. CALLOUT_STR is used for 80 | callouts with string arguments. In both cases the length in the table is 81 | zero; the actual length is stored in the compiled code. */ 82 | 83 | if (c == OP_XCLASS) code += GET(code, 1); 84 | else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); 85 | 86 | /* Handle lookbehind */ 87 | 88 | else if (c == OP_REVERSE) 89 | { 90 | if (number < 0) return (PCRE2_UCHAR *)code; 91 | code += PRIV(OP_lengths)[c]; 92 | } 93 | 94 | /* Handle capturing bracket */ 95 | 96 | else if (c == OP_CBRA || c == OP_SCBRA || 97 | c == OP_CBRAPOS || c == OP_SCBRAPOS) 98 | { 99 | int n = (int)GET2(code, 1+LINK_SIZE); 100 | if (n == number) return (PCRE2_UCHAR *)code; 101 | code += PRIV(OP_lengths)[c]; 102 | } 103 | 104 | /* Otherwise, we can get the item's length from the table, except that for 105 | repeated character types, we have to test for \p and \P, which have an extra 106 | two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we 107 | must add in its length. */ 108 | 109 | else 110 | { 111 | switch(c) 112 | { 113 | case OP_TYPESTAR: 114 | case OP_TYPEMINSTAR: 115 | case OP_TYPEPLUS: 116 | case OP_TYPEMINPLUS: 117 | case OP_TYPEQUERY: 118 | case OP_TYPEMINQUERY: 119 | case OP_TYPEPOSSTAR: 120 | case OP_TYPEPOSPLUS: 121 | case OP_TYPEPOSQUERY: 122 | if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; 123 | break; 124 | 125 | case OP_TYPEUPTO: 126 | case OP_TYPEMINUPTO: 127 | case OP_TYPEEXACT: 128 | case OP_TYPEPOSUPTO: 129 | if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) 130 | code += 2; 131 | break; 132 | 133 | case OP_MARK: 134 | case OP_PRUNE_ARG: 135 | case OP_SKIP_ARG: 136 | case OP_THEN_ARG: 137 | code += code[1]; 138 | break; 139 | } 140 | 141 | /* Add in the fixed length from the table */ 142 | 143 | code += PRIV(OP_lengths)[c]; 144 | 145 | /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be 146 | followed by a multi-byte character. The length in the table is a minimum, so 147 | we have to arrange to skip the extra bytes. */ 148 | 149 | #ifdef MAYBE_UTF_MULTI 150 | if (utf) switch(c) 151 | { 152 | case OP_CHAR: 153 | case OP_CHARI: 154 | case OP_NOT: 155 | case OP_NOTI: 156 | case OP_EXACT: 157 | case OP_EXACTI: 158 | case OP_NOTEXACT: 159 | case OP_NOTEXACTI: 160 | case OP_UPTO: 161 | case OP_UPTOI: 162 | case OP_NOTUPTO: 163 | case OP_NOTUPTOI: 164 | case OP_MINUPTO: 165 | case OP_MINUPTOI: 166 | case OP_NOTMINUPTO: 167 | case OP_NOTMINUPTOI: 168 | case OP_POSUPTO: 169 | case OP_POSUPTOI: 170 | case OP_NOTPOSUPTO: 171 | case OP_NOTPOSUPTOI: 172 | case OP_STAR: 173 | case OP_STARI: 174 | case OP_NOTSTAR: 175 | case OP_NOTSTARI: 176 | case OP_MINSTAR: 177 | case OP_MINSTARI: 178 | case OP_NOTMINSTAR: 179 | case OP_NOTMINSTARI: 180 | case OP_POSSTAR: 181 | case OP_POSSTARI: 182 | case OP_NOTPOSSTAR: 183 | case OP_NOTPOSSTARI: 184 | case OP_PLUS: 185 | case OP_PLUSI: 186 | case OP_NOTPLUS: 187 | case OP_NOTPLUSI: 188 | case OP_MINPLUS: 189 | case OP_MINPLUSI: 190 | case OP_NOTMINPLUS: 191 | case OP_NOTMINPLUSI: 192 | case OP_POSPLUS: 193 | case OP_POSPLUSI: 194 | case OP_NOTPOSPLUS: 195 | case OP_NOTPOSPLUSI: 196 | case OP_QUERY: 197 | case OP_QUERYI: 198 | case OP_NOTQUERY: 199 | case OP_NOTQUERYI: 200 | case OP_MINQUERY: 201 | case OP_MINQUERYI: 202 | case OP_NOTMINQUERY: 203 | case OP_NOTMINQUERYI: 204 | case OP_POSQUERY: 205 | case OP_POSQUERYI: 206 | case OP_NOTPOSQUERY: 207 | case OP_NOTPOSQUERYI: 208 | if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); 209 | break; 210 | } 211 | #else 212 | (void)(utf); /* Keep compiler happy by referencing function argument */ 213 | #endif /* MAYBE_UTF_MULTI */ 214 | } 215 | } 216 | } 217 | 218 | /* End of pcre2_find_bracket.c */ 219 | -------------------------------------------------------------------------------- /vendor/pcre/10.23/src/pcre2_jit_match.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | #ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE 42 | #error This file must be included from pcre2_jit_compile.c. 43 | #endif 44 | 45 | #ifdef SUPPORT_JIT 46 | 47 | static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func) 48 | { 49 | sljit_u8 local_space[MACHINE_STACK_SIZE]; 50 | struct sljit_stack local_stack; 51 | 52 | local_stack.top = (sljit_sw)&local_space; 53 | local_stack.base = local_stack.top; 54 | local_stack.limit = local_stack.base + MACHINE_STACK_SIZE; 55 | local_stack.max_limit = local_stack.limit; 56 | arguments->stack = &local_stack; 57 | return executable_func(arguments); 58 | } 59 | 60 | #endif 61 | 62 | 63 | /************************************************* 64 | * Do a JIT pattern match * 65 | *************************************************/ 66 | 67 | /* This function runs a JIT pattern match. 68 | 69 | Arguments: 70 | code points to the compiled expression 71 | subject points to the subject string 72 | length length of subject string (may contain binary zeros) 73 | start_offset where to start in the subject string 74 | options option bits 75 | match_data points to a match_data block 76 | mcontext points to a match context 77 | jit_stack points to a JIT stack 78 | 79 | Returns: > 0 => success; value is the number of ovector pairs filled 80 | = 0 => success, but ovector is not big enough 81 | -1 => failed to match (PCRE_ERROR_NOMATCH) 82 | < -1 => some kind of unexpected problem 83 | */ 84 | 85 | PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 86 | pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, 87 | PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, 88 | pcre2_match_context *mcontext) 89 | { 90 | #ifndef SUPPORT_JIT 91 | 92 | (void)code; 93 | (void)subject; 94 | (void)length; 95 | (void)start_offset; 96 | (void)options; 97 | (void)match_data; 98 | (void)mcontext; 99 | return PCRE2_ERROR_JIT_BADOPTION; 100 | 101 | #else /* SUPPORT_JIT */ 102 | 103 | pcre2_real_code *re = (pcre2_real_code *)code; 104 | executable_functions *functions = (executable_functions *)re->executable_jit; 105 | pcre2_jit_stack *jit_stack; 106 | uint32_t oveccount = match_data->oveccount; 107 | uint32_t max_oveccount; 108 | union { 109 | void *executable_func; 110 | jit_function call_executable_func; 111 | } convert_executable_func; 112 | jit_arguments arguments; 113 | int rc; 114 | int index = 0; 115 | 116 | if ((options & PCRE2_PARTIAL_HARD) != 0) 117 | index = 2; 118 | else if ((options & PCRE2_PARTIAL_SOFT) != 0) 119 | index = 1; 120 | 121 | if (functions->executable_funcs[index] == NULL) 122 | return PCRE2_ERROR_JIT_BADOPTION; 123 | 124 | /* Sanity checks should be handled by pcre_exec. */ 125 | arguments.str = subject + start_offset; 126 | arguments.begin = subject; 127 | arguments.end = subject + length; 128 | arguments.match_data = match_data; 129 | arguments.startchar_ptr = subject; 130 | arguments.mark_ptr = NULL; 131 | arguments.options = options; 132 | 133 | if (mcontext != NULL) 134 | { 135 | arguments.callout = mcontext->callout; 136 | arguments.callout_data = mcontext->callout_data; 137 | arguments.offset_limit = mcontext->offset_limit; 138 | arguments.limit_match = (mcontext->match_limit < re->limit_match)? 139 | mcontext->match_limit : re->limit_match; 140 | if (mcontext->jit_callback != NULL) 141 | jit_stack = mcontext->jit_callback(mcontext->jit_callback_data); 142 | else 143 | jit_stack = (pcre2_jit_stack *)mcontext->jit_callback_data; 144 | } 145 | else 146 | { 147 | arguments.callout = NULL; 148 | arguments.callout_data = NULL; 149 | arguments.offset_limit = PCRE2_UNSET; 150 | arguments.limit_match = (MATCH_LIMIT < re->limit_match)? 151 | MATCH_LIMIT : re->limit_match; 152 | jit_stack = NULL; 153 | } 154 | 155 | /* JIT only need two offsets for each ovector entry. Hence 156 | the last 1/3 of the ovector will never be touched. */ 157 | 158 | max_oveccount = functions->top_bracket; 159 | if (oveccount > max_oveccount) 160 | oveccount = max_oveccount; 161 | arguments.oveccount = oveccount << 1; 162 | 163 | 164 | convert_executable_func.executable_func = functions->executable_funcs[index]; 165 | if (jit_stack != NULL) 166 | { 167 | arguments.stack = (struct sljit_stack *)(jit_stack->stack); 168 | rc = convert_executable_func.call_executable_func(&arguments); 169 | } 170 | else 171 | rc = jit_machine_stack_exec(&arguments, convert_executable_func.call_executable_func); 172 | 173 | if (rc > (int)oveccount) 174 | rc = 0; 175 | match_data->code = re; 176 | match_data->subject = subject; 177 | match_data->rc = rc; 178 | match_data->startchar = arguments.startchar_ptr - subject; 179 | match_data->leftchar = 0; 180 | match_data->rightchar = 0; 181 | match_data->mark = arguments.mark_ptr; 182 | match_data->matchedby = PCRE2_MATCHEDBY_JIT; 183 | 184 | return match_data->rc; 185 | 186 | #endif /* SUPPORT_JIT */ 187 | } 188 | 189 | /* End of pcre2_jit_match.c */ 190 | -------------------------------------------------------------------------------- /vendor/pcre/10.23/src/pcre2_jit_misc.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | #ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE 43 | #error This file must be included from pcre2_jit_compile.c. 44 | #endif 45 | 46 | 47 | 48 | /************************************************* 49 | * Free JIT read-only data * 50 | *************************************************/ 51 | 52 | void 53 | PRIV(jit_free_rodata)(void *current, void *allocator_data) 54 | { 55 | #ifndef SUPPORT_JIT 56 | (void)current; 57 | (void)allocator_data; 58 | #else /* SUPPORT_JIT */ 59 | void *next; 60 | 61 | SLJIT_UNUSED_ARG(allocator_data); 62 | 63 | while (current != NULL) 64 | { 65 | next = *(void**)current; 66 | SLJIT_FREE(current, allocator_data); 67 | current = next; 68 | } 69 | 70 | #endif /* SUPPORT_JIT */ 71 | } 72 | 73 | /************************************************* 74 | * Free JIT compiled code * 75 | *************************************************/ 76 | 77 | void 78 | PRIV(jit_free)(void *executable_jit, pcre2_memctl *memctl) 79 | { 80 | #ifndef SUPPORT_JIT 81 | (void)executable_jit; 82 | (void)memctl; 83 | #else /* SUPPORT_JIT */ 84 | 85 | executable_functions *functions = (executable_functions *)executable_jit; 86 | void *allocator_data = memctl; 87 | int i; 88 | 89 | for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) 90 | { 91 | if (functions->executable_funcs[i] != NULL) 92 | sljit_free_code(functions->executable_funcs[i]); 93 | PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data); 94 | } 95 | 96 | SLJIT_FREE(functions, allocator_data); 97 | 98 | #endif /* SUPPORT_JIT */ 99 | } 100 | 101 | 102 | /************************************************* 103 | * Free unused JIT memory * 104 | *************************************************/ 105 | 106 | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION 107 | pcre2_jit_free_unused_memory(pcre2_general_context *gcontext) 108 | { 109 | #ifndef SUPPORT_JIT 110 | (void)gcontext; /* Suppress warning */ 111 | #else /* SUPPORT_JIT */ 112 | SLJIT_UNUSED_ARG(gcontext); 113 | sljit_free_unused_memory_exec(); 114 | #endif /* SUPPORT_JIT */ 115 | } 116 | 117 | 118 | 119 | /************************************************* 120 | * Allocate a JIT stack * 121 | *************************************************/ 122 | 123 | PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION 124 | pcre2_jit_stack_create(size_t startsize, size_t maxsize, 125 | pcre2_general_context *gcontext) 126 | { 127 | #ifndef SUPPORT_JIT 128 | 129 | (void)gcontext; 130 | (void)startsize; 131 | (void)maxsize; 132 | return NULL; 133 | 134 | #else /* SUPPORT_JIT */ 135 | 136 | pcre2_jit_stack *jit_stack; 137 | 138 | if (startsize < 1 || maxsize < 1) 139 | return NULL; 140 | if (startsize > maxsize) 141 | startsize = maxsize; 142 | startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); 143 | maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); 144 | 145 | jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext); 146 | if (jit_stack == NULL) return NULL; 147 | jit_stack->stack = sljit_allocate_stack(startsize, maxsize, &jit_stack->memctl); 148 | return jit_stack; 149 | 150 | #endif 151 | } 152 | 153 | 154 | /************************************************* 155 | * Assign a JIT stack to a pattern * 156 | *************************************************/ 157 | 158 | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION 159 | pcre2_jit_stack_assign(pcre2_match_context *mcontext, pcre2_jit_callback callback, 160 | void *callback_data) 161 | { 162 | #ifndef SUPPORT_JIT 163 | (void)mcontext; 164 | (void)callback; 165 | (void)callback_data; 166 | #else /* SUPPORT_JIT */ 167 | 168 | if (mcontext == NULL) return; 169 | mcontext->jit_callback = callback; 170 | mcontext->jit_callback_data = callback_data; 171 | 172 | #endif /* SUPPORT_JIT */ 173 | } 174 | 175 | 176 | /************************************************* 177 | * Free a JIT stack * 178 | *************************************************/ 179 | 180 | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION 181 | pcre2_jit_stack_free(pcre2_jit_stack *jit_stack) 182 | { 183 | #ifndef SUPPORT_JIT 184 | (void)jit_stack; 185 | #else /* SUPPORT_JIT */ 186 | if (jit_stack != NULL) 187 | { 188 | sljit_free_stack((struct sljit_stack *)(jit_stack->stack), &jit_stack->memctl); 189 | jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data); 190 | } 191 | #endif /* SUPPORT_JIT */ 192 | } 193 | 194 | 195 | /************************************************* 196 | * Get target CPU type * 197 | *************************************************/ 198 | 199 | const char* 200 | PRIV(jit_get_target)(void) 201 | { 202 | #ifndef SUPPORT_JIT 203 | return "JIT is not supported"; 204 | #else /* SUPPORT_JIT */ 205 | return sljit_get_platform_name(); 206 | #endif /* SUPPORT_JIT */ 207 | } 208 | 209 | 210 | /************************************************* 211 | * Get size of JIT code * 212 | *************************************************/ 213 | 214 | size_t 215 | PRIV(jit_get_size)(void *executable_jit) 216 | { 217 | #ifndef SUPPORT_JIT 218 | (void)executable_jit; 219 | return 0; 220 | #else /* SUPPORT_JIT */ 221 | sljit_uw *executable_sizes = ((executable_functions *)executable_jit)->executable_sizes; 222 | SLJIT_COMPILE_ASSERT(JIT_NUMBER_OF_COMPILE_MODES == 3, number_of_compile_modes_changed); 223 | return executable_sizes[0] + executable_sizes[1] + executable_sizes[2]; 224 | #endif 225 | } 226 | 227 | /* End of pcre2_jit_misc.c */ 228 | -------------------------------------------------------------------------------- /vendor/pcre/10.23/src/pcre2_maketables.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | /* This module contains the external function pcre2_maketables(), which builds 43 | character tables for PCRE2 in the current locale. The file is compiled on its 44 | own as part of the PCRE2 library. However, it is also included in the 45 | compilation of dftables.c, in which case the macro DFTABLES is defined. */ 46 | 47 | #ifndef DFTABLES 48 | # ifdef HAVE_CONFIG_H 49 | # include "config.h" 50 | # endif 51 | # include "pcre2_internal.h" 52 | #endif 53 | 54 | 55 | 56 | /************************************************* 57 | * Create PCRE2 character tables * 58 | *************************************************/ 59 | 60 | /* This function builds a set of character tables for use by PCRE2 and returns 61 | a pointer to them. They are build using the ctype functions, and consequently 62 | their contents will depend upon the current locale setting. When compiled as 63 | part of the library, the store is obtained via a general context malloc, if 64 | supplied, but when DFTABLES is defined (when compiling the dftables auxiliary 65 | program) malloc() is used, and the function has a different name so as not to 66 | clash with the prototype in pcre2.h. 67 | 68 | Arguments: none when DFTABLES is defined 69 | else a PCRE2 general context or NULL 70 | Returns: pointer to the contiguous block of data 71 | */ 72 | 73 | #ifdef DFTABLES /* Included in freestanding dftables.c program */ 74 | static const uint8_t *maketables(void) 75 | { 76 | uint8_t *yield = (uint8_t *)malloc(tables_length); 77 | 78 | #else /* Not DFTABLES, compiling the library */ 79 | PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION 80 | pcre2_maketables(pcre2_general_context *gcontext) 81 | { 82 | uint8_t *yield = (uint8_t *)((gcontext != NULL)? 83 | gcontext->memctl.malloc(tables_length, gcontext->memctl.memory_data) : 84 | malloc(tables_length)); 85 | #endif /* DFTABLES */ 86 | 87 | int i; 88 | uint8_t *p; 89 | 90 | if (yield == NULL) return NULL; 91 | p = yield; 92 | 93 | /* First comes the lower casing table */ 94 | 95 | for (i = 0; i < 256; i++) *p++ = tolower(i); 96 | 97 | /* Next the case-flipping table */ 98 | 99 | for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i); 100 | 101 | /* Then the character class tables. Don't try to be clever and save effort on 102 | exclusive ones - in some locales things may be different. 103 | 104 | Note that the table for "space" includes everything "isspace" gives, including 105 | VT in the default locale. This makes it work for the POSIX class [:space:]. 106 | From release 8.34 is is also correct for Perl space, because Perl added VT at 107 | release 5.18. 108 | 109 | Note also that it is possible for a character to be alnum or alpha without 110 | being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the 111 | fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must 112 | test for alnum specially. */ 113 | 114 | memset(p, 0, cbit_length); 115 | for (i = 0; i < 256; i++) 116 | { 117 | if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7); 118 | if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7); 119 | if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7); 120 | if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7); 121 | if (i == '_') p[cbit_word + i/8] |= 1 << (i&7); 122 | if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7); 123 | if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7); 124 | if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7); 125 | if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7); 126 | if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7); 127 | if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7); 128 | } 129 | p += cbit_length; 130 | 131 | /* Finally, the character type table. In this, we used to exclude VT from the 132 | white space chars, because Perl didn't recognize it as such for \s and for 133 | comments within regexes. However, Perl changed at release 5.18, so PCRE changed 134 | at release 8.34. */ 135 | 136 | for (i = 0; i < 256; i++) 137 | { 138 | int x = 0; 139 | if (isspace(i)) x += ctype_space; 140 | if (isalpha(i)) x += ctype_letter; 141 | if (isdigit(i)) x += ctype_digit; 142 | if (isxdigit(i)) x += ctype_xdigit; 143 | if (isalnum(i) || i == '_') x += ctype_word; 144 | 145 | /* Note: strchr includes the terminating zero in the characters it considers. 146 | In this instance, that is ok because we want binary zero to be flagged as a 147 | meta-character, which in this sense is any character that terminates a run 148 | of data characters. */ 149 | 150 | if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta; 151 | *p++ = x; 152 | } 153 | 154 | return yield; 155 | } 156 | 157 | /* End of pcre2_maketables.c */ 158 | -------------------------------------------------------------------------------- /vendor/pcre/10.23/src/pcre2_match_data.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | #ifdef HAVE_CONFIG_H 43 | #include "config.h" 44 | #endif 45 | 46 | #include "pcre2_internal.h" 47 | 48 | 49 | 50 | /************************************************* 51 | * Create a match data block given ovector size * 52 | *************************************************/ 53 | 54 | /* A minimum of 1 is imposed on the number of ovector triplets. */ 55 | 56 | PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION 57 | pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext) 58 | { 59 | pcre2_match_data *yield; 60 | if (oveccount < 1) oveccount = 1; 61 | yield = PRIV(memctl_malloc)( 62 | sizeof(pcre2_match_data) + 3*oveccount*sizeof(PCRE2_SIZE), 63 | (pcre2_memctl *)gcontext); 64 | if (yield == NULL) return NULL; 65 | yield->oveccount = oveccount; 66 | return yield; 67 | } 68 | 69 | 70 | 71 | /************************************************* 72 | * Create a match data block using pattern data * 73 | *************************************************/ 74 | 75 | /* If no context is supplied, use the memory allocator from the code. */ 76 | 77 | PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION 78 | pcre2_match_data_create_from_pattern(const pcre2_code *code, 79 | pcre2_general_context *gcontext) 80 | { 81 | if (gcontext == NULL) gcontext = (pcre2_general_context *)code; 82 | return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1, 83 | gcontext); 84 | } 85 | 86 | 87 | 88 | /************************************************* 89 | * Free a match data block * 90 | *************************************************/ 91 | 92 | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION 93 | pcre2_match_data_free(pcre2_match_data *match_data) 94 | { 95 | if (match_data != NULL) 96 | match_data->memctl.free(match_data, match_data->memctl.memory_data); 97 | } 98 | 99 | 100 | 101 | /************************************************* 102 | * Get last mark in match * 103 | *************************************************/ 104 | 105 | PCRE2_EXP_DEFN PCRE2_SPTR PCRE2_CALL_CONVENTION 106 | pcre2_get_mark(pcre2_match_data *match_data) 107 | { 108 | return match_data->mark; 109 | } 110 | 111 | 112 | 113 | /************************************************* 114 | * Get pointer to ovector * 115 | *************************************************/ 116 | 117 | PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION 118 | pcre2_get_ovector_pointer(pcre2_match_data *match_data) 119 | { 120 | return match_data->ovector; 121 | } 122 | 123 | 124 | 125 | /************************************************* 126 | * Get number of ovector slots * 127 | *************************************************/ 128 | 129 | PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION 130 | pcre2_get_ovector_count(pcre2_match_data *match_data) 131 | { 132 | return match_data->oveccount; 133 | } 134 | 135 | 136 | 137 | /************************************************* 138 | * Get starting code unit in match * 139 | *************************************************/ 140 | 141 | PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION 142 | pcre2_get_startchar(pcre2_match_data *match_data) 143 | { 144 | return match_data->startchar; 145 | } 146 | 147 | /* End of pcre2_match_data.c */ 148 | -------------------------------------------------------------------------------- /vendor/pcre/10.23/src/pcre2_newline.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | /* This module contains internal functions for testing newlines when more than 43 | one kind of newline is to be recognized. When a newline is found, its length is 44 | returned. In principle, we could implement several newline "types", each 45 | referring to a different set of newline characters. At present, PCRE2 supports 46 | only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF, 47 | and NLTYPE_ANY. The full list of Unicode newline characters is taken from 48 | http://unicode.org/unicode/reports/tr18/. */ 49 | 50 | 51 | #ifdef HAVE_CONFIG_H 52 | #include "config.h" 53 | #endif 54 | 55 | #include "pcre2_internal.h" 56 | 57 | 58 | 59 | /************************************************* 60 | * Check for newline at given position * 61 | *************************************************/ 62 | 63 | /* This function is called only via the IS_NEWLINE macro, which does so only 64 | when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed 65 | newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit 66 | pointed to by ptr is less than the end of the string. 67 | 68 | Arguments: 69 | ptr pointer to possible newline 70 | type the newline type 71 | endptr pointer to the end of the string 72 | lenptr where to return the length 73 | utf TRUE if in utf mode 74 | 75 | Returns: TRUE or FALSE 76 | */ 77 | 78 | BOOL 79 | PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr, 80 | uint32_t *lenptr, BOOL utf) 81 | { 82 | uint32_t c; 83 | 84 | #ifdef SUPPORT_UNICODE 85 | if (utf) { GETCHAR(c, ptr); } else c = *ptr; 86 | #else 87 | (void)utf; 88 | c = *ptr; 89 | #endif /* SUPPORT_UNICODE */ 90 | 91 | if (type == NLTYPE_ANYCRLF) switch(c) 92 | { 93 | case CHAR_LF: 94 | *lenptr = 1; 95 | return TRUE; 96 | 97 | case CHAR_CR: 98 | *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; 99 | return TRUE; 100 | 101 | default: 102 | return FALSE; 103 | } 104 | 105 | /* NLTYPE_ANY */ 106 | 107 | else switch(c) 108 | { 109 | #ifdef EBCDIC 110 | case CHAR_NEL: 111 | #endif 112 | case CHAR_LF: 113 | case CHAR_VT: 114 | case CHAR_FF: 115 | *lenptr = 1; 116 | return TRUE; 117 | 118 | case CHAR_CR: 119 | *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; 120 | return TRUE; 121 | 122 | #ifndef EBCDIC 123 | #if PCRE2_CODE_UNIT_WIDTH == 8 124 | case CHAR_NEL: 125 | *lenptr = utf? 2 : 1; 126 | return TRUE; 127 | 128 | case 0x2028: /* LS */ 129 | case 0x2029: /* PS */ 130 | *lenptr = 3; 131 | return TRUE; 132 | 133 | #else /* 16-bit or 32-bit code units */ 134 | case CHAR_NEL: 135 | case 0x2028: /* LS */ 136 | case 0x2029: /* PS */ 137 | *lenptr = 1; 138 | return TRUE; 139 | #endif 140 | #endif /* Not EBCDIC */ 141 | 142 | default: 143 | return FALSE; 144 | } 145 | } 146 | 147 | 148 | 149 | /************************************************* 150 | * Check for newline at previous position * 151 | *************************************************/ 152 | 153 | /* This function is called only via the WAS_NEWLINE macro, which does so only 154 | when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed 155 | newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial 156 | value of ptr is greater than the start of the string that is being processed. 157 | 158 | Arguments: 159 | ptr pointer to possible newline 160 | type the newline type 161 | startptr pointer to the start of the string 162 | lenptr where to return the length 163 | utf TRUE if in utf mode 164 | 165 | Returns: TRUE or FALSE 166 | */ 167 | 168 | BOOL 169 | PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr, 170 | uint32_t *lenptr, BOOL utf) 171 | { 172 | uint32_t c; 173 | ptr--; 174 | 175 | #ifdef SUPPORT_UNICODE 176 | if (utf) 177 | { 178 | BACKCHAR(ptr); 179 | GETCHAR(c, ptr); 180 | } 181 | else c = *ptr; 182 | #else 183 | (void)utf; 184 | c = *ptr; 185 | #endif /* SUPPORT_UNICODE */ 186 | 187 | if (type == NLTYPE_ANYCRLF) switch(c) 188 | { 189 | case CHAR_LF: 190 | *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; 191 | return TRUE; 192 | 193 | case CHAR_CR: 194 | *lenptr = 1; 195 | return TRUE; 196 | 197 | default: 198 | return FALSE; 199 | } 200 | 201 | /* NLTYPE_ANY */ 202 | 203 | else switch(c) 204 | { 205 | case CHAR_LF: 206 | *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; 207 | return TRUE; 208 | 209 | #ifdef EBCDIC 210 | case CHAR_NEL: 211 | #endif 212 | case CHAR_VT: 213 | case CHAR_FF: 214 | case CHAR_CR: 215 | *lenptr = 1; 216 | return TRUE; 217 | 218 | #ifndef EBCDIC 219 | #if PCRE2_CODE_UNIT_WIDTH == 8 220 | case CHAR_NEL: 221 | *lenptr = utf? 2 : 1; 222 | return TRUE; 223 | 224 | case 0x2028: /* LS */ 225 | case 0x2029: /* PS */ 226 | *lenptr = 3; 227 | return TRUE; 228 | 229 | #else /* 16-bit or 32-bit code units */ 230 | case CHAR_NEL: 231 | case 0x2028: /* LS */ 232 | case 0x2029: /* PS */ 233 | *lenptr = 1; 234 | return TRUE; 235 | #endif 236 | #endif /* Not EBCDIC */ 237 | 238 | default: 239 | return FALSE; 240 | } 241 | } 242 | 243 | /* End of pcre2_newline.c */ 244 | -------------------------------------------------------------------------------- /vendor/pcre/10.23/src/pcre2_ord2utf.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | /* This file contains a function that converts a Unicode character code point 43 | into a UTF string. The behaviour is different for each code unit width. */ 44 | 45 | 46 | #ifdef HAVE_CONFIG_H 47 | #include "config.h" 48 | #endif 49 | 50 | #include "pcre2_internal.h" 51 | 52 | 53 | /* If SUPPORT_UNICODE is not defined, this function will never be called. 54 | Supply a dummy function because some compilers do not like empty source 55 | modules. */ 56 | 57 | #ifndef SUPPORT_UNICODE 58 | unsigned int 59 | PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer) 60 | { 61 | (void)(cvalue); 62 | (void)(buffer); 63 | return 0; 64 | } 65 | #else /* SUPPORT_UNICODE */ 66 | 67 | 68 | /************************************************* 69 | * Convert code point to UTF * 70 | *************************************************/ 71 | 72 | /* 73 | Arguments: 74 | cvalue the character value 75 | buffer pointer to buffer for result 76 | 77 | Returns: number of code units placed in the buffer 78 | */ 79 | 80 | unsigned int 81 | PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer) 82 | { 83 | /* Convert to UTF-8 */ 84 | 85 | #if PCRE2_CODE_UNIT_WIDTH == 8 86 | int i, j; 87 | for (i = 0; i < PRIV(utf8_table1_size); i++) 88 | if ((int)cvalue <= PRIV(utf8_table1)[i]) break; 89 | buffer += i; 90 | for (j = i; j > 0; j--) 91 | { 92 | *buffer-- = 0x80 | (cvalue & 0x3f); 93 | cvalue >>= 6; 94 | } 95 | *buffer = PRIV(utf8_table2)[i] | cvalue; 96 | return i + 1; 97 | 98 | /* Convert to UTF-16 */ 99 | 100 | #elif PCRE2_CODE_UNIT_WIDTH == 16 101 | if (cvalue <= 0xffff) 102 | { 103 | *buffer = (PCRE2_UCHAR)cvalue; 104 | return 1; 105 | } 106 | cvalue -= 0x10000; 107 | *buffer++ = 0xd800 | (cvalue >> 10); 108 | *buffer = 0xdc00 | (cvalue & 0x3ff); 109 | return 2; 110 | 111 | /* Convert to UTF-32 */ 112 | 113 | #else 114 | *buffer = (PCRE2_UCHAR)cvalue; 115 | return 1; 116 | #endif 117 | } 118 | #endif /* SUPPORT_UNICODE */ 119 | 120 | /* End of pcre_ord2utf.c */ 121 | -------------------------------------------------------------------------------- /vendor/pcre/10.23/src/pcre2_string_utils.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | /* This module contains internal functions for comparing and finding the length 42 | of strings. These are used instead of strcmp() etc because the standard 43 | functions work only on 8-bit data. */ 44 | 45 | 46 | #ifdef HAVE_CONFIG_H 47 | #include "config.h" 48 | #endif 49 | 50 | #include "pcre2_internal.h" 51 | 52 | 53 | /************************************************* 54 | * Compare two zero-terminated PCRE2 strings * 55 | *************************************************/ 56 | 57 | /* 58 | Arguments: 59 | str1 first string 60 | str2 second string 61 | 62 | Returns: 0, 1, or -1 63 | */ 64 | 65 | int 66 | PRIV(strcmp)(PCRE2_SPTR str1, PCRE2_SPTR str2) 67 | { 68 | PCRE2_UCHAR c1, c2; 69 | while (*str1 != '\0' || *str2 != '\0') 70 | { 71 | c1 = *str1++; 72 | c2 = *str2++; 73 | if (c1 != c2) return ((c1 > c2) << 1) - 1; 74 | } 75 | return 0; 76 | } 77 | 78 | 79 | /************************************************* 80 | * Compare zero-terminated PCRE2 & 8-bit strings * 81 | *************************************************/ 82 | 83 | /* As the 8-bit string is almost always a literal, its type is specified as 84 | const char *. 85 | 86 | Arguments: 87 | str1 first string 88 | str2 second string 89 | 90 | Returns: 0, 1, or -1 91 | */ 92 | 93 | int 94 | PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2) 95 | { 96 | PCRE2_UCHAR c1, c2; 97 | while (*str1 != '\0' || *str2 != '\0') 98 | { 99 | c1 = *str1++; 100 | c2 = *str2++; 101 | if (c1 != c2) return ((c1 > c2) << 1) - 1; 102 | } 103 | return 0; 104 | } 105 | 106 | 107 | /************************************************* 108 | * Compare two PCRE2 strings, given a length * 109 | *************************************************/ 110 | 111 | /* 112 | Arguments: 113 | str1 first string 114 | str2 second string 115 | len the length 116 | 117 | Returns: 0, 1, or -1 118 | */ 119 | 120 | int 121 | PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len) 122 | { 123 | PCRE2_UCHAR c1, c2; 124 | for (; len > 0; len--) 125 | { 126 | c1 = *str1++; 127 | c2 = *str2++; 128 | if (c1 != c2) return ((c1 > c2) << 1) - 1; 129 | } 130 | return 0; 131 | } 132 | 133 | 134 | /************************************************* 135 | * Compare PCRE2 string to 8-bit string by length * 136 | *************************************************/ 137 | 138 | /* As the 8-bit string is almost always a literal, its type is specified as 139 | const char *. 140 | 141 | Arguments: 142 | str1 first string 143 | str2 second string 144 | len the length 145 | 146 | Returns: 0, 1, or -1 147 | */ 148 | 149 | int 150 | PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len) 151 | { 152 | PCRE2_UCHAR c1, c2; 153 | for (; len > 0; len--) 154 | { 155 | c1 = *str1++; 156 | c2 = *str2++; 157 | if (c1 != c2) return ((c1 > c2) << 1) - 1; 158 | } 159 | return 0; 160 | } 161 | 162 | 163 | /************************************************* 164 | * Find the length of a PCRE2 string * 165 | *************************************************/ 166 | 167 | /* 168 | Argument: the string 169 | Returns: the length 170 | */ 171 | 172 | PCRE2_SIZE 173 | PRIV(strlen)(PCRE2_SPTR str) 174 | { 175 | PCRE2_SIZE c = 0; 176 | while (*str++ != 0) c++; 177 | return c; 178 | } 179 | 180 | 181 | /************************************************* 182 | * Copy 8-bit 0-terminated string to PCRE2 string * 183 | *************************************************/ 184 | 185 | /* Arguments: 186 | str1 buffer to receive the string 187 | str2 8-bit string to be copied 188 | 189 | Returns: the number of code units used (excluding trailing zero) 190 | */ 191 | 192 | PCRE2_SIZE 193 | PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2) 194 | { 195 | PCRE2_UCHAR *t = str1; 196 | while (*str2 != 0) *t++ = *str2++; 197 | *t = 0; 198 | return t - str1; 199 | } 200 | 201 | /* End of pcre2_string_utils.c */ 202 | -------------------------------------------------------------------------------- /vendor/pcre/10.23/src/sljit/sljitConfig.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Stack-less Just-In-Time compiler 3 | * 4 | * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, are 7 | * permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this list of 10 | * conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 | * of conditions and the following disclaimer in the documentation and/or other materials 14 | * provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 | * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef _SLJIT_CONFIG_H_ 28 | #define _SLJIT_CONFIG_H_ 29 | 30 | /* --------------------------------------------------------------------- */ 31 | /* Custom defines */ 32 | /* --------------------------------------------------------------------- */ 33 | 34 | /* Put your custom defines here. This empty section will never change 35 | which helps maintaining patches (with diff / patch utilities). */ 36 | 37 | /* --------------------------------------------------------------------- */ 38 | /* Architecture */ 39 | /* --------------------------------------------------------------------- */ 40 | 41 | /* Architecture selection. */ 42 | /* #define SLJIT_CONFIG_X86_32 1 */ 43 | /* #define SLJIT_CONFIG_X86_64 1 */ 44 | /* #define SLJIT_CONFIG_ARM_V5 1 */ 45 | /* #define SLJIT_CONFIG_ARM_V7 1 */ 46 | /* #define SLJIT_CONFIG_ARM_THUMB2 1 */ 47 | /* #define SLJIT_CONFIG_ARM_64 1 */ 48 | /* #define SLJIT_CONFIG_PPC_32 1 */ 49 | /* #define SLJIT_CONFIG_PPC_64 1 */ 50 | /* #define SLJIT_CONFIG_MIPS_32 1 */ 51 | /* #define SLJIT_CONFIG_MIPS_64 1 */ 52 | /* #define SLJIT_CONFIG_SPARC_32 1 */ 53 | /* #define SLJIT_CONFIG_TILEGX 1 */ 54 | 55 | /* #define SLJIT_CONFIG_AUTO 1 */ 56 | /* #define SLJIT_CONFIG_UNSUPPORTED 1 */ 57 | 58 | /* --------------------------------------------------------------------- */ 59 | /* Utilities */ 60 | /* --------------------------------------------------------------------- */ 61 | 62 | /* Useful for thread-safe compiling of global functions. */ 63 | #ifndef SLJIT_UTIL_GLOBAL_LOCK 64 | /* Enabled by default */ 65 | #define SLJIT_UTIL_GLOBAL_LOCK 1 66 | #endif 67 | 68 | /* Implements a stack like data structure (by using mmap / VirtualAlloc). */ 69 | #ifndef SLJIT_UTIL_STACK 70 | /* Enabled by default */ 71 | #define SLJIT_UTIL_STACK 1 72 | #endif 73 | 74 | /* Single threaded application. Does not require any locks. */ 75 | #ifndef SLJIT_SINGLE_THREADED 76 | /* Disabled by default. */ 77 | #define SLJIT_SINGLE_THREADED 0 78 | #endif 79 | 80 | /* --------------------------------------------------------------------- */ 81 | /* Configuration */ 82 | /* --------------------------------------------------------------------- */ 83 | 84 | /* If SLJIT_STD_MACROS_DEFINED is not defined, the application should 85 | define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMCPY, and NULL. */ 86 | #ifndef SLJIT_STD_MACROS_DEFINED 87 | /* Disabled by default. */ 88 | #define SLJIT_STD_MACROS_DEFINED 0 89 | #endif 90 | 91 | /* Executable code allocation: 92 | If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should 93 | define SLJIT_MALLOC_EXEC, SLJIT_FREE_EXEC, and SLJIT_EXEC_OFFSET. */ 94 | #ifndef SLJIT_EXECUTABLE_ALLOCATOR 95 | /* Enabled by default. */ 96 | #define SLJIT_EXECUTABLE_ALLOCATOR 1 97 | 98 | /* When SLJIT_PROT_EXECUTABLE_ALLOCATOR is enabled SLJIT uses 99 | an allocator which does not set writable and executable 100 | permission flags at the same time. The trade-of is increased 101 | memory consumption and disabled dynamic code modifications. */ 102 | #ifndef SLJIT_PROT_EXECUTABLE_ALLOCATOR 103 | /* Disabled by default. */ 104 | #define SLJIT_PROT_EXECUTABLE_ALLOCATOR 0 105 | #endif 106 | 107 | #endif 108 | 109 | /* Force cdecl calling convention even if a better calling 110 | convention (e.g. fastcall) is supported by the C compiler. 111 | If this option is enabled, C functions without 112 | SLJIT_CALL can also be called from JIT code. */ 113 | #ifndef SLJIT_USE_CDECL_CALLING_CONVENTION 114 | /* Disabled by default */ 115 | #define SLJIT_USE_CDECL_CALLING_CONVENTION 0 116 | #endif 117 | 118 | /* Return with error when an invalid argument is passed. */ 119 | #ifndef SLJIT_ARGUMENT_CHECKS 120 | /* Disabled by default */ 121 | #define SLJIT_ARGUMENT_CHECKS 0 122 | #endif 123 | 124 | /* Debug checks (assertions, etc.). */ 125 | #ifndef SLJIT_DEBUG 126 | /* Enabled by default */ 127 | #define SLJIT_DEBUG 1 128 | #endif 129 | 130 | /* Verbose operations. */ 131 | #ifndef SLJIT_VERBOSE 132 | /* Enabled by default */ 133 | #define SLJIT_VERBOSE 1 134 | #endif 135 | 136 | /* 137 | SLJIT_IS_FPU_AVAILABLE 138 | The availability of the FPU can be controlled by SLJIT_IS_FPU_AVAILABLE. 139 | zero value - FPU is NOT present. 140 | nonzero value - FPU is present. 141 | */ 142 | 143 | /* For further configurations, see the beginning of sljitConfigInternal.h */ 144 | 145 | #endif 146 | -------------------------------------------------------------------------------- /vendor/pcre/README.md: -------------------------------------------------------------------------------- 1 | Superstring contains a copy of the [PCRE](http://www.pcre.org/) regex engine. 2 | 3 | The `10.23` directory contains a subset of the PCRE distribution obtained from [here](https://ftp.pcre.org/pub/pcre/pcre2-10.23.zip). 4 | 5 | The file that `superstring` uses to compile PCRE, `pcre.gyp`, was created based on PCRE's `NON-AUTOTOOLS-BUILD` instructions. -------------------------------------------------------------------------------- /vendor/pcre/pcre.gyp: -------------------------------------------------------------------------------- 1 | { 2 | "targets": [ 3 | { 4 | "target_name": "pcre", 5 | "type": "static_library", 6 | "sources": [ 7 | "pcre2_chartables.c", 8 | "10.23/src/pcre2_auto_possess.c", 9 | "10.23/src/pcre2_compile.c", 10 | "10.23/src/pcre2_config.c", 11 | "10.23/src/pcre2_context.c", 12 | "10.23/src/pcre2_dfa_match.c", 13 | "10.23/src/pcre2_error.c", 14 | "10.23/src/pcre2_find_bracket.c", 15 | "10.23/src/pcre2_jit_compile.c", 16 | "10.23/src/pcre2_maketables.c", 17 | "10.23/src/pcre2_match.c", 18 | "10.23/src/pcre2_match_data.c", 19 | "10.23/src/pcre2_newline.c", 20 | "10.23/src/pcre2_ord2utf.c", 21 | "10.23/src/pcre2_pattern_info.c", 22 | "10.23/src/pcre2_serialize.c", 23 | "10.23/src/pcre2_string_utils.c", 24 | "10.23/src/pcre2_study.c", 25 | "10.23/src/pcre2_substitute.c", 26 | "10.23/src/pcre2_substring.c", 27 | "10.23/src/pcre2_tables.c", 28 | "10.23/src/pcre2_ucd.c", 29 | "10.23/src/pcre2_valid_utf.c", 30 | "10.23/src/pcre2_xclass.c", 31 | ], 32 | "include_dirs": [ 33 | "include", 34 | "10.23/src" 35 | ], 36 | "defines": [ 37 | "HAVE_CONFIG_H", 38 | "PCRE2_CODE_UNIT_WIDTH=16", 39 | "SUPPORT_JIT", 40 | ], 41 | "cflags": [ 42 | "-Wno-unused-function" 43 | ], 44 | 'xcode_settings': { 45 | 'OTHER_CFLAGS': [ 46 | '-Wno-unused-function' 47 | ], 48 | }, 49 | "direct_dependent_settings": { 50 | "include_dirs": [ 51 | "include" 52 | ], 53 | "defines": [ 54 | "PCRE2_CODE_UNIT_WIDTH=16", 55 | ] 56 | } 57 | } 58 | ] 59 | } --------------------------------------------------------------------------------