├── .circleci
    └── config.yml
├── .clang-format
├── .github
    └── workflows
    │   ├── ci.yml
    │   └── codeql.yml
├── .gitignore
├── .gitmodules
├── .npmignore
├── LICENSE
├── README.md
├── appveyor.yml
├── benchmark
    ├── large-text-buffer.benchmark.js
    ├── marker-index.benchmark.js
    ├── native
    │   └── marker-index-benchmark.cc
    └── text-buffer.benchmark.js
├── binding.gyp
├── index.js
├── package.json
├── script
    ├── build-browser-version.sh
    ├── install-emscripten.sh
    ├── test-native.js
    └── test-with-debug-graph.sh
├── src
    ├── bindings
    │   ├── bindings.cc
    │   ├── em
    │   │   ├── auto-wrap.h
    │   │   ├── epilogue.js
    │   │   ├── marker-index.cc
    │   │   ├── patch.cc
    │   │   ├── point.cc
    │   │   ├── prologue.js
    │   │   ├── range.cc
    │   │   └── text-buffer.cc
    │   ├── marker-index-wrapper.cc
    │   ├── marker-index-wrapper.h
    │   ├── noop.h
    │   ├── number-conversion.h
    │   ├── patch-wrapper.cc
    │   ├── patch-wrapper.h
    │   ├── point-wrapper.cc
    │   ├── point-wrapper.h
    │   ├── range-wrapper.cc
    │   ├── range-wrapper.h
    │   ├── string-conversion.cc
    │   ├── string-conversion.h
    │   ├── text-buffer-snapshot-wrapper.cc
    │   ├── text-buffer-snapshot-wrapper.h
    │   ├── text-buffer-wrapper.cc
    │   ├── text-buffer-wrapper.h
    │   ├── text-reader.cc
    │   ├── text-reader.h
    │   ├── text-writer.cc
    │   └── text-writer.h
    └── core
    │   ├── encoding-conversion.cc
    │   ├── encoding-conversion.h
    │   ├── flat_set.h
    │   ├── libmba-diff.cc
    │   ├── libmba-diff.h
    │   ├── marker-index.cc
    │   ├── marker-index.h
    │   ├── optional.h
    │   ├── patch.cc
    │   ├── patch.h
    │   ├── point.cc
    │   ├── point.h
    │   ├── range.cc
    │   ├── range.h
    │   ├── regex.cc
    │   ├── regex.h
    │   ├── serializer.h
    │   ├── text-buffer.cc
    │   ├── text-buffer.h
    │   ├── text-diff.cc
    │   ├── text-diff.h
    │   ├── text-slice.cc
    │   ├── text-slice.h
    │   ├── text.cc
    │   └── text.h
├── test
    ├── js
    │   ├── helpers
    │   │   ├── point-helpers.js
    │   │   ├── test-document.js
    │   │   ├── text-helpers.js
    │   │   └── words.js
    │   ├── marker-index.test.js
    │   ├── patch.test.js
    │   └── text-buffer.test.js
    └── native
    │   ├── encoding-conversion-test.cc
    │   ├── patch-test.cc
    │   ├── test-helpers.cc
    │   ├── test-helpers.h
    │   ├── tests.cc
    │   ├── text-buffer-test.cc
    │   ├── text-diff-test.cc
    │   └── text-test.cc
└── vendor
    ├── catch.hpp
    ├── libcxx
        ├── LICENSE.txt
        └── utf8-conversions.h
    └── pcre
        ├── 10.23
            ├── AUTHORS
            ├── COPYING
            ├── LICENCE
            ├── NON-AUTOTOOLS-BUILD
            ├── README
            └── src
            │   ├── config.h.generic
            │   ├── config.h.in
            │   ├── pcre2.h.generic
            │   ├── pcre2.h.in
            │   ├── pcre2_auto_possess.c
            │   ├── pcre2_chartables.c.dist
            │   ├── pcre2_compile.c
            │   ├── pcre2_config.c
            │   ├── pcre2_context.c
            │   ├── pcre2_dfa_match.c
            │   ├── pcre2_error.c
            │   ├── pcre2_find_bracket.c
            │   ├── pcre2_internal.h
            │   ├── pcre2_intmodedep.h
            │   ├── pcre2_jit_compile.c
            │   ├── pcre2_jit_match.c
            │   ├── pcre2_jit_misc.c
            │   ├── pcre2_maketables.c
            │   ├── pcre2_match.c
            │   ├── pcre2_match_data.c
            │   ├── pcre2_newline.c
            │   ├── pcre2_ord2utf.c
            │   ├── pcre2_pattern_info.c
            │   ├── pcre2_printint.c
            │   ├── pcre2_serialize.c
            │   ├── pcre2_string_utils.c
            │   ├── pcre2_study.c
            │   ├── pcre2_substitute.c
            │   ├── pcre2_substring.c
            │   ├── pcre2_tables.c
            │   ├── pcre2_ucd.c
            │   ├── pcre2_ucp.h
            │   ├── pcre2_valid_utf.c
            │   ├── pcre2_xclass.c
            │   └── sljit
            │       ├── sljitConfig.h
            │       ├── sljitConfigInternal.h
            │       ├── sljitExecAllocator.c
            │       ├── sljitLir.c
            │       ├── sljitLir.h
            │       ├── sljitNativeARM_32.c
            │       ├── sljitNativeARM_64.c
            │       ├── sljitNativeARM_T2_32.c
            │       ├── sljitNativeMIPS_32.c
            │       ├── sljitNativeMIPS_64.c
            │       ├── sljitNativeMIPS_common.c
            │       ├── sljitNativePPC_32.c
            │       ├── sljitNativePPC_64.c
            │       ├── sljitNativePPC_common.c
            │       ├── sljitNativeSPARC_32.c
            │       ├── sljitNativeSPARC_common.c
            │       ├── sljitNativeTILEGX-encoder.c
            │       ├── sljitNativeTILEGX_64.c
            │       ├── sljitNativeX86_32.c
            │       ├── sljitNativeX86_64.c
            │       ├── sljitNativeX86_common.c
            │       └── sljitUtils.c
        ├── README.md
        ├── include
            ├── config.h
            └── pcre2.h
        ├── pcre.gyp
        └── pcre2_chartables.c


/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | jobs:
 4 |   build:
 5 |     environment:
 6 |       XCODE_SCHEME: test
 7 |       XCODE_WORKSPACE: test
 8 |       XCODE_PROJECT: test
 9 |       NODE_VERSION: '10.2.1'
10 |     macos:
11 |       xcode: 8.3.3
12 |     steps:
13 |     - checkout
14 |     - run:
15 |         name: Update submodules
16 |         command: git submodule update --init
17 |     - restore_cache:
18 |         key: node-{{ .Environment.NODE_VERSION }}
19 |     - restore_cache:
20 |         key: emsdk-{{ checksum "script/install-emscripten.sh" }}
21 |     - run:
22 |         name: Install Node.js with nvm
23 |         command: |
24 |           export NVM_DIR=${HOME}/.nvm
25 |           curl -o- https://raw.githubusercontent.com/creationix/nvm/v0.33.8/install.sh | bash
26 |           [ -s "${NVM_DIR}/nvm.sh" ] && \. "${NVM_DIR}/nvm.sh"
27 |           nvm install ${NODE_VERSION}
28 |           nvm alias default ${NODE_VERSION}
29 |           echo "[ -s \"${NVM_DIR}/nvm.sh\" ] && . \"${NVM_DIR}/nvm.sh\"" >> $BASH_ENV
30 |     - run:
31 |         name: Install node dependencies
32 |         command: npm install
33 |     - run:
34 |         name: Build with emscripten
35 |         command: script/install-emscripten.sh
36 |     - save_cache:
37 |         key: node-{{ .Environment.NODE_VERSION }}
38 |         paths:
39 |         - .nvm
40 |     - save_cache:
41 |         key: emsdk-{{ checksum "script/install-emscripten.sh" }}
42 |         paths:
43 |         - .emscripten_cache
44 |         - emsdk-portable
45 |     - run:
46 |         name: Build emscripten
47 |         command: npm run build:browser
48 |     - run:
49 |         name: Lint JavaScript
50 |         command: npm run standard
51 |     - run:
52 |         name: Test Browser
53 |         command: npm run test:browser
54 |     - run:
55 |         name: Test Node.js
56 |         command: npm run test:node
57 |     - run:
58 |         name: Test native
59 |         command: npm run test:native
60 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
1 | ---
2 | Language:        Cpp
3 | BasedOnStyle:  Google
4 | ColumnLimit:     120
5 | ...
6 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: ci
 2 | on:
 3 |   - pull_request
 4 |   - push
 5 | 
 6 | jobs:
 7 |   Test:
 8 |     if: "!contains(github.event.head_commit.message, '[skip ci]')"
 9 |     runs-on: ${{ matrix.os }}
10 |     strategy:
11 |       fail-fast: false
12 |       matrix:
13 |         os:
14 |           - ubuntu-latest
15 |           - macos-latest
16 |           - windows-latest
17 |     name: Node ${{ matrix.node_version }} on ${{ matrix.os }}
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v2
21 |         with:
22 |           submodules: true
23 |       - name: Cache
24 |         uses: actions/cache@v2
25 |         with:
26 |           path: |
27 |             'node_modules'
28 |           key: ${{ runner.os }}-${{ matrix.node_version }}-${{ hashFiles('package.json') }}
29 | 
30 |       - name: Setup node
31 |         uses: actions/setup-node@v2-beta
32 |         with:
33 |           node-version: 14
34 | 
35 |       - name: Install dependencies
36 |         run: npm install
37 | 
38 |       - name: Lint
39 |         run: npm run standard
40 | 
41 |       - name: Run tests
42 |         run: |
43 |           npm run test:node
44 |           npm run test:native
45 | 
46 |   Skip:
47 |     if: contains(github.event.head_commit.message, '[skip ci]')
48 |     runs-on: ubuntu-latest
49 |     steps:
50 |       - name: Skip CI 🚫
51 |         run: echo skip ci
52 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | name: "Code Scanning - Action"
 2 | 
 3 | on:
 4 |   push:
 5 |   schedule:
 6 |     - cron: '0 0 * * 0'
 7 | 
 8 | jobs:
 9 |   CodeQL-Build:
10 | 
11 |     strategy:
12 |       fail-fast: false
13 | 
14 | 
15 |     # CodeQL runs on ubuntu-latest, windows-latest, and macos-latest
16 |     runs-on: ubuntu-latest
17 | 
18 |     steps:
19 |     - name: Checkout repository
20 |       uses: actions/checkout@v2
21 | 
22 |     # Initializes the CodeQL tools for scanning.
23 |     - name: Initialize CodeQL
24 |       uses: github/codeql-action/init@v1
25 |       # Override language selection by uncommenting this and choosing your languages
26 |       with:
27 |          languages: javascript, cpp
28 |          
29 |     - run: |
30 |         npm install
31 |         npm run standard
32 | 
33 |     - name: Perform CodeQL Analysis
34 |       uses: github/codeql-action/analyze@v1
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | build
3 | .DS_Store
4 | .clang_complete
5 | 
6 | /browser.js
7 | emsdk-portable
8 | package-lock.json
9 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "vendor/win-iconv"]
2 | 	path = vendor/win-iconv
3 | 	url = https://github.com/win-iconv/win-iconv
4 | 


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
 1 | *
 2 | 
 3 | !README.md
 4 | !LICENSE
 5 | !index.js
 6 | !browser.js
 7 | !src/core/*
 8 | !src/bindings/*.h
 9 | !src/bindings/*.cc
10 | 
11 | !vendor/libcxx/*
12 | 
13 | !vendor/pcre/pcre.gyp
14 | !vendor/pcre/pcre2_chartables.c
15 | !vendor/pcre/include/*.h
16 | !vendor/pcre/10.23/src/*.h
17 | !vendor/pcre/10.23/src/*.c
18 | !vendor/pcre/10.23/src/sljit/*
19 | !vendor/pcre/10.23/COPYING
20 | !vendor/pcre/10.23/LICENSE
21 | 
22 | !vendor/win-iconv/iconv.h
23 | !vendor/win-iconv/win_iconv.c
24 | !vendor/win-iconv/readme.txt
25 | 
26 | !package.json
27 | !binding.gyp
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 GitHub
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ##### Atom and all repositories under Atom will be archived on December 15, 2022. Learn more in our [official announcement](https://github.blog/2022-06-08-sunsetting-atom/)
  2 |  # Superstring
  3 | [![ci](https://github.com/atom/superstring/actions/workflows/ci.yml/badge.svg)](https://github.com/atom/superstring/actions/workflows/ci.yml)  
  4 | 
  5 | Native library at the core of Atom's text editor.
  6 | 
  7 | ## Components:
  8 | 
  9 | ### Patch
 10 | 
 11 | This data structure represents a transformation from input to output text, and it's useful for combining changes that occur at different points in time and space.
 12 | 
 13 | Example:
 14 | ```js
 15 | const patch = new Patch
 16 | 
 17 | // At column 5, replace the string 'abc' with '1234':
 18 | patch.splice({row: 0, column: 5}, {row: 0, column: 3}, {row: 0, column: 4}, 'abc', '1234')
 19 | 
 20 | // Then at column 7, replace 3 characters with 4 characters:
 21 | patch.splice({row: 0, column: 7}, {row: 0, column: 3}, {row: 0, column: 4}, '34d', '5678')
 22 | 
 23 | // Retrieve the consolidated changes:
 24 | assert.deepEqual(patch.getChanges(), [
 25 |   {
 26 |     oldStart: {row: 0, column: 5},
 27 |     oldEnd: {row: 0, column: 9},
 28 |     oldText: 'abcd',
 29 |     newStart: {row: 0, column: 5},
 30 |     newEnd: {row: 0, column: 11},
 31 |     newText: '125678'
 32 |   }
 33 | ])
 34 | ```
 35 | 
 36 | ### MarkerIndex
 37 | 
 38 | This data structure is used to track logical locations in a text buffer as the contents of the buffer are changed.
 39 | 
 40 | Example:
 41 | 
 42 | ```js
 43 | const index = new MarkerIndex
 44 | 
 45 | // Associate a marker id with two ordered start and end points
 46 | index.insert(1, {row: 2, column: 5}, {row: 4, column: 10})
 47 | 
 48 | // Splice represents a change to the text file
 49 | // you pass it a starting point, then points representing the old and new extent
 50 | index.splice({row: 3, column: 5}, {row: 0, column: 0}, {row: 1, column: 0})
 51 | 
 52 | // The marker's end point was updated by the splice
 53 | assert.deepEqual(index.getEnd(1), {row: 5, column: 10})
 54 | ```
 55 | 
 56 | #### API
 57 | 
 58 | ##### `insert (id, start, end)`
 59 | 
 60 | Associates the given non-negative integer with a range represented by two `{row: number, column: number}` objects.
 61 | 
 62 | ##### `splice (start, oldExtent, newExtent)`
 63 | 
 64 | Update the locations of all markers based on the description of a change to the text. The range of the replaced text is described by *traversing* from `start` by `oldExtent`. The range of the new text is described by *traversing* from `start` to `newExtent`.
 65 | 
 66 | *Traversal* means that beginning with the `start` location, we arrive at a new location by performing X line feeds and carriage returns and then walk forward Y columns, where X is the `row` of the given traversal extent and Y is its `column`. So basically `start`, `oldExtent`, and `newExtent` describe two ranges in the file, basically the spatial before and after effects of a change.
 67 | 
 68 | This method returns an object that describes what markers were *invalidated* by the change based on various invalidation strategies. If a marker is in a set for a given strategy, it was invalidated according to that strategy. The strategies are as follows:
 69 | 
 70 | * `touch` Contains markers that the change touched in any way.
 71 | * `inside` Contains markers that the change touched, but not markers with endpoints immediately adjacent to the change.
 72 | * `overlap` Contains markers that had one or both of their endpoints surrounded by the change.
 73 | * `surround` Contains markers that had both endpoints surrounded by the change.
 74 | 
 75 | ##### `setExclusive (markerId, boolean)`
 76 | 
 77 | This method allows to control the behavior of a marker when splices start and/or end at the marker's endpoints.
 78 | 
 79 | By default, we consider markers to be *inclusive*: that is, splices exactly at the beginning of the marked range will be considered to begin inside the marker (meaning that the marker's start position **will not** move), and splices exactly at the end of the marked range will be considered to end inside the marker (meaning that the marker's end position **will** move).
 80 | 
 81 | *Exclusive* markers, on the other hand, exhibit a slightly different behavior: in fact, splices exactly at the beginning of the marked range will be considered to begin outside the marker (meaning that the marker's start position **will** move), and splices exactly at the end of the marked range will be considered to end outside the marker (meaning that the marker's end position **will not** move).
 82 | 
 83 | Please note that, independently of whether a marker is inclusive or exclusive, its end **will always** be moved when its start gets moved as a result of a splice.
 84 | 
 85 | ##### `isExclusive (markerId)`
 86 | 
 87 | Returns whether the given marker id has been set to behave exclusively via `setExclusive`.
 88 | 
 89 | ##### `delete (markerId)`
 90 | 
 91 | Removes the specified marker from the index.
 92 | 
 93 | ##### `getRange (markerId)`
 94 | 
 95 | Returns the range for the given marker id, in the form of an object with `start` and `end` points.
 96 | 
 97 | ##### `getStart (markerId)`
 98 | 
 99 | Returns a `{row: number, column: number}` object representing the start of the specified marker.
100 | 
101 | ##### `getEnd (markerId)`
102 | 
103 | Returns a `{row: number, column: number}` object representing the end of the specified marker.
104 | 
105 | ##### `dump ()`
106 | 
107 | Returns the current location of every marker in the index, represented as an object mapping marker ids to range objects. For example:
108 | 
109 | ```js
110 | {
111 |   '1': {start: {row: 2, column: 5}, end: {row: 5, column: 10}},
112 |   '2': {start: {row: 4, column: 10}, end: {row: 6, column: 3}}
113 | }
114 | ```
115 | 
116 | ##### `findIntersecting (start, end = start)`
117 | 
118 | Returns a set with the ids of all markers intersecting the specified point range.
119 | 
120 | ##### `findContaining (start, end = start)`
121 | 
122 | Returns a set with the ids of all markers intersecting the specified point range.
123 | 
124 | ##### `findContainedIn (start, end)`
125 | 
126 | Returns a set with the ids of all markers contained in the specified point range.
127 | 
128 | ##### `findStartingIn (start, end)`
129 | 
130 | Returns a set with the ids of all markers starting in the specified point range.
131 | 
132 | ##### `findEndingIn (start, end)`
133 | 
134 | Returns a set with the ids of all markers ending in the specified point range.
135 | 
136 | ##### `findStartingAt (position)`
137 | 
138 | Returns a set with the ids of all markers starting at the specified point.
139 | 
140 | ##### `findEndingAt (position)`
141 | 
142 | Returns a set with the ids of all markers ending at the specified point.
143 | 
144 | ##### `findBoundariesIn (start, end)`
145 | 
146 | A boundary is a position in the index where a marker starts or ends. Multiple markers starting and/or ending at the same position describe only one boundary. This method returns an object containing all the boundaries in the specified point range, and an array of marker ids that overlap the specified start position. For example:
147 | 
148 | ```js
149 | {
150 |   containingStart: [1, 2, 3, 4],
151 |   boundaries: [
152 |     {position: {row: 0, column: 1}, starting: new Set([5, 6]), ending: new Set()},
153 |     {position: {row: 1, column: 0}, starting: new Set(), ending: new Set([5])}
154 |     {position: {row: 2, column: 0}, starting: new Set(), ending: new Set([6])}
155 |   ]
156 | }
157 | ```
158 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
1 | # empty appveyor
2 | build: off
3 | 
4 | branches:
5 |   only:
6 |   - non-existing
7 | 


--------------------------------------------------------------------------------
/benchmark/large-text-buffer.benchmark.js:
--------------------------------------------------------------------------------
 1 | const http = require('http')
 2 | const fs = require('fs')
 3 | const unzip = require('unzip')
 4 | const { TextBuffer } = require('..')
 5 | 
 6 | const unzipper = unzip.Parse()
 7 | 
 8 | const getText = () => {
 9 |   return new Promise(resolve => {
10 |     console.log('fetching text file...')
11 |     const req = http.get({
12 |       hostname: 'www.acleddata.com',
13 |       port: 80,
14 |       // 51 MB text file
15 |       path: '/wp-content/uploads/2017/01/ACLED-Version-7-All-Africa-1997-2016_csv_dyadic-file.zip',
16 |       agent: false
17 |     }, res => {
18 |       res
19 |         .pipe(unzipper)
20 |         .on('entry', entry => {
21 |           let data = '';
22 |           entry.on('data', chunk => data += chunk);
23 |           entry.on('end', () => {
24 |             resolve(data)
25 |           });
26 |         })
27 |     })
28 | 
29 |     req.end()
30 |   })
31 | }
32 | 
33 | const timer = size => `Time to find "cat" in ${size} file`
34 | 
35 | getText().then(txt => {
36 |   const buffer = new TextBuffer()
37 | 
38 |   console.log('running findWordsWithSubsequence tests...')
39 | 
40 |   const sizes = [['10b', 10], ['100b', 100], ['1kb', 1000], ['1MB', 1000000], ['51MB', 100000000]]
41 | 
42 |   const test = size => {
43 |     const _timer = timer(size[0])
44 |     buffer.setText(txt.slice(0, size[1]))
45 |     console.time(_timer)
46 |     return buffer.findWordsWithSubsequence('cat', '', 100).then(sugs => {
47 |       console.timeEnd(_timer)
48 |     })
49 |   }
50 | 
51 |   return sizes.reduce((promise, size) => {
52 |     return promise.then(() => test(size))
53 |   }, Promise.resolve())
54 | }).then(() => {
55 |   console.log('finished')
56 | })
57 | 


--------------------------------------------------------------------------------
/benchmark/marker-index.benchmark.js:
--------------------------------------------------------------------------------
  1 | 'use strict';
  2 | 
  3 | const Random = require('random-seed')
  4 | const {MarkerIndex} = require('..')
  5 | const {traverse, traversalDistance, compare} = require('../test/js/helpers/point-helpers')
  6 | 
  7 | let random = new Random(1)
  8 | let markerIds = []
  9 | let idCounter = 1
 10 | let lastInsertionEnd = {row: 0, column: 0}
 11 | let markerIndex = null
 12 | let sequentialInsertOperations = []
 13 | let insertOperations = []
 14 | let spliceOperations = []
 15 | let deleteOperations = []
 16 | let rangeQueryOperations = []
 17 | 
 18 | function runBenchmark () {
 19 |   for (let i = 0; i < 40000; i++) {
 20 |     enqueueSequentialInsert()
 21 |   }
 22 | 
 23 |   for (let i = 0; i < 40000; i++) {
 24 |     enqueueInsert()
 25 |     enqueueSplice()
 26 |     enqueueDelete()
 27 |   }
 28 | 
 29 |   for (let i = 0; i < 500; i++) {
 30 |     enqueueRangeQuery()
 31 |   }
 32 | 
 33 |   markerIndex = new MarkerIndex()
 34 |   profileOperations('sequential inserts', sequentialInsertOperations)
 35 | 
 36 |   markerIndex = new MarkerIndex()
 37 |   profileOperations('inserts', insertOperations)
 38 |   profileOperations('range queries', rangeQueryOperations)
 39 |   profileOperations('splices', spliceOperations)
 40 |   profileOperations('deletes', deleteOperations)
 41 | }
 42 | 
 43 | function profileOperations (name, operations) {
 44 |   console.time(name)
 45 |   for (let i = 0, n = operations.length; i < n; i++) {
 46 |     const operation = operations[i]
 47 |     markerIndex[operation[0]].apply(markerIndex, operation[1])
 48 |   }
 49 |   console.timeEnd(name)
 50 | }
 51 | 
 52 | function enqueueSequentialInsert () {
 53 |   let id = (idCounter++).toString()
 54 |   let row, startColumn, endColumn
 55 |   if (random(10) < 3) {
 56 |     row = lastInsertionEnd.row + 1 + random(3)
 57 |     startColumn = random(100)
 58 |     endColumn = startColumn + random(20)
 59 |   } else {
 60 |     row = lastInsertionEnd.row
 61 |     startColumn = lastInsertionEnd.column + 1 + random(20)
 62 |     endColumn = startColumn + random(20)
 63 |   }
 64 |   lastInsertionEnd = {row, column: endColumn}
 65 |   sequentialInsertOperations.push(['insert', [id, {row, column: startColumn}, lastInsertionEnd]])
 66 | }
 67 | 
 68 | function enqueueInsert () {
 69 |   let id = (idCounter++).toString()
 70 |   let range = getRange()
 71 |   let start = range[0]
 72 |   let end = range[1]
 73 |   let exclusive = Boolean(random(2))
 74 |   markerIds.push(id)
 75 |   insertOperations.push(['insert', [id, start, end]])
 76 |   insertOperations.push(['setExclusive', [id, exclusive]])
 77 | }
 78 | 
 79 | function enqueueSplice () {
 80 |   spliceOperations.push(['splice', getSplice()])
 81 | }
 82 | 
 83 | function enqueueRangeQuery() {
 84 |   rangeQueryOperations.push(['findIntersecting', getRange()])
 85 | }
 86 | 
 87 | function enqueueDelete () {
 88 |   let id = markerIds.splice(random(markerIds.length), 1)
 89 |   deleteOperations.push(['delete', [id]])
 90 | }
 91 | 
 92 | function getRange () {
 93 |   let start = {row: random(100), column: random(100)}
 94 |   let end = start
 95 |   while (random(3) > 0) {
 96 |     end = traverse(end, {row: random.intBetween(-10, 10), column: random.intBetween(-10, 10)})
 97 |   }
 98 |   end.row = Math.max(end.row, 0)
 99 |   end.column = Math.max(end.column, 0)
100 | 
101 |   if (compare(start, end) <= 0) {
102 |     return [start, end]
103 |   } else {
104 |     return [end, start]
105 |   }
106 | }
107 | 
108 | function getSplice () {
109 |   let range = getRange()
110 |   let start = range[0]
111 |   let oldEnd = range[1]
112 |   let oldExtent = traversalDistance(oldEnd, start)
113 |   let newExtent = {row: 0, column: 0}
114 |   while (random(2)) {
115 |     newExtent = traverse(newExtent, {row: random(10), column: random(10)})
116 |   }
117 |   return [start, oldExtent, newExtent]
118 | }
119 | 
120 | runBenchmark()
121 | 


--------------------------------------------------------------------------------
/benchmark/native/marker-index-benchmark.cc:
--------------------------------------------------------------------------------
 1 | #include <chrono>
 2 | #include <iostream>
 3 | #include <vector>
 4 | #include <stdlib.h>
 5 | #include "catch.hpp"
 6 | #include "point.h"
 7 | #include "range.h"
 8 | #include "marker-index.h"
 9 | 
10 | using namespace std::chrono;
11 | using std::vector;
12 | 
13 | Range get_random_range() {
14 |   Point start(rand() % 100, rand() % 100);
15 |   Point end = start;
16 |   if (rand() % 10 < 5) {
17 |     end = end.traverse(Point(rand() % 10, rand() % 10));
18 |   }
19 |   return Range{start, end};
20 | }
21 | 
22 | 
23 | TEST_CASE("MarkerIndex::insert") {
24 |   srand(0);
25 |   MarkerIndex marker_index;
26 |   vector<Range> ranges;
27 |   uint count = 20000;
28 | 
29 |   for (uint i = 0; i < count; i++) {
30 |     ranges.push_back(get_random_range());
31 |   }
32 | 
33 |   milliseconds start = duration_cast<milliseconds>(system_clock::now().time_since_epoch());
34 |   for (uint i = 0; i < count; i++) {
35 |     marker_index.insert(i, ranges[i].start, ranges[i].end);
36 |   }
37 |   milliseconds end = duration_cast<milliseconds>(system_clock::now().time_since_epoch());
38 |   std::cout << "Inserting " << (end - start).count();
39 | }
40 | 


--------------------------------------------------------------------------------
/benchmark/text-buffer.benchmark.js:
--------------------------------------------------------------------------------
 1 | const assert = require('assert')
 2 | const {TextBuffer} = require('..')
 3 | 
 4 | const text = 'abc def ghi jkl\n'.repeat(1024 * 1024)
 5 | const lines = text.split('\n')
 6 | const buffer = new TextBuffer(text)
 7 | const trialCount = 10
 8 | 
 9 | function benchmarkSearch(description, pattern, expectedPosition) {
10 |   let name = `Search for ${description} - TextBuffer`
11 |   console.time(name)
12 |   for (let i = 0; i < trialCount; i++) {
13 |     assert.deepEqual(buffer.searchSync(pattern), expectedPosition)
14 |   }
15 |   console.timeEnd(name)
16 | 
17 |   name = `Search for ${description} - lines array`
18 |   console.time(name)
19 |   const regex = new RegExp(pattern)
20 |   for (let i = 0; i < trialCount; i++) {
21 |     for (let row = 0, rowCount = lines.length; row < rowCount; row++) {
22 |       let match = regex.exec(lines[row])
23 |       if (match) {
24 |         assert.deepEqual(
25 |           {
26 |             start: {row, column: match.index},
27 |             end: {row, column: match.index + match[0].length}
28 |           },
29 |           expectedPosition
30 |         )
31 |         break
32 |       }
33 |     }
34 |   }
35 |   console.timeEnd(name)
36 |   console.log()
37 | }
38 | 
39 | benchmarkSearch('simple non-existent pattern', '\t', null)
40 | benchmarkSearch('complex non-existent pattern', '123|456|789', null)
41 | benchmarkSearch('simple existing pattern', 'jkl', {start: {row: 0, column: 12}, end: {row: 0, column: 15}})
42 | benchmarkSearch('complex existing pattern', 'j\\w+', {start: {row: 0, column: 12}, end: {row: 0, column: 15}})


--------------------------------------------------------------------------------
/binding.gyp:
--------------------------------------------------------------------------------
  1 | {
  2 |     "targets": [
  3 |         {
  4 |             "target_name": "superstring",
  5 |             "dependencies": [
  6 |                 "superstring_core"
  7 |             ],
  8 |             "sources": [
  9 |                 "src/bindings/bindings.cc",
 10 |                 "src/bindings/marker-index-wrapper.cc",
 11 |                 "src/bindings/patch-wrapper.cc",
 12 |                 "src/bindings/point-wrapper.cc",
 13 |                 "src/bindings/range-wrapper.cc",
 14 |                 "src/bindings/text-buffer-wrapper.cc",
 15 |                 "src/bindings/text-buffer-snapshot-wrapper.cc",
 16 |                 "src/bindings/text-reader.cc",
 17 |                 "src/bindings/string-conversion.cc",
 18 |                 "src/bindings/text-writer.cc",
 19 |             ],
 20 |             "include_dirs": [
 21 |               "src/core",
 22 |               "<!(node -e \"require('nan')\")"
 23 |             ],
 24 |         },
 25 |         {
 26 |             "target_name": "superstring_core",
 27 |             "type": "static_library",
 28 |             "dependencies": [
 29 |                 "./vendor/pcre/pcre.gyp:pcre",
 30 |             ],
 31 |             "sources": [
 32 |                 "src/core/encoding-conversion.cc",
 33 |                 "src/core/marker-index.cc",
 34 |                 "src/core/patch.cc",
 35 |                 "src/core/point.cc",
 36 |                 "src/core/range.cc",
 37 |                 "src/core/regex.cc",
 38 |                 "src/core/text.cc",
 39 |                 "src/core/text-buffer.cc",
 40 |                 "src/core/text-slice.cc",
 41 |                 "src/core/text-diff.cc",
 42 |                 "src/core/libmba-diff.cc",
 43 |             ],
 44 |             "include_dirs": [
 45 |                 "vendor/libcxx"
 46 |             ],
 47 |             "conditions": [
 48 |                 ['OS=="mac"', {
 49 |                     'link_settings': {
 50 |                         'libraries': ['libiconv.dylib'],
 51 |                     }
 52 |                 }],
 53 |                 ['OS=="win"', {
 54 |                    'sources': [
 55 |                        'vendor/win-iconv/win_iconv.c',
 56 |                     ],
 57 |                     'include_dirs': [
 58 |                         'vendor/win-iconv'
 59 |                     ],
 60 |                     'defines': [
 61 |                         'WINICONV_CONST=',
 62 |                         'PCRE2_STATIC',
 63 |                     ]
 64 |                 }],
 65 |             ],
 66 |         }
 67 |     ],
 68 | 
 69 |     "variables": {
 70 |         "tests": 0
 71 |     },
 72 | 
 73 |     "conditions": [
 74 |         # If --tests is passed to node-gyp configure, we'll build a standalone
 75 |         # executable that runs tests on the patch.
 76 |         ['tests != 0', {
 77 |             "targets": [{
 78 |                 "target_name": "tests",
 79 |                 "type": "executable",
 80 |                 "cflags_cc!": ["-fno-exceptions"],
 81 |                 "defines": [
 82 |                     "CATCH_CONFIG_CPP11_NO_IS_ENUM"
 83 |                 ],
 84 |                 "sources": [
 85 |                     "test/native/test-helpers.cc",
 86 |                     "test/native/tests.cc",
 87 |                     "test/native/encoding-conversion-test.cc",
 88 |                     "test/native/patch-test.cc",
 89 |                     "test/native/text-buffer-test.cc",
 90 |                     "test/native/text-test.cc",
 91 |                     "test/native/text-diff-test.cc",
 92 |                 ],
 93 |                 "include_dirs": [
 94 |                     "vendor",
 95 |                     "src/core",
 96 |                 ],
 97 |                 "dependencies": [
 98 |                     "superstring_core"
 99 |                 ],
100 |                 "conditions": [
101 |                     ['OS=="mac"', {
102 |                         'cflags': [
103 |                             '-mmacosx-version-min=10.8'
104 |                         ],
105 |                         "xcode_settings": {
106 |                             "GCC_ENABLE_CPP_EXCEPTIONS": "YES",
107 |                             'MACOSX_DEPLOYMENT_TARGET': '10.8',
108 |                         }
109 |                     }]
110 |                 ]
111 |             }]
112 |         }]
113 |     ],
114 | 
115 |     "target_defaults": {
116 |         "cflags_cc": ["-std=c++11"],
117 |         "conditions": [
118 |             ['OS=="mac"', {
119 |                 "xcode_settings": {
120 |                     'CLANG_CXX_LIBRARY': 'libc++',
121 |                     'CLANG_CXX_LANGUAGE_STANDARD':'c++11',
122 |                 }
123 |             }],
124 |             ['OS=="win"', {
125 |                 "link_settings": {
126 |                     "libraries": ["ws2_32.lib"]
127 |                 },
128 |                 "defines": [
129 |                     "NOMINMAX"
130 |                 ],
131 |             }]
132 |         ]
133 |     }
134 | }
135 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "superstring",
 3 |   "version": "2.4.4",
 4 |   "description": "A data structure to efficiently represent the results of applying patches.",
 5 |   "main": "./index",
 6 |   "browser": "./browser",
 7 |   "scripts": {
 8 |     "build:node": "node-gyp rebuild",
 9 |     "build:browser": "script/build-browser-version.sh",
10 |     "build": "npm run build:node && npm run build:browser",
11 |     "test:native": "node ./script/test-native.js",
12 |     "test:node": "mocha test/js/*.js",
13 |     "test:browser": "SUPERSTRING_USE_BROWSER_VERSION=1 mocha test/js/*.js",
14 |     "test": "npm run test:node && npm run test:browser",
15 |     "benchmark": "node benchmark/marker-index.benchmark.js",
16 |     "prepublishOnly": "git submodule update --init --recursive && npm run build:browser",
17 |     "standard": "standard --recursive src test"
18 |   },
19 |   "repository": {
20 |     "type": "git",
21 |     "url": "https://github.com/atom/superstring.git"
22 |   },
23 |   "keywords": [
24 |     "text",
25 |     "data-structure"
26 |   ],
27 |   "author": "Nathan Sobo <nathan@github.com>",
28 |   "license": "MIT",
29 |   "bugs": {
30 |     "url": "https://github.com/atom/superstring/issues"
31 |   },
32 |   "homepage": "https://github.com/atom/superstring",
33 |   "dependencies": {
34 |     "nan": "^2.14.2"
35 |   },
36 |   "devDependencies": {
37 |     "chai": "^2.0.0",
38 |     "mocha": "^2.3.4",
39 |     "random-seed": "^0.2.0",
40 |     "standard": "^4.5.4",
41 |     "temp": "^0.8.3",
42 |     "unzip": "^0.1.11"
43 |   },
44 |   "standard": {
45 |     "global": [
46 |       "describe",
47 |       "it",
48 |       "expect"
49 |     ]
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/script/build-browser-version.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | source emsdk-portable/emsdk_env.sh
 4 | 
 5 | mkdir -p build
 6 | 
 7 | emcc                                         \
 8 |   -o build/pcre.o                            \
 9 |   -O3                                        \
10 |   -I vendor/pcre/10.23/src                   \
11 |   -I vendor/pcre/include                     \
12 |   -D HAVE_CONFIG_H                           \
13 |   -D PCRE2_CODE_UNIT_WIDTH=16                \
14 |   vendor/pcre/pcre2_chartables.c             \
15 |   vendor/pcre/10.23/src/pcre2_auto_possess.c \
16 |   vendor/pcre/10.23/src/pcre2_compile.c      \
17 |   vendor/pcre/10.23/src/pcre2_config.c       \
18 |   vendor/pcre/10.23/src/pcre2_context.c      \
19 |   vendor/pcre/10.23/src/pcre2_dfa_match.c    \
20 |   vendor/pcre/10.23/src/pcre2_error.c        \
21 |   vendor/pcre/10.23/src/pcre2_find_bracket.c \
22 |   vendor/pcre/10.23/src/pcre2_jit_compile.c  \
23 |   vendor/pcre/10.23/src/pcre2_maketables.c   \
24 |   vendor/pcre/10.23/src/pcre2_match.c        \
25 |   vendor/pcre/10.23/src/pcre2_match_data.c   \
26 |   vendor/pcre/10.23/src/pcre2_newline.c      \
27 |   vendor/pcre/10.23/src/pcre2_ord2utf.c      \
28 |   vendor/pcre/10.23/src/pcre2_pattern_info.c \
29 |   vendor/pcre/10.23/src/pcre2_serialize.c    \
30 |   vendor/pcre/10.23/src/pcre2_string_utils.c \
31 |   vendor/pcre/10.23/src/pcre2_study.c        \
32 |   vendor/pcre/10.23/src/pcre2_substitute.c   \
33 |   vendor/pcre/10.23/src/pcre2_substring.c    \
34 |   vendor/pcre/10.23/src/pcre2_tables.c       \
35 |   vendor/pcre/10.23/src/pcre2_ucd.c          \
36 |   vendor/pcre/10.23/src/pcre2_valid_utf.c    \
37 |   vendor/pcre/10.23/src/pcre2_xclass.c
38 | 
39 | em++                                    \
40 |   --bind                                \
41 |   -o browser.js                         \
42 |   -O3                                   \
43 |   -I src/bindings/em                    \
44 |   -I src/core                           \
45 |   -I vendor/libcxx                      \
46 |   -I vendor/pcre/include                \
47 |   -D PCRE2_CODE_UNIT_WIDTH=16           \
48 |   -xc++                                 \
49 |   --pre-js src/bindings/em/prologue.js  \
50 |   --post-js src/bindings/em/epilogue.js \
51 |   src/core/*.cc                         \
52 |   src/bindings/em/*.cc                  \
53 |   build/pcre.o                          \
54 |   -s TOTAL_MEMORY=134217728             \
55 |   --memory-init-file 0                  \
56 |   "$@"
57 | 


--------------------------------------------------------------------------------
/script/install-emscripten.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | 
 5 | EMSCRIPTEN_DOWNLOAD_URL='https://s3.amazonaws.com/mozilla-games/emscripten/releases/emsdk-portable.tar.gz'
 6 | EMSDK_PATH="./emsdk-portable/emsdk"
 7 | 
 8 | if [ ! -f $EMSDK_PATH ]; then
 9 |   echo 'Downloading emscripten SDK installer...'
10 |   curl $EMSCRIPTEN_DOWNLOAD_URL | tar xz
11 | fi
12 | 
13 | echo 'Installing emscripten SDK...'
14 | 
15 | # Workaround https://github.com/juj/emsdk/pull/74
16 | sed -i{} "s_/kripken/emscripten/'_/kripken/emscripten'_" $EMSDK_PATH
17 | sed -i{} "s_/WebAssembly/binaryen/'_/WebAssembly/binaryen'_" $EMSDK_PATH
18 | 
19 | $EMSDK_PATH update
20 | $EMSDK_PATH list
21 | $EMSDK_PATH install sdk-1.37.9-64bit
22 | $EMSDK_PATH activate sdk-1.37.9-64bit
23 | 


--------------------------------------------------------------------------------
/script/test-native.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | const fs = require('fs')
 4 | const path = require('path')
 5 | const {spawnSync} = require('child_process')
 6 | 
 7 | const testsPath = path.resolve(__dirname, '..', 'build', 'Debug', 'tests')
 8 | const dotPath = path.resolve(__dirname, '..', 'build', 'debug.dot')
 9 | const htmlPath = path.join(__dirname, '..', 'build', 'debug.html')
10 | 
11 | if (fs.existsSync(testsPath)) {
12 |   run('node-gyp', ['build'])
13 | } else {
14 |   run('node-gyp', ['rebuild', '--debug', '--tests'])
15 | }
16 | 
17 | const args = process.argv.slice(2)
18 | 
19 | switch (args[0]) {
20 |   case '-d':
21 |   case '--debug':
22 |     args.shift()
23 |     run('lldb', [testsPath, '--', ...args])
24 |     break
25 | 
26 |   case '-v':
27 |   case '--valgrind':
28 |     args.shift()
29 |     run('valgrind', ['--leak-check=full', testsPath, args[0]])
30 |     break
31 | 
32 |   case '-s':
33 |   case '--svg':
34 |     args.shift()
35 | 
36 |     let dotFile = fs.openSync(dotPath, 'w')
37 |     const {status} = spawnSync(testsPath, args, {stdio: ['ignore', 1, dotFile]})
38 |     fs.closeSync(dotFile)
39 | 
40 |     dotFile = fs.openSync(dotPath, 'r')
41 |     let htmlFile = fs.openSync(htmlPath, 'w')
42 |     fs.writeSync(htmlFile, '<!doctype HTML>\n<style>svg {width: 100%;}</style>\n')
43 |     spawnSync('dot', ['-Tsvg'], {stdio: [dotFile, htmlFile, 2]})
44 |     spawnSync('open', [htmlPath])
45 | 
46 |     process.exit(status)
47 |     break
48 | 
49 |   default:
50 |     run(testsPath, args)
51 |     break
52 | }
53 | 
54 | function run(command, args = [], options = {stdio: 'inherit'}) {
55 |   const {status} = spawnSync(command, args, options)
56 |   if (status !== 0) process.exit(status)
57 | }


--------------------------------------------------------------------------------
/script/test-with-debug-graph.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | echo '<?xml version="1.0"?>' > build/debug.html
4 | echo '<style> svg { width: 100%; margin-bottom: 10px; } </style>' >> build/debug.html
5 | node_modules/.bin/mocha test/js/*.js 2> >(dot -Tsvg >> build/debug.html)
6 | open build/debug.html
7 | 


--------------------------------------------------------------------------------
/src/bindings/bindings.cc:
--------------------------------------------------------------------------------
 1 | #include "marker-index-wrapper.h"
 2 | #include "nan.h"
 3 | #include "patch-wrapper.h"
 4 | #include "range-wrapper.h"
 5 | #include "point-wrapper.h"
 6 | #include "text-writer.h"
 7 | #include "text-reader.h"
 8 | #include "text-buffer-wrapper.h"
 9 | #include "text-buffer-snapshot-wrapper.h"
10 | 
11 | using namespace v8;
12 | 
13 | void Init(Local<Object> exports) {
14 |   PointWrapper::init();
15 |   RangeWrapper::init();
16 |   PatchWrapper::init(exports);
17 |   MarkerIndexWrapper::init(exports);
18 |   TextBufferWrapper::init(exports);
19 |   TextWriter::init(exports);
20 |   TextReader::init(exports);
21 |   TextBufferSnapshotWrapper::init();
22 | }
23 | 
24 | NODE_MODULE(superstring, Init)
25 | 


--------------------------------------------------------------------------------
/src/bindings/em/epilogue.js:
--------------------------------------------------------------------------------
1 |   return Module;
2 | }));
3 | 


--------------------------------------------------------------------------------
/src/bindings/em/marker-index.cc:
--------------------------------------------------------------------------------
 1 | #include "auto-wrap.h"
 2 | #include "marker-index.h"
 3 | #include <emscripten/bind.h>
 4 | 
 5 | EMSCRIPTEN_BINDINGS(MarkerIndex) {
 6 |   emscripten::class_<MarkerIndex>("MarkerIndex")
 7 |     .constructor<>()
 8 |     .constructor<unsigned>()
 9 |     .function("generateRandomNumber", WRAP(&MarkerIndex::generate_random_number))
10 |     .function("insert", WRAP(&MarkerIndex::insert))
11 |     .function("setExclusive", WRAP(&MarkerIndex::set_exclusive))
12 |     .function("remove", WRAP(&MarkerIndex::remove))
13 |     .function("splice", WRAP(&MarkerIndex::splice))
14 |     .function("has", WRAP(&MarkerIndex::has))
15 |     .function("getStart", WRAP(&MarkerIndex::get_start))
16 |     .function("getEnd", WRAP(&MarkerIndex::get_end))
17 |     .function("getRange", WRAP(&MarkerIndex::get_range))
18 |     .function("compare", WRAP(&MarkerIndex::compare))
19 |     .function("findIntersecting", WRAP(&MarkerIndex::find_intersecting))
20 |     .function("findContaining", WRAP(&MarkerIndex::find_containing))
21 |     .function("findContainedIn", WRAP(&MarkerIndex::find_contained_in))
22 |     .function("findStartingIn", WRAP(&MarkerIndex::find_starting_in))
23 |     .function("findStartingAt", WRAP(&MarkerIndex::find_starting_at))
24 |     .function("findEndingIn", WRAP(&MarkerIndex::find_ending_in))
25 |     .function("findEndingAt", WRAP(&MarkerIndex::find_ending_at))
26 |     .function("findBoundariesAfter", WRAP(&MarkerIndex::find_boundaries_after))
27 |     .function("dump", WRAP(&MarkerIndex::dump));
28 | 
29 |   emscripten::value_object<MarkerIndex::SpliceResult>("SpliceResult")
30 |     .field("touch", WRAP_FIELD(MarkerIndex::SpliceResult, touch))
31 |     .field("inside", WRAP_FIELD(MarkerIndex::SpliceResult, inside))
32 |     .field("overlap", WRAP_FIELD(MarkerIndex::SpliceResult, overlap))
33 |     .field("surround", WRAP_FIELD(MarkerIndex::SpliceResult, surround));
34 | 
35 |   emscripten::value_object<MarkerIndex::BoundaryQueryResult>("BoundaryQueryResult")
36 |     .field("containing_start", &MarkerIndex::BoundaryQueryResult::containing_start)
37 |     .field("boundaries", &MarkerIndex::BoundaryQueryResult::boundaries);
38 | 
39 |   emscripten::value_object<MarkerIndex::Boundary>("Boundary")
40 |     .field("position", &MarkerIndex::Boundary::position)
41 |     .field("starting", &MarkerIndex::Boundary::starting)
42 |     .field("ending", &MarkerIndex::Boundary::ending);
43 | }
44 | 


--------------------------------------------------------------------------------
/src/bindings/em/patch.cc:
--------------------------------------------------------------------------------
  1 | #include <memory>
  2 | #include <vector>
  3 | #include "auto-wrap.h"
  4 | #include "patch.h"
  5 | #include <emscripten/bind.h>
  6 | #include <emscripten/val.h>
  7 | 
  8 | using std::runtime_error;
  9 | using std::string;
 10 | using std::vector;
 11 | 
 12 | template <>
 13 | inline Patch const *emscripten::val::as<Patch const *>(void) const {
 14 |   using namespace emscripten;
 15 |   using namespace internal;
 16 | 
 17 |   EM_DESTRUCTORS destructors;
 18 |   EM_GENERIC_WIRE_TYPE result = _emval_as(
 19 |     handle,
 20 |     TypeID<AllowedRawPointer<Patch const>>::get(),
 21 |     &destructors
 22 |   );
 23 |   DestructorsRunner destructors_runner(destructors);
 24 | 
 25 |   return fromGenericWireType<Patch *>(result);
 26 | }
 27 | 
 28 | Patch *constructor(emscripten::val value) {
 29 |   bool merge_adjacent_changes = false;
 30 |   if (value.as<bool>() && value["mergeAdjacentChanges"].as<bool>()) {
 31 |     merge_adjacent_changes = true;
 32 |   }
 33 |   return new Patch(merge_adjacent_changes);
 34 | }
 35 | 
 36 | vector<uint8_t> serialize(Patch &patch) {
 37 |   vector<uint8_t> output;
 38 |   Serializer serializer(output);
 39 |   patch.serialize(serializer);
 40 |   return output;
 41 | }
 42 | 
 43 | Patch *compose(vector<Patch const *> const &patches) {
 44 |   auto result = new Patch();
 45 |   bool left_to_right = true;
 46 |   for (const Patch *patch : patches) {
 47 |     if (!result->combine(*patch, left_to_right)) {
 48 |       delete result;
 49 |       return nullptr;
 50 |     }
 51 |     left_to_right = !left_to_right;
 52 |   }
 53 |   return result;
 54 | }
 55 | 
 56 | Patch *deserialize(const vector<uint8_t> &bytes) {
 57 |   Deserializer deserializer(bytes);
 58 |   return new Patch(deserializer);
 59 | }
 60 | 
 61 | bool splice(Patch &patch, Point start, Point deleted_extent, Point inserted_extent) {
 62 |   return patch.splice(
 63 |     start,
 64 |     deleted_extent,
 65 |     inserted_extent
 66 |   );
 67 | }
 68 | 
 69 | bool splice_with_text(Patch &patch, Point start, Point deleted_extent, Point inserted_extent,
 70 |                       const string &deleted_text, const string &inserted_text) {
 71 |   return patch.splice(
 72 |     start,
 73 |     deleted_extent,
 74 |     inserted_extent,
 75 |     Text(deleted_text.begin(), deleted_text.end()),
 76 |     Text(inserted_text.begin(), inserted_text.end())
 77 |   );
 78 | }
 79 | 
 80 | template <typename T>
 81 | void change_set_noop(Patch::Change &change, T const &) {}
 82 | 
 83 | EMSCRIPTEN_BINDINGS(Patch) {
 84 |   emscripten::class_<Patch>("Patch")
 85 |     .constructor<>()
 86 |     .constructor<emscripten::val>(WRAP_STATIC(&constructor), emscripten::allow_raw_pointers())
 87 |     .function("splice", splice)
 88 |     .function("splice", splice_with_text)
 89 |     .function("spliceOld", WRAP(&Patch::splice_old))
 90 |     .function("copy", WRAP(&Patch::copy))
 91 |     .function("invert", WRAP(&Patch::invert))
 92 |     .function("getChanges", WRAP(&Patch::get_changes))
 93 |     .function("getChangesInNewRange", WRAP(&Patch::grab_changes_in_new_range))
 94 |     .function("getChangesInOldRange", WRAP(&Patch::grab_changes_in_old_range))
 95 |     .function("getChangeCount", WRAP(&Patch::get_change_count))
 96 |     .function("changeForOldPosition", WRAP(&Patch::grab_change_starting_before_old_position))
 97 |     .function("changeForNewPosition", WRAP(&Patch::grab_change_starting_before_new_position))
 98 |     .function("getBounds", WRAP(&Patch::get_bounds))
 99 |     .function("rebalance", WRAP(&Patch::rebalance))
100 |     .function("serialize", WRAP(&serialize))
101 |     .class_function("compose", WRAP_STATIC(&compose), emscripten::allow_raw_pointers())
102 |     .class_function("deserialize", WRAP_STATIC(&deserialize), emscripten::allow_raw_pointers());
103 | 
104 |   emscripten::value_object<Patch::Change>("Change")
105 |     .field("oldStart", WRAP_FIELD(Patch::Change, old_start))
106 |     .field("oldEnd", WRAP_FIELD(Patch::Change, old_end))
107 |     .field("newStart", WRAP_FIELD(Patch::Change, new_start))
108 |     .field("newEnd", WRAP_FIELD(Patch::Change, new_end))
109 |     .field("oldText", WRAP_FIELD(Patch::Change, old_text))
110 |     .field("newText", WRAP_FIELD(Patch::Change, new_text));
111 | }
112 | 


--------------------------------------------------------------------------------
/src/bindings/em/point.cc:
--------------------------------------------------------------------------------
 1 | #include "point.h"
 2 | #include <emscripten/bind.h>
 3 | #include <algorithm>
 4 | #include <limits>
 5 | 
 6 | double get_row(const Point &point) {
 7 |   return point.row;
 8 | }
 9 | 
10 | void set_row(Point &point, double row) {
11 |   if (row < 0) {
12 |     point.row = 0;
13 |   } else {
14 |     point.row = std::min(
15 |       row,
16 |       static_cast<double>(std::numeric_limits<unsigned>::max())
17 |     );
18 |   }
19 | }
20 | 
21 | double get_column(const Point &point) {
22 |   return point.column;
23 | }
24 | 
25 | void set_column(Point &point, double column) {
26 |   if (column < 0) {
27 |     point.column = 0;
28 |   } else {
29 |     point.column = std::min(
30 |       column,
31 |       static_cast<double>(std::numeric_limits<unsigned>::max())
32 |     );
33 |   }
34 | }
35 | 
36 | EMSCRIPTEN_BINDINGS(Point) {
37 |   emscripten::value_object<Point>("Point")
38 |     .field("row", &get_row, &set_row)
39 |     .field("column", &get_column, &set_column);
40 | }
41 | 


--------------------------------------------------------------------------------
/src/bindings/em/prologue.js:
--------------------------------------------------------------------------------
 1 | (function (root, factory) {
 2 |   if (typeof define === 'function' && define.amd) {
 3 |     define([], factory);
 4 |   } else if (typeof exports === 'object') {
 5 |     module.exports = factory();
 6 |   } else {
 7 |     window.Superstring = factory();
 8 |   }
 9 | }(this, function () {
10 | 


--------------------------------------------------------------------------------
/src/bindings/em/range.cc:
--------------------------------------------------------------------------------
 1 | #include "auto-wrap.h"
 2 | #include "range.h"
 3 | #include <emscripten/bind.h>
 4 | 
 5 | EMSCRIPTEN_BINDINGS(Range) {
 6 |   emscripten::value_object<Range>("Range")
 7 |     .field("start", WRAP_FIELD(Range, start))
 8 |     .field("end", WRAP_FIELD(Range, end));
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/bindings/em/text-buffer.cc:
--------------------------------------------------------------------------------
  1 | #include "auto-wrap.h"
  2 | #include "text-buffer.h"
  3 | #include "marker-index.h"
  4 | #include <emscripten/bind.h>
  5 | 
  6 | using std::string;
  7 | using std::u16string;
  8 | 
  9 | static TextBuffer *construct(const std::wstring &text) {
 10 |   return new TextBuffer(u16string(text.begin(), text.end()));
 11 | }
 12 | 
 13 | static emscripten::val find_sync(TextBuffer &buffer, std::wstring js_pattern, bool ignore_case, bool unicode, Range range) {
 14 |   u16string pattern(js_pattern.begin(), js_pattern.end());
 15 |   u16string error_message;
 16 |   Regex regex(pattern, &error_message, ignore_case, unicode);
 17 |   if (!error_message.empty()) {
 18 |     return emscripten::val(string(error_message.begin(), error_message.end()));
 19 |   }
 20 | 
 21 |   auto result = buffer.find(regex, range);
 22 |   if (result) {
 23 |     return emscripten::val(*result);
 24 |   }
 25 | 
 26 |   return emscripten::val::null();
 27 | }
 28 | 
 29 | static emscripten::val find_all_sync(TextBuffer &buffer, std::wstring js_pattern, bool ignore_case, bool unicode, Range range) {
 30 |   u16string pattern(js_pattern.begin(), js_pattern.end());
 31 |   u16string error_message;
 32 |   Regex regex(pattern, &error_message, ignore_case, unicode);
 33 |   if (!error_message.empty()) {
 34 |     return emscripten::val(string(error_message.begin(), error_message.end()));
 35 |   }
 36 | 
 37 |   return em_transmit(buffer.find_all(regex, range));
 38 | }
 39 | 
 40 | static emscripten::val find_and_mark_all_sync(TextBuffer &buffer, MarkerIndex &index, unsigned next_id,
 41 |                                               bool exclusive, std::wstring js_pattern, bool ignore_case, bool unicode,
 42 |                                               Range range) {
 43 |   u16string pattern(js_pattern.begin(), js_pattern.end());
 44 |   u16string error_message;
 45 |   Regex regex(pattern, &error_message, ignore_case, unicode);
 46 |   if (!error_message.empty()) {
 47 |     return emscripten::val(string(error_message.begin(), error_message.end()));
 48 |   }
 49 | 
 50 |   return emscripten::val(buffer.find_and_mark_all(index, next_id, exclusive, regex, range));
 51 | }
 52 | 
 53 | static emscripten::val line_ending_for_row(TextBuffer &buffer, uint32_t row) {
 54 |   auto line_ending = buffer.line_ending_for_row(row);
 55 |   if (line_ending) {
 56 |     string result;
 57 |     for (const uint16_t *character = line_ending; *character != 0; character++) {
 58 |       result += (char)*character;
 59 |     }
 60 |     return emscripten::val(result);
 61 |   }
 62 |   return emscripten::val::undefined();
 63 | }
 64 | 
 65 | static uint32_t character_index_for_position(TextBuffer &buffer, Point position) {
 66 |   return buffer.clip_position(position).offset;
 67 | }
 68 | 
 69 | static uint32_t get_line_count(TextBuffer &buffer) {
 70 |   return buffer.extent().row + 1;
 71 | }
 72 | 
 73 | static Point position_for_character_index(TextBuffer &buffer, long index) {
 74 |   return index < 0 ?
 75 |     Point{0, 0} :
 76 |     buffer.position_for_offset(static_cast<uint32_t>(index));
 77 | }
 78 | 
 79 | EMSCRIPTEN_BINDINGS(TextBuffer) {
 80 |   emscripten::class_<TextBuffer>("TextBuffer")
 81 |     .constructor<>()
 82 |     .constructor(construct, emscripten::allow_raw_pointers())
 83 |     .function("getText", WRAP(&TextBuffer::text))
 84 |     .function("setText", WRAP_OVERLOAD(&TextBuffer::set_text, void (TextBuffer::*)(u16string &&)))
 85 |     .function("getCharacterAtPosition", WRAP(&TextBuffer::character_at))
 86 |     .function("getTextInRange", WRAP(&TextBuffer::text_in_range))
 87 |     .function("setTextInRange", WRAP_OVERLOAD(&TextBuffer::set_text_in_range, void (TextBuffer::*)(Range, u16string &&)))
 88 |     .function("getLength", &TextBuffer::size)
 89 |     .function("getExtent", &TextBuffer::extent)
 90 |     .function("getLineCount", get_line_count)
 91 |     .function("hasAstral", &TextBuffer::has_astral)
 92 |     .function("reset", WRAP(&TextBuffer::reset))
 93 |     .function("lineLengthForRow", WRAP(&TextBuffer::line_length_for_row))
 94 |     .function("lineEndingForRow", line_ending_for_row)
 95 |     .function("lineForRow", WRAP(&TextBuffer::line_for_row))
 96 |     .function("characterIndexForPosition", character_index_for_position)
 97 |     .function("positionForCharacterIndex", position_for_character_index)
 98 |     .function("isModified", WRAP_OVERLOAD(&TextBuffer::is_modified, bool (TextBuffer::*)() const))
 99 |     .function("findSync", find_sync)
100 |     .function("findAllSync", find_all_sync)
101 |     .function("findAndMarkAllSync", find_and_mark_all_sync)
102 |     .function("findWordsWithSubsequenceInRange", WRAP(&TextBuffer::find_words_with_subsequence_in_range));
103 | 
104 |   emscripten::value_object<TextBuffer::SubsequenceMatch>("SubsequenceMatch")
105 |     .field("word", WRAP_FIELD(TextBuffer::SubsequenceMatch, word))
106 |     .field("positions", WRAP_FIELD(TextBuffer::SubsequenceMatch, positions))
107 |     .field("matchIndices", WRAP_FIELD(TextBuffer::SubsequenceMatch, match_indices))
108 |     .field("score", WRAP_FIELD(TextBuffer::SubsequenceMatch, score));
109 | }
110 | 


--------------------------------------------------------------------------------
/src/bindings/marker-index-wrapper.h:
--------------------------------------------------------------------------------
 1 | #include "nan.h"
 2 | #include "marker-index.h"
 3 | #include "optional.h"
 4 | #include "range.h"
 5 | 
 6 | class MarkerIndexWrapper : public Nan::ObjectWrap {
 7 | public:
 8 |   static void init(v8::Local<v8::Object> exports);
 9 |   static MarkerIndex *from_js(v8::Local<v8::Value>);
10 | 
11 | private:
12 |   static void construct(const Nan::FunctionCallbackInfo<v8::Value> &info);
13 |   static void generate_random_number(const Nan::FunctionCallbackInfo<v8::Value> &info);
14 |   static bool is_finite(v8::Local<v8::Integer> number);
15 |   static v8::Local<v8::Set> marker_ids_set_to_js(const MarkerIndex::MarkerIdSet &marker_ids);
16 |   static v8::Local<v8::Array> marker_ids_vector_to_js(const std::vector<MarkerIndex::MarkerId> &marker_ids);
17 |   static v8::Local<v8::Object> snapshot_to_js(const std::unordered_map<MarkerIndex::MarkerId, Range> &snapshot);
18 |   static optional<MarkerIndex::MarkerId> marker_id_from_js(v8::Local<v8::Value> value);
19 |   static optional<unsigned> unsigned_from_js(v8::Local<v8::Value> value);
20 |   static optional<bool> bool_from_js(v8::Local<v8::Value> value);
21 |   static void insert(const Nan::FunctionCallbackInfo<v8::Value> &info);
22 |   static void set_exclusive(const Nan::FunctionCallbackInfo<v8::Value> &info);
23 |   static void remove(const Nan::FunctionCallbackInfo<v8::Value> &info);
24 |   static void has(const Nan::FunctionCallbackInfo<v8::Value> &info);
25 |   static void splice(const Nan::FunctionCallbackInfo<v8::Value> &info);
26 |   static void get_start(const Nan::FunctionCallbackInfo<v8::Value> &info);
27 |   static void get_end(const Nan::FunctionCallbackInfo<v8::Value> &info);
28 |   static void get_range(const Nan::FunctionCallbackInfo<v8::Value> &info);
29 |   static void compare(const Nan::FunctionCallbackInfo<v8::Value> &info);
30 |   static void find_intersecting(const Nan::FunctionCallbackInfo<v8::Value> &info);
31 |   static void find_containing(const Nan::FunctionCallbackInfo<v8::Value> &info);
32 |   static void find_contained_in(const Nan::FunctionCallbackInfo<v8::Value> &info);
33 |   static void find_starting_in(const Nan::FunctionCallbackInfo<v8::Value> &info);
34 |   static void find_starting_at(const Nan::FunctionCallbackInfo<v8::Value> &info);
35 |   static void find_ending_in(const Nan::FunctionCallbackInfo<v8::Value> &info);
36 |   static void find_ending_at(const Nan::FunctionCallbackInfo<v8::Value> &info);
37 |   static void find_boundaries_after(const Nan::FunctionCallbackInfo<v8::Value> &info);
38 |   static void dump(const Nan::FunctionCallbackInfo<v8::Value> &info);
39 |   MarkerIndexWrapper(unsigned seed);
40 |   MarkerIndex marker_index;
41 | };
42 | 


--------------------------------------------------------------------------------
/src/bindings/noop.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "nan.h"
4 | 
5 | static void noop(const Nan::FunctionCallbackInfo<v8::Value>&) {}
6 | 


--------------------------------------------------------------------------------
/src/bindings/number-conversion.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPERSTRING_NUMBER_CONVERSION_H
 2 | #define SUPERSTRING_NUMBER_CONVERSION_H
 3 | 
 4 | #include "nan.h"
 5 | #include "optional.h"
 6 | 
 7 | namespace number_conversion {
 8 |   template<typename T>
 9 |   optional<T> number_from_js(v8::Local<v8::Value> js_value) {
10 |     v8::Local<v8::Number> js_number;
11 |     if (Nan::To<v8::Number>(js_value).ToLocal(&js_number)) {
12 |       auto maybe_number = Nan::To<T>(js_number);
13 |       if (maybe_number.IsJust()) {
14 |         return maybe_number.FromJust();
15 |       }
16 |     }
17 |     return optional<T>{};
18 |   }
19 | }
20 | 
21 | #endif // SUPERSTRING_NUMBER_CONVERSION_H
22 | 


--------------------------------------------------------------------------------
/src/bindings/patch-wrapper.h:
--------------------------------------------------------------------------------
 1 | #include <nan.h>
 2 | #include "patch.h"
 3 | 
 4 | class PatchWrapper : public Nan::ObjectWrap {
 5 |  public:
 6 |   static void init(v8::Local<v8::Object> exports);
 7 |   static v8::Local<v8::Value> from_patch(Patch &&);
 8 | 
 9 |  private:
10 |   PatchWrapper(Patch &&patch);
11 |   static void construct(const Nan::FunctionCallbackInfo<v8::Value> &info);
12 |   static void splice(const Nan::FunctionCallbackInfo<v8::Value> &info);
13 |   static void splice_old(const Nan::FunctionCallbackInfo<v8::Value> &info);
14 |   static void copy(const Nan::FunctionCallbackInfo<v8::Value> &info);
15 |   static void invert(const Nan::FunctionCallbackInfo<v8::Value> &info);
16 |   static void get_changes(const Nan::FunctionCallbackInfo<v8::Value> &info);
17 |   static void get_changes_in_old_range(const Nan::FunctionCallbackInfo<v8::Value> &info);
18 |   static void get_changes_in_new_range(const Nan::FunctionCallbackInfo<v8::Value> &info);
19 |   static void change_for_old_position(const Nan::FunctionCallbackInfo<v8::Value> &info);
20 |   static void change_for_new_position(const Nan::FunctionCallbackInfo<v8::Value> &info);
21 |   static void serialize(const Nan::FunctionCallbackInfo<v8::Value> &info);
22 |   static void deserialize(const Nan::FunctionCallbackInfo<v8::Value> &info);
23 |   static void compose(const Nan::FunctionCallbackInfo<v8::Value> &info);
24 |   static void get_dot_graph(const Nan::FunctionCallbackInfo<v8::Value> &info);
25 |   static void get_json(const Nan::FunctionCallbackInfo<v8::Value> &info);
26 |   static void get_change_count(const Nan::FunctionCallbackInfo<v8::Value> &info);
27 |   static void get_bounds(const Nan::FunctionCallbackInfo<v8::Value> &info);
28 |   static void rebalance(const Nan::FunctionCallbackInfo<v8::Value> &info);
29 | 
30 |   Patch patch;
31 | };
32 | 


--------------------------------------------------------------------------------
/src/bindings/point-wrapper.cc:
--------------------------------------------------------------------------------
 1 | #include "point-wrapper.h"
 2 | #include <cmath>
 3 | #include "nan.h"
 4 | 
 5 | using namespace v8;
 6 | 
 7 | static Nan::Persistent<String> row_string;
 8 | static Nan::Persistent<String> column_string;
 9 | static Nan::Persistent<v8::Function> constructor;
10 | 
11 | static uint32_t number_from_js(Local<Integer> js_number) {
12 |   double number = Nan::To<double>(js_number).FromMaybe(0);
13 |   if (number > 0 && !std::isfinite(number)) {
14 |     return UINT32_MAX;
15 |   } else {
16 |     return std::max(0.0, number);
17 |   }
18 | }
19 | 
20 | optional<Point> PointWrapper::point_from_js(Local<Value> value) {
21 |   Nan::MaybeLocal<Object> maybe_object = Nan::To<Object>(value);
22 |   Local<Object> object;
23 |   if (!maybe_object.ToLocal(&object)) {
24 |     Nan::ThrowTypeError("Expected an object with 'row' and 'column' properties.");
25 |     return optional<Point>{};
26 |   }
27 | 
28 |   Nan::MaybeLocal<Integer> maybe_row = Nan::To<Integer>(Nan::Get(object, Nan::New(row_string)).ToLocalChecked());
29 |   Local<Integer> js_row;
30 |   if (!maybe_row.ToLocal(&js_row)) {
31 |     Nan::ThrowTypeError("Expected an object with 'row' and 'column' properties.");
32 |     return optional<Point>{};
33 |   }
34 | 
35 |   Nan::MaybeLocal<Integer> maybe_column = Nan::To<Integer>(Nan::Get(object, Nan::New(column_string)).ToLocalChecked());
36 |   Local<Integer> js_column;
37 |   if (!maybe_column.ToLocal(&js_column)) {
38 |     Nan::ThrowTypeError("Expected an object with 'row' and 'column' properties.");
39 |     return optional<Point>{};
40 |   }
41 | 
42 |   return Point(number_from_js(js_row), number_from_js(js_column));
43 | }
44 | 
45 | void PointWrapper::init() {
46 |   row_string.Reset(Nan::Persistent<String>(Nan::New("row").ToLocalChecked()));
47 |   column_string.Reset(Nan::Persistent<String>(Nan::New("column").ToLocalChecked()));
48 | 
49 |   Local<FunctionTemplate> constructor_template = Nan::New<FunctionTemplate>(construct);
50 |   constructor_template->SetClassName(Nan::New<String>("Point").ToLocalChecked());
51 |   constructor_template->InstanceTemplate()->SetInternalFieldCount(1);
52 |   Nan::SetAccessor(constructor_template->InstanceTemplate(), Nan::New(row_string), get_row);
53 |   Nan::SetAccessor(constructor_template->InstanceTemplate(), Nan::New(column_string), get_column);
54 |   constructor.Reset(Nan::GetFunction(constructor_template).ToLocalChecked());
55 | }
56 | 
57 | Local<Value> PointWrapper::from_point(Point point) {
58 |   Local<Object> result;
59 |   if (Nan::New(constructor)->NewInstance(Nan::GetCurrentContext()).ToLocal(&result)) {
60 |     (new PointWrapper(point))->Wrap(result);
61 |     return result;
62 |   } else {
63 |     return Nan::Null();
64 |   }
65 | }
66 | 
67 | PointWrapper::PointWrapper(Point point) : point(point) {}
68 | 
69 | void PointWrapper::construct(const Nan::FunctionCallbackInfo<Value> &info) {}
70 | 
71 | void PointWrapper::get_row(v8::Local<v8::String> property, const Nan::PropertyCallbackInfo<v8::Value> &info) {
72 |   PointWrapper *wrapper = Nan::ObjectWrap::Unwrap<PointWrapper>(info.This());
73 |   Point &point = wrapper->point;
74 |   info.GetReturnValue().Set(Nan::New(point.row));
75 | }
76 | 
77 | void PointWrapper::get_column(v8::Local<v8::String> property, const Nan::PropertyCallbackInfo<v8::Value> &info) {
78 |   PointWrapper *wrapper = Nan::ObjectWrap::Unwrap<PointWrapper>(info.This());
79 |   Point &point = wrapper->point;
80 |   info.GetReturnValue().Set(Nan::New(point.column));
81 | }
82 | 


--------------------------------------------------------------------------------
/src/bindings/point-wrapper.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPERSTRING_POINT_WRAPPER_H
 2 | #define SUPERSTRING_POINT_WRAPPER_H
 3 | 
 4 | #include "nan.h"
 5 | #include "optional.h"
 6 | #include "point.h"
 7 | 
 8 | class PointWrapper : public Nan::ObjectWrap {
 9 | public:
10 |   static void init();
11 |   static v8::Local<v8::Value> from_point(Point point);
12 |   static optional<Point> point_from_js(v8::Local<v8::Value>);
13 | 
14 | private:
15 |   PointWrapper(Point point);
16 | 
17 |   static void construct(const Nan::FunctionCallbackInfo<v8::Value> &info);
18 | 
19 |   static void get_row(v8::Local<v8::String> property,
20 |                      const Nan::PropertyCallbackInfo<v8::Value> &info);
21 | 
22 |   static void get_column(v8::Local<v8::String> property,
23 |                         const Nan::PropertyCallbackInfo<v8::Value> &info);
24 | 
25 |   Point point;
26 | };
27 | 
28 | #endif // SUPERSTRING_POINT_WRAPPER_H
29 | 


--------------------------------------------------------------------------------
/src/bindings/range-wrapper.cc:
--------------------------------------------------------------------------------
 1 | #include "range-wrapper.h"
 2 | #include "point-wrapper.h"
 3 | #include "nan.h"
 4 | 
 5 | using namespace v8;
 6 | 
 7 | static Nan::Persistent<String> start_string;
 8 | static Nan::Persistent<String> end_string;
 9 | static Nan::Persistent<v8::Function> constructor;
10 | 
11 | optional<Range> RangeWrapper::range_from_js(Local<Value> value) {
12 |   Local<Object> object;
13 |   if (!Nan::To<Object>(value).ToLocal(&object)) {
14 |     Nan::ThrowTypeError("Expected an object with 'start' and 'end' properties.");
15 |     return optional<Range>{};
16 |   }
17 | 
18 |   auto start = PointWrapper::point_from_js(Nan::Get(object, Nan::New(start_string)).ToLocalChecked());
19 |   auto end = PointWrapper::point_from_js(Nan::Get(object, Nan::New(end_string)).ToLocalChecked());
20 |   if (start && end) {
21 |     return Range{*start, *end};
22 |   } else {
23 |     Nan::ThrowTypeError("Expected an object with 'start' and 'end' properties.");
24 |     return optional<Range>{};
25 |   }
26 | }
27 | 
28 | void RangeWrapper::init() {
29 |   start_string.Reset(Nan::Persistent<String>(Nan::New("start").ToLocalChecked()));
30 |   end_string.Reset(Nan::Persistent<String>(Nan::New("end").ToLocalChecked()));
31 | 
32 |   Local<FunctionTemplate> constructor_template = Nan::New<FunctionTemplate>(construct);
33 |   constructor_template->SetClassName(Nan::New<String>("Range").ToLocalChecked());
34 |   constructor_template->InstanceTemplate()->SetInternalFieldCount(1);
35 |   Nan::SetAccessor(constructor_template->InstanceTemplate(), Nan::New(start_string), get_start);
36 |   Nan::SetAccessor(constructor_template->InstanceTemplate(), Nan::New(end_string), get_end);
37 |   constructor.Reset(Nan::GetFunction(constructor_template).ToLocalChecked());
38 | }
39 | 
40 | Local<Value> RangeWrapper::from_range(Range range) {
41 |   Local<Object> result;
42 |   if (Nan::New(constructor)->NewInstance(Nan::GetCurrentContext()).ToLocal(&result)) {
43 |     (new RangeWrapper(range))->Wrap(result);
44 |     return result;
45 |   } else {
46 |     return Nan::Null();
47 |   }
48 | }
49 | 
50 | RangeWrapper::RangeWrapper(Range range) : range(range) {}
51 | 
52 | void RangeWrapper::construct(const Nan::FunctionCallbackInfo<Value> &info) {}
53 | 
54 | void RangeWrapper::get_start(v8::Local<v8::String> property, const Nan::PropertyCallbackInfo<v8::Value> &info) {
55 |   RangeWrapper *wrapper = Nan::ObjectWrap::Unwrap<RangeWrapper>(info.This());
56 |   Range &range = wrapper->range;
57 |   info.GetReturnValue().Set(PointWrapper::from_point(range.start));
58 | }
59 | 
60 | void RangeWrapper::get_end(v8::Local<v8::String> property, const Nan::PropertyCallbackInfo<v8::Value> &info) {
61 |   RangeWrapper *wrapper = Nan::ObjectWrap::Unwrap<RangeWrapper>(info.This());
62 |   Range &range = wrapper->range;
63 |   info.GetReturnValue().Set(PointWrapper::from_point(range.end));
64 | }
65 | 


--------------------------------------------------------------------------------
/src/bindings/range-wrapper.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPERSTRING_RANGE_WRAPPER_H
 2 | #define SUPERSTRING_RANGE_WRAPPER_H
 3 | 
 4 | #include "nan.h"
 5 | #include "optional.h"
 6 | #include "point.h"
 7 | #include "range.h"
 8 | 
 9 | class RangeWrapper : public Nan::ObjectWrap {
10 | public:
11 |   static void init();
12 |   static v8::Local<v8::Value> from_range(Range);
13 |   static optional<Range> range_from_js(v8::Local<v8::Value>);
14 | 
15 | private:
16 |   RangeWrapper(Range);
17 | 
18 |   static void construct(const Nan::FunctionCallbackInfo<v8::Value> &);
19 |   static void get_start(v8::Local<v8::String>, const Nan::PropertyCallbackInfo<v8::Value> &);
20 |   static void get_end(v8::Local<v8::String>, const Nan::PropertyCallbackInfo<v8::Value> &);
21 | 
22 |   Range range;
23 | };
24 | 
25 | #endif // SUPERSTRING_RANGE_WRAPPER_H
26 | 


--------------------------------------------------------------------------------
/src/bindings/string-conversion.cc:
--------------------------------------------------------------------------------
 1 | #include "string-conversion.h"
 2 | #include "text.h"
 3 | 
 4 | using namespace v8;
 5 | using std::u16string;
 6 | 
 7 | optional<u16string> string_conversion::string_from_js(Local<Value> value) {
 8 |   Local<String> string;
 9 |   if (!Nan::To<String>(value).ToLocal(&string)) {
10 |     Nan::ThrowTypeError("Expected a string.");
11 |     return optional<u16string>{};
12 |   }
13 | 
14 |   u16string result;
15 |   result.resize(string->Length());
16 |   string->Write(
17 | 
18 |     // Nan doesn't wrap this functionality
19 |     #if NODE_MAJOR_VERSION >= 12
20 |           Isolate::GetCurrent(),
21 |     #endif
22 | 
23 |     reinterpret_cast<uint16_t *>(&result[0]),
24 |     0,
25 |     -1,
26 |     String::WriteOptions::NO_NULL_TERMINATION
27 |   );
28 |   return result;
29 | }
30 | 
31 | Local<String> string_conversion::string_to_js(const u16string &text, const char *failure_message) {
32 |   Local<String> result;
33 |   if (Nan::New<String>(
34 |     reinterpret_cast<const uint16_t *>(text.data()),
35 |     text.size()
36 |   ).ToLocal(&result)) {
37 |     return result;
38 |   } else {
39 |     if (!failure_message) failure_message = "Couldn't convert text to a String";
40 |     Nan::ThrowError(failure_message);
41 |     return Nan::New<String>("").ToLocalChecked();
42 |   }
43 | }
44 | 
45 | Local<String> string_conversion::char_to_js(const uint16_t c, const char *failure_message) {
46 |   Local<String> result;
47 |   if (Nan::New<String>(&c, 1).ToLocal(&result)) {
48 |     return result;
49 |   } else {
50 |     if (!failure_message) failure_message = "Couldn't convert character to a String";
51 |     Nan::ThrowError(failure_message);
52 |     return Nan::New<String>("").ToLocalChecked();
53 |   }
54 | }
55 | 


--------------------------------------------------------------------------------
/src/bindings/string-conversion.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPERSTRING_STRING_CONVERSION_H
 2 | #define SUPERSTRING_STRING_CONVERSION_H
 3 | 
 4 | #include <string>
 5 | #include "nan.h"
 6 | #include "optional.h"
 7 | #include "text.h"
 8 | 
 9 | namespace string_conversion {
10 |   v8::Local<v8::String> string_to_js(
11 |     const std::u16string &,
12 |     const char *failure_message = nullptr
13 |   );
14 |   v8::Local<v8::String> char_to_js(
15 |     const std::uint16_t,
16 |     const char *failure_message = nullptr
17 |   );
18 |   optional<std::u16string> string_from_js(v8::Local<v8::Value>);
19 | };
20 | 
21 | #endif // SUPERSTRING_STRING_CONVERSION_H
22 | 


--------------------------------------------------------------------------------
/src/bindings/text-buffer-snapshot-wrapper.cc:
--------------------------------------------------------------------------------
 1 | #include "text-buffer.h"
 2 | #include "text-buffer-wrapper.h"
 3 | #include "text-buffer-snapshot-wrapper.h"
 4 | 
 5 | using namespace v8;
 6 | 
 7 | static Nan::Persistent<v8::Function> snapshot_wrapper_constructor;
 8 | 
 9 | void TextBufferSnapshotWrapper::init() {
10 |   auto class_name = Nan::New("Snapshot").ToLocalChecked();
11 | 
12 |   auto constructor_template = Nan::New<FunctionTemplate>(construct);
13 |   constructor_template->SetClassName(class_name);
14 |   constructor_template->InstanceTemplate()->SetInternalFieldCount(1);
15 | 
16 |   const auto &prototype_template = constructor_template->PrototypeTemplate();
17 |   Nan::SetTemplate(prototype_template, Nan::New("destroy").ToLocalChecked(), Nan::New<FunctionTemplate>(destroy), None);
18 | 
19 |   snapshot_wrapper_constructor.Reset(Nan::GetFunction(constructor_template).ToLocalChecked());
20 | }
21 | 
22 | TextBufferSnapshotWrapper::TextBufferSnapshotWrapper(Local<Object> js_buffer, void *snapshot) :
23 |   snapshot{snapshot} {
24 |   slices_ = reinterpret_cast<TextBuffer::Snapshot *>(snapshot)->primitive_chunks();
25 |   js_text_buffer.Reset(Isolate::GetCurrent(), js_buffer);
26 | }
27 | 
28 | TextBufferSnapshotWrapper::~TextBufferSnapshotWrapper() {
29 |   if (snapshot) {
30 |     delete reinterpret_cast<TextBuffer::Snapshot *>(snapshot);
31 |   }
32 | }
33 | 
34 | Local<Value> TextBufferSnapshotWrapper::new_instance(Local<Object> js_buffer, void *snapshot) {
35 |   Local<Object> result;
36 |   if (Nan::NewInstance(Nan::New(snapshot_wrapper_constructor)).ToLocal(&result)) {
37 |     (new TextBufferSnapshotWrapper(js_buffer, snapshot))->Wrap(result);
38 |     return result;
39 |   } else {
40 |     return Nan::Null();
41 |   }
42 | }
43 | 
44 | void TextBufferSnapshotWrapper::construct(const Nan::FunctionCallbackInfo<Value> &info) {
45 |   info.GetReturnValue().Set(Nan::Null());
46 | }
47 | 
48 | void TextBufferSnapshotWrapper::destroy(const Nan::FunctionCallbackInfo<Value> &info) {
49 |   auto reader = Nan::ObjectWrap::Unwrap<TextBufferSnapshotWrapper>(Nan::To<Object>(info.This()).ToLocalChecked());
50 |   if (reader->snapshot) {
51 |     delete reinterpret_cast<TextBuffer::Snapshot *>(reader->snapshot);
52 |     reader->snapshot = nullptr;
53 |   }
54 | }
55 | 


--------------------------------------------------------------------------------
/src/bindings/text-buffer-snapshot-wrapper.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPERSTRING_TEXT_BUFFER_SNAPSHOT_WRAPPER_H
 2 | #define SUPERSTRING_TEXT_BUFFER_SNAPSHOT_WRAPPER_H
 3 | 
 4 | #include "nan.h"
 5 | #include <string>
 6 | 
 7 | // This header can be included by other native node modules, allowing them
 8 | // to access the content of a TextBuffer::Snapshot without having to call
 9 | // any superstring APIs.
10 | 
11 | class TextBufferSnapshotWrapper : public Nan::ObjectWrap {
12 | public:
13 |   static void init();
14 | 
15 |   static v8::Local<v8::Value> new_instance(v8::Local<v8::Object>, void *);
16 | 
17 |   inline const std::vector<std::pair<const char16_t *, uint32_t>> *slices() {
18 |     return &slices_;
19 |   }
20 | 
21 | private:
22 |   TextBufferSnapshotWrapper(v8::Local<v8::Object> js_buffer, void *snapshot);
23 |   ~TextBufferSnapshotWrapper();
24 | 
25 |   static void construct(const Nan::FunctionCallbackInfo<v8::Value> &info);
26 |   static void destroy(const Nan::FunctionCallbackInfo<v8::Value> &info);
27 | 
28 |   v8::Persistent<v8::Object> js_text_buffer;
29 |   void *snapshot;
30 |   std::vector<std::pair<const char16_t *, uint32_t>> slices_;
31 | };
32 | 
33 | #endif // SUPERSTRING_TEXT_BUFFER_SNAPSHOT_WRAPPER_H
34 | 


--------------------------------------------------------------------------------
/src/bindings/text-buffer-wrapper.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPERSTRING_TEXT_BUFFER_WRAPPER_H
 2 | #define SUPERSTRING_TEXT_BUFFER_WRAPPER_H
 3 | 
 4 | #include "nan.h"
 5 | #include "text-buffer.h"
 6 | #include <unordered_set>
 7 | 
 8 | class CancellableWorker {
 9 | public:
10 |   virtual void CancelIfQueued() = 0;
11 | };
12 | 
13 | class TextBufferWrapper : public Nan::ObjectWrap {
14 | public:
15 |   static void init(v8::Local<v8::Object> exports);
16 |   TextBuffer text_buffer;
17 |   std::unordered_set<CancellableWorker *> outstanding_workers;
18 | 
19 | private:
20 |   static void construct(const Nan::FunctionCallbackInfo<v8::Value> &info);
21 |   static void get_length(const Nan::FunctionCallbackInfo<v8::Value> &info);
22 |   static void get_extent(const Nan::FunctionCallbackInfo<v8::Value> &info);
23 |   static void get_line_count(const Nan::FunctionCallbackInfo<v8::Value> &info);
24 |   static void has_astral(const Nan::FunctionCallbackInfo<v8::Value> &info);
25 |   static void get_text(const Nan::FunctionCallbackInfo<v8::Value> &info);
26 |   static void get_character_at_position(const Nan::FunctionCallbackInfo<v8::Value> &info);
27 |   static void get_text_in_range(const Nan::FunctionCallbackInfo<v8::Value> &info);
28 |   static void set_text(const Nan::FunctionCallbackInfo<v8::Value> &info);
29 |   static void set_text_in_range(const Nan::FunctionCallbackInfo<v8::Value> &info);
30 |   static void line_for_row(const Nan::FunctionCallbackInfo<v8::Value> &info);
31 |   static void line_length_for_row(const Nan::FunctionCallbackInfo<v8::Value> &info);
32 |   static void line_ending_for_row(const Nan::FunctionCallbackInfo<v8::Value> &info);
33 |   static void get_lines(const Nan::FunctionCallbackInfo<v8::Value> &info);
34 |   static void character_index_for_position(const Nan::FunctionCallbackInfo<v8::Value> &info);
35 |   static void position_for_character_index(const Nan::FunctionCallbackInfo<v8::Value> &info);
36 |   static void find(const Nan::FunctionCallbackInfo<v8::Value> &info);
37 |   static void find_sync(const Nan::FunctionCallbackInfo<v8::Value> &info);
38 |   static void find_all(const Nan::FunctionCallbackInfo<v8::Value> &info);
39 |   static void find_all_sync(const Nan::FunctionCallbackInfo<v8::Value> &info);
40 |   static void find_and_mark_all_sync(const Nan::FunctionCallbackInfo<v8::Value> &info);
41 |   static void find_words_with_subsequence_in_range(const Nan::FunctionCallbackInfo<v8::Value> &info);
42 |   static void is_modified(const Nan::FunctionCallbackInfo<v8::Value> &info);
43 |   static void load(const Nan::FunctionCallbackInfo<v8::Value> &info);
44 |   static void base_text_matches_file(const Nan::FunctionCallbackInfo<v8::Value> &info);
45 |   static void save(const Nan::FunctionCallbackInfo<v8::Value> &info);
46 |   static void load_sync(const Nan::FunctionCallbackInfo<v8::Value> &info);
47 |   static void save_sync(const Nan::FunctionCallbackInfo<v8::Value> &info);
48 |   static void serialize_changes(const Nan::FunctionCallbackInfo<v8::Value> &info);
49 |   static void deserialize_changes(const Nan::FunctionCallbackInfo<v8::Value> &info);
50 |   static void reset(const Nan::FunctionCallbackInfo<v8::Value> &info);
51 |   static void base_text_digest(const Nan::FunctionCallbackInfo<v8::Value> &info);
52 |   static void get_snapshot(const Nan::FunctionCallbackInfo<v8::Value> &info);
53 |   static void dot_graph(const Nan::FunctionCallbackInfo<v8::Value> &info);
54 | 
55 |   void cancel_queued_workers();
56 | };
57 | 
58 | #endif // SUPERSTRING_TEXT_BUFFER_WRAPPER_H
59 | 


--------------------------------------------------------------------------------
/src/bindings/text-reader.cc:
--------------------------------------------------------------------------------
  1 | #include "text-slice.h"
  2 | #include "text-reader.h"
  3 | #include "encoding-conversion.h"
  4 | #include "text-buffer-wrapper.h"
  5 | 
  6 | using std::move;
  7 | using std::string;
  8 | using namespace v8;
  9 | 
 10 | void TextReader::init(Local<Object> exports) {
 11 |   Local<FunctionTemplate> constructor_template = Nan::New<FunctionTemplate>(construct);
 12 |   constructor_template->SetClassName(Nan::New<String>("TextReader").ToLocalChecked());
 13 |   constructor_template->InstanceTemplate()->SetInternalFieldCount(1);
 14 |   const auto &prototype_template = constructor_template->PrototypeTemplate();
 15 |   Nan::SetTemplate(prototype_template, Nan::New("read").ToLocalChecked(), Nan::New<FunctionTemplate>(read), None);
 16 |   Nan::SetTemplate(prototype_template, Nan::New("end").ToLocalChecked(), Nan::New<FunctionTemplate>(end), None);
 17 |   Nan::SetTemplate(prototype_template, Nan::New("destroy").ToLocalChecked(), Nan::New<FunctionTemplate>(destroy), None);
 18 |   Nan::Set(exports, Nan::New("TextReader").ToLocalChecked(), Nan::GetFunction(constructor_template).ToLocalChecked());
 19 | }
 20 | 
 21 | TextReader::TextReader(Local<Object> js_buffer,
 22 |                        TextBuffer::Snapshot *snapshot,
 23 |                        EncodingConversion &&conversion) :
 24 |   snapshot{snapshot},
 25 |   slices{snapshot->chunks()},
 26 |   slice_index{0},
 27 |   text_offset{slices[0].start_offset()},
 28 |   conversion{move(conversion)} {
 29 |   js_text_buffer.Reset(Isolate::GetCurrent(), js_buffer);
 30 | }
 31 | 
 32 | TextReader::~TextReader() {
 33 |   if (snapshot) delete snapshot;
 34 | }
 35 | 
 36 | void TextReader::construct(const Nan::FunctionCallbackInfo<Value> &info) {
 37 |   Local<Object> js_text_buffer;
 38 |   if (!Nan::To<Object>(info[0]).ToLocal(&js_text_buffer)) return;
 39 |   auto &text_buffer = Nan::ObjectWrap::Unwrap<TextBufferWrapper>(js_text_buffer)->text_buffer;
 40 |   auto snapshot = text_buffer.create_snapshot();
 41 | 
 42 |   Local<String> js_encoding_name;
 43 |   if (!Nan::To<String>(info[1]).ToLocal(&js_encoding_name)) return;
 44 |   Nan::Utf8String encoding_name(js_encoding_name);
 45 |   auto conversion = transcoding_to(*encoding_name);
 46 |   if (!conversion) {
 47 |     Nan::ThrowError((string("Invalid encoding name: ") + *encoding_name).c_str());
 48 |     return;
 49 |   }
 50 | 
 51 |   TextReader *reader = new TextReader(js_text_buffer, snapshot, move(*conversion));
 52 |   reader->Wrap(info.This());
 53 | }
 54 | 
 55 | void TextReader::read(const Nan::FunctionCallbackInfo<Value> &info) {
 56 |   TextReader *reader = Nan::ObjectWrap::Unwrap<TextReader>(Nan::To<Object>(info.This()).ToLocalChecked());
 57 | 
 58 |   if (!info[0]->IsUint8Array()) {
 59 |     Nan::ThrowError("Expected a buffer");
 60 |     return;
 61 |   }
 62 | 
 63 |   char *buffer = node::Buffer::Data(info[0]);
 64 |   size_t buffer_length = node::Buffer::Length(info[0]);
 65 |   size_t total_bytes_written = 0;
 66 | 
 67 |   for (;;) {
 68 |     if (reader->slice_index == reader->slices.size()) break;
 69 |     TextSlice &slice = reader->slices[reader->slice_index];
 70 |     size_t end_offset = slice.end_offset();
 71 |     size_t bytes_written = reader->conversion.encode(
 72 |       slice.text->content,
 73 |       &reader->text_offset,
 74 |       end_offset,
 75 |       buffer + total_bytes_written,
 76 |       buffer_length - total_bytes_written
 77 |     );
 78 |     if (bytes_written == 0) break;
 79 |     total_bytes_written += bytes_written;
 80 |     if (reader->text_offset == end_offset) {
 81 |       reader->slice_index++;
 82 |       if (reader->slice_index == reader->slices.size()) break;
 83 |       reader->text_offset = reader->slices[reader->slice_index].start_offset();
 84 |     }
 85 |   }
 86 | 
 87 |   info.GetReturnValue().Set(Nan::New<Number>(total_bytes_written));
 88 | }
 89 | 
 90 | void TextReader::end(const Nan::FunctionCallbackInfo<Value> &info) {
 91 |   TextReader *reader = Nan::ObjectWrap::Unwrap<TextReader>(Nan::To<Object>(info.This()).ToLocalChecked());
 92 |   if (reader->snapshot) {
 93 |     reader->snapshot->flush_preceding_changes();
 94 |     delete reader->snapshot;
 95 |     reader->snapshot = nullptr;
 96 |   }
 97 | }
 98 | 
 99 | void TextReader::destroy(const Nan::FunctionCallbackInfo<Value> &info) {
100 |   TextReader *reader = Nan::ObjectWrap::Unwrap<TextReader>(Nan::To<Object>(info.This()).ToLocalChecked());
101 |   if (reader->snapshot) {
102 |     delete reader->snapshot;
103 |     reader->snapshot = nullptr;
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------
/src/bindings/text-reader.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPERSTRING_TEXT_READER_H
 2 | #define SUPERSTRING_TEXT_READER_H
 3 | 
 4 | #include "nan.h"
 5 | #include "text.h"
 6 | #include "text-buffer.h"
 7 | #include "encoding-conversion.h"
 8 | 
 9 | class TextReader : public Nan::ObjectWrap {
10 | public:
11 |   static void init(v8::Local<v8::Object> exports);
12 | 
13 | private:
14 |   TextReader(v8::Local<v8::Object> js_buffer, TextBuffer::Snapshot *snapshot,
15 |              EncodingConversion &&conversion);
16 |   ~TextReader();
17 | 
18 |   static void construct(const Nan::FunctionCallbackInfo<v8::Value> &info);
19 |   static void read(const Nan::FunctionCallbackInfo<v8::Value> &info);
20 |   static void end(const Nan::FunctionCallbackInfo<v8::Value> &info);
21 |   static void destroy(const Nan::FunctionCallbackInfo<v8::Value> &info);
22 | 
23 |   v8::Persistent<v8::Object> js_text_buffer;
24 |   TextBuffer::Snapshot *snapshot;
25 |   std::vector<TextSlice> slices;
26 |   size_t slice_index;
27 |   size_t text_offset;
28 |   EncodingConversion conversion;
29 | };
30 | 
31 | #endif // SUPERSTRING_TEXT_READER_H
32 | 


--------------------------------------------------------------------------------
/src/bindings/text-writer.cc:
--------------------------------------------------------------------------------
 1 | #include "text-writer.h"
 2 | 
 3 | using std::string;
 4 | using std::move;
 5 | using std::u16string;
 6 | using namespace v8;
 7 | 
 8 | void TextWriter::init(Local<Object> exports) {
 9 |   Local<FunctionTemplate> constructor_template = Nan::New<FunctionTemplate>(construct);
10 |   constructor_template->SetClassName(Nan::New<String>("TextWriter").ToLocalChecked());
11 |   constructor_template->InstanceTemplate()->SetInternalFieldCount(1);
12 |   const auto &prototype_template = constructor_template->PrototypeTemplate();
13 |   Nan::SetTemplate(prototype_template, Nan::New("write").ToLocalChecked(), Nan::New<FunctionTemplate>(write), None);
14 |   Nan::SetTemplate(prototype_template, Nan::New("end").ToLocalChecked(), Nan::New<FunctionTemplate>(end), None);
15 |   Nan::Set(exports, Nan::New("TextWriter").ToLocalChecked(), Nan::GetFunction(constructor_template).ToLocalChecked());
16 | }
17 | 
18 | TextWriter::TextWriter(EncodingConversion &&conversion) : conversion{move(conversion)} {}
19 | 
20 | void TextWriter::construct(const Nan::FunctionCallbackInfo<Value> &info) {
21 |   Local<String> js_encoding_name;
22 |   if (!Nan::To<String>(info[0]).ToLocal(&js_encoding_name)) return;
23 |   Nan::Utf8String encoding_name(js_encoding_name);
24 |   auto conversion = transcoding_from(*encoding_name);
25 |   if (!conversion) {
26 |     Nan::ThrowError((string("Invalid encoding name: ") + *encoding_name).c_str());
27 |     return;
28 |   }
29 | 
30 |   TextWriter *wrapper = new TextWriter(move(*conversion));
31 |   wrapper->Wrap(info.This());
32 | }
33 | 
34 | void TextWriter::write(const Nan::FunctionCallbackInfo<Value> &info) {
35 |   auto writer = Nan::ObjectWrap::Unwrap<TextWriter>(info.This());
36 | 
37 |   Local<String> js_chunk;
38 |   if (Nan::To<String>(info[0]).ToLocal(&js_chunk)) {
39 |     size_t size = writer->content.size();
40 |     writer->content.resize(size + js_chunk->Length());
41 |     js_chunk->Write(
42 | 
43 | // Nan doesn't wrap this functionality
44 | #if NODE_MAJOR_VERSION >= 12
45 |       Isolate::GetCurrent(),
46 | #endif
47 | 
48 |       reinterpret_cast<uint16_t *>(&writer->content[0]) + size,
49 |       0,
50 |       -1,
51 |       String::WriteOptions::NO_NULL_TERMINATION
52 |     );
53 |   } else if (info[0]->IsUint8Array()) {
54 |     auto *data = node::Buffer::Data(info[0]);
55 |     size_t length = node::Buffer::Length(info[0]);
56 |     if (!writer->leftover_bytes.empty()) {
57 |       writer->leftover_bytes.insert(
58 |         writer->leftover_bytes.end(),
59 |         data,
60 |         data + length
61 |       );
62 |       data = writer->leftover_bytes.data();
63 |       length = writer->leftover_bytes.size();
64 |     }
65 |     size_t bytes_written = writer->conversion.decode(
66 |       writer->content,
67 |       data,
68 |       length
69 |     );
70 |     if (bytes_written < length) {
71 |       writer->leftover_bytes.assign(data + bytes_written, data + length);
72 |     } else {
73 |       writer->leftover_bytes.clear();
74 |     }
75 |   }
76 | }
77 | 
78 | void TextWriter::end(const Nan::FunctionCallbackInfo<Value> &info) {
79 |   auto writer = Nan::ObjectWrap::Unwrap<TextWriter>(info.This());
80 |   if (!writer->leftover_bytes.empty()) {
81 |     writer->conversion.decode(
82 |       writer->content,
83 |       writer->leftover_bytes.data(),
84 |       writer->leftover_bytes.size(),
85 |       true
86 |     );
87 |   }
88 | }
89 | 
90 | u16string TextWriter::get_text() {
91 |   return move(content);
92 | }
93 | 


--------------------------------------------------------------------------------
/src/bindings/text-writer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPERSTRING_TEXT_WRITER_H
 2 | #define SUPERSTRING_TEXT_WRITER_H
 3 | 
 4 | #include "nan.h"
 5 | #include "text.h"
 6 | #include "encoding-conversion.h"
 7 | 
 8 | class TextWriter : public Nan::ObjectWrap {
 9 | public:
10 |   static void init(v8::Local<v8::Object> exports);
11 |   TextWriter(EncodingConversion &&conversion);
12 |   std::u16string get_text();
13 | 
14 | private:
15 |   static void construct(const Nan::FunctionCallbackInfo<v8::Value> &info);
16 |   static void write(const Nan::FunctionCallbackInfo<v8::Value> &info);
17 |   static void end(const Nan::FunctionCallbackInfo<v8::Value> &info);
18 | 
19 |   EncodingConversion conversion;
20 |   std::vector<char> leftover_bytes;
21 |   std::u16string content;
22 | };
23 | 
24 | #endif // SUPERSTRING_TEXT_WRITER_H
25 | 


--------------------------------------------------------------------------------
/src/core/encoding-conversion.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPERSTRING_ENCODING_CONVERSION_H_
 2 | #define SUPERSTRING_ENCODING_CONVERSION_H_
 3 | 
 4 | #include "optional.h"
 5 | #include "text.h"
 6 | #include <stdio.h>
 7 | 
 8 | class EncodingConversion {
 9 |   void *data;
10 |   int mode;
11 | 
12 |   EncodingConversion(int, void *);
13 |   int convert(const char **, const char *, char **, char *) const;
14 | 
15 |  public:
16 |   EncodingConversion(EncodingConversion &&);
17 |   EncodingConversion();
18 |   ~EncodingConversion();
19 | 
20 |   bool encode(const std::u16string &, size_t start_offset, size_t end_offset,
21 |               FILE *stream, std::vector<char> &buffer);
22 |   size_t encode(const std::u16string &, size_t *start_offset, size_t end_offset,
23 |                 char *buffer, size_t buffer_size, bool is_last = false);
24 |   bool decode(std::u16string &, FILE *stream, std::vector<char> &buffer,
25 |               std::function<void(size_t)> progress_callback);
26 |   size_t decode(std::u16string &, const char *buffer, size_t buffer_size,
27 |                 bool is_last = false);
28 | 
29 |   friend optional<EncodingConversion> transcoding_to(const char *);
30 |   friend optional<EncodingConversion> transcoding_from(const char *);
31 | };
32 | 
33 | optional<EncodingConversion> transcoding_to(const char *);
34 | optional<EncodingConversion> transcoding_from(const char *);
35 | 
36 | #endif // SUPERSTRING_ENCODING_CONVERSION_H_
37 | 


--------------------------------------------------------------------------------
/src/core/flat_set.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPERSTRING_FLAT_SET_H
 2 | #define SUPERSTRING_FLAT_SET_H
 3 | 
 4 | #include <vector>
 5 | #include <algorithm>
 6 | 
 7 | template <typename T> class flat_set {
 8 |   typedef std::vector<T> contents_type;
 9 |   contents_type contents;
10 | 
11 | public:
12 |   typedef typename contents_type::iterator iterator;
13 |   typedef typename contents_type::const_iterator const_iterator;
14 | 
15 |   void insert(T value) {
16 |     auto iter = std::lower_bound(contents.begin(), contents.end(), value);
17 |     if (iter == contents.end() || *iter != value) {
18 |       contents.insert(iter, value);
19 |     }
20 |   }
21 | 
22 |   void insert(const_iterator start, const_iterator end) {
23 |     for (auto i = start; i != end; i++) {
24 |       insert(*i);
25 |     }
26 |   }
27 | 
28 |   iterator erase(const iterator &iter) {
29 |     return contents.erase(iter);
30 |   }
31 | 
32 |   void erase(T value) {
33 |     auto end = this->end();
34 |     auto iter = std::lower_bound(begin(), end, value);
35 |     if (iter != end && *iter == value) {
36 |       erase(iter);
37 |     }
38 |   }
39 | 
40 |   iterator begin() {
41 |     return contents.begin();
42 |   }
43 | 
44 |   const_iterator begin() const {
45 |     return contents.begin();
46 |   }
47 | 
48 |   iterator end() {
49 |     return contents.end();
50 |   }
51 | 
52 |   const_iterator end() const {
53 |     return contents.end();
54 |   }
55 | 
56 |   size_t count(T value) const {
57 |     return std::binary_search(contents.begin(), contents.end(), value) ? 1 : 0;
58 |   }
59 | 
60 |   size_t size() const {
61 |     return contents.size();
62 |   }
63 | };
64 | 
65 | #endif // SUPERSTRING_FLAT_SET_H
66 | 


--------------------------------------------------------------------------------
/src/core/libmba-diff.h:
--------------------------------------------------------------------------------
 1 | #ifndef MBA_DIFF_H_
 2 | #define MBA_DIFF_H_
 3 | 
 4 | #include <stdint.h>
 5 | #include <vector>
 6 | 
 7 | typedef enum {
 8 |   DIFF_MATCH = 1,
 9 |   DIFF_DELETE,
10 |   DIFF_INSERT
11 | } diff_op;
12 | 
13 | struct diff_edit {
14 |   diff_op op;
15 |   uint32_t off; /* off into s1 if MATCH or DELETE but s2 if INSERT */
16 |   uint32_t len;
17 | };
18 | 
19 | int diff(
20 |   const char16_t *old_text, uint32_t old_length,
21 |   const char16_t *new_text, uint32_t new_length,
22 |   int dmax, std::vector<diff_edit> *ses
23 | );
24 | 
25 | #endif  // MBA_DIFF_H_
26 | 


--------------------------------------------------------------------------------
/src/core/marker-index.h:
--------------------------------------------------------------------------------
  1 | #ifndef MARKER_INDEX_H_
  2 | #define MARKER_INDEX_H_
  3 | 
  4 | #include <random>
  5 | #include <unordered_map>
  6 | #include "flat_set.h"
  7 | #include "point.h"
  8 | #include "range.h"
  9 | 
 10 | class MarkerIndex {
 11 | public:
 12 |   using MarkerId = unsigned;
 13 |   using MarkerIdSet = flat_set<MarkerId>;
 14 | 
 15 |   struct SpliceResult {
 16 |     flat_set<MarkerId> touch;
 17 |     flat_set<MarkerId> inside;
 18 |     flat_set<MarkerId> overlap;
 19 |     flat_set<MarkerId> surround;
 20 |   };
 21 | 
 22 |   struct Boundary {
 23 |     Point position;
 24 |     flat_set<MarkerId> starting;
 25 |     flat_set<MarkerId> ending;
 26 |   };
 27 | 
 28 |   struct BoundaryQueryResult {
 29 |     std::vector<MarkerId> containing_start;
 30 |     std::vector<Boundary> boundaries;
 31 |   };
 32 | 
 33 |   MarkerIndex(unsigned seed = 0u);
 34 |   ~MarkerIndex();
 35 |   int generate_random_number();
 36 |   void insert(MarkerId id, Point start, Point end);
 37 |   void set_exclusive(MarkerId id, bool exclusive);
 38 |   void remove(MarkerId id);
 39 |   bool has(MarkerId id);
 40 |   SpliceResult splice(Point start, Point old_extent, Point new_extent);
 41 |   Point get_start(MarkerId id) const;
 42 |   Point get_end(MarkerId id) const;
 43 |   Range get_range(MarkerId id) const;
 44 | 
 45 |   int compare(MarkerId id1, MarkerId id2) const;
 46 |   flat_set<MarkerId> find_intersecting(Point start, Point end);
 47 |   flat_set<MarkerId> find_containing(Point start, Point end);
 48 |   flat_set<MarkerId> find_contained_in(Point start, Point end);
 49 |   flat_set<MarkerId> find_starting_in(Point start, Point end);
 50 |   flat_set<MarkerId> find_starting_at(Point position);
 51 |   flat_set<MarkerId> find_ending_in(Point start, Point end);
 52 |   flat_set<MarkerId> find_ending_at(Point position);
 53 |   BoundaryQueryResult find_boundaries_after(Point start, size_t max_count);
 54 | 
 55 |   std::unordered_map<MarkerId, Range> dump();
 56 | 
 57 | private:
 58 |   friend class Iterator;
 59 | 
 60 |   struct Node {
 61 |     Node *parent;
 62 |     Node *left;
 63 |     Node *right;
 64 |     Point left_extent;
 65 |     flat_set<MarkerId> left_marker_ids;
 66 |     flat_set<MarkerId> right_marker_ids;
 67 |     flat_set<MarkerId> start_marker_ids;
 68 |     flat_set<MarkerId> end_marker_ids;
 69 |     int priority;
 70 | 
 71 |     Node(Node *parent, Point left_extent);
 72 |     bool is_marker_endpoint();
 73 |   };
 74 | 
 75 |   class Iterator {
 76 |   public:
 77 |     Iterator(MarkerIndex *marker_index);
 78 |     void reset();
 79 |     Node* insert_marker_start(const MarkerId &id, const Point &start_position, const Point &end_position);
 80 |     Node* insert_marker_end(const MarkerId &id, const Point &start_position, const Point &end_position);
 81 |     Node* insert_splice_boundary(const Point &position, bool is_insertion_end);
 82 |     void find_intersecting(const Point &start, const Point &end, flat_set<MarkerId> *result);
 83 |     void find_contained_in(const Point &start, const Point &end, flat_set<MarkerId> *result);
 84 |     void find_starting_in(const Point &start, const Point &end, flat_set<MarkerId> *result);
 85 |     void find_ending_in(const Point &start, const Point &end, flat_set<MarkerId> *result);
 86 |     void find_boundaries_after(Point start, size_t max_count, BoundaryQueryResult *result);
 87 |     std::unordered_map<MarkerId, Range> dump();
 88 | 
 89 |   private:
 90 |     void ascend();
 91 |     void descend_left();
 92 |     void descend_right();
 93 |     void move_to_successor();
 94 |     void seek_to_first_node_greater_than_or_equal_to(const Point &position);
 95 |     void mark_right(const MarkerId &id, const Point &start_position, const Point &end_position);
 96 |     void mark_left(const MarkerId &id, const Point &start_position, const Point &end_position);
 97 |     Node* insert_left_child(const Point &position);
 98 |     Node* insert_right_child(const Point &position);
 99 |     void check_intersection(const Point &start, const Point &end, flat_set<MarkerId> *results);
100 |     void cache_node_position() const;
101 | 
102 |     MarkerIndex *marker_index;
103 |     Node *current_node;
104 |     Point current_node_position;
105 |     Point left_ancestor_position;
106 |     Point right_ancestor_position;
107 |     std::vector<Point> left_ancestor_position_stack;
108 |     std::vector<Point> right_ancestor_position_stack;
109 |   };
110 | 
111 |   Point get_node_position(const Node *node) const;
112 |   void delete_node(Node *node);
113 |   void delete_subtree(Node *node);
114 |   void bubble_node_up(Node *node);
115 |   void bubble_node_down(Node *node);
116 |   void rotate_node_left(Node *pivot);
117 |   void rotate_node_right(Node *pivot);
118 |   void get_starting_and_ending_markers_within_subtree(const Node *node, flat_set<MarkerId> *starting, flat_set<MarkerId> *ending);
119 |   void populate_splice_invalidation_sets(SpliceResult *invalidated, const Node *start_node, const Node *end_node, const flat_set<MarkerId> &starting_inside_splice, const flat_set<MarkerId> &ending_inside_splice);
120 | 
121 |   std::default_random_engine random_engine;
122 |   std::uniform_int_distribution<int> random_distribution;
123 |   Node *root;
124 |   std::unordered_map<MarkerId, Node*> start_nodes_by_id;
125 |   std::unordered_map<MarkerId, Node*> end_nodes_by_id;
126 |   Iterator iterator;
127 |   flat_set<MarkerId> exclusive_marker_ids;
128 |   mutable std::unordered_map<const Node*, Point> node_position_cache;
129 | };
130 | 
131 | #endif // MARKER_INDEX_H_
132 | 


--------------------------------------------------------------------------------
/src/core/optional.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPERSTRING_OPTIONAL_H
 2 | #define SUPERSTRING_OPTIONAL_H
 3 | 
 4 | #include <utility>
 5 | 
 6 | template <typename T> class optional {
 7 |   T value;
 8 |   bool is_some;
 9 | 
10 | public:
11 |   optional(T &&value) : value(std::move(value)), is_some(true) {}
12 |   optional(const T &value) : value(value), is_some(true) {}
13 |   optional() : value(T()), is_some(false) {}
14 | 
15 |   T &operator*() { return value; }
16 |   const T &operator*() const { return value; }
17 |   const T *operator->() const { return &value; }
18 |   T *operator->() { return &value; }
19 |   operator bool() const { return is_some; }
20 |   bool operator==(const optional<T> &other) {
21 |     if (is_some) {
22 |       return other.is_some && value == other.value;
23 |     } else {
24 |       return !other.is_some;
25 |     }
26 |   }
27 | };
28 | 
29 | #endif // SUPERSTRING_OPTIONAL_H
30 | 


--------------------------------------------------------------------------------
/src/core/patch.h:
--------------------------------------------------------------------------------
  1 | #ifndef PATCH_H_
  2 | #define PATCH_H_
  3 | 
  4 | #include "optional.h"
  5 | #include "point.h"
  6 | #include "serializer.h"
  7 | #include "text.h"
  8 | #include <memory>
  9 | #include <vector>
 10 | #include <ostream>
 11 | 
 12 | class Patch {
 13 |   struct Node;
 14 |   struct OldCoordinates;
 15 |   struct NewCoordinates;
 16 |   struct PositionStackEntry;
 17 | 
 18 |   Node *root;
 19 |   std::vector<Node *> node_stack;
 20 |   std::vector<PositionStackEntry> left_ancestor_stack;
 21 |   uint32_t change_count;
 22 |   bool merges_adjacent_changes;
 23 | 
 24 | public:
 25 |   struct Change {
 26 |     Point old_start;
 27 |     Point old_end;
 28 |     Point new_start;
 29 |     Point new_end;
 30 |     Text *old_text;
 31 |     Text *new_text;
 32 |     uint32_t preceding_old_text_size;
 33 |     uint32_t preceding_new_text_size;
 34 |     uint32_t old_text_size;
 35 |   };
 36 | 
 37 |   // Construction and destruction
 38 |   Patch(bool merges_adjacent_changes = true);
 39 |   Patch(Patch &&);
 40 |   Patch(Deserializer &input);
 41 |   Patch &operator=(Patch &&);
 42 |   ~Patch();
 43 |   void serialize(Serializer &serializer);
 44 | 
 45 |   Patch copy();
 46 |   Patch invert();
 47 | 
 48 |   // Mutations
 49 |   bool splice(Point new_splice_start,
 50 |               Point new_deletion_extent, Point new_insertion_extent,
 51 |               optional<Text> &&deleted_text = optional<Text>{},
 52 |               optional<Text> &&inserted_text = optional<Text>{},
 53 |               uint32_t deleted_text_size = 0);
 54 |   void splice_old(Point start, Point deletion_extent, Point insertion_extent);
 55 |   bool combine(const Patch &other, bool left_to_right = true);
 56 |   void clear();
 57 |   void rebalance();
 58 | 
 59 |   // Non-splaying reads
 60 |   std::vector<Change> get_changes() const;
 61 |   size_t get_change_count() const;
 62 |   std::vector<Change> get_changes_in_old_range(Point start, Point end) const;
 63 |   std::vector<Change> get_changes_in_new_range(Point start, Point end) const;
 64 |   optional<Change> get_change_starting_before_old_position(Point position) const;
 65 |   optional<Change> get_change_starting_before_new_position(Point position) const;
 66 |   optional<Change> get_change_ending_after_new_position(Point position) const;
 67 |   optional<Change> get_bounds() const;
 68 |   Point new_position_for_new_offset(uint32_t new_offset,
 69 |                                     std::function<uint32_t(Point)> old_offset_for_old_position,
 70 |                                     std::function<Point(uint32_t)> old_position_for_old_offset) const;
 71 | 
 72 |   // Splaying reads
 73 |   std::vector<Change> grab_changes_in_old_range(Point start, Point end);
 74 |   std::vector<Change> grab_changes_in_new_range(Point start, Point end);
 75 |   optional<Change> grab_change_starting_before_old_position(Point position);
 76 |   optional<Change> grab_change_starting_before_new_position(Point position);
 77 |   optional<Change> grab_change_ending_after_new_position(Point position, bool exclusive = false);
 78 | 
 79 |   // Debugging
 80 |   std::string get_dot_graph() const;
 81 |   std::string get_json() const;
 82 | 
 83 | private:
 84 |   Patch(Node *root, uint32_t change_count, bool merges_adjacent_changes);
 85 | 
 86 |   template <typename CoordinateSpace>
 87 |   std::vector<Change> get_changes_in_range(Point, Point, bool inclusive) const;
 88 | 
 89 |   template <typename CoordinateSpace>
 90 |   optional<Change> get_change_starting_before_position(Point target) const;
 91 | 
 92 |   template <typename CoordinateSpace>
 93 |   optional<Change> get_change_ending_after_position(Point target) const;
 94 | 
 95 |   template <typename CoordinateSpace>
 96 |   std::vector<Change> grab_changes_in_range(Point, Point, bool inclusive = false);
 97 | 
 98 |   template <typename CoordinateSpace>
 99 |   optional<Change> grab_change_starting_before_position(Point position);
100 | 
101 |   template <typename CoordinateSpace>
102 |   Node *splay_node_starting_before(Point target);
103 | 
104 |   template <typename CoordinateSpace>
105 |   Node *splay_node_starting_after(Point target, optional<Point> exclusive_lower_bound);
106 | 
107 |   template <typename CoordinateSpace>
108 |   Node *splay_node_ending_before(Point target);
109 | 
110 |   template <typename CoordinateSpace>
111 |   Node *splay_node_ending_after(Point target, optional<Point> exclusive_lower_bound);
112 | 
113 |   Change change_for_root_node();
114 | 
115 |   std::pair<optional<Text>, bool> compute_old_text(optional<Text> &&, Point, Point);
116 |   uint32_t compute_old_text_size(uint32_t, Point, Point);
117 | 
118 |   void splay_node(Node *);
119 |   void rotate_node_right(Node *, Node *, Node *);
120 |   void rotate_node_left(Node *, Node *, Node *);
121 |   void delete_root();
122 |   void perform_rebalancing_rotations(uint32_t);
123 |   Node *build_node(Node *, Node *, Point, Point, Point, Point,
124 |                   optional<Text> &&, optional<Text> &&, uint32_t old_text_size);
125 |   void delete_node(Node **);
126 |   void remove_noop_change();
127 | };
128 | 
129 | std::ostream &operator<<(std::ostream &, const Patch::Change &);
130 | 
131 | #endif // PATCH_H_
132 | 


--------------------------------------------------------------------------------
/src/core/point.cc:
--------------------------------------------------------------------------------
 1 | #include <climits>
 2 | #include <algorithm>
 3 | #include "point.h"
 4 | 
 5 | Point Point::min(const Point &left, const Point &right) {
 6 |   return left <= right ? left : right;
 7 | }
 8 | 
 9 | Point Point::max(const Point &left, const Point &right) {
10 |   return left >= right ? left : right;
11 | }
12 | 
13 | Point Point::max() {
14 |   return Point(UINT32_MAX, UINT32_MAX);
15 | }
16 | 
17 | Point::Point() : Point(0, 0) {}
18 | 
19 | Point::Point(unsigned row, unsigned column) : row{row}, column{column} {}
20 | 
21 | Point::Point(Deserializer &input) :
22 |   row{input.read<uint32_t>()},
23 |   column{input.read<uint32_t>()} {}
24 | 
25 | int Point::compare(const Point &other) const {
26 |   if (row < other.row) return -1;
27 |   if (row > other.row) return 1;
28 |   if (column < other.column) return -1;
29 |   if (column > other.column) return 1;
30 |   return 0;
31 | }
32 | 
33 | bool Point::is_zero() const {
34 |   return row == 0 && column == 0;
35 | }
36 | 
37 | static uint32_t checked_add(uint32_t a, uint32_t b) {
38 |   return std::min<uint64_t>(
39 |     UINT32_MAX,
40 |     static_cast<uint64_t>(a) + static_cast<uint64_t>(b)
41 |   );
42 | }
43 | 
44 | Point Point::traverse(const Point &traversal) const {
45 |   if (traversal.row == 0) {
46 |     return Point(row, checked_add(column, traversal.column));
47 |   } else {
48 |     return Point(checked_add(row, traversal.row), traversal.column);
49 |   }
50 | }
51 | 
52 | Point Point::traversal(const Point &start) const {
53 |   if (row == start.row) {
54 |     return Point(0, column - start.column);
55 |   } else {
56 |     return Point(row - start.row, column);
57 |   }
58 | }
59 | 
60 | void Point::serialize(Serializer &output) const {
61 |   output.append(row);
62 |   output.append(column);
63 | }
64 | 
65 | bool Point::operator==(const Point &other) const {
66 |   return compare(other) == 0;
67 | }
68 | 
69 | bool Point::operator!=(const Point &other) const {
70 |   return compare(other) != 0;
71 | }
72 | 
73 | bool Point::operator<(const Point &other) const {
74 |   return compare(other) < 0;
75 | }
76 | 
77 | bool Point::operator<=(const Point &other) const {
78 |   return compare(other) <= 0;
79 | }
80 | 
81 | bool Point::operator>(const Point &other) const {
82 |   return compare(other) > 0;
83 | }
84 | 
85 | bool Point::operator>=(const Point &other) const {
86 |   return compare(other) >= 0;
87 | }
88 | 


--------------------------------------------------------------------------------
/src/core/point.h:
--------------------------------------------------------------------------------
 1 | #ifndef POINT_H_
 2 | #define POINT_H_
 3 | 
 4 | #include <ostream>
 5 | #include "serializer.h"
 6 | 
 7 | struct Point {
 8 |   unsigned row;
 9 |   unsigned column;
10 | 
11 |   static Point min(const Point &left, const Point &right);
12 |   static Point max(const Point &left, const Point &right);
13 |   static Point max();
14 | 
15 |   Point();
16 |   Point(unsigned row, unsigned column);
17 |   Point(Deserializer &input);
18 | 
19 |   int compare(const Point &other) const;
20 |   bool is_zero() const;
21 |   Point traverse(const Point &other) const;
22 |   Point traversal(const Point &other) const;
23 |   void serialize(Serializer &output) const;
24 | 
25 |   bool operator!=(const Point &other) const;
26 |   bool operator==(const Point &other) const;
27 |   bool operator<(const Point &other) const;
28 |   bool operator<=(const Point &other) const;
29 |   bool operator>(const Point &other) const;
30 |   bool operator>=(const Point &other) const;
31 | };
32 | 
33 | inline std::ostream &operator<<(std::ostream &stream, const Point &point) {
34 |   return stream << "(" << point.row << ", " << point.column << ")";
35 | }
36 | 
37 | #endif // POINT_H_
38 | 


--------------------------------------------------------------------------------
/src/core/range.cc:
--------------------------------------------------------------------------------
 1 | #include "range.h"
 2 | 
 3 | Range Range::all_inclusive() {
 4 |   return Range{Point(), Point::max()};
 5 | }
 6 | 
 7 | Point Range::extent() const {
 8 |   return end.traversal(start);
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/core/range.h:
--------------------------------------------------------------------------------
 1 | #ifndef RANGE_H_
 2 | #define RANGE_H_
 3 | 
 4 | #include <ostream>
 5 | #include "point.h"
 6 | 
 7 | struct Range {
 8 |   Point start;
 9 |   Point end;
10 | 
11 |   static Range all_inclusive();
12 | 
13 |   Point extent() const;
14 | 
15 |   bool operator==(const Range &other) const {
16 |     return start == other.start && end == other.end;
17 |   }
18 | };
19 | 
20 | inline std::ostream &operator<<(std::ostream &stream, const Range &range) {
21 |   return stream << "(" << range.start << ", " << range.end << ")";
22 | }
23 | 
24 | #endif // RANGE_H_
25 | 


--------------------------------------------------------------------------------
/src/core/regex.cc:
--------------------------------------------------------------------------------
  1 | #include "regex.h"
  2 | #include <stdlib.h>
  3 | #include "pcre2.h"
  4 | 
  5 | using std::u16string;
  6 | using MatchResult = Regex::MatchResult;
  7 | 
  8 | const char16_t EMPTY_PATTERN[] = u".{0}";
  9 | 
 10 | Regex::Regex() : code{nullptr} {}
 11 | 
 12 | static u16string preprocess_pattern(const char16_t *pattern, uint32_t length) {
 13 |   u16string result;
 14 |   for (unsigned i = 0; i < length;) {
 15 |     char16_t c = pattern[i];
 16 | 
 17 |     // Replace escape sequences like '\u00cf' with their literal UTF16 value
 18 |     if (c == '\\' && i + 1 < length) {
 19 |       if (pattern[i + 1] == 'u') {
 20 |         if (i + 6 <= length) {
 21 |           std::string char_code_string(&pattern[i + 2], &pattern[i + 6]);
 22 |           char16_t char_code_value = strtol(char_code_string.data(), nullptr, 16);
 23 |           if (char_code_value != 0) {
 24 |             result += char_code_value;
 25 |             i += 6;
 26 |             continue;
 27 |           }
 28 |         }
 29 | 
 30 |         // Replace invalid '\u' escape sequences with the literal characters '\' and 'u'
 31 |         result += u"\\\\u";
 32 |         i += 2;
 33 |         continue;
 34 |       } else if (pattern[i + 1] == '\\') {
 35 |         // Prevent '\\u' from UTF16 replacement
 36 |         result += u"\\\\";
 37 |         i += 2;
 38 |         continue;
 39 |       }
 40 |     }
 41 | 
 42 |     result += c;
 43 |     i++;
 44 |   }
 45 | 
 46 |   return result;
 47 | }
 48 | 
 49 | 
 50 | Regex::Regex(const char16_t *pattern, uint32_t pattern_length, u16string *error_message, bool ignore_case, bool unicode) {
 51 |   if (pattern_length == 0) {
 52 |     pattern = EMPTY_PATTERN;
 53 |     pattern_length = 4;
 54 |   }
 55 | 
 56 |   u16string final_pattern = preprocess_pattern(pattern, pattern_length);
 57 | 
 58 |   int error_number = 0;
 59 |   size_t error_offset = 0;
 60 |   uint32_t options = PCRE2_MULTILINE;
 61 |   if (ignore_case) options |= PCRE2_CASELESS;
 62 |   if (unicode) options |= PCRE2_UTF;
 63 |   code = pcre2_compile(
 64 |     reinterpret_cast<const uint16_t *>(final_pattern.data()),
 65 |     final_pattern.size(),
 66 |     options,
 67 |     &error_number,
 68 |     &error_offset,
 69 |     nullptr
 70 |   );
 71 | 
 72 |   if (!code) {
 73 |     uint16_t message_buffer[256];
 74 |     size_t length = pcre2_get_error_message(error_number, message_buffer, 256);
 75 |     error_message->assign(message_buffer, message_buffer + length);
 76 |     return;
 77 |   }
 78 | 
 79 |   pcre2_jit_compile(
 80 |     code,
 81 |     PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_PARTIAL_SOFT
 82 |   );
 83 | }
 84 | 
 85 | Regex::Regex(const u16string &pattern, u16string *error_message, bool ignore_case, bool unicode)
 86 |   : Regex(pattern.data(), pattern.size(), error_message, ignore_case, unicode) {}
 87 | 
 88 | Regex::Regex(Regex &&other) : code{other.code} {
 89 |   other.code = nullptr;
 90 | }
 91 | 
 92 | Regex::~Regex() {
 93 |   if (code) pcre2_code_free(code);
 94 | }
 95 | 
 96 | Regex::MatchData::MatchData(const Regex &regex)
 97 |   : data{pcre2_match_data_create_from_pattern(regex.code, nullptr)} {}
 98 | 
 99 | Regex::MatchData::~MatchData() {
100 |   pcre2_match_data_free(data);
101 | }
102 | 
103 | MatchResult Regex::match(const char16_t *string, size_t length,
104 |                          MatchData &match_data, unsigned options) const {
105 |   MatchResult result{MatchResult::None, 0, 0};
106 | 
107 |   unsigned int pcre_options = 0;
108 |   if (!(options & MatchOptions::IsEndSearch)) pcre_options |= PCRE2_PARTIAL_HARD;
109 |   if (!(options & MatchOptions::IsBeginningOfLine)) pcre_options |= PCRE2_NOTBOL;
110 |   if (!(options & MatchOptions::IsEndOfLine)) pcre_options |= PCRE2_NOTEOL;
111 | 
112 |   int status = pcre2_match(
113 |     code,
114 |     reinterpret_cast<const uint16_t *>(string),
115 |     length,
116 |     0,
117 |     pcre_options,
118 |     match_data.data,
119 |     nullptr
120 |   );
121 | 
122 |   if (status < 0) {
123 |     switch (status) {
124 |       case PCRE2_ERROR_PARTIAL:
125 |         result.type = MatchResult::Partial;
126 |         result.start_offset = pcre2_get_ovector_pointer(match_data.data)[0];
127 |         result.end_offset = pcre2_get_ovector_pointer(match_data.data)[1];
128 |         break;
129 |       case PCRE2_ERROR_NOMATCH:
130 |         result.type = MatchResult::None;
131 |         break;
132 |       default:
133 |         result.type = MatchResult::Error;
134 |         break;
135 |     }
136 |   } else {
137 |     result.type = MatchResult::Full;
138 |     result.start_offset = pcre2_get_ovector_pointer(match_data.data)[0];
139 |     result.end_offset = pcre2_get_ovector_pointer(match_data.data)[1];
140 |   }
141 | 
142 |   return result;
143 | }
144 | 


--------------------------------------------------------------------------------
/src/core/regex.h:
--------------------------------------------------------------------------------
 1 | #ifndef REGEX_H_
 2 | #define REGEX_H_
 3 | 
 4 | #include "optional.h"
 5 | #include <string>
 6 | 
 7 | struct pcre2_real_code_16;
 8 | struct pcre2_real_match_data_16;
 9 | struct BuildRegexResult;
10 | 
11 | class Regex {
12 |   pcre2_real_code_16 *code;
13 |   Regex(pcre2_real_code_16 *);
14 | 
15 |  public:
16 |   Regex();
17 |   Regex(const char16_t *, uint32_t, std::u16string *error_message, bool ignore_case = false, bool unicode = false);
18 |   Regex(const std::u16string &, std::u16string *error_message, bool ignore_case = false, bool unicode = false);
19 |   Regex(Regex &&);
20 |   ~Regex();
21 | 
22 |   class MatchData {
23 |     pcre2_real_match_data_16 *data;
24 |     friend class Regex;
25 | 
26 |    public:
27 |     MatchData(const Regex &);
28 |     ~MatchData();
29 |   };
30 | 
31 |   struct MatchResult {
32 |     enum {
33 |       None,
34 |       Error,
35 |       Partial,
36 |       Full,
37 |     } type;
38 | 
39 |     size_t start_offset;
40 |     size_t end_offset;
41 |   };
42 | 
43 |   enum MatchOptions {
44 |     None = 0,
45 |     IsBeginningOfLine = 1,
46 |     IsEndOfLine = 2,
47 |     IsEndSearch = 4,
48 |   };
49 | 
50 |   MatchResult match(const char16_t *data, size_t length, MatchData &, unsigned options = 0) const;
51 | };
52 | 
53 | struct BuildRegexResult {
54 |   optional<Regex> regex;
55 |   std::u16string error_message;
56 | };
57 | 
58 | #endif  // REGX_H_
59 | 


--------------------------------------------------------------------------------
/src/core/serializer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SERIALIZER_H_
 2 | #define SERIALIZER_H_
 3 | 
 4 | #include <vector>
 5 | #include <cstdint>
 6 | 
 7 | class Serializer {
 8 |   std::vector<uint8_t> &vector;
 9 | 
10 |  public:
11 |   inline Serializer(std::vector<uint8_t> &output) :
12 |     vector(output) {};
13 | 
14 |   template <typename T>
15 |   void append(T value) {
16 |     for (auto i = 0u; i < sizeof(T); i++) {
17 |       vector.push_back(value & 0xFF);
18 |       value >>= 8;
19 |     }
20 |   }
21 | };
22 | 
23 | class Deserializer {
24 |   const uint8_t *read_ptr;
25 |   const uint8_t *end_ptr;
26 | 
27 |  public:
28 |   inline Deserializer(const std::vector<uint8_t> &input) :
29 |     read_ptr(input.data()),
30 |     end_ptr(input.data() + input.size()) {};
31 | 
32 |   template <typename T>
33 |   T peek() const {
34 |     T value = 0;
35 |     const uint8_t *temp_ptr = read_ptr;
36 |     if (static_cast<unsigned>(end_ptr - temp_ptr) >= sizeof(T)) {
37 |       for (auto i = 0u; i < sizeof(T); i++) {
38 |         value |= static_cast<T>(*(temp_ptr++)) << static_cast<T>(8 * i);
39 |       }
40 |     }
41 |     return value;
42 |   }
43 | 
44 |   template <typename T>
45 |   T read() {
46 |     T value = peek<T>();
47 |     read_ptr += sizeof(T);
48 |     return value;
49 |   }
50 | };
51 | 
52 | #endif // SERIALIZER_H_
53 | 


--------------------------------------------------------------------------------
/src/core/text-buffer.h:
--------------------------------------------------------------------------------
  1 | #ifndef SUPERSTRING_TEXT_BUFFER_H_
  2 | #define SUPERSTRING_TEXT_BUFFER_H_
  3 | 
  4 | #include <string>
  5 | #include <vector>
  6 | #include "text.h"
  7 | #include "patch.h"
  8 | #include "point.h"
  9 | #include "range.h"
 10 | #include "regex.h"
 11 | #include "marker-index.h"
 12 | 
 13 | class TextBuffer {
 14 |   struct Layer;
 15 |   Layer *base_layer;
 16 |   Layer *top_layer;
 17 |   void squash_layers(const std::vector<Layer *> &);
 18 |   void consolidate_layers();
 19 | 
 20 | public:
 21 |   static uint32_t MAX_CHUNK_SIZE_TO_COPY;
 22 | 
 23 |   TextBuffer();
 24 |   TextBuffer(std::u16string &&);
 25 |   TextBuffer(const std::u16string &text);
 26 |   ~TextBuffer();
 27 | 
 28 |   uint32_t size() const;
 29 |   Point extent() const;
 30 |   optional<std::u16string> line_for_row(uint32_t row);
 31 |   void with_line_for_row(uint32_t row, const std::function<void(const char16_t *, uint32_t)> &);
 32 | 
 33 |   optional<uint32_t> line_length_for_row(uint32_t row);
 34 |   const uint16_t *line_ending_for_row(uint32_t row);
 35 |   ClipResult clip_position(Point);
 36 |   Point position_for_offset(uint32_t offset);
 37 |   std::u16string text();
 38 |   uint16_t character_at(Point position) const;
 39 |   std::u16string text_in_range(Range range);
 40 |   void set_text(std::u16string &&);
 41 |   void set_text(const std::u16string &);
 42 |   void set_text_in_range(Range old_range, std::u16string &&);
 43 |   void set_text_in_range(Range old_range, const std::u16string &);
 44 |   bool is_modified() const;
 45 |   bool has_astral();
 46 |   std::vector<TextSlice> chunks() const;
 47 | 
 48 |   void reset(Text &&);
 49 |   void flush_changes();
 50 |   void serialize_changes(Serializer &);
 51 |   bool deserialize_changes(Deserializer &);
 52 |   const Text &base_text() const;
 53 | 
 54 |   optional<Range> find(const Regex &, Range range = Range::all_inclusive()) const;
 55 |   std::vector<Range> find_all(const Regex &, Range range = Range::all_inclusive()) const;
 56 |   unsigned find_and_mark_all(MarkerIndex &, MarkerIndex::MarkerId, bool exclusive,
 57 |                              const Regex &, Range range = Range::all_inclusive()) const;
 58 | 
 59 |   struct SubsequenceMatch {
 60 |     std::u16string word;
 61 |     std::vector<Point> positions;
 62 |     std::vector<uint32_t> match_indices;
 63 |     int32_t score;
 64 |     bool operator==(const SubsequenceMatch &) const;
 65 |   };
 66 | 
 67 |   std::vector<SubsequenceMatch> find_words_with_subsequence_in_range(const std::u16string &, const std::u16string &, Range) const;
 68 | 
 69 |   class Snapshot {
 70 |     friend class TextBuffer;
 71 |     TextBuffer &buffer;
 72 |     Layer &layer;
 73 |     Layer &base_layer;
 74 | 
 75 |     Snapshot(TextBuffer &, Layer &, Layer &);
 76 | 
 77 |   public:
 78 |     ~Snapshot();
 79 |     void flush_preceding_changes();
 80 | 
 81 |     uint32_t size() const;
 82 |     Point extent() const;
 83 |     uint32_t line_length_for_row(uint32_t) const;
 84 |     std::vector<TextSlice> chunks() const;
 85 |     std::vector<TextSlice> chunks_in_range(Range) const;
 86 |     std::vector<std::pair<const char16_t *, uint32_t>> primitive_chunks() const;
 87 |     std::u16string text() const;
 88 |     std::u16string text_in_range(Range) const;
 89 |     const Text &base_text() const;
 90 |     optional<Range> find(const Regex &, Range range = Range::all_inclusive()) const;
 91 |     std::vector<Range> find_all(const Regex &, Range range = Range::all_inclusive()) const;
 92 |     std::vector<SubsequenceMatch> find_words_with_subsequence_in_range(std::u16string query, const std::u16string &extra_word_characters, Range range) const;
 93 |   };
 94 | 
 95 |   friend class Snapshot;
 96 |   Snapshot *create_snapshot();
 97 | 
 98 |   bool is_modified(const Snapshot *) const;
 99 |   Patch get_inverted_changes(const Snapshot *) const;
100 | 
101 |   size_t layer_count()  const;
102 |   std::string get_dot_graph() const;
103 | };
104 | 
105 | #endif  // SUPERSTRING_TEXT_BUFFER_H_
106 | 


--------------------------------------------------------------------------------
/src/core/text-diff.cc:
--------------------------------------------------------------------------------
  1 | #include "text-diff.h"
  2 | #include "libmba-diff.h"
  3 | #include "text-slice.h"
  4 | #include <vector>
  5 | #include <string.h>
  6 | #include <ostream>
  7 | #include <cassert>
  8 | 
  9 | using std::move;
 10 | using std::ostream;
 11 | using std::vector;
 12 | 
 13 | static Point previous_column(Point position) {
 14 |   assert(position.column > 0);
 15 |   position.column--;
 16 |   return position;
 17 | }
 18 | 
 19 | static int MAX_EDIT_DISTANCE = 4 * 1024;
 20 | 
 21 | Patch text_diff(const Text &old_text, const Text &new_text) {
 22 |   Patch result;
 23 |   Text empty;
 24 |   Text cr{u"\r"};
 25 |   Text lf{u"\n"};
 26 | 
 27 |   vector<diff_edit> edit_script;
 28 | 
 29 |   int edit_distance = diff(
 30 |     old_text.content.data(),
 31 |     old_text.content.size(),
 32 |     new_text.content.data(),
 33 |     new_text.content.size(),
 34 |     MAX_EDIT_DISTANCE,
 35 |     &edit_script
 36 |   );
 37 | 
 38 |   if (edit_distance == -1 || edit_distance >= MAX_EDIT_DISTANCE) {
 39 |     result.splice(Point(), old_text.extent(), new_text.extent(), old_text, new_text);
 40 |     return result;
 41 |   }
 42 | 
 43 |   size_t old_offset = 0;
 44 |   size_t new_offset = 0;
 45 |   Point old_position;
 46 |   Point new_position;
 47 | 
 48 |   for (struct diff_edit &edit : edit_script) {
 49 |     switch (edit.op) {
 50 |       case DIFF_MATCH:
 51 |         if (edit.len == 0) continue;
 52 | 
 53 |         // If the previous change ended between a CR and an LF, then expand
 54 |         // that change downward to include the LF.
 55 |         if (new_text.at(new_offset) == '\n' &&
 56 |             ((old_offset > 0 && old_text.at(old_offset - 1) == '\r') ||
 57 |              (new_offset > 0 && new_text.at(new_offset - 1) == '\r'))) {
 58 |           result.splice(new_position, Point(1, 0), Point(1, 0), lf, lf);
 59 |           old_position.row++;
 60 |           old_position.column = 0;
 61 |           new_position.row++;
 62 |           new_position.column = 0;
 63 |         }
 64 | 
 65 |         old_offset += edit.len;
 66 |         new_offset += edit.len;
 67 |         old_position = old_text.position_for_offset(old_offset, 0, false);
 68 |         new_position = new_text.position_for_offset(new_offset, 0, false);
 69 | 
 70 |         // If the next change starts between a CR and an LF, then expand that
 71 |         // change leftward to include the CR.
 72 |         if (new_text.at(new_offset - 1) == '\r' &&
 73 |             ((old_offset < old_text.size() && old_text.at(old_offset) == '\n') ||
 74 |              (new_offset < new_text.size() && new_text.at(new_offset) == '\n'))) {
 75 |           result.splice(previous_column(new_position), Point(0, 1), Point(0, 1), cr, cr);
 76 |         }
 77 |         break;
 78 | 
 79 |       case DIFF_DELETE: {
 80 |         uint32_t deletion_end = old_offset + edit.len;
 81 |         Text deleted_text{old_text.begin() + old_offset, old_text.begin() + deletion_end};
 82 |         old_offset = deletion_end;
 83 |         Point next_old_position = old_text.position_for_offset(old_offset, 0, false);
 84 |         result.splice(new_position, next_old_position.traversal(old_position), Point(), deleted_text, empty);
 85 |         old_position = next_old_position;
 86 |         break;
 87 |       }
 88 | 
 89 |       case DIFF_INSERT: {
 90 |         uint32_t insertion_end = new_offset + edit.len;
 91 |         Text inserted_text{new_text.begin() + new_offset, new_text.begin() + insertion_end};
 92 |         new_offset = insertion_end;
 93 |         Point next_new_position = new_text.position_for_offset(new_offset, 0, false);
 94 |         result.splice(new_position, Point(), next_new_position.traversal(new_position), empty, inserted_text);
 95 |         new_position = next_new_position;
 96 |         break;
 97 |       }
 98 |     }
 99 |   }
100 | 
101 |   return result;
102 | }
103 | 


--------------------------------------------------------------------------------
/src/core/text-diff.h:
--------------------------------------------------------------------------------
1 | #ifndef SUPERSTRING_TEXT_DIFF_H
2 | #define SUPERSTRING_TEXT_DIFF_H
3 | 
4 | #include "patch.h"
5 | #include "text.h"
6 | 
7 | Patch text_diff(const Text &old_text, const Text &new_text);
8 | 
9 | #endif  // SUPERSTRING_TEXT_DIFF_H


--------------------------------------------------------------------------------
/src/core/text-slice.cc:
--------------------------------------------------------------------------------
  1 | #include "text-slice.h"
  2 | #include "text.h"
  3 | #include <assert.h>
  4 | 
  5 | TextSlice::TextSlice() :
  6 |   text{nullptr} {}
  7 | 
  8 | TextSlice::TextSlice(const Text *text, Point start_position, Point end_position) :
  9 |   text{text}, start_position{start_position}, end_position{end_position} {}
 10 | 
 11 | TextSlice::TextSlice(const Text &text) :
 12 |   text{&text}, start_position{Point()}, end_position{text.extent()} {}
 13 | 
 14 | size_t TextSlice::start_offset() const {
 15 |   if (start_position.is_zero()) return 0;
 16 |   assert(start_position.row < text->line_offsets.size());
 17 |   return text->line_offsets[start_position.row] + start_position.column;
 18 | }
 19 | 
 20 | size_t TextSlice::end_offset() const {
 21 |   if (end_position.is_zero()) return 0;
 22 |   return text->line_offsets[end_position.row] + end_position.column;
 23 | }
 24 | 
 25 | bool TextSlice::is_valid() const {
 26 |   uint32_t start_offset = this->start_offset();
 27 |   uint32_t end_offset = this->end_offset();
 28 | 
 29 |   if (start_offset > end_offset) {
 30 |     return false;
 31 |   }
 32 | 
 33 |   if (start_position.row + 1 < text->line_offsets.size()) {
 34 |     if (start_offset >= text->line_offsets[start_position.row + 1]) {
 35 |       return false;
 36 |     }
 37 |   }
 38 | 
 39 |   if (end_position.row + 1 < text->line_offsets.size()) {
 40 |     if (end_offset >= text->line_offsets[end_position.row + 1]) {
 41 |       return false;
 42 |     }
 43 |   }
 44 | 
 45 |   if (end_offset > text->size()) {
 46 |     return false;
 47 |   }
 48 | 
 49 |   return true;
 50 | }
 51 | 
 52 | std::pair<TextSlice, TextSlice> TextSlice::split(Point split_point) const {
 53 |   Point absolute_split_point = Point::min(
 54 |     end_position,
 55 |     start_position.traverse(split_point)
 56 |   );
 57 | 
 58 |   return std::pair<TextSlice, TextSlice>{
 59 |     TextSlice{text, start_position, absolute_split_point},
 60 |     TextSlice{text, absolute_split_point, end_position}
 61 |   };
 62 | }
 63 | 
 64 | std::pair<TextSlice, TextSlice> TextSlice::split(uint32_t split_offset) const {
 65 |   return split(position_for_offset(split_offset));
 66 | }
 67 | 
 68 | Point TextSlice::position_for_offset(uint32_t offset, uint32_t min_row) const {
 69 |   return text->position_for_offset(
 70 |     offset + start_offset(),
 71 |     start_position.row + min_row
 72 |   ).traversal(start_position);
 73 | }
 74 | 
 75 | TextSlice TextSlice::prefix(Point prefix_end) const {
 76 |   return split(prefix_end).first;
 77 | }
 78 | 
 79 | TextSlice TextSlice::prefix(uint32_t prefix_end) const {
 80 |   return split(prefix_end).first;
 81 | }
 82 | 
 83 | TextSlice TextSlice::suffix(Point suffix_start) const {
 84 |   return split(suffix_start).second;
 85 | }
 86 | 
 87 | TextSlice TextSlice::slice(Range range) const {
 88 |   return suffix(range.start).prefix(range.extent());
 89 | }
 90 | 
 91 | Point TextSlice::extent() const {
 92 |   return end_position.traversal(start_position);
 93 | }
 94 | 
 95 | const char16_t *TextSlice::data() const {
 96 |   return text->data() + start_offset();
 97 | }
 98 | 
 99 | uint32_t TextSlice::size() const {
100 |   return end_offset() - start_offset();
101 | }
102 | 
103 | bool TextSlice::empty() const {
104 |   return size() == 0;
105 | }
106 | 
107 | Text::const_iterator TextSlice::begin() const {
108 |   return text->cbegin() + start_offset();
109 | }
110 | 
111 | Text::const_iterator TextSlice::end() const {
112 |   return text->cbegin() + end_offset();
113 | }
114 | 
115 | uint16_t TextSlice::front() const {
116 |   return *begin();
117 | }
118 | 
119 | uint16_t TextSlice::back() const {
120 |   return *(end() - 1);
121 | }
122 | 


--------------------------------------------------------------------------------
/src/core/text-slice.h:
--------------------------------------------------------------------------------
 1 | #ifndef FLAT_TEXT_SLICE_H_
 2 | #define FLAT_TEXT_SLICE_H_
 3 | 
 4 | #include <vector>
 5 | #include "point.h"
 6 | #include "range.h"
 7 | #include "text.h"
 8 | 
 9 | class TextSlice {
10 |  public:
11 |   const Text *text;
12 |   Point start_position;
13 |   Point end_position;
14 | 
15 |   TextSlice(const Text *text, Point start_position, Point end_position);
16 |   size_t start_offset() const;
17 |   size_t end_offset() const;
18 | 
19 |   TextSlice();
20 |   TextSlice(const Text &text);
21 |   std::pair<TextSlice, TextSlice> split(Point) const;
22 |   std::pair<TextSlice, TextSlice> split(uint32_t) const;
23 |   TextSlice prefix(Point) const;
24 |   TextSlice prefix(uint32_t) const;
25 |   TextSlice suffix(Point) const;
26 |   TextSlice slice(Range range) const;
27 |   Point position_for_offset(uint32_t offset, uint32_t min_row = 0) const;
28 |   Point extent() const;
29 |   uint16_t front() const;
30 |   uint16_t back() const;
31 |   bool is_valid() const;
32 | 
33 |   const char16_t *data() const;
34 |   uint32_t size() const;
35 |   bool empty() const;
36 | 
37 |   Text::const_iterator begin() const;
38 |   Text::const_iterator end() const;
39 | };
40 | 
41 | #endif // FLAT_TEXT_SLICE_H_
42 | 


--------------------------------------------------------------------------------
/src/core/text.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPERSTRING_TEXT_H_
 2 | #define SUPERSTRING_TEXT_H_
 3 | 
 4 | #include <istream>
 5 | #include <functional>
 6 | #include <vector>
 7 | #include <ostream>
 8 | #include "serializer.h"
 9 | #include "point.h"
10 | #include "optional.h"
11 | 
12 | class TextSlice;
13 | 
14 | struct ClipResult {
15 |   Point position;
16 |   uint32_t offset;
17 | };
18 | 
19 | class Text {
20 |   friend class TextSlice;
21 | 
22 |  public:
23 |   static Point extent(const std::u16string &);
24 | 
25 |   std::u16string content;
26 |   std::vector<uint32_t> line_offsets;
27 |   Text(const std::u16string &&, const std::vector<uint32_t> &&);
28 | 
29 |   using const_iterator = std::u16string::const_iterator;
30 | 
31 |   Text();
32 |   Text(const std::u16string &);
33 |   Text(std::u16string &&);
34 |   Text(TextSlice slice);
35 |   Text(Deserializer &deserializer);
36 |   template<typename Iter>
37 |   Text(Iter begin, Iter end) : Text(std::u16string{begin, end}) {}
38 | 
39 |   static Text concat(TextSlice a, TextSlice b);
40 |   static Text concat(TextSlice a, TextSlice b, TextSlice c);
41 |   void splice(Point start, Point deletion_extent, TextSlice inserted_slice);
42 | 
43 |   uint16_t at(Point position) const;
44 |   uint16_t at(uint32_t offset) const;
45 |   const_iterator begin() const;
46 |   const_iterator end() const;
47 |   inline const_iterator cbegin() const { return begin(); }
48 |   inline const_iterator cend() const { return end(); }
49 |   ClipResult clip_position(Point) const;
50 |   Point extent() const;
51 |   bool empty() const;
52 |   uint32_t offset_for_position(Point) const;
53 |   Point position_for_offset(uint32_t, uint32_t min_row = 0, bool clip_crlf = true) const;
54 |   uint32_t line_length_for_row(uint32_t row) const;
55 |   void append(TextSlice);
56 |   void assign(TextSlice);
57 |   void serialize(Serializer &) const;
58 |   uint32_t size() const;
59 |   const char16_t *data() const;
60 |   size_t digest() const;
61 |   void clear();
62 | 
63 |   bool operator!=(const Text &) const;
64 |   bool operator==(const Text &) const;
65 | 
66 |   friend std::ostream &operator<<(std::ostream &, const Text &);
67 | };
68 | 
69 | #endif // SUPERSTRING_TEXT_H_
70 | 


--------------------------------------------------------------------------------
/test/js/helpers/point-helpers.js:
--------------------------------------------------------------------------------
 1 | exports.INFINITY_POINT = Object.freeze({row: Infinity, column: Infinity})
 2 | 
 3 | exports.compare = function compare (a, b) {
 4 |   if (a.row === b.row) {
 5 |     return compareNumbers(a.column, b.column)
 6 |   } else {
 7 |     return compareNumbers(a.row, b.row)
 8 |   }
 9 | }
10 | 
11 | exports.isZero = function isZero (point) {
12 |   return (point.row === 0 && point.column === 0)
13 | }
14 | 
15 | exports.isInfinity = function isInfinity (point) {
16 |   return (point.row === Infinity || point.column === Infinity)
17 | }
18 | 
19 | exports.min = function min (a, b) {
20 |   if (compare(a, b) <= 0) {
21 |     return a
22 |   } else {
23 |     return b
24 |   }
25 | }
26 | 
27 | exports.traverse = function traverse (start, distance) {
28 |   if (distance.row === 0) {
29 |     return {
30 |       row: start.row,
31 |       column: start.column + distance.column
32 |     }
33 |   } else {
34 |     return {
35 |       row: start.row + distance.row,
36 |       column: distance.column
37 |     }
38 |   }
39 | }
40 | 
41 | exports.traversalDistance = function traversalDistance (end, start) {
42 |   if (end.row === start.row) {
43 |     return {row: 0, column: end.column - start.column}
44 |   } else {
45 |     return {row: end.row - start.row, column: end.column}
46 |   }
47 | }
48 | 
49 | exports.format = function format (point) {
50 |   return `(${point.row}, ${point.column})`
51 | }
52 | 
53 | function compareNumbers (a, b) {
54 |   if (a < b) {
55 |     return -1
56 |   } else if (a > b) {
57 |     return 1
58 |   } else {
59 |     return 0
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/test/js/helpers/test-document.js:
--------------------------------------------------------------------------------
  1 | const Random = require('random-seed')
  2 | const WORDS = require('./words')
  3 | const pointHelpers = require('./point-helpers')
  4 | const textHelpers = require('./text-helpers')
  5 | 
  6 | module.exports =
  7 | class TestDocument {
  8 |   constructor (randomSeed, maxLineCount = 50) {
  9 |     this.random = new Random(randomSeed)
 10 |     this.lines = this.buildRandomLines(1, maxLineCount)
 11 |   }
 12 | 
 13 |   clone () {
 14 |     let clone = Object.create(Object.getPrototypeOf(this))
 15 |     clone.random = this.random
 16 |     clone.lines = this.lines.slice()
 17 |     return clone
 18 |   }
 19 | 
 20 |   getLines () {
 21 |     return this.lines.slice()
 22 |   }
 23 | 
 24 |   getText () {
 25 |     return this.lines.join('\n')
 26 |   }
 27 | 
 28 |   getTextInRange (start, end) {
 29 |     let endRow = Math.min(end.row, this.lines.length - 1)
 30 |     if (start.row === endRow) {
 31 |       return this.lines[start.row].substring(start.column, end.column)
 32 |     } else if (!pointHelpers.isInfinity(start)) {
 33 |       let text = this.lines[start.row].substring(start.column) + '\n'
 34 |       for (let row = start.row + 1; row < endRow; row++) {
 35 |         text += this.lines[row] + '\n'
 36 |       }
 37 |       text += this.lines[endRow].substring(0, end.column)
 38 |       return text
 39 |     } else {
 40 |       return ""
 41 |     }
 42 |   }
 43 | 
 44 |   searchAll (regex) {
 45 |     return this.searchAllInRange(
 46 |       {start: {row: 0, column: 0}, end: this.getExtent()},
 47 |       regex
 48 |     )
 49 |   }
 50 | 
 51 |   searchAllInRange (range, regex) {
 52 |     const ranges = []
 53 |     const text = this.getTextInRange(range.start, range.end)
 54 |     let match
 55 |     while (match = regex.exec(text)) {
 56 |       const start = pointHelpers.traverse(range.start, textHelpers.getExtent(text.slice(0, match.index)))
 57 |       const extent = textHelpers.getExtent(match[0])
 58 |       ranges.push({start, end: pointHelpers.traverse(start, extent)})
 59 |       if (match[0].length === 0) regex.lastIndex++
 60 |     }
 61 |     return ranges
 62 |   }
 63 | 
 64 |   getExtent () {
 65 |     const row = this.lines.length - 1
 66 |     return {row, column: this.lines[row].length}
 67 |   }
 68 | 
 69 |   performRandomSplice (upperCase) {
 70 |     let deletedRange = this.buildRandomRange()
 71 |     let start = deletedRange.start
 72 |     let deletedText = this.getTextInRange(start, deletedRange.end)
 73 |     let deletedExtent = pointHelpers.traversalDistance(deletedRange.end, deletedRange.start)
 74 |     let insertedText = this.buildRandomLines(0, 3, upperCase).join('\n')
 75 |     let insertedExtent = textHelpers.getExtent(insertedText)
 76 |     this.splice(start, deletedExtent, insertedText)
 77 |     return {start, deletedExtent, insertedExtent, deletedText, insertedText}
 78 |   }
 79 | 
 80 |   splice (start, deletedExtent, insertedText) {
 81 |     let deletedText = this.getTextInRange(start, pointHelpers.traverse(start, deletedExtent))
 82 |     let end = pointHelpers.traverse(start, deletedExtent)
 83 |     let replacementLines = insertedText.split('\n')
 84 | 
 85 |     replacementLines[0] =
 86 |       this.lines[start.row].substring(0, start.column) + replacementLines[0]
 87 |     replacementLines[replacementLines.length - 1] =
 88 |       replacementLines[replacementLines.length - 1] + this.lines[end.row].substring(end.column)
 89 | 
 90 |     this.lines.splice(start.row, deletedExtent.row + 1, ...replacementLines)
 91 |     return deletedText
 92 |   }
 93 | 
 94 |   characterAtPosition ({row, column}) {
 95 |     return this.lines[row][column]
 96 |   }
 97 | 
 98 |   buildRandomLines (min, max, upperCase) {
 99 |     let lineCount = this.random.intBetween(min, max - 1)
100 |     let lines = []
101 |     for (let i = 0; i < lineCount; i++) {
102 |       lines.push(this.buildRandomLine(upperCase))
103 |     }
104 |     return lines
105 |   }
106 | 
107 |   buildRandomLine (upperCase) {
108 |     let wordCount = this.random(5)
109 |     let words = []
110 |     for (let i = 0; i < wordCount; i++) {
111 |       words.push(this.buildRandomWord(upperCase))
112 |     }
113 |     return words.join(' ')
114 |   }
115 | 
116 |   buildRandomWord (upperCase) {
117 |     let word = WORDS[this.random(WORDS.length)]
118 |     if (upperCase) word = word.toUpperCase()
119 |     return word
120 |   }
121 | 
122 |   buildRandomRange () {
123 |     const start = this.buildRandomPoint()
124 |     let end = start
125 | 
126 |     if (this.random(10)) {
127 |       do {
128 |         end = pointHelpers.traverse(end, {
129 |           row: this.random(3),
130 |           column: this.random(5)
131 |         })
132 |       } while (this.random(2));
133 |     }
134 | 
135 |     return {start, end: this.clipPosition(end)}
136 |   }
137 | 
138 |   buildRandomPoint () {
139 |     let row = this.random(this.lines.length)
140 |     let column = this.random(this.lines[row].length)
141 |     return {row, column}
142 |   }
143 | 
144 |   clipPosition ({row, column}) {
145 |     if (row >= this.lines.length) {
146 |       row = this.lines.length - 1
147 |       column = this.lines[row].length
148 |     } else if (column > this.lines[row].length) {
149 |       column = this.lines[row].length
150 |     }
151 |     return {row, column}
152 |   }
153 | }
154 | 


--------------------------------------------------------------------------------
/test/js/helpers/text-helpers.js:
--------------------------------------------------------------------------------
 1 | const NEWLINE_REG_EXP = /\n/g
 2 | 
 3 | exports.getExtent = function getExtent (text) {
 4 |   let lastLineStartIndex = 0
 5 |   let row = 0
 6 |   NEWLINE_REG_EXP.lastIndex = 0
 7 |   while (NEWLINE_REG_EXP.exec(text)) {
 8 |     row++
 9 |     lastLineStartIndex = NEWLINE_REG_EXP.lastIndex
10 |   }
11 |   let column = text.length - lastLineStartIndex
12 |   return {row, column}
13 | }
14 | 
15 | exports.getPrefix = function getPrefix (text, prefixExtent) {
16 |   return text.substring(0, characterIndexForPoint(text, prefixExtent))
17 | }
18 | 
19 | exports.getSuffix = function getSuffix (text, prefixExtent) {
20 |   return text.substring(characterIndexForPoint(text, prefixExtent))
21 | }
22 | 
23 | exports.characterIndexForPoint = function characterIndexForPoint(text, point) {
24 |   let {row, column} = point
25 |   NEWLINE_REG_EXP.lastIndex = 0
26 |   while (row-- > 0) {
27 |     let matches = NEWLINE_REG_EXP.exec(text)
28 |     if (matches == null) {
29 |       return text.length
30 |     }
31 |   }
32 |   return NEWLINE_REG_EXP.lastIndex + column
33 | }
34 | 


--------------------------------------------------------------------------------
/test/native/encoding-conversion-test.cc:
--------------------------------------------------------------------------------
  1 | #include "test-helpers.h"
  2 | #include <sstream>
  3 | #include "text.h"
  4 | #include "encoding-conversion.h"
  5 | 
  6 | using std::string;
  7 | using std::stringstream;
  8 | using std::vector;
  9 | using std::u16string;
 10 | 
 11 | TEST_CASE("EncodingConversion::decode - basic UTF-8") {
 12 |   auto conversion = transcoding_from("UTF-8");
 13 |   string input("abγdefg\nhijklmnop");
 14 | 
 15 |   u16string string;
 16 |   conversion->decode(string, input.data(), input.size());
 17 |   REQUIRE(string == u"abγdefg\nhijklmnop");
 18 | 
 19 |   // This first chunk ends in the middle of the multi-byte 'γ' character, so
 20 |   // decoding stops before that character.
 21 |   u16string string2;
 22 |   size_t bytes_read = conversion->decode(string2, input.data(), 3);
 23 |   REQUIRE(bytes_read == 2);
 24 | 
 25 |   // We can pick up where we left off and decode the reset of the input.
 26 |   conversion->decode(string2, input.data() + 2, input.size() - 2);
 27 |   REQUIRE(string2 == u"abγdefg\nhijklmnop");
 28 | }
 29 | 
 30 | TEST_CASE("EncodingConversion::decode - basic ISO-8859-1") {
 31 |   auto conversion = transcoding_from("ISO-8859-1");
 32 |   string input("qrst" "\xfc" "v"); // qrstüv
 33 | 
 34 |   u16string string;
 35 |   conversion->decode(string, input.data(), input.size());
 36 |   REQUIRE(string == u"qrstüv");
 37 | }
 38 | 
 39 | TEST_CASE("EncodingConversion::decode - invalid byte sequences in the middle of the input") {
 40 |   auto conversion = transcoding_from("UTF-8");
 41 |   string input("ab" "\xc0" "\xc1" "de");
 42 | 
 43 |   u16string string;
 44 |   conversion->decode(string, input.data(), input.size());
 45 |   REQUIRE(string == u"ab" "\ufffd" "\ufffd" "de");
 46 | }
 47 | 
 48 | TEST_CASE("EncodingConversion::decode - invalid byte sequences at the end of the input") {
 49 |   auto conversion = transcoding_from("UTF-8");
 50 |   string input("ab" "\xf0\x9f"); // incomplete 4-byte code point for '😁' at the end of the stream
 51 | 
 52 |   u16string string;
 53 |   size_t bytes_encoded = conversion->decode(string, input.data(), input.size());
 54 |   REQUIRE(bytes_encoded == 2);
 55 |   REQUIRE(string == u"ab");
 56 | 
 57 |   // Passing the `is_end`
 58 |   string.clear();
 59 |   bytes_encoded = conversion->decode(string, input.data(), input.size(), true);
 60 |   REQUIRE(bytes_encoded == 4);
 61 |   REQUIRE(string == u"ab" "\ufffd" "\ufffd");
 62 | }
 63 | 
 64 | TEST_CASE("EncodingConversion::decode - four-byte UTF-16 characters") {
 65 |   auto conversion = transcoding_from("UTF-8");
 66 |   string input("ab" "\xf0\x9f" "\x98\x81" "cd"); // 'ab😁cd'
 67 | 
 68 |   u16string string;
 69 |   conversion->decode(string, input.data(), input.size());
 70 |   REQUIRE(string == u"ab" "\xd83d" "\xde01" "cd");
 71 | }
 72 | 
 73 | TEST_CASE("EncodingConversion::encode - basic") {
 74 |   auto conversion = transcoding_to("UTF-8");
 75 |   u16string string = u"abγdefg\nhijklmnop";
 76 | 
 77 |   vector<char> output(3);
 78 |   size_t bytes_encoded = 0, start = 0;
 79 | 
 80 |   // The 'γ' requires to UTF-8 bytes, so it doesn't fit in the output buffer
 81 |   bytes_encoded = conversion->encode(
 82 |     string, &start, string.size(), output.data(), output.size());
 83 |   REQUIRE(std::string(output.data(), bytes_encoded) == "ab");
 84 | 
 85 |   bytes_encoded = conversion->encode(
 86 |     string, &start, string.size(), output.data(), output.size());
 87 |   REQUIRE(std::string(output.data(), bytes_encoded) == "γd");
 88 | 
 89 |   bytes_encoded = conversion->encode(
 90 |     string, &start, string.size(), output.data(), output.size());
 91 |   REQUIRE(std::string(output.data(), bytes_encoded) == "efg");
 92 | }
 93 | 
 94 | TEST_CASE("EncodingConversion::encode - four-byte UTF-16 characters") {
 95 |   auto conversion = transcoding_to("UTF-8");
 96 |   u16string string = u"ab" "\xd83d" "\xde01" "cd";  // 'ab😁cd'
 97 | 
 98 |   vector<char> output(10);
 99 |   size_t bytes_encoded = 0, start = 0;
100 | 
101 |   bytes_encoded = conversion->encode(
102 |     string, &start, string.size(), output.data(), output.size());
103 |   REQUIRE(std::string(output.data(), bytes_encoded) == "ab" "\xf0\x9f" "\x98\x81" "cd");
104 | 
105 |   // The end offset, 3, is in the middle of the 4-byte character.
106 |   start = 0;
107 |   bytes_encoded = conversion->encode(
108 |     string, &start, 3, output.data(), output.size());
109 |   REQUIRE(std::string(output.data(), bytes_encoded) == "ab");
110 | 
111 |   // We can pick up where we left off.
112 |   bytes_encoded += conversion->encode(
113 |     string, &start, string.size(), output.data() + bytes_encoded, output.size() - bytes_encoded);
114 |   REQUIRE(std::string(output.data(), bytes_encoded) == "ab" "\xf0\x9f" "\x98\x81" "cd");
115 | }
116 | 
117 | TEST_CASE("EncodingConversion::encode - invalid characters in the middle of the string") {
118 |   auto conversion = transcoding_to("UTF-8");
119 |   u16string string = u"abc" "\xD800" "def";
120 | 
121 |   vector<char> output(10);
122 |   size_t bytes_encoded = 0, start = 0;
123 | 
124 |   bytes_encoded = conversion->encode(
125 |     string, &start, string.size(), output.data(), output.size());
126 |   REQUIRE(std::string(output.data(), bytes_encoded) == "abc" "\ufffd" "def");
127 | 
128 |   // Here, the invalid character occurs at the end of a chunk.
129 |   start = 0;
130 |   bytes_encoded = conversion->encode(
131 |     string, &start, 4, output.data(), output.size());
132 |   bytes_encoded += conversion->encode(
133 |     string, &start, string.size(), output.data() + bytes_encoded, output.size() - bytes_encoded);
134 |   REQUIRE(std::string(output.data(), bytes_encoded) == "abc" "\ufffd" "def");
135 | }
136 | 
137 | TEST_CASE("EncodingConversion::encode - invalid characters at the end of the string") {
138 |   auto conversion = transcoding_to("UTF-8");
139 |   u16string string = u"abc" "\xD800";
140 | 
141 |   vector<char> output(10);
142 |   size_t bytes_encoded = 0, start = 0;
143 | 
144 |   bytes_encoded = conversion->encode(
145 |     string, &start, string.size(), output.data(), output.size(), true);
146 |   REQUIRE(std::string(output.data(), bytes_encoded) == "abc" "\ufffd");
147 | }
148 | 


--------------------------------------------------------------------------------
/test/native/test-helpers.cc:
--------------------------------------------------------------------------------
 1 | #include "test-helpers.h"
 2 | #include "patch.h"
 3 | #include "range.h"
 4 | #include "text-buffer.h"
 5 | #include <catch.hpp>
 6 | #include <cstring>
 7 | #include <memory>
 8 | #include <ostream>
 9 | #include <vector>
10 | 
11 | using std::vector;
12 | using std::u16string;
13 | 
14 | bool text_eq(const Text *left, const Text *right) {
15 |   if (left == right)
16 |     return true;
17 |   if (!left && right)
18 |     return false;
19 |   if (left && !right)
20 |     return false;
21 |   return *left == *right;
22 | }
23 | 
24 | bool operator==(const Patch::Change &left, const Patch::Change &right) {
25 |   return left.old_start == right.old_start &&
26 |          left.new_start == right.new_start && left.old_end == right.old_end &&
27 |          left.new_end == right.new_end &&
28 |          text_eq(left.old_text, right.old_text) &&
29 |          text_eq(left.new_text, right.new_text);
30 | }
31 | 
32 | std::unique_ptr<Text> get_text(const u16string content) {
33 |   return std::unique_ptr<Text> { new Text(content) };
34 | }
35 | 
36 | std::u16string get_random_string(Generator &rand, uint32_t character_count) {
37 |   u16string content;
38 |   content.reserve(character_count);
39 |   for (uint i = 0; i < character_count; i++) {
40 |     if (rand() % 20 < 1) {
41 |       content.push_back('\n');
42 |     } else if (rand() % 20 < 1) {
43 |       content.push_back('\r');
44 |       content.push_back('\n');
45 |       i++;
46 |     } else if (rand() % 20 < 1) {
47 |       content.push_back('\r');
48 |     } else {
49 |       uint16_t character = 'a' + (rand() % 26);
50 |       content.push_back(character);
51 |     }
52 |   }
53 |   return content;
54 | }
55 | 
56 | Text get_random_text(Generator &rand) {
57 |   return Text {get_random_string(rand)};
58 | }
59 | 
60 | Range get_random_range(Generator &rand, const Text &text) {
61 |   uint32_t start_row = rand() % (text.extent().row + 1);
62 |   uint32_t max_column = text.line_length_for_row(start_row);
63 |   uint32_t start_column = 0;
64 |   if (max_column > 0) start_column = rand() % max_column;
65 |   Point start {start_row, start_column};
66 |   Point end {start};
67 |   while (rand() % 10 < 3) {
68 |     end = text.clip_position(end.traverse(Point(rand() % 2, rand() % 10))).position;
69 |   }
70 |   return {start, end};
71 | }
72 | 
73 | Range get_random_range(Generator &rand, TextBuffer &buffer) {
74 |   return get_random_range(rand, buffer.text());
75 | }
76 | 


--------------------------------------------------------------------------------
/test/native/test-helpers.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPERSTRING_TEST_HELPERS_H
 2 | #define SUPERSTRING_TEST_HELPERS_H
 3 | 
 4 | #include "patch.h"
 5 | #include <catch.hpp>
 6 | #include <cstring>
 7 | #include <memory>
 8 | #include <ostream>
 9 | #include <vector>
10 | #include <random>
11 | #include "range.h"
12 | #include "text.h"
13 | #include "text-buffer.h"
14 | #include <iostream>
15 | 
16 | using std::cout;
17 | using std::cerr;
18 | 
19 | class TextBuffer;
20 | 
21 | class Generator {
22 |   std::default_random_engine engine;
23 |   std::uniform_int_distribution<uint32_t> distribution;
24 | 
25 | public:
26 |   Generator(uint32_t seed) : engine{seed} {}
27 |   uint32_t operator()() { return distribution(engine); }
28 | };
29 | 
30 | bool operator==(const Patch::Change &left, const Patch::Change &right);
31 | std::unique_ptr<Text> get_text(const std::u16string content);
32 | std::u16string get_random_string(Generator &, uint32_t character_count = 20);
33 | Text get_random_text(Generator &);
34 | Range get_random_range(Generator &, const Text &);
35 | Range get_random_range(Generator &, TextBuffer &);
36 | 
37 | namespace std {
38 |   inline std::ostream &operator<<(std::ostream &stream, const std::u16string &text) {
39 |     for (uint16_t character : text) {
40 |       if (character == '\r') {
41 |         stream << "\\\\r";
42 |       } else if (character < 255) {
43 |         stream << static_cast<char>(character);
44 |       } else {
45 |         stream << "\\u";
46 |         stream << character;
47 |       }
48 |     }
49 | 
50 |     return stream;
51 |   }
52 | 
53 |   inline std::ostream &operator<<(std::ostream &stream, const TextBuffer::SubsequenceMatch &match) {
54 |     stream << "SubsequenceMatch{ word: " <<  match.word << ", positions: [";
55 | 
56 |     for (size_t i = 0; i < match.positions.size(); i++) {
57 |       stream << match.positions[i];
58 |       if (i < match.positions.size() - 1) stream << ", ";
59 |     }
60 | 
61 |     stream << "], match_indices: [";
62 | 
63 |     for (size_t i = 0; i < match.match_indices.size(); i++) {
64 |       stream << match.match_indices[i];
65 |       if (i < match.match_indices.size() - 1) stream << ", ";
66 |     }
67 | 
68 |     stream << "], score: " << match.score << " }";
69 | 
70 |     return stream;
71 |   }
72 | }
73 | 
74 | template <typename T>
75 | std::ostream &operator<<(std::ostream &stream, const optional<T> &value) {
76 |   if (value) {
77 |     return stream << *value;
78 |   } else {
79 |     return stream << "nullopt";
80 |   }
81 | }
82 | 
83 | #endif // SUPERSTRING_TEST_HELPERS_H
84 | 


--------------------------------------------------------------------------------
/test/native/tests.cc:
--------------------------------------------------------------------------------
1 | // Because this file is slow to compile, we separate it from patch_test.cc
2 | // for a faster feedback loop
3 | 
4 | #define CATCH_CONFIG_MAIN
5 | #include <catch.hpp>
6 | 


--------------------------------------------------------------------------------
/test/native/text-diff-test.cc:
--------------------------------------------------------------------------------
  1 | #include "test-helpers.h"
  2 | #include "text.h"
  3 | #include "text-slice.h"
  4 | #include "text-diff.h"
  5 | 
  6 | using Change = Patch::Change;
  7 | using std::vector;
  8 | 
  9 | TEST_CASE("text_diff - multiple lines") {
 10 |   Text old_text{u"abc\nghi\njk\nmno\n"};
 11 |   Text new_text{u"abc\ndef\nghi\njkl\nmno\n"};
 12 | 
 13 |   Patch patch = text_diff(old_text, new_text);
 14 | 
 15 |   REQUIRE(patch.get_changes() == vector<Change>({
 16 |     Change{
 17 |       Point{1, 0}, Point{1, 0},
 18 |       Point{1, 0}, Point{2, 0},
 19 |       get_text(u"").get(), get_text(u"def\n").get(),
 20 |       0, 0, 0
 21 |     },
 22 |     Change{
 23 |       Point{2, 2}, Point{2, 2},
 24 |       Point{3, 2}, Point{3, 3},
 25 |       get_text(u"").get(), get_text(u"l").get(),
 26 |       0, 3, 0
 27 |     }
 28 |   }));
 29 | 
 30 |   // We temporarily move the Text's content in order to diff it without
 31 |   // copying. Check that the text is unchanged afterwards.
 32 |   REQUIRE(old_text == Text(u"abc\nghi\njk\nmno\n"));
 33 |   REQUIRE(new_text == Text(u"abc\ndef\nghi\njkl\nmno\n"));
 34 | }
 35 | 
 36 | TEST_CASE("text_diff - single line") {
 37 |   Text old_text{u"abcdefghij"};
 38 |   Text new_text{u"abcxyefij"};
 39 | 
 40 |   Patch patch = text_diff(old_text, new_text);
 41 | 
 42 |   REQUIRE(patch.get_changes() == vector<Change>({
 43 |     Change{
 44 |       Point{0, 3}, Point{0, 4},
 45 |       Point{0, 3}, Point{0, 5},
 46 |       get_text(u"d").get(), get_text(u"xy").get(),
 47 |       0, 0, 0
 48 | 
 49 |     },
 50 |     Change{
 51 |       Point{0, 6}, Point{0, 8},
 52 |       Point{0, 7}, Point{0, 7},
 53 |       get_text(u"gh").get(), get_text(u"").get(),
 54 |       1, 2, 0
 55 |     },
 56 |   }));
 57 | }
 58 | 
 59 | TEST_CASE("text_diff - old text is empty") {
 60 |   Text old_text{u""};
 61 |   Text new_text{u"abc\ndef\nghi\njkl\n"};
 62 | 
 63 |   Patch patch = text_diff(old_text, new_text);
 64 | 
 65 |   REQUIRE(patch.get_changes() == vector<Change>({
 66 |     Change{
 67 |       Point{0, 0}, Point{0, 0},
 68 |       Point{0, 0}, Point{4, 0},
 69 |       get_text(u"").get(), get_text(u"abc\ndef\nghi\njkl\n").get(),
 70 |       0, 0, 0
 71 |     },
 72 |   }));
 73 | }
 74 | 
 75 | TEST_CASE("text_diff - old text is a prefix of new text") {
 76 |   Text old_text{u"abc\ndef\n"};
 77 |   Text new_text{u"abc\ndef\nghi\njkl\n"};
 78 | 
 79 |   Patch patch = text_diff(old_text, new_text);
 80 | 
 81 |   REQUIRE(patch.get_changes() == vector<Change>({
 82 |     Change{
 83 |       Point{2, 0}, Point{2, 0},
 84 |       Point{2, 0}, Point{4, 0},
 85 |       get_text(u"").get(), get_text(u"ghi\njkl\n").get(),
 86 |       0, 0, 0
 87 |     },
 88 |   }));
 89 | }
 90 | 
 91 | TEST_CASE("text_diff - old text is a suffix of new text") {
 92 |   Text old_text{u"ghi\njkl\n"};
 93 |   Text new_text{u"abc\ndef\nghi\njkl\n"};
 94 | 
 95 |   Patch patch = text_diff(old_text, new_text);
 96 | 
 97 |   REQUIRE(patch.get_changes() == vector<Change>({
 98 |     Change{
 99 |       Point{0, 0}, Point{0, 0},
100 |       Point{0, 0}, Point{2, 0},
101 |       get_text(u"").get(), get_text(u"abc\ndef\n").get(),
102 |       0, 0, 0
103 |     },
104 |   }));
105 | }
106 | 
107 | TEST_CASE("text_diff - randomized changes") {
108 |   auto t = time(nullptr);
109 |   for (uint i = 0; i < 100; i++) {
110 |     uint32_t seed = t * 1000 + i;
111 |     Generator rand(seed);
112 |     cout << "seed: " << seed << "\n";
113 | 
114 |     Text old_text{get_random_string(rand, 100)};
115 |     Text new_text = old_text;
116 | 
117 |     // cout << "extent: " << new_text.extent() << " text:\n" << new_text << "\n\n";
118 | 
119 |     for (uint j = 0; j < 1 + rand() % 10; j++) {
120 |       // cout << "j: " << j << "\n";
121 | 
122 |       Range deleted_range = get_random_range(rand, new_text);
123 |       Text inserted_text{get_random_string(rand, 3)};
124 | 
125 |       new_text.splice(deleted_range.start, deleted_range.extent(), inserted_text);
126 | 
127 |       // cout << "replace " << deleted_range << " with " << inserted_text << "\n\n";
128 |       // cout << "extent: " << new_text.extent() << " text:\n" << new_text << "\n\n";
129 |     }
130 | 
131 |     Patch patch = text_diff(old_text, new_text);
132 | 
133 |     for (const Change &change : patch.get_changes()) {
134 |       REQUIRE(
135 |         *change.new_text ==
136 |         Text(TextSlice(new_text).slice(Range{change.new_start, change.new_end}))
137 |       );
138 | 
139 |       old_text.splice(
140 |         change.new_start,
141 |         change.old_end.traversal(change.old_start),
142 |         *change.new_text
143 |       );
144 |     }
145 | 
146 |     REQUIRE(old_text == new_text);
147 |   }
148 | }


--------------------------------------------------------------------------------
/test/native/text-test.cc:
--------------------------------------------------------------------------------
  1 | #include "test-helpers.h"
  2 | #include "text.h"
  3 | #include "text-slice.h"
  4 | 
  5 | TEST_CASE("Text::split") {
  6 |   Text text {u"abc\ndef\r\nghi"};
  7 |   TextSlice base_slice {text};
  8 | 
  9 |   {
 10 |     auto slices = base_slice.split({0, 2});
 11 |     REQUIRE(Text(slices.first) == Text(u"ab"));
 12 |     REQUIRE(Text(slices.second) == Text(u"c\ndef\r\nghi"));
 13 |   }
 14 | 
 15 |   {
 16 |     auto slices = base_slice.split({1, 2});
 17 |     REQUIRE(Text(slices.first) == Text(u"abc\nde"));
 18 |     REQUIRE(Text(slices.second) == Text(u"f\r\nghi"));
 19 |   }
 20 | 
 21 |   {
 22 |     auto slices = base_slice.split({1, 3});
 23 |     REQUIRE(Text(slices.first) == Text(u"abc\ndef"));
 24 |     REQUIRE(Text(slices.second) == Text(u"\r\nghi"));
 25 |   }
 26 | 
 27 |   {
 28 |     auto slices = base_slice.split({2, 0});
 29 |     REQUIRE(Text(slices.first) == Text(u"abc\ndef\r\n"));
 30 |     REQUIRE(Text(slices.second) == Text(u"ghi"));
 31 |   }
 32 | 
 33 |   {
 34 |     auto slices = base_slice.split({2, 3});
 35 |     REQUIRE(Text(slices.first) == Text(u"abc\ndef\r\nghi"));
 36 |     REQUIRE(Text(slices.second) == Text(u""));
 37 |   }
 38 | }
 39 | 
 40 | TEST_CASE("Text::concat") {
 41 |   Text text {u"abc\ndef\r\nghi"};
 42 |   TextSlice base_slice {text};
 43 | 
 44 |   REQUIRE(Text::concat(base_slice, base_slice) == Text(u"abc\ndef\r\nghiabc\ndef\r\nghi"));
 45 | 
 46 |   {
 47 |     auto prefix = base_slice.prefix({0, 2});
 48 |     auto suffix = base_slice.suffix({2, 2});
 49 |     REQUIRE(Text::concat(prefix, suffix) == Text(u"abi"));
 50 |   }
 51 | 
 52 |   {
 53 |     auto prefix = base_slice.prefix({1, 3});
 54 |     auto suffix = base_slice.suffix({2, 2});
 55 |     REQUIRE(Text::concat(prefix, suffix) == Text(u"abc\ndefi"));
 56 |   }
 57 | 
 58 |   {
 59 |     auto prefix = base_slice.prefix({1, 3});
 60 |     auto suffix = base_slice.suffix({2, 3});
 61 |     REQUIRE(Text::concat(prefix, suffix) == Text(u"abc\ndef"));
 62 |   }
 63 | }
 64 | 
 65 | TEST_CASE("Text::splice") {
 66 |   Text text {u"abc\ndef\r\nghi\njkl"};
 67 |   text.splice({1, 2}, {1, 1}, Text {u"mno\npq\r\nst"});
 68 |   REQUIRE(text == Text {u"abc\ndemno\npq\r\nsthi\njkl"});
 69 |   text.splice({2, 1}, {2, 1}, Text {u""});
 70 |   REQUIRE(text == Text {u"abc\ndemno\npkl"});
 71 |   text.splice({1, 1}, {0, 0}, Text {u"uvw"});
 72 |   REQUIRE(text == Text {u"abc\nduvwemno\npkl"});
 73 |   text.splice(text.extent(), {0, 0}, Text {u"\nxyz\r\nabc"});
 74 |   REQUIRE(text == Text {u"abc\nduvwemno\npkl\nxyz\r\nabc"});
 75 |   text.splice({0, 0}, {0, 0}, Text {u"def\nghi"});
 76 |   REQUIRE(text == Text {u"def\nghiabc\nduvwemno\npkl\nxyz\r\nabc"});
 77 | }
 78 | 
 79 | TEST_CASE("Text::offset_for_position - basic") {
 80 |   Text text {u"abc\ndefg\r\nhijkl"};
 81 | 
 82 |   REQUIRE(text.offset_for_position({0, 2}) == 2);
 83 |   REQUIRE(text.offset_for_position({0, 3}) == 3);
 84 |   REQUIRE(text.offset_for_position({0, 4}) == 3);
 85 |   REQUIRE(text.offset_for_position({0, 8}) == 3);
 86 | 
 87 |   REQUIRE(text.offset_for_position({1, 1}) == 5);
 88 |   REQUIRE(text.offset_for_position({1, 4}) == 8);
 89 |   REQUIRE(text.offset_for_position({1, 5}) == 8);
 90 |   REQUIRE(text.offset_for_position({1, 8}) == 8);
 91 | 
 92 |   REQUIRE(text.offset_for_position({2, 0}) == 10);
 93 |   REQUIRE(text.offset_for_position({2, 1}) == 11);
 94 |   REQUIRE(text.offset_for_position({2, 5}) == 15);
 95 |   REQUIRE(text.offset_for_position({2, 6}) == 15);
 96 | }
 97 | 
 98 | TEST_CASE("Text::offset_for_position - empty lines") {
 99 |   Text text {u"a\n\nb\r\rc"};
100 |   TextSlice slice(text);
101 | 
102 |   REQUIRE(text.offset_for_position({0, 1}) == 1);
103 |   REQUIRE(text.offset_for_position({0, 2}) == 1);
104 |   REQUIRE(text.offset_for_position({0, UINT32_MAX}) == 1);
105 |   REQUIRE(text.offset_for_position({1, 0}) == 2);
106 |   REQUIRE(slice.position_for_offset(1) == Point(0, 1));
107 |   REQUIRE(text.offset_for_position({1, 1}) == 2);
108 |   REQUIRE(text.offset_for_position({1, UINT32_MAX}) == 2);
109 |   REQUIRE(slice.position_for_offset(2) == Point(1, 0));
110 | }
111 | 


--------------------------------------------------------------------------------
/vendor/libcxx/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | ==============================================================================
 2 | libc++ License
 3 | ==============================================================================
 4 | 
 5 | The libc++ library is dual licensed under both the University of Illinois
 6 | "BSD-Like" license and the MIT license.  As a user of this code you may choose
 7 | to use it under either license.  As a contributor, you agree to allow your code
 8 | to be used under both.
 9 | 
10 | Full text of the relevant licenses is included below.
11 | 
12 | ==============================================================================
13 | 
14 | University of Illinois/NCSA
15 | Open Source License
16 | 
17 | Copyright (c) 2009-2017 by the contributors listed in CREDITS.TXT
18 | 
19 | All rights reserved.
20 | 
21 | Developed by:
22 | 
23 |     LLVM Team
24 | 
25 |     University of Illinois at Urbana-Champaign
26 | 
27 |     http://llvm.org
28 | 
29 | Permission is hereby granted, free of charge, to any person obtaining a copy of
30 | this software and associated documentation files (the "Software"), to deal with
31 | the Software without restriction, including without limitation the rights to
32 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
33 | of the Software, and to permit persons to whom the Software is furnished to do
34 | so, subject to the following conditions:
35 | 
36 |     * Redistributions of source code must retain the above copyright notice,
37 |       this list of conditions and the following disclaimers.
38 | 
39 |     * Redistributions in binary form must reproduce the above copyright notice,
40 |       this list of conditions and the following disclaimers in the
41 |       documentation and/or other materials provided with the distribution.
42 | 
43 |     * Neither the names of the LLVM Team, University of Illinois at
44 |       Urbana-Champaign, nor the names of its contributors may be used to
45 |       endorse or promote products derived from this Software without specific
46 |       prior written permission.
47 | 
48 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
49 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
50 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
51 | CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
52 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
53 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
54 | SOFTWARE.
55 | 
56 | ==============================================================================
57 | 
58 | Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT
59 | 
60 | Permission is hereby granted, free of charge, to any person obtaining a copy
61 | of this software and associated documentation files (the "Software"), to deal
62 | in the Software without restriction, including without limitation the rights
63 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
64 | copies of the Software, and to permit persons to whom the Software is
65 | furnished to do so, subject to the following conditions:
66 | 
67 | The above copyright notice and this permission notice shall be included in
68 | all copies or substantial portions of the Software.
69 | 
70 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
71 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
72 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
73 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
74 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
75 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
76 | THE SOFTWARE.


--------------------------------------------------------------------------------
/vendor/pcre/10.23/AUTHORS:
--------------------------------------------------------------------------------
 1 | THE MAIN PCRE2 LIBRARY CODE
 2 | ---------------------------
 3 | 
 4 | Written by:       Philip Hazel
 5 | Email local part: ph10
 6 | Email domain:     cam.ac.uk
 7 | 
 8 | University of Cambridge Computing Service,
 9 | Cambridge, England.
10 | 
11 | Copyright (c) 1997-2017 University of Cambridge
12 | All rights reserved
13 | 
14 | 
15 | PCRE2 JUST-IN-TIME COMPILATION SUPPORT
16 | --------------------------------------
17 | 
18 | Written by:       Zoltan Herczeg
19 | Email local part: hzmester
20 | Emain domain:     freemail.hu
21 | 
22 | Copyright(c) 2010-2017 Zoltan Herczeg
23 | All rights reserved.
24 | 
25 | 
26 | STACK-LESS JUST-IN-TIME COMPILER
27 | --------------------------------
28 | 
29 | Written by:       Zoltan Herczeg
30 | Email local part: hzmester
31 | Emain domain:     freemail.hu
32 | 
33 | Copyright(c) 2009-2017 Zoltan Herczeg
34 | All rights reserved.
35 | 
36 | ####
37 | 


--------------------------------------------------------------------------------
/vendor/pcre/10.23/COPYING:
--------------------------------------------------------------------------------
1 | PCRE2 LICENCE
2 | 
3 | Please see the file LICENCE in the PCRE2 distribution for licensing details.
4 | 
5 | End
6 | 


--------------------------------------------------------------------------------
/vendor/pcre/10.23/LICENCE:
--------------------------------------------------------------------------------
 1 | PCRE2 LICENCE
 2 | -------------
 3 | 
 4 | PCRE2 is a library of functions to support regular expressions whose syntax
 5 | and semantics are as close as possible to those of the Perl 5 language.
 6 | 
 7 | Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as
 8 | specified below. The documentation for PCRE2, supplied in the "doc"
 9 | directory, is distributed under the same terms as the software itself. The data
10 | in the testdata directory is not copyrighted and is in the public domain.
11 | 
12 | The basic library functions are written in C and are freestanding. Also
13 | included in the distribution is a just-in-time compiler that can be used to
14 | optimize pattern matching. This is an optional feature that can be omitted when
15 | the library is built.
16 | 
17 | 
18 | THE BASIC LIBRARY FUNCTIONS
19 | ---------------------------
20 | 
21 | Written by:       Philip Hazel
22 | Email local part: ph10
23 | Email domain:     cam.ac.uk
24 | 
25 | University of Cambridge Computing Service,
26 | Cambridge, England.
27 | 
28 | Copyright (c) 1997-2017 University of Cambridge
29 | All rights reserved.
30 | 
31 | 
32 | PCRE2 JUST-IN-TIME COMPILATION SUPPORT
33 | --------------------------------------
34 | 
35 | Written by:       Zoltan Herczeg
36 | Email local part: hzmester
37 | Emain domain:     freemail.hu
38 | 
39 | Copyright(c) 2010-2017 Zoltan Herczeg
40 | All rights reserved.
41 | 
42 | 
43 | STACK-LESS JUST-IN-TIME COMPILER
44 | --------------------------------
45 | 
46 | Written by:       Zoltan Herczeg
47 | Email local part: hzmester
48 | Emain domain:     freemail.hu
49 | 
50 | Copyright(c) 2009-2017 Zoltan Herczeg
51 | All rights reserved.
52 | 
53 | 
54 | THE "BSD" LICENCE
55 | -----------------
56 | 
57 | Redistribution and use in source and binary forms, with or without
58 | modification, are permitted provided that the following conditions are met:
59 | 
60 |     * Redistributions of source code must retain the above copyright notice,
61 |       this list of conditions and the following disclaimer.
62 | 
63 |     * Redistributions in binary form must reproduce the above copyright
64 |       notice, this list of conditions and the following disclaimer in the
65 |       documentation and/or other materials provided with the distribution.
66 | 
67 |     * Neither the name of the University of Cambridge nor the names of any
68 |       contributors may be used to endorse or promote products derived from this
69 |       software without specific prior written permission.
70 | 
71 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
72 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
73 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
74 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
75 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
76 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
77 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
78 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
79 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
80 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
81 | POSSIBILITY OF SUCH DAMAGE.
82 | 
83 | End
84 | 


--------------------------------------------------------------------------------
/vendor/pcre/10.23/src/pcre2_find_bracket.c:
--------------------------------------------------------------------------------
  1 | /*************************************************
  2 | *      Perl-Compatible Regular Expressions       *
  3 | *************************************************/
  4 | 
  5 | /* PCRE is a library of functions to support regular expressions whose syntax
  6 | and semantics are as close as possible to those of the Perl 5 language.
  7 | 
  8 |                        Written by Philip Hazel
  9 |      Original API code Copyright (c) 1997-2012 University of Cambridge
 10 |          New API code Copyright (c) 2016 University of Cambridge
 11 | 
 12 | -----------------------------------------------------------------------------
 13 | Redistribution and use in source and binary forms, with or without
 14 | modification, are permitted provided that the following conditions are met:
 15 | 
 16 |     * Redistributions of source code must retain the above copyright notice,
 17 |       this list of conditions and the following disclaimer.
 18 | 
 19 |     * Redistributions in binary form must reproduce the above copyright
 20 |       notice, this list of conditions and the following disclaimer in the
 21 |       documentation and/or other materials provided with the distribution.
 22 | 
 23 |     * Neither the name of the University of Cambridge nor the names of its
 24 |       contributors may be used to endorse or promote products derived from
 25 |       this software without specific prior written permission.
 26 | 
 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 37 | POSSIBILITY OF SUCH DAMAGE.
 38 | -----------------------------------------------------------------------------
 39 | */
 40 | 
 41 | 
 42 | /* This module contains a single function that scans through a compiled pattern
 43 | until it finds a capturing bracket with the given number, or, if the number is
 44 | negative, an instance of OP_REVERSE for a lookbehind. The function is called
 45 | from pcre2_compile.c and also from pcre2_study.c when finding the minimum
 46 | matching length. */
 47 | 
 48 | 
 49 | #ifdef HAVE_CONFIG_H
 50 | #include "config.h"
 51 | #endif
 52 | 
 53 | #include "pcre2_internal.h"
 54 | 
 55 | 
 56 | /*************************************************
 57 | *    Scan compiled regex for specific bracket    *
 58 | *************************************************/
 59 | 
 60 | /*
 61 | Arguments:
 62 |   code        points to start of expression
 63 |   utf         TRUE in UTF mode
 64 |   number      the required bracket number or negative to find a lookbehind
 65 | 
 66 | Returns:      pointer to the opcode for the bracket, or NULL if not found
 67 | */
 68 | 
 69 | PCRE2_SPTR
 70 | PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
 71 | {
 72 | for (;;)
 73 |   {
 74 |   PCRE2_UCHAR c = *code;
 75 | 
 76 |   if (c == OP_END) return NULL;
 77 | 
 78 |   /* XCLASS is used for classes that cannot be represented just by a bit map.
 79 |   This includes negated single high-valued characters. CALLOUT_STR is used for
 80 |   callouts with string arguments. In both cases the length in the table is
 81 |   zero; the actual length is stored in the compiled code. */
 82 | 
 83 |   if (c == OP_XCLASS) code += GET(code, 1);
 84 |     else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
 85 | 
 86 |   /* Handle lookbehind */
 87 | 
 88 |   else if (c == OP_REVERSE)
 89 |     {
 90 |     if (number < 0) return (PCRE2_UCHAR *)code;
 91 |     code += PRIV(OP_lengths)[c];
 92 |     }
 93 | 
 94 |   /* Handle capturing bracket */
 95 | 
 96 |   else if (c == OP_CBRA || c == OP_SCBRA ||
 97 |            c == OP_CBRAPOS || c == OP_SCBRAPOS)
 98 |     {
 99 |     int n = (int)GET2(code, 1+LINK_SIZE);
100 |     if (n == number) return (PCRE2_UCHAR *)code;
101 |     code += PRIV(OP_lengths)[c];
102 |     }
103 | 
104 |   /* Otherwise, we can get the item's length from the table, except that for
105 |   repeated character types, we have to test for \p and \P, which have an extra
106 |   two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
107 |   must add in its length. */
108 | 
109 |   else
110 |     {
111 |     switch(c)
112 |       {
113 |       case OP_TYPESTAR:
114 |       case OP_TYPEMINSTAR:
115 |       case OP_TYPEPLUS:
116 |       case OP_TYPEMINPLUS:
117 |       case OP_TYPEQUERY:
118 |       case OP_TYPEMINQUERY:
119 |       case OP_TYPEPOSSTAR:
120 |       case OP_TYPEPOSPLUS:
121 |       case OP_TYPEPOSQUERY:
122 |       if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
123 |       break;
124 | 
125 |       case OP_TYPEUPTO:
126 |       case OP_TYPEMINUPTO:
127 |       case OP_TYPEEXACT:
128 |       case OP_TYPEPOSUPTO:
129 |       if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
130 |         code += 2;
131 |       break;
132 | 
133 |       case OP_MARK:
134 |       case OP_PRUNE_ARG:
135 |       case OP_SKIP_ARG:
136 |       case OP_THEN_ARG:
137 |       code += code[1];
138 |       break;
139 |       }
140 | 
141 |     /* Add in the fixed length from the table */
142 | 
143 |     code += PRIV(OP_lengths)[c];
144 | 
145 |   /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
146 |   followed by a multi-byte character. The length in the table is a minimum, so
147 |   we have to arrange to skip the extra bytes. */
148 | 
149 | #ifdef MAYBE_UTF_MULTI
150 |     if (utf) switch(c)
151 |       {
152 |       case OP_CHAR:
153 |       case OP_CHARI:
154 |       case OP_NOT:
155 |       case OP_NOTI:
156 |       case OP_EXACT:
157 |       case OP_EXACTI:
158 |       case OP_NOTEXACT:
159 |       case OP_NOTEXACTI:
160 |       case OP_UPTO:
161 |       case OP_UPTOI:
162 |       case OP_NOTUPTO:
163 |       case OP_NOTUPTOI:
164 |       case OP_MINUPTO:
165 |       case OP_MINUPTOI:
166 |       case OP_NOTMINUPTO:
167 |       case OP_NOTMINUPTOI:
168 |       case OP_POSUPTO:
169 |       case OP_POSUPTOI:
170 |       case OP_NOTPOSUPTO:
171 |       case OP_NOTPOSUPTOI:
172 |       case OP_STAR:
173 |       case OP_STARI:
174 |       case OP_NOTSTAR:
175 |       case OP_NOTSTARI:
176 |       case OP_MINSTAR:
177 |       case OP_MINSTARI:
178 |       case OP_NOTMINSTAR:
179 |       case OP_NOTMINSTARI:
180 |       case OP_POSSTAR:
181 |       case OP_POSSTARI:
182 |       case OP_NOTPOSSTAR:
183 |       case OP_NOTPOSSTARI:
184 |       case OP_PLUS:
185 |       case OP_PLUSI:
186 |       case OP_NOTPLUS:
187 |       case OP_NOTPLUSI:
188 |       case OP_MINPLUS:
189 |       case OP_MINPLUSI:
190 |       case OP_NOTMINPLUS:
191 |       case OP_NOTMINPLUSI:
192 |       case OP_POSPLUS:
193 |       case OP_POSPLUSI:
194 |       case OP_NOTPOSPLUS:
195 |       case OP_NOTPOSPLUSI:
196 |       case OP_QUERY:
197 |       case OP_QUERYI:
198 |       case OP_NOTQUERY:
199 |       case OP_NOTQUERYI:
200 |       case OP_MINQUERY:
201 |       case OP_MINQUERYI:
202 |       case OP_NOTMINQUERY:
203 |       case OP_NOTMINQUERYI:
204 |       case OP_POSQUERY:
205 |       case OP_POSQUERYI:
206 |       case OP_NOTPOSQUERY:
207 |       case OP_NOTPOSQUERYI:
208 |       if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
209 |       break;
210 |       }
211 | #else
212 |     (void)(utf);  /* Keep compiler happy by referencing function argument */
213 | #endif  /* MAYBE_UTF_MULTI */
214 |     }
215 |   }
216 | }
217 | 
218 | /* End of pcre2_find_bracket.c */
219 | 


--------------------------------------------------------------------------------
/vendor/pcre/10.23/src/pcre2_jit_match.c:
--------------------------------------------------------------------------------
  1 | /*************************************************
  2 | *      Perl-Compatible Regular Expressions       *
  3 | *************************************************/
  4 | 
  5 | /* PCRE is a library of functions to support regular expressions whose syntax
  6 | and semantics are as close as possible to those of the Perl 5 language.
  7 | 
  8 |                        Written by Philip Hazel
  9 |      Original API code Copyright (c) 1997-2012 University of Cambridge
 10 |          New API code Copyright (c) 2016 University of Cambridge
 11 | 
 12 | -----------------------------------------------------------------------------
 13 | Redistribution and use in source and binary forms, with or without
 14 | modification, are permitted provided that the following conditions are met:
 15 | 
 16 |     * Redistributions of source code must retain the above copyright notice,
 17 |       this list of conditions and the following disclaimer.
 18 | 
 19 |     * Redistributions in binary form must reproduce the above copyright
 20 |       notice, this list of conditions and the following disclaimer in the
 21 |       documentation and/or other materials provided with the distribution.
 22 | 
 23 |     * Neither the name of the University of Cambridge nor the names of its
 24 |       contributors may be used to endorse or promote products derived from
 25 |       this software without specific prior written permission.
 26 | 
 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 37 | POSSIBILITY OF SUCH DAMAGE.
 38 | -----------------------------------------------------------------------------
 39 | */
 40 | 
 41 | #ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
 42 | #error This file must be included from pcre2_jit_compile.c.
 43 | #endif
 44 | 
 45 | #ifdef SUPPORT_JIT
 46 | 
 47 | static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func)
 48 | {
 49 | sljit_u8 local_space[MACHINE_STACK_SIZE];
 50 | struct sljit_stack local_stack;
 51 | 
 52 | local_stack.top = (sljit_sw)&local_space;
 53 | local_stack.base = local_stack.top;
 54 | local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
 55 | local_stack.max_limit = local_stack.limit;
 56 | arguments->stack = &local_stack;
 57 | return executable_func(arguments);
 58 | }
 59 | 
 60 | #endif
 61 | 
 62 | 
 63 | /*************************************************
 64 | *              Do a JIT pattern match            *
 65 | *************************************************/
 66 | 
 67 | /* This function runs a JIT pattern match.
 68 | 
 69 | Arguments:
 70 |   code            points to the compiled expression
 71 |   subject         points to the subject string
 72 |   length          length of subject string (may contain binary zeros)
 73 |   start_offset    where to start in the subject string
 74 |   options         option bits
 75 |   match_data      points to a match_data block
 76 |   mcontext        points to a match context
 77 |   jit_stack       points to a JIT stack
 78 | 
 79 | Returns:          > 0 => success; value is the number of ovector pairs filled
 80 |                   = 0 => success, but ovector is not big enough
 81 |                    -1 => failed to match (PCRE_ERROR_NOMATCH)
 82 |                  < -1 => some kind of unexpected problem
 83 | */
 84 | 
 85 | PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 86 | pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
 87 |   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
 88 |   pcre2_match_context *mcontext)
 89 | {
 90 | #ifndef SUPPORT_JIT
 91 | 
 92 | (void)code;
 93 | (void)subject;
 94 | (void)length;
 95 | (void)start_offset;
 96 | (void)options;
 97 | (void)match_data;
 98 | (void)mcontext;
 99 | return PCRE2_ERROR_JIT_BADOPTION;
100 | 
101 | #else  /* SUPPORT_JIT */
102 | 
103 | pcre2_real_code *re = (pcre2_real_code *)code;
104 | executable_functions *functions = (executable_functions *)re->executable_jit;
105 | pcre2_jit_stack *jit_stack;
106 | uint32_t oveccount = match_data->oveccount;
107 | uint32_t max_oveccount;
108 | union {
109 |    void *executable_func;
110 |    jit_function call_executable_func;
111 | } convert_executable_func;
112 | jit_arguments arguments;
113 | int rc;
114 | int index = 0;
115 | 
116 | if ((options & PCRE2_PARTIAL_HARD) != 0)
117 |   index = 2;
118 | else if ((options & PCRE2_PARTIAL_SOFT) != 0)
119 |   index = 1;
120 | 
121 | if (functions->executable_funcs[index] == NULL)
122 |   return PCRE2_ERROR_JIT_BADOPTION;
123 | 
124 | /* Sanity checks should be handled by pcre_exec. */
125 | arguments.str = subject + start_offset;
126 | arguments.begin = subject;
127 | arguments.end = subject + length;
128 | arguments.match_data = match_data;
129 | arguments.startchar_ptr = subject;
130 | arguments.mark_ptr = NULL;
131 | arguments.options = options;
132 | 
133 | if (mcontext != NULL)
134 |   {
135 |   arguments.callout = mcontext->callout;
136 |   arguments.callout_data = mcontext->callout_data;
137 |   arguments.offset_limit = mcontext->offset_limit;
138 |   arguments.limit_match = (mcontext->match_limit < re->limit_match)?
139 |     mcontext->match_limit : re->limit_match;
140 |   if (mcontext->jit_callback != NULL)
141 |     jit_stack = mcontext->jit_callback(mcontext->jit_callback_data);
142 |   else
143 |     jit_stack = (pcre2_jit_stack *)mcontext->jit_callback_data;
144 |   }
145 | else
146 |   {
147 |   arguments.callout = NULL;
148 |   arguments.callout_data = NULL;
149 |   arguments.offset_limit = PCRE2_UNSET;
150 |   arguments.limit_match = (MATCH_LIMIT < re->limit_match)?
151 |     MATCH_LIMIT : re->limit_match;
152 |   jit_stack = NULL;
153 |   }
154 | 
155 | /* JIT only need two offsets for each ovector entry. Hence
156 |    the last 1/3 of the ovector will never be touched. */
157 | 
158 | max_oveccount = functions->top_bracket;
159 | if (oveccount > max_oveccount)
160 |   oveccount = max_oveccount;
161 | arguments.oveccount = oveccount << 1;
162 | 
163 | 
164 | convert_executable_func.executable_func = functions->executable_funcs[index];
165 | if (jit_stack != NULL)
166 |   {
167 |   arguments.stack = (struct sljit_stack *)(jit_stack->stack);
168 |   rc = convert_executable_func.call_executable_func(&arguments);
169 |   }
170 | else
171 |   rc = jit_machine_stack_exec(&arguments, convert_executable_func.call_executable_func);
172 | 
173 | if (rc > (int)oveccount)
174 |   rc = 0;
175 | match_data->code = re;
176 | match_data->subject = subject;
177 | match_data->rc = rc;
178 | match_data->startchar = arguments.startchar_ptr - subject;
179 | match_data->leftchar = 0;
180 | match_data->rightchar = 0;
181 | match_data->mark = arguments.mark_ptr;
182 | match_data->matchedby = PCRE2_MATCHEDBY_JIT;
183 | 
184 | return match_data->rc;
185 | 
186 | #endif  /* SUPPORT_JIT */
187 | }
188 | 
189 | /* End of pcre2_jit_match.c */
190 | 


--------------------------------------------------------------------------------
/vendor/pcre/10.23/src/pcre2_jit_misc.c:
--------------------------------------------------------------------------------
  1 | /*************************************************
  2 | *      Perl-Compatible Regular Expressions       *
  3 | *************************************************/
  4 | 
  5 | /* PCRE is a library of functions to support regular expressions whose syntax
  6 | and semantics are as close as possible to those of the Perl 5 language.
  7 | 
  8 |                        Written by Philip Hazel
  9 |      Original API code Copyright (c) 1997-2012 University of Cambridge
 10 |          New API code Copyright (c) 2016 University of Cambridge
 11 | 
 12 | -----------------------------------------------------------------------------
 13 | Redistribution and use in source and binary forms, with or without
 14 | modification, are permitted provided that the following conditions are met:
 15 | 
 16 |     * Redistributions of source code must retain the above copyright notice,
 17 |       this list of conditions and the following disclaimer.
 18 | 
 19 |     * Redistributions in binary form must reproduce the above copyright
 20 |       notice, this list of conditions and the following disclaimer in the
 21 |       documentation and/or other materials provided with the distribution.
 22 | 
 23 |     * Neither the name of the University of Cambridge nor the names of its
 24 |       contributors may be used to endorse or promote products derived from
 25 |       this software without specific prior written permission.
 26 | 
 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 37 | POSSIBILITY OF SUCH DAMAGE.
 38 | -----------------------------------------------------------------------------
 39 | */
 40 | 
 41 | 
 42 | #ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
 43 | #error This file must be included from pcre2_jit_compile.c.
 44 | #endif
 45 | 
 46 | 
 47 | 
 48 | /*************************************************
 49 | *           Free JIT read-only data              *
 50 | *************************************************/
 51 | 
 52 | void
 53 | PRIV(jit_free_rodata)(void *current, void *allocator_data)
 54 | {
 55 | #ifndef SUPPORT_JIT
 56 | (void)current;
 57 | (void)allocator_data;
 58 | #else  /* SUPPORT_JIT */
 59 | void *next;
 60 | 
 61 | SLJIT_UNUSED_ARG(allocator_data);
 62 | 
 63 | while (current != NULL)
 64 |   {
 65 |   next = *(void**)current;
 66 |   SLJIT_FREE(current, allocator_data);
 67 |   current = next;
 68 |   }
 69 | 
 70 | #endif /* SUPPORT_JIT */
 71 | }
 72 | 
 73 | /*************************************************
 74 | *           Free JIT compiled code               *
 75 | *************************************************/
 76 | 
 77 | void
 78 | PRIV(jit_free)(void *executable_jit, pcre2_memctl *memctl)
 79 | {
 80 | #ifndef SUPPORT_JIT
 81 | (void)executable_jit;
 82 | (void)memctl;
 83 | #else  /* SUPPORT_JIT */
 84 | 
 85 | executable_functions *functions = (executable_functions *)executable_jit;
 86 | void *allocator_data = memctl;
 87 | int i;
 88 | 
 89 | for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
 90 |   {
 91 |   if (functions->executable_funcs[i] != NULL)
 92 |     sljit_free_code(functions->executable_funcs[i]);
 93 |   PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data);
 94 |   }
 95 | 
 96 | SLJIT_FREE(functions, allocator_data);
 97 | 
 98 | #endif /* SUPPORT_JIT */
 99 | }
100 | 
101 | 
102 | /*************************************************
103 | *            Free unused JIT memory              *
104 | *************************************************/
105 | 
106 | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
107 | pcre2_jit_free_unused_memory(pcre2_general_context *gcontext)
108 | {
109 | #ifndef SUPPORT_JIT
110 | (void)gcontext;     /* Suppress warning */
111 | #else  /* SUPPORT_JIT */
112 | SLJIT_UNUSED_ARG(gcontext);
113 | sljit_free_unused_memory_exec();
114 | #endif  /* SUPPORT_JIT */
115 | }
116 | 
117 | 
118 | 
119 | /*************************************************
120 | *            Allocate a JIT stack                *
121 | *************************************************/
122 | 
123 | PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
124 | pcre2_jit_stack_create(size_t startsize, size_t maxsize,
125 |   pcre2_general_context *gcontext)
126 | {
127 | #ifndef SUPPORT_JIT
128 | 
129 | (void)gcontext;
130 | (void)startsize;
131 | (void)maxsize;
132 | return NULL;
133 | 
134 | #else  /* SUPPORT_JIT */
135 | 
136 | pcre2_jit_stack *jit_stack;
137 | 
138 | if (startsize < 1 || maxsize < 1)
139 |   return NULL;
140 | if (startsize > maxsize)
141 |   startsize = maxsize;
142 | startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
143 | maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
144 | 
145 | jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext);
146 | if (jit_stack == NULL) return NULL;
147 | jit_stack->stack = sljit_allocate_stack(startsize, maxsize, &jit_stack->memctl);
148 | return jit_stack;
149 | 
150 | #endif
151 | }
152 | 
153 | 
154 | /*************************************************
155 | *         Assign a JIT stack to a pattern        *
156 | *************************************************/
157 | 
158 | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
159 | pcre2_jit_stack_assign(pcre2_match_context *mcontext, pcre2_jit_callback callback,
160 |   void *callback_data)
161 | {
162 | #ifndef SUPPORT_JIT
163 | (void)mcontext;
164 | (void)callback;
165 | (void)callback_data;
166 | #else  /* SUPPORT_JIT */
167 | 
168 | if (mcontext == NULL) return;
169 | mcontext->jit_callback = callback;
170 | mcontext->jit_callback_data = callback_data;
171 | 
172 | #endif  /* SUPPORT_JIT */
173 | }
174 | 
175 | 
176 | /*************************************************
177 | *               Free a JIT stack                 *
178 | *************************************************/
179 | 
180 | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
181 | pcre2_jit_stack_free(pcre2_jit_stack *jit_stack)
182 | {
183 | #ifndef SUPPORT_JIT
184 | (void)jit_stack;
185 | #else  /* SUPPORT_JIT */
186 | if (jit_stack != NULL)
187 |   {
188 |   sljit_free_stack((struct sljit_stack *)(jit_stack->stack), &jit_stack->memctl);
189 |   jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data);
190 |   }
191 | #endif  /* SUPPORT_JIT */
192 | }
193 | 
194 | 
195 | /*************************************************
196 | *               Get target CPU type              *
197 | *************************************************/
198 | 
199 | const char*
200 | PRIV(jit_get_target)(void)
201 | {
202 | #ifndef SUPPORT_JIT
203 | return "JIT is not supported";
204 | #else  /* SUPPORT_JIT */
205 | return sljit_get_platform_name();
206 | #endif  /* SUPPORT_JIT */
207 | }
208 | 
209 | 
210 | /*************************************************
211 | *              Get size of JIT code              *
212 | *************************************************/
213 | 
214 | size_t
215 | PRIV(jit_get_size)(void *executable_jit)
216 | {
217 | #ifndef SUPPORT_JIT
218 | (void)executable_jit;
219 | return 0;
220 | #else  /* SUPPORT_JIT */
221 | sljit_uw *executable_sizes = ((executable_functions *)executable_jit)->executable_sizes;
222 | SLJIT_COMPILE_ASSERT(JIT_NUMBER_OF_COMPILE_MODES == 3, number_of_compile_modes_changed);
223 | return executable_sizes[0] + executable_sizes[1] + executable_sizes[2];
224 | #endif
225 | }
226 | 
227 | /* End of pcre2_jit_misc.c */
228 | 


--------------------------------------------------------------------------------
/vendor/pcre/10.23/src/pcre2_maketables.c:
--------------------------------------------------------------------------------
  1 | /*************************************************
  2 | *      Perl-Compatible Regular Expressions       *
  3 | *************************************************/
  4 | 
  5 | /* PCRE is a library of functions to support regular expressions whose syntax
  6 | and semantics are as close as possible to those of the Perl 5 language.
  7 | 
  8 |                        Written by Philip Hazel
  9 |      Original API code Copyright (c) 1997-2012 University of Cambridge
 10 |          New API code Copyright (c) 2016 University of Cambridge
 11 | 
 12 | -----------------------------------------------------------------------------
 13 | Redistribution and use in source and binary forms, with or without
 14 | modification, are permitted provided that the following conditions are met:
 15 | 
 16 |     * Redistributions of source code must retain the above copyright notice,
 17 |       this list of conditions and the following disclaimer.
 18 | 
 19 |     * Redistributions in binary form must reproduce the above copyright
 20 |       notice, this list of conditions and the following disclaimer in the
 21 |       documentation and/or other materials provided with the distribution.
 22 | 
 23 |     * Neither the name of the University of Cambridge nor the names of its
 24 |       contributors may be used to endorse or promote products derived from
 25 |       this software without specific prior written permission.
 26 | 
 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 37 | POSSIBILITY OF SUCH DAMAGE.
 38 | -----------------------------------------------------------------------------
 39 | */
 40 | 
 41 | 
 42 | /* This module contains the external function pcre2_maketables(), which builds
 43 | character tables for PCRE2 in the current locale. The file is compiled on its
 44 | own as part of the PCRE2 library. However, it is also included in the
 45 | compilation of dftables.c, in which case the macro DFTABLES is defined. */
 46 | 
 47 | #ifndef DFTABLES
 48 | #  ifdef HAVE_CONFIG_H
 49 | #  include "config.h"
 50 | #  endif
 51 | #  include "pcre2_internal.h"
 52 | #endif
 53 | 
 54 | 
 55 | 
 56 | /*************************************************
 57 | *           Create PCRE2 character tables        *
 58 | *************************************************/
 59 | 
 60 | /* This function builds a set of character tables for use by PCRE2 and returns
 61 | a pointer to them. They are build using the ctype functions, and consequently
 62 | their contents will depend upon the current locale setting. When compiled as
 63 | part of the library, the store is obtained via a general context malloc, if
 64 | supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
 65 | program) malloc() is used, and the function has a different name so as not to
 66 | clash with the prototype in pcre2.h.
 67 | 
 68 | Arguments:   none when DFTABLES is defined
 69 |              else a PCRE2 general context or NULL
 70 | Returns:     pointer to the contiguous block of data
 71 | */
 72 | 
 73 | #ifdef DFTABLES  /* Included in freestanding dftables.c program */
 74 | static const uint8_t *maketables(void)
 75 | {
 76 | uint8_t *yield = (uint8_t *)malloc(tables_length);
 77 | 
 78 | #else  /* Not DFTABLES, compiling the library */
 79 | PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
 80 | pcre2_maketables(pcre2_general_context *gcontext)
 81 | {
 82 | uint8_t *yield = (uint8_t *)((gcontext != NULL)?
 83 |   gcontext->memctl.malloc(tables_length, gcontext->memctl.memory_data) :
 84 |   malloc(tables_length));
 85 | #endif  /* DFTABLES */
 86 | 
 87 | int i;
 88 | uint8_t *p;
 89 | 
 90 | if (yield == NULL) return NULL;
 91 | p = yield;
 92 | 
 93 | /* First comes the lower casing table */
 94 | 
 95 | for (i = 0; i < 256; i++) *p++ = tolower(i);
 96 | 
 97 | /* Next the case-flipping table */
 98 | 
 99 | for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
100 | 
101 | /* Then the character class tables. Don't try to be clever and save effort on
102 | exclusive ones - in some locales things may be different.
103 | 
104 | Note that the table for "space" includes everything "isspace" gives, including
105 | VT in the default locale. This makes it work for the POSIX class [:space:].
106 | From release 8.34 is is also correct for Perl space, because Perl added VT at
107 | release 5.18.
108 | 
109 | Note also that it is possible for a character to be alnum or alpha without
110 | being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
111 | fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
112 | test for alnum specially. */
113 | 
114 | memset(p, 0, cbit_length);
115 | for (i = 0; i < 256; i++)
116 |   {
117 |   if (isdigit(i)) p[cbit_digit  + i/8] |= 1 << (i&7);
118 |   if (isupper(i)) p[cbit_upper  + i/8] |= 1 << (i&7);
119 |   if (islower(i)) p[cbit_lower  + i/8] |= 1 << (i&7);
120 |   if (isalnum(i)) p[cbit_word   + i/8] |= 1 << (i&7);
121 |   if (i == '_')   p[cbit_word   + i/8] |= 1 << (i&7);
122 |   if (isspace(i)) p[cbit_space  + i/8] |= 1 << (i&7);
123 |   if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
124 |   if (isgraph(i)) p[cbit_graph  + i/8] |= 1 << (i&7);
125 |   if (isprint(i)) p[cbit_print  + i/8] |= 1 << (i&7);
126 |   if (ispunct(i)) p[cbit_punct  + i/8] |= 1 << (i&7);
127 |   if (iscntrl(i)) p[cbit_cntrl  + i/8] |= 1 << (i&7);
128 |   }
129 | p += cbit_length;
130 | 
131 | /* Finally, the character type table. In this, we used to exclude VT from the
132 | white space chars, because Perl didn't recognize it as such for \s and for
133 | comments within regexes. However, Perl changed at release 5.18, so PCRE changed
134 | at release 8.34. */
135 | 
136 | for (i = 0; i < 256; i++)
137 |   {
138 |   int x = 0;
139 |   if (isspace(i)) x += ctype_space;
140 |   if (isalpha(i)) x += ctype_letter;
141 |   if (isdigit(i)) x += ctype_digit;
142 |   if (isxdigit(i)) x += ctype_xdigit;
143 |   if (isalnum(i) || i == '_') x += ctype_word;
144 | 
145 |   /* Note: strchr includes the terminating zero in the characters it considers.
146 |   In this instance, that is ok because we want binary zero to be flagged as a
147 |   meta-character, which in this sense is any character that terminates a run
148 |   of data characters. */
149 | 
150 |   if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
151 |   *p++ = x;
152 |   }
153 | 
154 | return yield;
155 | }
156 | 
157 | /* End of pcre2_maketables.c */
158 | 


--------------------------------------------------------------------------------
/vendor/pcre/10.23/src/pcre2_match_data.c:
--------------------------------------------------------------------------------
  1 | /*************************************************
  2 | *      Perl-Compatible Regular Expressions       *
  3 | *************************************************/
  4 | 
  5 | /* PCRE is a library of functions to support regular expressions whose syntax
  6 | and semantics are as close as possible to those of the Perl 5 language.
  7 | 
  8 |                        Written by Philip Hazel
  9 |      Original API code Copyright (c) 1997-2012 University of Cambridge
 10 |          New API code Copyright (c) 2016 University of Cambridge
 11 | 
 12 | -----------------------------------------------------------------------------
 13 | Redistribution and use in source and binary forms, with or without
 14 | modification, are permitted provided that the following conditions are met:
 15 | 
 16 |     * Redistributions of source code must retain the above copyright notice,
 17 |       this list of conditions and the following disclaimer.
 18 | 
 19 |     * Redistributions in binary form must reproduce the above copyright
 20 |       notice, this list of conditions and the following disclaimer in the
 21 |       documentation and/or other materials provided with the distribution.
 22 | 
 23 |     * Neither the name of the University of Cambridge nor the names of its
 24 |       contributors may be used to endorse or promote products derived from
 25 |       this software without specific prior written permission.
 26 | 
 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 37 | POSSIBILITY OF SUCH DAMAGE.
 38 | -----------------------------------------------------------------------------
 39 | */
 40 | 
 41 | 
 42 | #ifdef HAVE_CONFIG_H
 43 | #include "config.h"
 44 | #endif
 45 | 
 46 | #include "pcre2_internal.h"
 47 | 
 48 | 
 49 | 
 50 | /*************************************************
 51 | *  Create a match data block given ovector size  *
 52 | *************************************************/
 53 | 
 54 | /* A minimum of 1 is imposed on the number of ovector triplets. */
 55 | 
 56 | PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
 57 | pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
 58 | {
 59 | pcre2_match_data *yield;
 60 | if (oveccount < 1) oveccount = 1;
 61 | yield = PRIV(memctl_malloc)(
 62 |   sizeof(pcre2_match_data) + 3*oveccount*sizeof(PCRE2_SIZE),
 63 |   (pcre2_memctl *)gcontext);
 64 | if (yield == NULL) return NULL;
 65 | yield->oveccount = oveccount;
 66 | return yield;
 67 | }
 68 | 
 69 | 
 70 | 
 71 | /*************************************************
 72 | *  Create a match data block using pattern data  *
 73 | *************************************************/
 74 | 
 75 | /* If no context is supplied, use the memory allocator from the code. */
 76 | 
 77 | PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
 78 | pcre2_match_data_create_from_pattern(const pcre2_code *code,
 79 |   pcre2_general_context *gcontext)
 80 | {
 81 | if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
 82 | return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
 83 |   gcontext);
 84 | }
 85 | 
 86 | 
 87 | 
 88 | /*************************************************
 89 | *            Free a match data block             *
 90 | *************************************************/
 91 | 
 92 | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
 93 | pcre2_match_data_free(pcre2_match_data *match_data)
 94 | {
 95 | if (match_data != NULL)
 96 |   match_data->memctl.free(match_data, match_data->memctl.memory_data);
 97 | }
 98 | 
 99 | 
100 | 
101 | /*************************************************
102 | *         Get last mark in match                 *
103 | *************************************************/
104 | 
105 | PCRE2_EXP_DEFN PCRE2_SPTR PCRE2_CALL_CONVENTION
106 | pcre2_get_mark(pcre2_match_data *match_data)
107 | {
108 | return match_data->mark;
109 | }
110 | 
111 | 
112 | 
113 | /*************************************************
114 | *          Get pointer to ovector                *
115 | *************************************************/
116 | 
117 | PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION
118 | pcre2_get_ovector_pointer(pcre2_match_data *match_data)
119 | {
120 | return match_data->ovector;
121 | }
122 | 
123 | 
124 | 
125 | /*************************************************
126 | *          Get number of ovector slots           *
127 | *************************************************/
128 | 
129 | PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION
130 | pcre2_get_ovector_count(pcre2_match_data *match_data)
131 | {
132 | return match_data->oveccount;
133 | }
134 | 
135 | 
136 | 
137 | /*************************************************
138 | *         Get starting code unit in match        *
139 | *************************************************/
140 | 
141 | PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
142 | pcre2_get_startchar(pcre2_match_data *match_data)
143 | {
144 | return match_data->startchar;
145 | }
146 | 
147 | /* End of pcre2_match_data.c */
148 | 


--------------------------------------------------------------------------------
/vendor/pcre/10.23/src/pcre2_newline.c:
--------------------------------------------------------------------------------
  1 | /*************************************************
  2 | *      Perl-Compatible Regular Expressions       *
  3 | *************************************************/
  4 | 
  5 | /* PCRE is a library of functions to support regular expressions whose syntax
  6 | and semantics are as close as possible to those of the Perl 5 language.
  7 | 
  8 |                        Written by Philip Hazel
  9 |      Original API code Copyright (c) 1997-2012 University of Cambridge
 10 |          New API code Copyright (c) 2016 University of Cambridge
 11 | 
 12 | -----------------------------------------------------------------------------
 13 | Redistribution and use in source and binary forms, with or without
 14 | modification, are permitted provided that the following conditions are met:
 15 | 
 16 |     * Redistributions of source code must retain the above copyright notice,
 17 |       this list of conditions and the following disclaimer.
 18 | 
 19 |     * Redistributions in binary form must reproduce the above copyright
 20 |       notice, this list of conditions and the following disclaimer in the
 21 |       documentation and/or other materials provided with the distribution.
 22 | 
 23 |     * Neither the name of the University of Cambridge nor the names of its
 24 |       contributors may be used to endorse or promote products derived from
 25 |       this software without specific prior written permission.
 26 | 
 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 37 | POSSIBILITY OF SUCH DAMAGE.
 38 | -----------------------------------------------------------------------------
 39 | */
 40 | 
 41 | 
 42 | /* This module contains internal functions for testing newlines when more than
 43 | one kind of newline is to be recognized. When a newline is found, its length is
 44 | returned. In principle, we could implement several newline "types", each
 45 | referring to a different set of newline characters. At present, PCRE2 supports
 46 | only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
 47 | and NLTYPE_ANY. The full list of Unicode newline characters is taken from
 48 | http://unicode.org/unicode/reports/tr18/. */
 49 | 
 50 | 
 51 | #ifdef HAVE_CONFIG_H
 52 | #include "config.h"
 53 | #endif
 54 | 
 55 | #include "pcre2_internal.h"
 56 | 
 57 | 
 58 | 
 59 | /*************************************************
 60 | *      Check for newline at given position       *
 61 | *************************************************/
 62 | 
 63 | /* This function is called only via the IS_NEWLINE macro, which does so only
 64 | when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
 65 | newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
 66 | pointed to by ptr is less than the end of the string.
 67 | 
 68 | Arguments:
 69 |   ptr          pointer to possible newline
 70 |   type         the newline type
 71 |   endptr       pointer to the end of the string
 72 |   lenptr       where to return the length
 73 |   utf          TRUE if in utf mode
 74 | 
 75 | Returns:       TRUE or FALSE
 76 | */
 77 | 
 78 | BOOL
 79 | PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
 80 |   uint32_t *lenptr, BOOL utf)
 81 | {
 82 | uint32_t c;
 83 | 
 84 | #ifdef SUPPORT_UNICODE
 85 | if (utf) { GETCHAR(c, ptr); } else c = *ptr;
 86 | #else
 87 | (void)utf;
 88 | c = *ptr;
 89 | #endif  /* SUPPORT_UNICODE */
 90 | 
 91 | if (type == NLTYPE_ANYCRLF) switch(c)
 92 |   {
 93 |   case CHAR_LF:
 94 |   *lenptr = 1;
 95 |   return TRUE;
 96 | 
 97 |   case CHAR_CR:
 98 |   *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
 99 |   return TRUE;
100 | 
101 |   default:
102 |   return FALSE;
103 |   }
104 | 
105 | /* NLTYPE_ANY */
106 | 
107 | else switch(c)
108 |   {
109 | #ifdef EBCDIC
110 |   case CHAR_NEL:
111 | #endif
112 |   case CHAR_LF:
113 |   case CHAR_VT:
114 |   case CHAR_FF:
115 |   *lenptr = 1;
116 |   return TRUE;
117 | 
118 |   case CHAR_CR:
119 |   *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
120 |   return TRUE;
121 | 
122 | #ifndef EBCDIC
123 | #if PCRE2_CODE_UNIT_WIDTH == 8
124 |   case CHAR_NEL:
125 |   *lenptr = utf? 2 : 1;
126 |   return TRUE;
127 | 
128 |   case 0x2028:   /* LS */
129 |   case 0x2029:   /* PS */
130 |   *lenptr = 3;
131 |   return TRUE;
132 | 
133 | #else  /* 16-bit or 32-bit code units */
134 |   case CHAR_NEL:
135 |   case 0x2028:   /* LS */
136 |   case 0x2029:   /* PS */
137 |   *lenptr = 1;
138 |   return TRUE;
139 | #endif
140 | #endif /* Not EBCDIC */
141 | 
142 |   default:
143 |   return FALSE;
144 |   }
145 | }
146 | 
147 | 
148 | 
149 | /*************************************************
150 | *     Check for newline at previous position     *
151 | *************************************************/
152 | 
153 | /* This function is called only via the WAS_NEWLINE macro, which does so only
154 | when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
155 | newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
156 | value of ptr is greater than the start of the string that is being processed.
157 | 
158 | Arguments:
159 |   ptr          pointer to possible newline
160 |   type         the newline type
161 |   startptr     pointer to the start of the string
162 |   lenptr       where to return the length
163 |   utf          TRUE if in utf mode
164 | 
165 | Returns:       TRUE or FALSE
166 | */
167 | 
168 | BOOL
169 | PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
170 |   uint32_t *lenptr, BOOL utf)
171 | {
172 | uint32_t c;
173 | ptr--;
174 | 
175 | #ifdef SUPPORT_UNICODE
176 | if (utf)
177 |   {
178 |   BACKCHAR(ptr);
179 |   GETCHAR(c, ptr);
180 |   }
181 | else c = *ptr;
182 | #else
183 | (void)utf;
184 | c = *ptr;
185 | #endif  /* SUPPORT_UNICODE */
186 | 
187 | if (type == NLTYPE_ANYCRLF) switch(c)
188 |   {
189 |   case CHAR_LF:
190 |   *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
191 |   return TRUE;
192 | 
193 |   case CHAR_CR:
194 |   *lenptr = 1;
195 |   return TRUE;
196 | 
197 |   default:
198 |   return FALSE;
199 |   }
200 | 
201 | /* NLTYPE_ANY */
202 | 
203 | else switch(c)
204 |   {
205 |   case CHAR_LF:
206 |   *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
207 |   return TRUE;
208 | 
209 | #ifdef EBCDIC
210 |   case CHAR_NEL:
211 | #endif
212 |   case CHAR_VT:
213 |   case CHAR_FF:
214 |   case CHAR_CR:
215 |   *lenptr = 1;
216 |   return TRUE;
217 | 
218 | #ifndef EBCDIC
219 | #if PCRE2_CODE_UNIT_WIDTH == 8
220 |   case CHAR_NEL:
221 |   *lenptr = utf? 2 : 1;
222 |   return TRUE;
223 | 
224 |   case 0x2028:   /* LS */
225 |   case 0x2029:   /* PS */
226 |   *lenptr = 3;
227 |   return TRUE;
228 | 
229 | #else /* 16-bit or 32-bit code units */
230 |   case CHAR_NEL:
231 |   case 0x2028:   /* LS */
232 |   case 0x2029:   /* PS */
233 |   *lenptr = 1;
234 |   return TRUE;
235 | #endif
236 | #endif /* Not EBCDIC */
237 | 
238 |   default:
239 |   return FALSE;
240 |   }
241 | }
242 | 
243 | /* End of pcre2_newline.c */
244 | 


--------------------------------------------------------------------------------
/vendor/pcre/10.23/src/pcre2_ord2utf.c:
--------------------------------------------------------------------------------
  1 | /*************************************************
  2 | *      Perl-Compatible Regular Expressions       *
  3 | *************************************************/
  4 | 
  5 | /* PCRE is a library of functions to support regular expressions whose syntax
  6 | and semantics are as close as possible to those of the Perl 5 language.
  7 | 
  8 |                        Written by Philip Hazel
  9 |      Original API code Copyright (c) 1997-2012 University of Cambridge
 10 |          New API code Copyright (c) 2016 University of Cambridge
 11 | 
 12 | -----------------------------------------------------------------------------
 13 | Redistribution and use in source and binary forms, with or without
 14 | modification, are permitted provided that the following conditions are met:
 15 | 
 16 |     * Redistributions of source code must retain the above copyright notice,
 17 |       this list of conditions and the following disclaimer.
 18 | 
 19 |     * Redistributions in binary form must reproduce the above copyright
 20 |       notice, this list of conditions and the following disclaimer in the
 21 |       documentation and/or other materials provided with the distribution.
 22 | 
 23 |     * Neither the name of the University of Cambridge nor the names of its
 24 |       contributors may be used to endorse or promote products derived from
 25 |       this software without specific prior written permission.
 26 | 
 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 37 | POSSIBILITY OF SUCH DAMAGE.
 38 | -----------------------------------------------------------------------------
 39 | */
 40 | 
 41 | 
 42 | /* This file contains a function that converts a Unicode character code point
 43 | into a UTF string. The behaviour is different for each code unit width. */
 44 | 
 45 | 
 46 | #ifdef HAVE_CONFIG_H
 47 | #include "config.h"
 48 | #endif
 49 | 
 50 | #include "pcre2_internal.h"
 51 | 
 52 | 
 53 | /* If SUPPORT_UNICODE is not defined, this function will never be called.
 54 | Supply a dummy function because some compilers do not like empty source
 55 | modules. */
 56 | 
 57 | #ifndef SUPPORT_UNICODE
 58 | unsigned int
 59 | PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
 60 | {
 61 | (void)(cvalue);
 62 | (void)(buffer);
 63 | return 0;
 64 | }
 65 | #else  /* SUPPORT_UNICODE */
 66 | 
 67 | 
 68 | /*************************************************
 69 | *          Convert code point to UTF             *
 70 | *************************************************/
 71 | 
 72 | /*
 73 | Arguments:
 74 |   cvalue     the character value
 75 |   buffer     pointer to buffer for result
 76 | 
 77 | Returns:     number of code units placed in the buffer
 78 | */
 79 | 
 80 | unsigned int
 81 | PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
 82 | {
 83 | /* Convert to UTF-8 */
 84 | 
 85 | #if PCRE2_CODE_UNIT_WIDTH == 8
 86 | int i, j;
 87 | for (i = 0; i < PRIV(utf8_table1_size); i++)
 88 |   if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
 89 | buffer += i;
 90 | for (j = i; j > 0; j--)
 91 |  {
 92 |  *buffer-- = 0x80 | (cvalue & 0x3f);
 93 |  cvalue >>= 6;
 94 |  }
 95 | *buffer = PRIV(utf8_table2)[i] | cvalue;
 96 | return i + 1;
 97 | 
 98 | /* Convert to UTF-16 */
 99 | 
100 | #elif PCRE2_CODE_UNIT_WIDTH == 16
101 | if (cvalue <= 0xffff)
102 |   {
103 |   *buffer = (PCRE2_UCHAR)cvalue;
104 |   return 1;
105 |   }
106 | cvalue -= 0x10000;
107 | *buffer++ = 0xd800 | (cvalue >> 10);
108 | *buffer = 0xdc00 | (cvalue & 0x3ff);
109 | return 2;
110 | 
111 | /* Convert to UTF-32 */
112 | 
113 | #else
114 | *buffer = (PCRE2_UCHAR)cvalue;
115 | return 1;
116 | #endif
117 | }
118 | #endif  /* SUPPORT_UNICODE */
119 | 
120 | /* End of pcre_ord2utf.c */
121 | 


--------------------------------------------------------------------------------
/vendor/pcre/10.23/src/pcre2_string_utils.c:
--------------------------------------------------------------------------------
  1 | /*************************************************
  2 | *      Perl-Compatible Regular Expressions       *
  3 | *************************************************/
  4 | 
  5 | /* PCRE is a library of functions to support regular expressions whose syntax
  6 | and semantics are as close as possible to those of the Perl 5 language.
  7 | 
  8 |                        Written by Philip Hazel
  9 |      Original API code Copyright (c) 1997-2012 University of Cambridge
 10 |          New API code Copyright (c) 2016 University of Cambridge
 11 | 
 12 | -----------------------------------------------------------------------------
 13 | Redistribution and use in source and binary forms, with or without
 14 | modification, are permitted provided that the following conditions are met:
 15 | 
 16 |     * Redistributions of source code must retain the above copyright notice,
 17 |       this list of conditions and the following disclaimer.
 18 | 
 19 |     * Redistributions in binary form must reproduce the above copyright
 20 |       notice, this list of conditions and the following disclaimer in the
 21 |       documentation and/or other materials provided with the distribution.
 22 | 
 23 |     * Neither the name of the University of Cambridge nor the names of its
 24 |       contributors may be used to endorse or promote products derived from
 25 |       this software without specific prior written permission.
 26 | 
 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 37 | POSSIBILITY OF SUCH DAMAGE.
 38 | -----------------------------------------------------------------------------
 39 | */
 40 | 
 41 | /* This module contains internal functions for comparing and finding the length
 42 | of strings. These are used instead of strcmp() etc because the standard
 43 | functions work only on 8-bit data. */
 44 | 
 45 | 
 46 | #ifdef HAVE_CONFIG_H
 47 | #include "config.h"
 48 | #endif
 49 | 
 50 | #include "pcre2_internal.h"
 51 | 
 52 | 
 53 | /*************************************************
 54 | *    Compare two zero-terminated PCRE2 strings   *
 55 | *************************************************/
 56 | 
 57 | /*
 58 | Arguments:
 59 |   str1        first string
 60 |   str2        second string
 61 | 
 62 | Returns:      0, 1, or -1
 63 | */
 64 | 
 65 | int
 66 | PRIV(strcmp)(PCRE2_SPTR str1, PCRE2_SPTR str2)
 67 | {
 68 | PCRE2_UCHAR c1, c2;
 69 | while (*str1 != '\0' || *str2 != '\0')
 70 |   {
 71 |   c1 = *str1++;
 72 |   c2 = *str2++;
 73 |   if (c1 != c2) return ((c1 > c2) << 1) - 1;
 74 |   }
 75 | return 0;
 76 | }
 77 | 
 78 | 
 79 | /*************************************************
 80 | *  Compare zero-terminated PCRE2 & 8-bit strings *
 81 | *************************************************/
 82 | 
 83 | /* As the 8-bit string is almost always a literal, its type is specified as
 84 | const char *.
 85 | 
 86 | Arguments:
 87 |   str1        first string
 88 |   str2        second string
 89 | 
 90 | Returns:      0, 1, or -1
 91 | */
 92 | 
 93 | int
 94 | PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2)
 95 | {
 96 | PCRE2_UCHAR c1, c2;
 97 | while (*str1 != '\0' || *str2 != '\0')
 98 |   {
 99 |   c1 = *str1++;
100 |   c2 = *str2++;
101 |   if (c1 != c2) return ((c1 > c2) << 1) - 1;
102 |   }
103 | return 0;
104 | }
105 | 
106 | 
107 | /*************************************************
108 | *    Compare two PCRE2 strings, given a length   *
109 | *************************************************/
110 | 
111 | /*
112 | Arguments:
113 |   str1        first string
114 |   str2        second string
115 |   len         the length
116 | 
117 | Returns:      0, 1, or -1
118 | */
119 | 
120 | int
121 | PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
122 | {
123 | PCRE2_UCHAR c1, c2;
124 | for (; len > 0; len--)
125 |   {
126 |   c1 = *str1++;
127 |   c2 = *str2++;
128 |   if (c1 != c2) return ((c1 > c2) << 1) - 1;
129 |   }
130 | return 0;
131 | }
132 | 
133 | 
134 | /*************************************************
135 | * Compare PCRE2 string to 8-bit string by length *
136 | *************************************************/
137 | 
138 | /* As the 8-bit string is almost always a literal, its type is specified as
139 | const char *.
140 | 
141 | Arguments:
142 |   str1        first string
143 |   str2        second string
144 |   len         the length
145 | 
146 | Returns:      0, 1, or -1
147 | */
148 | 
149 | int
150 | PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
151 | {
152 | PCRE2_UCHAR c1, c2;
153 | for (; len > 0; len--)
154 |   {
155 |   c1 = *str1++;
156 |   c2 = *str2++;
157 |   if (c1 != c2) return ((c1 > c2) << 1) - 1;
158 |   }
159 | return 0;
160 | }
161 | 
162 | 
163 | /*************************************************
164 | *        Find the length of a PCRE2 string       *
165 | *************************************************/
166 | 
167 | /*
168 | Argument:    the string
169 | Returns:     the length
170 | */
171 | 
172 | PCRE2_SIZE
173 | PRIV(strlen)(PCRE2_SPTR str)
174 | {
175 | PCRE2_SIZE c = 0;
176 | while (*str++ != 0) c++;
177 | return c;
178 | }
179 | 
180 | 
181 | /*************************************************
182 | * Copy 8-bit 0-terminated string to PCRE2 string *
183 | *************************************************/
184 | 
185 | /* Arguments:
186 |   str1     buffer to receive the string
187 |   str2     8-bit string to be copied
188 | 
189 | Returns:   the number of code units used (excluding trailing zero)
190 | */
191 | 
192 | PCRE2_SIZE
193 | PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)
194 | {
195 | PCRE2_UCHAR *t = str1;
196 | while (*str2 != 0) *t++ = *str2++;
197 | *t = 0;
198 | return t - str1;
199 | }
200 | 
201 | /* End of pcre2_string_utils.c */
202 | 


--------------------------------------------------------------------------------
/vendor/pcre/10.23/src/sljit/sljitConfig.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *    Stack-less Just-In-Time compiler
  3 |  *
  4 |  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
  5 |  *
  6 |  * Redistribution and use in source and binary forms, with or without modification, are
  7 |  * permitted provided that the following conditions are met:
  8 |  *
  9 |  *   1. Redistributions of source code must retain the above copyright notice, this list of
 10 |  *      conditions and the following disclaimer.
 11 |  *
 12 |  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
 13 |  *      of conditions and the following disclaimer in the documentation and/or other materials
 14 |  *      provided with the distribution.
 15 |  *
 16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
 17 |  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 18 |  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
 19 |  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 20 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 21 |  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 22 |  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 23 |  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 24 |  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef _SLJIT_CONFIG_H_
 28 | #define _SLJIT_CONFIG_H_
 29 | 
 30 | /* --------------------------------------------------------------------- */
 31 | /*  Custom defines                                                       */
 32 | /* --------------------------------------------------------------------- */
 33 | 
 34 | /* Put your custom defines here. This empty section will never change
 35 |    which helps maintaining patches (with diff / patch utilities). */
 36 | 
 37 | /* --------------------------------------------------------------------- */
 38 | /*  Architecture                                                         */
 39 | /* --------------------------------------------------------------------- */
 40 | 
 41 | /* Architecture selection. */
 42 | /* #define SLJIT_CONFIG_X86_32 1 */
 43 | /* #define SLJIT_CONFIG_X86_64 1 */
 44 | /* #define SLJIT_CONFIG_ARM_V5 1 */
 45 | /* #define SLJIT_CONFIG_ARM_V7 1 */
 46 | /* #define SLJIT_CONFIG_ARM_THUMB2 1 */
 47 | /* #define SLJIT_CONFIG_ARM_64 1 */
 48 | /* #define SLJIT_CONFIG_PPC_32 1 */
 49 | /* #define SLJIT_CONFIG_PPC_64 1 */
 50 | /* #define SLJIT_CONFIG_MIPS_32 1 */
 51 | /* #define SLJIT_CONFIG_MIPS_64 1 */
 52 | /* #define SLJIT_CONFIG_SPARC_32 1 */
 53 | /* #define SLJIT_CONFIG_TILEGX 1 */
 54 | 
 55 | /* #define SLJIT_CONFIG_AUTO 1 */
 56 | /* #define SLJIT_CONFIG_UNSUPPORTED 1 */
 57 | 
 58 | /* --------------------------------------------------------------------- */
 59 | /*  Utilities                                                            */
 60 | /* --------------------------------------------------------------------- */
 61 | 
 62 | /* Useful for thread-safe compiling of global functions. */
 63 | #ifndef SLJIT_UTIL_GLOBAL_LOCK
 64 | /* Enabled by default */
 65 | #define SLJIT_UTIL_GLOBAL_LOCK 1
 66 | #endif
 67 | 
 68 | /* Implements a stack like data structure (by using mmap / VirtualAlloc). */
 69 | #ifndef SLJIT_UTIL_STACK
 70 | /* Enabled by default */
 71 | #define SLJIT_UTIL_STACK 1
 72 | #endif
 73 | 
 74 | /* Single threaded application. Does not require any locks. */
 75 | #ifndef SLJIT_SINGLE_THREADED
 76 | /* Disabled by default. */
 77 | #define SLJIT_SINGLE_THREADED 0
 78 | #endif
 79 | 
 80 | /* --------------------------------------------------------------------- */
 81 | /*  Configuration                                                        */
 82 | /* --------------------------------------------------------------------- */
 83 | 
 84 | /* If SLJIT_STD_MACROS_DEFINED is not defined, the application should
 85 |    define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMCPY, and NULL. */
 86 | #ifndef SLJIT_STD_MACROS_DEFINED
 87 | /* Disabled by default. */
 88 | #define SLJIT_STD_MACROS_DEFINED 0
 89 | #endif
 90 | 
 91 | /* Executable code allocation:
 92 |    If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should
 93 |    define SLJIT_MALLOC_EXEC, SLJIT_FREE_EXEC, and SLJIT_EXEC_OFFSET. */
 94 | #ifndef SLJIT_EXECUTABLE_ALLOCATOR
 95 | /* Enabled by default. */
 96 | #define SLJIT_EXECUTABLE_ALLOCATOR 1
 97 | 
 98 | /* When SLJIT_PROT_EXECUTABLE_ALLOCATOR is enabled SLJIT uses
 99 |    an allocator which does not set writable and executable
100 |    permission flags at the same time. The trade-of is increased
101 |    memory consumption and disabled dynamic code modifications. */
102 | #ifndef SLJIT_PROT_EXECUTABLE_ALLOCATOR
103 | /* Disabled by default. */
104 | #define SLJIT_PROT_EXECUTABLE_ALLOCATOR 0
105 | #endif
106 | 
107 | #endif
108 | 
109 | /* Force cdecl calling convention even if a better calling
110 |    convention (e.g. fastcall) is supported by the C compiler.
111 |    If this option is enabled, C functions without
112 |    SLJIT_CALL can also be called from JIT code. */
113 | #ifndef SLJIT_USE_CDECL_CALLING_CONVENTION
114 | /* Disabled by default */
115 | #define SLJIT_USE_CDECL_CALLING_CONVENTION 0
116 | #endif
117 | 
118 | /* Return with error when an invalid argument is passed. */
119 | #ifndef SLJIT_ARGUMENT_CHECKS
120 | /* Disabled by default */
121 | #define SLJIT_ARGUMENT_CHECKS 0
122 | #endif
123 | 
124 | /* Debug checks (assertions, etc.). */
125 | #ifndef SLJIT_DEBUG
126 | /* Enabled by default */
127 | #define SLJIT_DEBUG 1
128 | #endif
129 | 
130 | /* Verbose operations. */
131 | #ifndef SLJIT_VERBOSE
132 | /* Enabled by default */
133 | #define SLJIT_VERBOSE 1
134 | #endif
135 | 
136 | /*
137 |   SLJIT_IS_FPU_AVAILABLE
138 |     The availability of the FPU can be controlled by SLJIT_IS_FPU_AVAILABLE.
139 |       zero value - FPU is NOT present.
140 |       nonzero value - FPU is present.
141 | */
142 | 
143 | /* For further configurations, see the beginning of sljitConfigInternal.h */
144 | 
145 | #endif
146 | 


--------------------------------------------------------------------------------
/vendor/pcre/README.md:
--------------------------------------------------------------------------------
1 | Superstring contains a copy of the [PCRE](http://www.pcre.org/) regex engine.
2 | 
3 | The `10.23` directory contains a subset of the PCRE distribution obtained from [here](https://ftp.pcre.org/pub/pcre/pcre2-10.23.zip).
4 | 
5 | The file that `superstring` uses to compile PCRE, `pcre.gyp`, was created based on PCRE's `NON-AUTOTOOLS-BUILD` instructions.


--------------------------------------------------------------------------------
/vendor/pcre/pcre.gyp:
--------------------------------------------------------------------------------
 1 | {
 2 |     "targets": [
 3 |         {
 4 |             "target_name": "pcre",
 5 |             "type": "static_library",
 6 |             "sources": [
 7 |                 "pcre2_chartables.c",
 8 |                 "10.23/src/pcre2_auto_possess.c",
 9 |                 "10.23/src/pcre2_compile.c",
10 |                 "10.23/src/pcre2_config.c",
11 |                 "10.23/src/pcre2_context.c",
12 |                 "10.23/src/pcre2_dfa_match.c",
13 |                 "10.23/src/pcre2_error.c",
14 |                 "10.23/src/pcre2_find_bracket.c",
15 |                 "10.23/src/pcre2_jit_compile.c",
16 |                 "10.23/src/pcre2_maketables.c",
17 |                 "10.23/src/pcre2_match.c",
18 |                 "10.23/src/pcre2_match_data.c",
19 |                 "10.23/src/pcre2_newline.c",
20 |                 "10.23/src/pcre2_ord2utf.c",
21 |                 "10.23/src/pcre2_pattern_info.c",
22 |                 "10.23/src/pcre2_serialize.c",
23 |                 "10.23/src/pcre2_string_utils.c",
24 |                 "10.23/src/pcre2_study.c",
25 |                 "10.23/src/pcre2_substitute.c",
26 |                 "10.23/src/pcre2_substring.c",
27 |                 "10.23/src/pcre2_tables.c",
28 |                 "10.23/src/pcre2_ucd.c",
29 |                 "10.23/src/pcre2_valid_utf.c",
30 |                 "10.23/src/pcre2_xclass.c",
31 |             ],
32 |             "include_dirs": [
33 |                 "include",
34 |                 "10.23/src"
35 |             ],
36 |             "defines": [
37 |                 "HAVE_CONFIG_H",
38 |                 "PCRE2_CODE_UNIT_WIDTH=16",
39 |                 "SUPPORT_JIT",
40 |             ],
41 |             "cflags": [
42 |                 "-Wno-unused-function"
43 |             ],
44 |             'xcode_settings': {
45 |                 'OTHER_CFLAGS': [
46 |                     '-Wno-unused-function'
47 |                 ],
48 |             },
49 |             "direct_dependent_settings": {
50 |                 "include_dirs": [
51 |                     "include"
52 |                 ],
53 |                 "defines": [
54 |                     "PCRE2_CODE_UNIT_WIDTH=16",
55 |                 ]
56 |             }
57 |         }
58 |     ]
59 | }


--------------------------------------------------------------------------------