├── .cargo
    ├── release-windows-ms.toml
    └── release.toml
├── .github
    └── workflows
    │   ├── ci.yml
    │   └── winget.yml
├── .gitignore
├── .pipelines
    ├── release.yml
    └── tsa.json
├── .vscode
    ├── launch.json
    └── tasks.json
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── SECURITY.md
├── assets
    ├── Microsoft_logo_(1980).svg
    ├── com.microsoft.edit.desktop
    ├── edit.svg
    ├── edit_hero_image.png
    ├── editing-traces
    │   ├── README.md
    │   └── rustcode.json.zst
    ├── manpage
    │   └── edit.1
    ├── microsoft.png
    └── microsoft.sixel
├── benches
    └── lib.rs
├── build.rs
├── rust-toolchain.toml
├── rustfmt.toml
├── src
    ├── apperr.rs
    ├── arena
    │   ├── debug.rs
    │   ├── mod.rs
    │   ├── release.rs
    │   ├── scratch.rs
    │   └── string.rs
    ├── base64.rs
    ├── bin
    │   └── edit
    │   │   ├── documents.rs
    │   │   ├── draw_editor.rs
    │   │   ├── draw_filepicker.rs
    │   │   ├── draw_menubar.rs
    │   │   ├── draw_statusbar.rs
    │   │   ├── edit.exe.manifest
    │   │   ├── localization.rs
    │   │   ├── main.rs
    │   │   └── state.rs
    ├── buffer
    │   ├── gap_buffer.rs
    │   ├── line_cache.rs
    │   ├── mod.rs
    │   └── navigation.rs
    ├── cell.rs
    ├── document.rs
    ├── framebuffer.rs
    ├── fuzzy.rs
    ├── hash.rs
    ├── helpers.rs
    ├── icu.rs
    ├── input.rs
    ├── lib.rs
    ├── oklab.rs
    ├── path.rs
    ├── simd
    │   ├── lines_bwd.rs
    │   ├── lines_fwd.rs
    │   ├── memchr2.rs
    │   ├── memset.rs
    │   └── mod.rs
    ├── sys
    │   ├── mod.rs
    │   ├── unix.rs
    │   └── windows.rs
    ├── tui.rs
    ├── unicode
    │   ├── measurement.rs
    │   ├── mod.rs
    │   ├── tables.rs
    │   └── utf8.rs
    └── vt.rs
└── tools
    └── grapheme-table-gen
        ├── Cargo.lock
        ├── Cargo.toml
        ├── README.md
        └── src
            ├── main.rs
            └── rules.rs


/.cargo/release-windows-ms.toml:
--------------------------------------------------------------------------------
 1 | # vvv The following parts are identical to release.toml vvv
 2 | 
 3 | # Avoid linking with vcruntime140.dll by statically linking everything,
 4 | # and then explicitly linking with ucrtbase.dll dynamically.
 5 | # We do this, because vcruntime140.dll is an optional Windows component.
 6 | [target.'cfg(target_os = "windows")']
 7 | rustflags = [
 8 |     "-Ctarget-feature=+crt-static",
 9 |     "-Clink-args=/DEFAULTLIB:ucrt.lib",
10 |     "-Clink-args=/NODEFAULTLIB:vcruntime.lib",
11 |     "-Clink-args=/NODEFAULTLIB:msvcrt.lib",
12 |     "-Clink-args=/NODEFAULTLIB:libucrt.lib",
13 | ]
14 | 
15 | # The backtrace code for panics in Rust is almost as large as the entire editor.
16 | # = Huge reduction in binary size by removing all that.
17 | [unstable]
18 | build-std = ["std", "panic_abort"]
19 | build-std-features = ["panic_immediate_abort", "optimize_for_size"]
20 | 
21 | # vvv The following parts are specific to official Windows builds. vvv
22 | # (The use of internal registries, security features, etc., are mandatory.)
23 | 
24 | # Enable shadow stacks: https://learn.microsoft.com/en-us/cpp/build/reference/cetcompat
25 | [target.'cfg(all(target_os = "windows", any(target_arch = "x86", target_arch = "x86_64")))']
26 | rustflags = ["-Clink-args=/DYNAMICBASE", "-Clink-args=/CETCOMPAT"]
27 | 
28 | [registries.Edit_PublicPackages]
29 | index = "sparse+https://pkgs.dev.azure.com/microsoft/Dart/_packaging/Edit_PublicPackages/Cargo/index/"
30 | 
31 | [source.crates-io]
32 | replace-with = "Edit_PublicPackages"
33 | 


--------------------------------------------------------------------------------
/.cargo/release.toml:
--------------------------------------------------------------------------------
 1 | # The following is not used by default via .cargo/config.toml,
 2 | # because `build-std-features` cannot be keyed by profile.
 3 | # This breaks the bench profile which doesn't support panic=abort.
 4 | # See: https://github.com/rust-lang/cargo/issues/11214
 5 | # See: https://github.com/rust-lang/cargo/issues/13894
 6 | 
 7 | # Avoid linking with vcruntime140.dll by statically linking everything,
 8 | # and then explicitly linking with ucrtbase.dll dynamically.
 9 | # We do this, because vcruntime140.dll is an optional Windows component.
10 | [target.'cfg(all(target_os = "windows", target_env = "msvc"))']
11 | rustflags = [
12 |     "-Ctarget-feature=+crt-static",
13 |     "-Clink-args=/DEFAULTLIB:ucrt.lib",
14 |     "-Clink-args=/NODEFAULTLIB:vcruntime.lib",
15 |     "-Clink-args=/NODEFAULTLIB:msvcrt.lib",
16 |     "-Clink-args=/NODEFAULTLIB:libucrt.lib",
17 | ]
18 | 
19 | # The backtrace code for panics in Rust is almost as large as the entire editor.
20 | # = Huge reduction in binary size by removing all that.
21 | [unstable]
22 | build-std = ["std", "panic_abort"]
23 | build-std-features = ["panic_immediate_abort", "optimize_for_size"]
24 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | env:
12 |   CARGO_TERM_COLOR: always
13 | 
14 | jobs:
15 |   check:
16 |     runs-on: ${{ matrix.os }}
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         os:
21 |           - ubuntu-latest
22 |           - windows-latest
23 |     steps:
24 |       # The Windows runners have autocrlf enabled by default.
25 |       - name: Disable git autocrlf
26 |         run: git config --global core.autocrlf false
27 |         if: matrix.os == 'windows-latest'
28 |       - name: Checkout
29 |         uses: actions/checkout@v4
30 |       # https://github.com/actions/cache/blob/main/examples.md#rust---cargo
31 |       # Depends on `Cargo.lock` --> Has to be after checkout.
32 |       - uses: actions/cache@v4
33 |         with:
34 |           path: |
35 |             ~/.cargo/bin/
36 |             ~/.cargo/registry/index/
37 |             ~/.cargo/registry/cache/
38 |             ~/.cargo/git/db/
39 |             target/
40 |           key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
41 |       - name: Install Rust
42 |         run: rustup toolchain install nightly --no-self-update --profile minimal --component rust-src,rustfmt,clippy
43 |       - name: Check formatting
44 |         run: cargo fmt --all -- --check
45 |       - name: Run tests
46 |         run: cargo test --all-features --all-targets
47 |       - name: Run clippy
48 |         run: cargo clippy --all-features --all-targets -- --deny warnings
49 | 


--------------------------------------------------------------------------------
/.github/workflows/winget.yml:
--------------------------------------------------------------------------------
 1 | name: Submit release to the WinGet community repository
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | jobs:
 8 |   publish-winget:
 9 |     name: Submit to WinGet repository
10 | 
11 |     # winget-create is only supported on Windows
12 |     runs-on: windows-latest
13 | 
14 |     # Only submit stable releases
15 |     if: ${{ !github.event.release.prerelease }}
16 |     steps:
17 |       - name: Submit package using wingetcreate
18 |         run: |
19 |           # Get installer info from release event
20 |           $assets = '${{ toJSON(github.event.release.assets) }}' | ConvertFrom-Json
21 |           $x64InstallerUrl = $assets | Where-Object -Property name -like '*x86_64-windows.zip' | Select-Object -ExpandProperty browser_download_url
22 |           $arm64InstallerUrl = $assets | Where-Object -Property name -like '*aarch64-windows.zip' | Select-Object -ExpandProperty browser_download_url
23 |           $packageVersion = (${{ toJSON(github.event.release.tag_name) }}).Trim('v')
24 | 
25 |           # Update package using wingetcreate
26 |           curl.exe -JLO https://aka.ms/wingetcreate/latest
27 |           .\wingetcreate.exe update Microsoft.Edit `
28 |             --version $packageVersion `
29 |             --urls $x64InstallerUrl $arm64InstallerUrl `
30 |             --token "${{ secrets.WINGET_TOKEN }}" `
31 |             --submit
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | .vs
3 | *.profraw
4 | lcov.info
5 | target
6 | 


--------------------------------------------------------------------------------
/.pipelines/release.yml:
--------------------------------------------------------------------------------
  1 | # Documentation:  https://aka.ms/obpipelines
  2 | 
  3 | trigger: none
  4 | 
  5 | parameters:
  6 |   - name: debug
  7 |     displayName: Enable debug output
  8 |     type: boolean
  9 |     default: false
 10 |   - name: official
 11 |     displayName: Whether to build Official or NonOfficial
 12 |     type: string
 13 |     default: NonOfficial
 14 |     values:
 15 |       - NonOfficial
 16 |       - Official
 17 |   - name: createvpack
 18 |     displayName: Enable vpack creation
 19 |     type: boolean
 20 |     default: false
 21 |   - name: buildPlatforms
 22 |     type: object
 23 |     default:
 24 |       - x86_64-pc-windows-msvc
 25 |       - aarch64-pc-windows-msvc
 26 | 
 27 | variables:
 28 |   system.debug: ${{parameters.debug}}
 29 |   WindowsContainerImage: onebranch.azurecr.io/windows/ltsc2022/vse2022:latest
 30 |   # CDP_DEFINITION_BUILD_COUNT is needed for onebranch.pipeline.version task.
 31 |   # See: https://aka.ms/obpipelines/versioning
 32 |   CDP_DEFINITION_BUILD_COUNT: $[counter('', 0)]
 33 |   # LOAD BEARING - the vpack task fails without these
 34 |   ROOT: $(Build.SourcesDirectory)
 35 |   REPOROOT: $(Build.SourcesDirectory)
 36 |   OUTPUTROOT: $(REPOROOT)\out
 37 |   NUGET_XMLDOC_MODE: none
 38 | 
 39 | resources:
 40 |   repositories:
 41 |     - repository: GovernedTemplates
 42 |       type: git
 43 |       name: OneBranch.Pipelines/GovernedTemplates
 44 |       ref: refs/heads/main
 45 | 
 46 | extends:
 47 |   template: v2/Microsoft.${{parameters.official}}.yml@GovernedTemplates
 48 |   parameters:
 49 |     featureFlags:
 50 |       WindowsHostVersion:
 51 |         Version: 2022
 52 |         Network: R1
 53 |     platform:
 54 |       name: windows_undocked
 55 |       product: edit
 56 |     # https://aka.ms/obpipelines/cloudvault
 57 |     cloudvault:
 58 |       enabled: false
 59 |     # https://aka.ms/obpipelines/sdl
 60 |     globalSdl:
 61 |       binskim:
 62 |         # > Due to some legacy reasons, 1ES PT is scanning full sources directory
 63 |         # > for BinSkim tool instead of just scanning the output directory [...]
 64 |         scanOutputDirectoryOnly: true
 65 |       isNativeCode: true
 66 |       tsa:
 67 |         enabled: ${{eq(parameters.official, 'Official')}}
 68 |         configFile: "$(Build.SourcesDirectory)/.pipelines/tsa.json"
 69 |     stages:
 70 |       # Our Build stage will build all three targets in one job, so we don't need
 71 |       # to repeat most of the boilerplate work in three separate jobs.
 72 |       - stage: Build
 73 |         jobs:
 74 |           - job: Windows
 75 |             pool:
 76 |               type: windows
 77 |             variables:
 78 |               # Binaries will go here.
 79 |               # More settings at https://aka.ms/obpipelines/yaml/jobs
 80 |               ob_outputDirectory: "$(Build.SourcesDirectory)/out"
 81 |               # The vPack gets created from stuff in here.
 82 |               # It will have a structure like:
 83 |               # .../vpack/
 84 |               #      - amd64/
 85 |               #         - edit.exe
 86 |               #      - i386/
 87 |               #         - edit.exe
 88 |               #      - arm64/
 89 |               #         - edit.exe
 90 |               ob_createvpack_enabled: ${{parameters.createvpack}}
 91 |               ob_createvpack_vpackdirectory: "$(ob_outputDirectory)/vpack"
 92 |               ob_createvpack_packagename: "windows_edit.$(Build.SourceBranchName)"
 93 |               ob_createvpack_owneralias: lhecker@microsoft.com
 94 |               ob_createvpack_description: Microsoft Edit
 95 |               ob_createvpack_targetDestinationDirectory: "$(Destination)"
 96 |               ob_createvpack_propsFile: false
 97 |               ob_createvpack_provData: true
 98 |               ob_createvpack_versionAs: string
 99 |               ob_createvpack_version: "$(EditVersion)-$(CDP_DEFINITION_BUILD_COUNT)"
100 |               ob_createvpack_metadata: "$(Build.SourceVersion)"
101 |               ob_createvpack_topLevelRetries: 0
102 |               ob_createvpack_failOnStdErr: true
103 |               ob_createvpack_verbose: ${{ parameters.debug }}
104 |               # For details on this cargo_target_dir setting, see:
105 |               # https://eng.ms/docs/more/rust/topics/onebranch-workaround
106 |               CARGO_TARGET_DIR: C:\cargo_target_dir
107 |               # msrustup only supports stable toolchains, but this project requires nightly.
108 |               # We were told RUSTC_BOOTSTRAP=1 is a supported workaround.
109 |               RUSTC_BOOTSTRAP: 1
110 |             steps:
111 |               # NOTE: Step objects have ordered keys and you MUST have "task" as the first key.
112 |               # Objects with ordered keys... lol
113 |               - task: RustInstaller@1
114 |                 displayName: Install Rust toolchain
115 |                 inputs:
116 |                   rustVersion: ms-stable
117 |                   additionalTargets: x86_64-pc-windows-msvc aarch64-pc-windows-msvc
118 |                   # URL of an Azure Artifacts feed configured with a crates.io upstream. Must be within the current ADO collection.
119 |                   # NOTE: Azure Artifacts support for Rust is not yet public, but it is enabled for internal ADO organizations.
120 |                   # https://learn.microsoft.com/en-us/azure/devops/artifacts/how-to/set-up-upstream-sources?view=azure-devops
121 |                   cratesIoFeedOverride: sparse+https://pkgs.dev.azure.com/microsoft/Dart/_packaging/Edit_PublicPackages/Cargo/index/
122 |                   # URL of an Azure Artifacts NuGet feed configured with the mscodehub Rust feed as an upstream.
123 |                   # * The feed must be within the current ADO collection.
124 |                   # * The CI account, usually "Project Collection Build Service (org-name)", must have at least "Collaborator" permission.
125 |                   # When setting up the upstream NuGet feed, use following Azure Artifacts feed locator:
126 |                   #   azure-feed://mscodehub/Rust/Rust@Release
127 |                   toolchainFeed: https://pkgs.dev.azure.com/microsoft/_packaging/RustTools/nuget/v3/index.json
128 |               - task: CargoAuthenticate@0
129 |                 displayName: Authenticate with Azure Artifacts
130 |                 inputs:
131 |                   configFile: ".cargo/release-windows-ms.toml"
132 |               # We recommend making a separate `cargo fetch` step, as some build systems perform
133 |               # fetching entirely prior to the build, and perform the build with the network disabled.
134 |               - script: cargo fetch --config .cargo/release-windows-ms.toml
135 |                 displayName: Fetch crates
136 |               - ${{ each platform in parameters.buildPlatforms }}:
137 |                   - script: cargo build --config .cargo/release-windows-ms.toml --frozen --release --target ${{platform}}
138 |                     displayName: Build ${{platform}} Release
139 |                   - task: CopyFiles@2
140 |                     displayName: Copy files to vpack (${{platform}})
141 |                     inputs:
142 |                       sourceFolder: "$(CARGO_TARGET_DIR)/${{platform}}/release"
143 |                       ${{ if eq(platform, 'i686-pc-windows-msvc') }}:
144 |                         targetFolder: "$(ob_createvpack_vpackdirectory)/i386"
145 |                       ${{ elseif eq(platform, 'x86_64-pc-windows-msvc') }}:
146 |                         targetFolder: "$(ob_createvpack_vpackdirectory)/amd64"
147 |                       ${{ else }}: # aarch64-pc-windows-msvc
148 |                         targetFolder: "$(ob_createvpack_vpackdirectory)/arm64"
149 |                       contents: |
150 |                         *.exe
151 |                         *.pdb
152 |               # Extract the version for `ob_createvpack_version`.
153 |               - script: |-
154 |                   @echo off
155 |                   for /f "tokens=3 delims=- " %%x in ('findstr /c:"version = " Cargo.toml') do (
156 |                       echo ##vso[task.setvariable variable=EditVersion]%%~x
157 |                       goto :EOF
158 |                   )
159 |                 displayName: "Set EditVersion"
160 |               - task: onebranch.pipeline.signing@1
161 |                 displayName: "Sign files"
162 |                 inputs:
163 |                   command: "sign"
164 |                   signing_profile: "external_distribution"
165 |                   files_to_sign: "**/edit.exe"
166 |                   search_root: "$(ob_createvpack_vpackdirectory)"
167 |                   use_testsign: false
168 |                   in_container: true
169 | 
170 |               - ${{ each platform in parameters.buildPlatforms }}:
171 |                 - pwsh: |-
172 |                     $Dest = New-Item -Type Directory "_staging/${env:RELEASE_NAME}"
173 |                     Write-Host "Staging files from ${env:VPACK_ROOT} at $Dest"
174 |                     Get-ChildItem "${env:VPACK_ROOT}\*" -Include *.exe, *.pdb | Copy-Item -Destination $Dest -Verbose
175 |                     tar.exe -c -v --format=zip -f "$(ob_outputDirectory)\${env:RELEASE_NAME}.zip" -C _staging $env:RELEASE_NAME
176 |                   env:
177 |                     RELEASE_NAME: edit-$(EditVersion)-${{ replace(platform, 'pc-windows-msvc', 'windows') }}
178 |                     ${{ if eq(platform, 'i686-pc-windows-msvc') }}:
179 |                       VPACK_ROOT: "$(ob_createvpack_vpackdirectory)/i386"
180 |                     ${{ elseif eq(platform, 'x86_64-pc-windows-msvc') }}:
181 |                       VPACK_ROOT: "$(ob_createvpack_vpackdirectory)/amd64"
182 |                     ${{ else }}: # aarch64-pc-windows-msvc
183 |                       VPACK_ROOT: "$(ob_createvpack_vpackdirectory)/arm64"
184 |                   displayName: Produce ${{platform}} release archive
185 | 


--------------------------------------------------------------------------------
/.pipelines/tsa.json:
--------------------------------------------------------------------------------
1 | {
2 |     "instanceUrl": "https://microsoft.visualstudio.com",
3 |     "projectName": "OS",
4 |     "areaPath": "OS\\Windows Client and Services\\WinPD\\DFX-Developer Fundamentals and Experiences\\DEFT\\SHINE\\Commandline Tooling",
5 |     "notificationAliases": ["condev@microsoft.com", "duhowett@microsoft.com"],
6 |     "template": "VSTS_Microsoft_OSGS"
7 | }
8 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.2.0",
 3 |     "configurations": [
 4 |         {
 5 |             "name": "Launch Debug (Windows)",
 6 |             "preLaunchTask": "rust: cargo build",
 7 |             "type": "cppvsdbg",
 8 |             "request": "launch",
 9 |             "console": "externalTerminal",
10 |             "program": "${workspaceFolder}/target/debug/edit",
11 |             "cwd": "${workspaceFolder}",
12 |             "args": [
13 |                 "${workspaceFolder}/src/bin/edit/main.rs"
14 |             ],
15 |         },
16 |         {
17 |             "name": "Launch Debug (GDB/LLDB)",
18 |             "preLaunchTask": "rust: cargo build",
19 |             "type": "cppdbg",
20 |             "request": "launch",
21 |             "program": "${workspaceFolder}/target/debug/edit",
22 |             "cwd": "${workspaceFolder}",
23 |             "args": [
24 |                 "${workspaceFolder}/src/bin/edit/main.rs"
25 |             ],
26 |         }
27 |     ]
28 | }
29 | 


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "2.0.0",
 3 |     "tasks": [
 4 |         {
 5 |             "label": "rust: cargo build",
 6 |             "type": "process",
 7 |             "command": "cargo",
 8 |             "args": [
 9 |                 "build",
10 |                 "--package",
11 |                 "edit",
12 |                 "--features",
13 |                 "debug-latency"
14 |             ],
15 |             "group": {
16 |                 "kind": "build",
17 |                 "isDefault": true
18 |             },
19 |             "problemMatcher": [
20 |                 "$rustc"
21 |             ]
22 |         }
23 |     ]
24 | }
25 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | - Employees can reach out at [aka.ms/opensource/moderation-support](https://aka.ms/opensource/moderation-support)
11 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | ## Translation improvements
 4 | 
 5 | You can find our translations in [`src/bin/edit/localization.rs`](./src/bin/edit/localization.rs).
 6 | Please feel free to open a pull request with your changes at any time.
 7 | If you'd like to discuss your changes first, please feel free to open an issue.
 8 | 
 9 | ## Bug reports
10 | 
11 | If you find any bugs, we gladly accept pull requests without prior discussion.
12 | Otherwise, you can of course always open an issue for us to look into.
13 | 
14 | ## Feature requests
15 | 
16 | Please open a new issue for any feature requests you have in mind.
17 | Keeping the binary size of the editor small is a priority for us and so we may need to discuss any new features first until we have support for plugins.
18 | 
19 | ## Code changes
20 | 
21 | The project has a focus on a small binary size and sufficient (good) performance.
22 | As such, we generally do not accept pull requests that introduce dependencies (there are always exceptions of course).
23 | Otherwise, you can consider this project a playground for trying out any cool ideas you have.
24 | 
25 | The overall architecture of the project can be summarized as follows:
26 | * The underlying text buffer in `src/buffer` doesn't keep track of line breaks in the document.
27 |   This is a crucial design aspect that permeates throughout the entire codebase.
28 | 
29 |   To oversimplify, the *only* state that is kept is the current cursor position.
30 |   When the user asks to move to another line, the editor will `O(n)` seek through the underlying document until it found the corresponding number of line breaks.
31 |   * As a result, `src/simd` contains crucial `memchr2` functions to quickly find the next or previous line break (runs at up to >100GB/s).
32 |   * Furthermore, `src/unicode` implements an `Utf8Chars` iterator which transparently inserts U+FFFD replacements during iteration (runs at up to 4GB/s).
33 |   * Furthermore, `src/unicode` also implements grapheme cluster segmentation and cluster width measurement via its `MeasurementConfig` (runs at up to 600MB/s).
34 |   * If word wrap is disabled, `memchr2` is used for all navigation across lines, allowing us to breeze through 1GB large files as if they were 1MB.
35 |   * Even if word-wrap is enabled, it's still sufficiently smooth thanks to `MeasurementConfig`. This is only possible because these base functions are heavily optimized.
36 | * `src/framebuffer.rs` implements a "framebuffer" like in video games.
37 |   It allows us to draw the UI output into an intermediate buffer first, accumulating all changes and handling things like color blending.
38 |   Then, it can compare the accumulated output with the previous frame and only send the necessary changes to the terminal.
39 | * `src/tui.rs` implements an immediate mode UI. Its module implementation gives an overview how it works and I recommend reading it.
40 | * `src/vt.rs` implements our VT parser.
41 | * `src/sys` contains our platform abstractions.
42 | * Finally, `src/bin/edit` ties everything together.
43 |   It's roughly 90% UI code and business logic.
44 |   It contains a little bit of VT logic in `setup_terminal`.
45 | 
46 | If you have an issue with your terminal, the places of interest are the aforementioned:
47 | * VT parser in `src/vt.rs`
48 | * Platform specific code in `src/sys`
49 | * And the `setup_terminal` function in `src/bin/edit/main.rs`
50 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "edit"
 3 | version = "1.1.0"
 4 | edition = "2024"
 5 | rust-version = "1.87"
 6 | readme = "README.md"
 7 | repository = "https://github.com/microsoft/edit"
 8 | homepage = "https://github.com/microsoft/edit"
 9 | license = "MIT"
10 | categories = ["text-editors"]
11 | 
12 | [[bench]]
13 | name = "lib"
14 | harness = false
15 | 
16 | [features]
17 | debug-latency = []
18 | 
19 | # We use `opt-level = "s"` as it significantly reduces binary size.
20 | # We could then use the `#[optimize(speed)]` attribute for spot optimizations.
21 | # Unfortunately, that attribute currently doesn't work on intrinsics such as memset.
22 | [profile.release]
23 | codegen-units = 1           # reduces binary size by ~2%
24 | debug = "full"              # No one needs an undebuggable release binary
25 | lto = true                  # reduces binary size by ~14%
26 | opt-level = "s"             # reduces binary size by ~25%
27 | panic = "abort"             # reduces binary size by ~50% in combination with -Zbuild-std-features=panic_immediate_abort
28 | split-debuginfo = "packed"  # generates a separate *.dwp/*.dSYM so the binary can get stripped
29 | strip = "symbols"           # See split-debuginfo - allows us to drop the size by ~65%
30 | incremental = true          # Improves re-compile times
31 | 
32 | [profile.bench]
33 | codegen-units = 16          # Make compiling criterion faster (16 is the default, but profile.release sets it to 1)
34 | lto = "thin"                # Similarly, speed up linking by a ton
35 | 
36 | [dependencies]
37 | 
38 | [target.'cfg(unix)'.dependencies]
39 | libc = "0.2"
40 | 
41 | [target.'cfg(windows)'.build-dependencies]
42 | winresource = "0.1.22"
43 | 
44 | [target.'cfg(windows)'.dependencies.windows-sys]
45 | version = "0.59"
46 | features = [
47 |     "Win32_Globalization",
48 |     "Win32_Security",
49 |     "Win32_Storage_FileSystem",
50 |     "Win32_System_Console",
51 |     "Win32_System_Diagnostics_Debug",
52 |     "Win32_System_IO",
53 |     "Win32_System_LibraryLoader",
54 |     "Win32_System_Memory",
55 |     "Win32_System_Threading",
56 | ]
57 | 
58 | [dev-dependencies]
59 | criterion = { version = "0.6", features = ["html_reports"] }
60 | serde = { version = "1.0", features = ["derive"] }
61 | serde_json = { version = "1.0" }
62 | zstd = { version = "0.13", default-features = false }
63 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Microsoft Corporation. All rights reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ![Application Icon for Edit](./assets/edit.svg) Edit
 2 | 
 3 | A simple editor for simple needs.
 4 | 
 5 | This editor pays homage to the classic [MS-DOS Editor](https://en.wikipedia.org/wiki/MS-DOS_Editor), but with a modern interface and input controls similar to VS Code. The goal is to provide an accessible editor that even users largely unfamiliar with terminals can easily use.
 6 | 
 7 | ![Screenshot of Edit with the About dialog in the foreground](./assets/edit_hero_image.png)
 8 | 
 9 | ## Installation
10 | 
11 | [![Packaging status](https://repology.org/badge/vertical-allrepos/microsoft-edit.svg?exclude_unsupported=1)](https://repology.org/project/microsoft-edit/versions)
12 | 
13 | You can also download binaries from [our Releases page](https://github.com/microsoft/edit/releases/latest).
14 | 
15 | ### Windows
16 | 
17 | You can install the latest version with WinGet:
18 | ```powershell
19 | winget install Microsoft.Edit
20 | ```
21 | 
22 | ### Notes to Package Maintainers
23 | 
24 | The canonical executable name is "edit" and the alternative name is "msedit".
25 | 
26 | We're aware of the potential conflict of "edit" with existing commands and as such recommend naming packages and executables "msedit".
27 | Names such as "ms-edit" should be avoided.
28 | Assigning an "edit" alias is recommended if possible.
29 | 
30 | ## Build Instructions
31 | 
32 | * [Install Rust](https://www.rust-lang.org/tools/install)
33 | * Install the nightly toolchain: `rustup install nightly`
34 |   * Alternatively, set the environment variable `RUSTC_BOOTSTRAP=1`
35 | * Clone the repository
36 | * For a release build, run: `cargo build --config .cargo/release.toml --release`
37 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/assets/Microsoft_logo_(1980).svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!-- Source: https://commons.wikimedia.org/wiki/File:Microsoft_logo_(1980).svg -->
 3 | <!-- License: Public domain -->
 4 | <svg xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" id="svg8" version="1.1" viewBox="0 0 264.58333 52.916669" height="200" width="1000">
 5 |   <defs id="defs2"/>
 6 |   <metadata id="metadata5">
 7 |     <rdf:RDF>
 8 |       <cc:Work rdf:about="">
 9 |         <dc:format>image/svg+xml</dc:format>
10 |         <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
11 |         <dc:title/>
12 |       </cc:Work>
13 |     </rdf:RDF>
14 |   </metadata>
15 |   <g id="layer2">
16 |     <path style="fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" d="M 0,52.916667 33.602084,20.902084 V 34.925001 L 48.418751,20.902084 v 13.758334 h 8.73125 V 0.26458334 L 42.333334,15.08125 V 0.26458334 L 0,42.597917 Z" id="path847"/>
17 |     <path style="fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" d="M 67.468752,0.26458334 58.737501,9.2604169 V 34.660418 h 8.731251 z" id="path849"/>
18 |     <path transform="scale(0.26458334)" d="m 301.16016,1 c -21.9507,4.4255933 -39.58425,23.383151 -45.24024,48 H 255 V 53.673828 78.277344 82 h 0.69727 c 5.39479,25.07886 23.17116,44.48439 45.38085,49 H 343 v -30 h -20 v -0.004 C 322.83335,100.999 322.66667,101 322.5,101 303.44618,101 288,85.553824 288,66.5 288,47.446176 303.44618,32 322.5,32 H 342 L 372,1 Z" style="fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.567005;stop-color:#000000" id="path848"/>
19 |     <path transform="scale(0.26458334)" d="m 383,1 -33,34 v 96 h 33 V 33 h 18.5 c 9.66498,0 17.5,7.835017 17.5,17.5 0,9.664983 -7.83502,17.5 -17.5,17.5 H 387 L 521,199 V 157 L 487,123 443.33594,78.365234 A 47.000001,50 0 0 0 451,51 47.000001,50 0 0 0 405.00977,1.0117188 L 405,1 Z" style="display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.999999px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" id="path864"/>
20 |     <path transform="scale(0.26458334)" d="M 525.86523,1 A 68,66.499996 0 0 0 458,67.5 68,66.499996 0 0 0 526,134 68,66.499996 0 0 0 594,67.5 68,66.499996 0 0 0 526,1 68,66.499996 0 0 0 525.86523,1 Z m -1.60546,31 A 36.499998,36.000002 0 0 1 524.5,32 36.499998,36.000002 0 0 1 561,68 36.499998,36.000002 0 0 1 524.5,104 36.499998,36.000002 0 0 1 488,68 36.499998,36.000002 0 0 1 524.25977,32 Z" style="display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.567001;stop-color:#000000" id="path881"/>
21 |     <path transform="scale(0.26458334)" d="m 620.5,1 c -22.36753,-5.5e-7 -40.5,18.132467 -40.5,40.5 0,22.367533 18.13247,40.500001 40.5,40.5 h 2.5 c 11.59798,0 21,4.477153 21,10 0,5.522847 -9.40202,10 -21,10 h -40 v 29 h 62.99999 c 18.43887,-4.06734 31.56367,-20.13433 31.56446,-38.605479 C 677.56392,78.310576 669.87456,65.292316 657.38281,58.226562 640.78385,50.357003 632.38254,48.035667 620.15625,48 615.01343,46.201489 612.00162,43.42696 612,40.486328 611.9995,36.896878 616.47115,33.613784 623.55859,32 H 677 C 686.99999,16.999999 695.99998,9 709,1 h -84 z" style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.567001;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;stop-color:#000000;stop-opacity:1" id="rect942"/>
22 |     <path transform="scale(0.26458334)" d="M 743,0 A 68.999999,68.500001 0 0 0 674,68.5 68.999999,68.500001 0 0 0 743,137 68.999999,68.500001 0 0 0 812,68.5 68.999999,68.500001 0 0 0 743,0 Z m 0.5,32 A 37.499999,36.500002 0 0 1 781,68.5 37.499999,36.500002 0 0 1 743.5,105 37.499999,36.500002 0 0 1 706,68.5 37.499999,36.500002 0 0 1 743.5,32 Z" style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.567001;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;stop-color:#000000;stop-opacity:1" id="path881-7"/>
23 |     <path id="path1086" d="m 232.03959,22.754167 v -8.73125 h -8.46667 V 8.9958336 h 9.26042 l 8.73125,-8.73125026 H 223.83751 L 214.84167,9.2604169 V 52.652085 l 8.73125,-7.672917 V 22.754167 Z" style="fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"/>
24 |     <path style="fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" d="m 251.88334,8.9958335 3.70414,-2e-7 8.7313,-8.73125014 h -20.10839 l -8.73122,8.73125034 h 7.67292 V 34.660417 l 8.7312,-7.672917 z" id="path1086-2"/>
25 |   </g>
26 | </svg>
27 | 


--------------------------------------------------------------------------------
/assets/com.microsoft.edit.desktop:
--------------------------------------------------------------------------------
 1 | [Desktop Entry]
 2 | Type=Application
 3 | Name=Microsoft Edit
 4 | GenericName=Text Editor
 5 | Comment=A simple editor for simple needs
 6 | Icon=edit
 7 | Exec=edit %U
 8 | Terminal=true
 9 | MimeType=text/plain
10 | Keywords=text;editor
11 | 


--------------------------------------------------------------------------------
/assets/edit.svg:
--------------------------------------------------------------------------------
 1 | <svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <g clip-path="url(#clip0_2349_313)">
 3 | <path fill-rule="evenodd" clip-rule="evenodd" d="M14.0918 19.0947L22.0855 15.0979C23.2589 14.5112 24.0001 13.3119 24.0001 12C24.0001 10.6881 23.2589 9.48882 22.0855 8.90213C22.6071 9.16293 22.4986 9.86016 21.977 10.121L15.5293 13.3448L14.0918 19.0947Z" fill="#D9D9D9"/>
 4 | <path fill-rule="evenodd" clip-rule="evenodd" d="M14.0918 19.0947L22.0855 15.0979C23.2589 14.5112 24.0001 13.3119 24.0001 12C24.0001 10.6881 23.2589 9.48882 22.0855 8.90213C22.6071 9.16293 22.4986 9.86016 21.977 10.121L15.5293 13.3448L14.0918 19.0947Z" fill="url(#paint0_linear_2349_313)"/>
 5 | <path fill-rule="evenodd" clip-rule="evenodd" d="M14.0918 19.0947L22.0855 15.0979C23.2589 14.5112 24.0001 13.3119 24.0001 12C24.0001 10.6881 23.2589 9.48882 22.0855 8.90213C22.6071 9.16293 22.4986 9.86016 21.977 10.121L15.5293 13.3448L14.0918 19.0947Z" fill="url(#paint1_linear_2349_313)"/>
 6 | <path fill-rule="evenodd" clip-rule="evenodd" d="M14.0918 19.0947L22.0855 15.0979C23.2589 14.5112 24.0001 13.3119 24.0001 12C24.0001 10.6881 23.2589 9.48882 22.0855 8.90213C22.6071 9.16293 22.4986 9.86016 21.977 10.121L15.5293 13.3448L14.0918 19.0947Z" fill="url(#paint2_linear_2349_313)"/>
 7 | <path fill-rule="evenodd" clip-rule="evenodd" d="M8.47085 10.6552L9.90833 4.90526L1.91459 8.90213C0.741205 9.48882 0 10.6881 0 12C0 13.3119 0.741183 14.5112 1.91457 15.0979C1.39297 14.8371 1.50149 14.1398 2.02309 13.879L8.47085 10.6552Z" fill="#D9D9D9"/>
 8 | <path fill-rule="evenodd" clip-rule="evenodd" d="M8.47085 10.6552L9.90833 4.90526L1.91459 8.90213C0.741205 9.48882 0 10.6881 0 12C0 13.3119 0.741183 14.5112 1.91457 15.0979C1.39297 14.8371 1.50149 14.1398 2.02309 13.879L8.47085 10.6552Z" fill="url(#paint3_linear_2349_313)"/>
 9 | <path fill-rule="evenodd" clip-rule="evenodd" d="M8.47085 10.6552L9.90833 4.90526L1.91459 8.90213C0.741205 9.48882 0 10.6881 0 12C0 13.3119 0.741183 14.5112 1.91457 15.0979C1.39297 14.8371 1.50149 14.1398 2.02309 13.879L8.47085 10.6552Z" fill="url(#paint4_linear_2349_313)"/>
10 | <path fill-rule="evenodd" clip-rule="evenodd" d="M6.75517 17.5181L7.87321 13.046L5.78126 12L2.02316 13.879C1.50156 14.1398 1.39302 14.8371 1.91462 15.0979L6.07921 17.1802L6.75517 17.5181Z" fill="#D9D9D9"/>
11 | <path fill-rule="evenodd" clip-rule="evenodd" d="M6.75517 17.5181L7.87321 13.046L5.78126 12L2.02316 13.879C1.50156 14.1398 1.39302 14.8371 1.91462 15.0979L6.07921 17.1802L6.75517 17.5181Z" fill="url(#paint5_linear_2349_313)"/>
12 | <path fill-rule="evenodd" clip-rule="evenodd" d="M6.75517 17.5181L7.87321 13.046L5.78126 12L2.02316 13.879C1.50156 14.1398 1.39302 14.8371 1.91462 15.0979L6.07921 17.1802L6.75517 17.5181Z" fill="url(#paint6_linear_2349_313)"/>
13 | <path fill-rule="evenodd" clip-rule="evenodd" d="M16.127 10.9541L18.2189 12L21.977 10.121C22.4986 9.86017 22.6071 9.16294 22.0855 8.90214L17.9209 6.81985L17.245 6.48189L16.127 10.9541Z" fill="#D9D9D9"/>
14 | <path fill-rule="evenodd" clip-rule="evenodd" d="M16.127 10.9541L18.2189 12L21.977 10.121C22.4986 9.86017 22.6071 9.16294 22.0855 8.90214L17.9209 6.81985L17.245 6.48189L16.127 10.9541Z" fill="url(#paint7_linear_2349_313)"/>
15 | <path fill-rule="evenodd" clip-rule="evenodd" d="M16.127 10.9541L18.2189 12L21.977 10.121C22.4986 9.86017 22.6071 9.16294 22.0855 8.90214L17.9209 6.81985L17.245 6.48189L16.127 10.9541Z" fill="url(#paint8_linear_2349_313)"/>
16 | <path fill-rule="evenodd" clip-rule="evenodd" d="M16.127 10.9541L18.2189 12L21.977 10.121C22.4986 9.86017 22.6071 9.16294 22.0855 8.90214L17.9209 6.81985L17.245 6.48189L16.127 10.9541Z" fill="url(#paint9_linear_2349_313)"/>
17 | <path d="M11.9878 19.9576L10.0194 23.5133C9.85317 23.8136 9.53698 24 9.19374 24C8.67253 24 8.25 23.5775 8.25 23.0563V18.6577C8.25 18.2209 8.30357 17.7857 8.40951 17.362L12.3366 1.65341C12.5796 0.68169 13.4527 0 14.4543 0C15.8744 0 16.9164 1.33455 16.5719 2.71223L12.7344 18.0622C12.569 18.7238 12.318 19.361 11.9878 19.9576Z" fill="url(#paint10_linear_2349_313)"/>
18 | <path d="M11.9878 19.9576L10.0194 23.5133C9.85317 23.8136 9.53698 24 9.19374 24C8.67253 24 8.25 23.5775 8.25 23.0563V18.6577C8.25 18.2209 8.30357 17.7857 8.40951 17.362L12.3366 1.65341C12.5796 0.68169 13.4527 0 14.4543 0C15.8744 0 16.9164 1.33455 16.5719 2.71223L12.7344 18.0622C12.569 18.7238 12.318 19.361 11.9878 19.9576Z" fill="url(#paint11_linear_2349_313)"/>
19 | </g>
20 | <defs>
21 | <linearGradient id="paint0_linear_2349_313" x1="22.2355" y1="13.1286" x2="15.8564" y2="16.1088" gradientUnits="userSpaceOnUse">
22 | <stop stop-color="#3DCBFF"/>
23 | <stop offset="1" stop-color="#0091EB"/>
24 | </linearGradient>
25 | <linearGradient id="paint1_linear_2349_313" x1="22.2355" y1="13.1286" x2="15.8564" y2="16.1088" gradientUnits="userSpaceOnUse">
26 | <stop stop-color="#3BD5FF"/>
27 | <stop offset="1" stop-color="#3DCBFF"/>
28 | </linearGradient>
29 | <linearGradient id="paint2_linear_2349_313" x1="24.0001" y1="12.1487" x2="15.1349" y2="17.5577" gradientUnits="userSpaceOnUse">
30 | <stop stop-color="#76EB95"/>
31 | <stop offset="1" stop-color="#309C61"/>
32 | </linearGradient>
33 | <linearGradient id="paint3_linear_2349_313" x1="8.14375" y1="9.13175" x2="1.76459" y2="12.1119" gradientUnits="userSpaceOnUse">
34 | <stop stop-color="#3BD5FF"/>
35 | <stop offset="1" stop-color="#3DCBFF"/>
36 | </linearGradient>
37 | <linearGradient id="paint4_linear_2349_313" x1="9.90833" y1="8.15188" x2="1.04312" y2="13.5609" gradientUnits="userSpaceOnUse">
38 | <stop stop-color="#309C61"/>
39 | <stop offset="1" stop-color="#76EB95"/>
40 | </linearGradient>
41 | <linearGradient id="paint5_linear_2349_313" x1="7.1966" y1="16.0181" x2="3.19388" y2="16.6496" gradientUnits="userSpaceOnUse">
42 | <stop stop-color="#3DCBFF"/>
43 | <stop offset="1" stop-color="#0FAFFF"/>
44 | </linearGradient>
45 | <linearGradient id="paint6_linear_2349_313" x1="7.87321" y1="16.2896" x2="5.53862" y2="11.3533" gradientUnits="userSpaceOnUse">
46 | <stop stop-color="#52D17C"/>
47 | <stop offset="1" stop-color="#1E794A"/>
48 | </linearGradient>
49 | <linearGradient id="paint7_linear_2349_313" x1="21.75" y1="10.5" x2="17.7473" y2="11.1315" gradientUnits="userSpaceOnUse">
50 | <stop stop-color="#0078D4"/>
51 | <stop offset="1" stop-color="#0FAFFF"/>
52 | </linearGradient>
53 | <linearGradient id="paint8_linear_2349_313" x1="21.75" y1="10.5" x2="17.7473" y2="11.1315" gradientUnits="userSpaceOnUse">
54 | <stop stop-color="#0FAFFF"/>
55 | <stop offset="1" stop-color="#3DCBFF"/>
56 | </linearGradient>
57 | <linearGradient id="paint9_linear_2349_313" x1="22.4266" y1="10.7714" x2="20.0921" y2="5.83518" gradientUnits="userSpaceOnUse">
58 | <stop stop-color="#1E794A"/>
59 | <stop offset="1" stop-color="#52D17C"/>
60 | </linearGradient>
61 | <linearGradient id="paint10_linear_2349_313" x1="11.25" y1="10.5" x2="15.5195" y2="11.6079" gradientUnits="userSpaceOnUse">
62 | <stop stop-color="#0FAFFF"/>
63 | <stop offset="0.245" stop-color="#3BD5FF"/>
64 | <stop offset="1" stop-color="#0078D4"/>
65 | </linearGradient>
66 | <linearGradient id="paint11_linear_2349_313" x1="14.1714" y1="12.5" x2="10.4649" y2="11.6493" gradientUnits="userSpaceOnUse">
67 | <stop offset="0.137772" stop-color="#52D17C"/>
68 | <stop offset="0.75" stop-color="#B6F6C7"/>
69 | <stop offset="1" stop-color="#76EB95"/>
70 | </linearGradient>
71 | <clipPath id="clip0_2349_313">
72 | <rect width="24" height="24" fill="white"/>
73 | </clipPath>
74 | </defs>
75 | </svg>
76 | 


--------------------------------------------------------------------------------
/assets/edit_hero_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/edit/5c5471e9443353b0f60c5380e515c4559e0316b1/assets/edit_hero_image.png


--------------------------------------------------------------------------------
/assets/editing-traces/README.md:
--------------------------------------------------------------------------------
1 | # editing-traces
2 | 
3 | This directory contains Seph Gentle's ASCII-only `rustcode` editing traces from: https://github.com/josephg/editing-traces
4 | 
5 | The trace was provided under the [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) license.
6 | 


--------------------------------------------------------------------------------
/assets/editing-traces/rustcode.json.zst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/edit/5c5471e9443353b0f60c5380e515c4559e0316b1/assets/editing-traces/rustcode.json.zst


--------------------------------------------------------------------------------
/assets/manpage/edit.1:
--------------------------------------------------------------------------------
 1 | .TH EDIT 1 "version 1.0" "May 2025"
 2 | .SH NAME
 3 | edit \- a simple text editor
 4 | .SH SYNOPSIS
 5 | \fBedit\fP [\fIOPTIONS\fP]... [\fIARGUMENTS\fP]...
 6 | .SH DESCRIPTION
 7 | edit is a simple text editor inspired by MS-DOS edit.
 8 | .SH EDITING
 9 | Edit is an interactive mode-less editor. Use Alt-F to access the menus.
10 | .SH ARGUMENTS
11 | .TP
12 | \fIFILE[:LINE[:COLUMN]]\fP
13 | The file to open, optionally with line and column (e.g., \fBfoo.txt:123:45\fP).
14 | .SH OPTIONS
15 | .TP
16 | \fB\-h\fP, \fB\-\-help\fP
17 | Print the help message.
18 | .TP
19 | \fB\-v\fP, \fB\-\-version\fP
20 | Print the version number.
21 | .SH COPYRIGHT
22 | Copyright (c) Microsoft Corporation.
23 | .br
24 | Licensed under the MIT License.
25 | .SH SEE ALSO
26 | https://github.com/microsoft/edit
27 | 


--------------------------------------------------------------------------------
/assets/microsoft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/edit/5c5471e9443353b0f60c5380e515c4559e0316b1/assets/microsoft.png


--------------------------------------------------------------------------------
/assets/microsoft.sixel:
--------------------------------------------------------------------------------
1 | P;1q"1;1;300;60#0;2;100;100;100#0!42?_ow{}!12?_ow{}!6?_ow{}}!5?_ow{{}}}!17~^NFbpw{}!8~!4}{wwo_!12?_oow{{{!4}!6~!4}{{wwo__!4?_ow{{}}}!23~^Nfrxw{{}}}!9~!4}{{woo_!12?_ow{}!15~^NFbpw{}!17~^NFB@-!36?_ow{}!6~!6?_ow{}!6~??w{}!7~?o{}!10~^^!10NFBpw{}!6~!8N^!9~{_!4?_o{}!8~^^!9N^^!9~{w}!8~^!18NFbx{}!9~^^!8N^^!9~}{o???ow{}!6~!11NFB@GKM!5N!10~!4NFB@-!30?_ow{}!12~_ow{}!12~??!20~FB@!15?!10~!10?r!9~???{!8~NB@!15?@FN!16~!4{!4wooo__!5?_}!8~^FB!16?@F^!8~{o!10~!9o!13?!10~-!24?_ow{}!35~??!19~x!18?!10~?CK[!4{}!9~^B??N!8~x!21?!10~N^^!18~}{o!10~!22?!29~!13?!10~-!18?_ow{}!8~^NFB@?!11~^NFB@?!10~??!10~F!9~}{wo__!12?!10~!5?@BFN^!9~}{wof^!7~}wo__!11?__o{!9~N@!7?!6@Bb!10~N!9~{o__!12?__o{}!8~F@!10~!9B!13?!10~-!12?_ow{}!8~^NFB@!7?!5~^NFB@!7?!10~??!10~??@FN^!20~??!10~!11?@BFN^!23~!7}!10~^NFB~!12}!12~^NB??BFN^!9~!10}!9~^NF@???!10~!22?!5~^NFB@-!6?_ow{}!8~^NFB@!13?FFB@!13?!10F??!10F!7?@@BB!15F??!10F!17?@BFN^!10~|zrfFF!10NFFFBB@@!5?!21FBB@!11?@BBFFNNN!10^NNNFFBB@!8?!10~!22?NFB@-_ow{}!8~^NFB@!119?@BFN^!9~}{wo!88?!10~-!7~^NFB@!131?@BFN^!7~!88?!7~^NF-~^NFB@!143?@BFN^~!88?~^NFB@\
2 | 


--------------------------------------------------------------------------------
/benches/lib.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | use std::hint::black_box;
  5 | use std::io::Cursor;
  6 | use std::{mem, vec};
  7 | 
  8 | use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
  9 | use edit::helpers::*;
 10 | use edit::simd::MemsetSafe;
 11 | use edit::{arena, buffer, hash, oklab, simd, unicode};
 12 | use serde::Deserialize;
 13 | 
 14 | #[derive(Deserialize)]
 15 | pub struct EditingTracePatch(pub usize, pub usize, pub String);
 16 | 
 17 | #[derive(Deserialize)]
 18 | pub struct EditingTraceTransaction {
 19 |     pub patches: Vec<EditingTracePatch>,
 20 | }
 21 | 
 22 | #[derive(Deserialize)]
 23 | pub struct EditingTraceData {
 24 |     #[serde(rename = "startContent")]
 25 |     pub start_content: String,
 26 |     #[serde(rename = "endContent")]
 27 |     pub end_content: String,
 28 |     pub txns: Vec<EditingTraceTransaction>,
 29 | }
 30 | 
 31 | fn bench_buffer(c: &mut Criterion) {
 32 |     let data = include_bytes!("../assets/editing-traces/rustcode.json.zst");
 33 |     let data = zstd::decode_all(Cursor::new(data)).unwrap();
 34 |     let data: EditingTraceData = serde_json::from_slice(&data).unwrap();
 35 |     let mut patches_with_coords = Vec::new();
 36 | 
 37 |     {
 38 |         let mut tb = buffer::TextBuffer::new(false).unwrap();
 39 |         tb.set_crlf(false);
 40 |         tb.write(data.start_content.as_bytes(), true);
 41 | 
 42 |         for t in &data.txns {
 43 |             for p in &t.patches {
 44 |                 tb.cursor_move_to_offset(p.0);
 45 |                 let beg = tb.cursor_logical_pos();
 46 | 
 47 |                 tb.delete(buffer::CursorMovement::Grapheme, p.1 as CoordType);
 48 | 
 49 |                 tb.write(p.2.as_bytes(), true);
 50 |                 patches_with_coords.push((beg, p.1 as CoordType, p.2.clone()));
 51 |             }
 52 |         }
 53 | 
 54 |         let mut actual = String::new();
 55 |         tb.save_as_string(&mut actual);
 56 |         assert_eq!(actual, data.end_content);
 57 |     }
 58 | 
 59 |     let bench_gap_buffer = || {
 60 |         let mut buf = buffer::GapBuffer::new(false).unwrap();
 61 |         buf.replace(0..usize::MAX, data.start_content.as_bytes());
 62 | 
 63 |         for t in &data.txns {
 64 |             for p in &t.patches {
 65 |                 buf.replace(p.0..p.0 + p.1, p.2.as_bytes());
 66 |             }
 67 |         }
 68 | 
 69 |         buf
 70 |     };
 71 | 
 72 |     let bench_text_buffer = || {
 73 |         let mut tb = buffer::TextBuffer::new(false).unwrap();
 74 |         tb.set_crlf(false);
 75 |         tb.write(data.start_content.as_bytes(), true);
 76 | 
 77 |         for p in &patches_with_coords {
 78 |             tb.cursor_move_to_logical(p.0);
 79 |             tb.delete(buffer::CursorMovement::Grapheme, p.1);
 80 |             tb.write(p.2.as_bytes(), true);
 81 |         }
 82 | 
 83 |         tb
 84 |     };
 85 | 
 86 |     // Sanity check: If this fails, the implementation is incorrect.
 87 |     {
 88 |         let buf = bench_gap_buffer();
 89 |         let mut actual = Vec::new();
 90 |         buf.extract_raw(0..usize::MAX, &mut actual, 0);
 91 |         assert_eq!(actual, data.end_content.as_bytes());
 92 |     }
 93 |     {
 94 |         let mut tb = bench_text_buffer();
 95 |         let mut actual = String::new();
 96 |         tb.save_as_string(&mut actual);
 97 |         assert_eq!(actual, data.end_content);
 98 |     }
 99 | 
100 |     c.benchmark_group("buffer")
101 |         .bench_function(BenchmarkId::new("GapBuffer", "rustcode"), |b| {
102 |             b.iter(bench_gap_buffer);
103 |         })
104 |         .bench_function(BenchmarkId::new("TextBuffer", "rustcode"), |b| {
105 |             b.iter(bench_text_buffer);
106 |         });
107 | }
108 | 
109 | fn bench_hash(c: &mut Criterion) {
110 |     c.benchmark_group("hash")
111 |         .throughput(Throughput::Bytes(8))
112 |         .bench_function(BenchmarkId::new("hash", 8), |b| {
113 |             let data = [0u8; 8];
114 |             b.iter(|| hash::hash(0, black_box(&data)))
115 |         })
116 |         .throughput(Throughput::Bytes(16))
117 |         .bench_function(BenchmarkId::new("hash", 16), |b| {
118 |             let data = [0u8; 16];
119 |             b.iter(|| hash::hash(0, black_box(&data)))
120 |         })
121 |         .throughput(Throughput::Bytes(1024))
122 |         .bench_function(BenchmarkId::new("hash", 1024), |b| {
123 |             let data = [0u8; 1024];
124 |             b.iter(|| hash::hash(0, black_box(&data)))
125 |         });
126 | }
127 | 
128 | fn bench_oklab(c: &mut Criterion) {
129 |     c.benchmark_group("oklab")
130 |         .bench_function("srgb_to_oklab", |b| b.iter(|| oklab::srgb_to_oklab(black_box(0xff212cbe))))
131 |         .bench_function("oklab_blend", |b| {
132 |             b.iter(|| oklab::oklab_blend(black_box(0x7f212cbe), black_box(0x7f3aae3f)))
133 |         });
134 | }
135 | 
136 | fn bench_simd_lines_fwd(c: &mut Criterion) {
137 |     let mut group = c.benchmark_group("simd");
138 |     let buf = vec![b'\n'; 128 * MEBI];
139 | 
140 |     for &lines in &[1, 8, 128, KIBI, 128 * KIBI, 128 * MEBI] {
141 |         group.throughput(Throughput::Bytes(lines as u64)).bench_with_input(
142 |             BenchmarkId::new("lines_fwd", lines),
143 |             &lines,
144 |             |b, &lines| {
145 |                 b.iter(|| simd::lines_fwd(black_box(&buf), 0, 0, lines as CoordType));
146 |             },
147 |         );
148 |     }
149 | }
150 | 
151 | fn bench_simd_memchr2(c: &mut Criterion) {
152 |     let mut group = c.benchmark_group("simd");
153 |     let mut buf = vec![0u8; 128 * MEBI + KIBI];
154 | 
155 |     // For small sizes we add a small offset of +8,
156 |     // to ensure we also benchmark the non-SIMD tail handling.
157 |     // For large sizes, its relative impact is negligible.
158 |     for &bytes in &[8usize, 128 + 8, KIBI, 128 * KIBI, 128 * MEBI] {
159 |         group.throughput(Throughput::Bytes(bytes as u64 + 1)).bench_with_input(
160 |             BenchmarkId::new("memchr2", bytes),
161 |             &bytes,
162 |             |b, &size| {
163 |                 buf.fill(b'a');
164 |                 buf[size] = b'\n';
165 |                 b.iter(|| simd::memchr2(b'\n', b'\r', black_box(&buf), 0));
166 |             },
167 |         );
168 |     }
169 | }
170 | 
171 | fn bench_simd_memset<T: MemsetSafe + Copy + Default>(c: &mut Criterion) {
172 |     let mut group = c.benchmark_group("simd");
173 |     let name = format!("memset<{}>", std::any::type_name::<T>());
174 |     let size = mem::size_of::<T>();
175 |     let mut buf: Vec<T> = vec![Default::default(); 128 * MEBI / size];
176 | 
177 |     // For small sizes we add a small offset of +8,
178 |     // to ensure we also benchmark the non-SIMD tail handling.
179 |     // For large sizes, its relative impact is negligible.
180 |     for &bytes in &[8usize, 128 + 8, KIBI, 128 * KIBI, 128 * MEBI] {
181 |         group.throughput(Throughput::Bytes(bytes as u64)).bench_with_input(
182 |             BenchmarkId::new(&name, bytes),
183 |             &bytes,
184 |             |b, &bytes| {
185 |                 let slice = unsafe { buf.get_unchecked_mut(..bytes / size) };
186 |                 b.iter(|| simd::memset(black_box(slice), Default::default()));
187 |             },
188 |         );
189 |     }
190 | }
191 | 
192 | fn bench_unicode(c: &mut Criterion) {
193 |     let reference = concat!(
194 |         "In the quiet twilight, dreams unfold, soft whispers of a story untold.\n",
195 |         "月明かりが静かに照らし出し、夢を見る心の奥で詩が静かに囁かれる\n",
196 |         "Stars collide in the early light of hope, echoing the silent call of the night.\n",
197 |         "夜の静寂、希望と孤独が混ざり合うその中で詩が永遠に続く\n",
198 |     );
199 |     let buffer = reference.repeat(10);
200 |     let bytes = buffer.as_bytes();
201 | 
202 |     c.benchmark_group("unicode::MeasurementConfig::goto_logical")
203 |         .throughput(Throughput::Bytes(bytes.len() as u64))
204 |         .bench_function("basic", |b| {
205 |             b.iter(|| unicode::MeasurementConfig::new(&bytes).goto_logical(Point::MAX))
206 |         })
207 |         .bench_function("word_wrap", |b| {
208 |             b.iter(|| {
209 |                 unicode::MeasurementConfig::new(black_box(&bytes))
210 |                     .with_word_wrap_column(50)
211 |                     .goto_logical(Point::MAX)
212 |             })
213 |         });
214 | 
215 |     c.benchmark_group("unicode::Utf8Chars")
216 |         .throughput(Throughput::Bytes(bytes.len() as u64))
217 |         .bench_function("next", |b| {
218 |             b.iter(|| {
219 |                 unicode::Utf8Chars::new(bytes, 0).fold(0u32, |acc, ch| acc.wrapping_add(ch as u32))
220 |             })
221 |         });
222 | }
223 | 
224 | fn bench(c: &mut Criterion) {
225 |     arena::init(128 * MEBI).unwrap();
226 | 
227 |     bench_buffer(c);
228 |     bench_hash(c);
229 |     bench_oklab(c);
230 |     bench_simd_lines_fwd(c);
231 |     bench_simd_memchr2(c);
232 |     bench_simd_memset::<u32>(c);
233 |     bench_simd_memset::<u8>(c);
234 |     bench_unicode(c);
235 | }
236 | 
237 | criterion_group!(benches, bench);
238 | criterion_main!(benches);
239 | 


--------------------------------------------------------------------------------
/build.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Licensed under the MIT License.
 3 | 
 4 | fn main() {
 5 |     #[cfg(windows)]
 6 |     if std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default() == "windows" {
 7 |         winresource::WindowsResource::new()
 8 |             .set_manifest_file("src/bin/edit/edit.exe.manifest")
 9 |             .set("FileDescription", "Microsoft Edit")
10 |             .set("LegalCopyright", "© Microsoft Corporation. All rights reserved.")
11 |             .compile()
12 |             .unwrap();
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | channel = "nightly"
3 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | style_edition = "2024"
2 | use_small_heuristics = "Max"
3 | group_imports = "StdExternalCrate"
4 | imports_granularity = "Module"
5 | format_code_in_doc_comments = true
6 | newline_style = "Unix"
7 | use_field_init_shorthand = true
8 | 


--------------------------------------------------------------------------------
/src/apperr.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Licensed under the MIT License.
 3 | 
 4 | //! Provides a transparent error type for edit.
 5 | 
 6 | use std::{io, result};
 7 | 
 8 | use crate::sys;
 9 | 
10 | pub const APP_ICU_MISSING: Error = Error::new_app(0);
11 | 
12 | /// Edit's transparent `Result` type.
13 | pub type Result<T> = result::Result<T, Error>;
14 | 
15 | /// Edit's transparent `Error` type.
16 | /// Abstracts over system and application errors.
17 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
18 | pub enum Error {
19 |     App(u32),
20 |     Icu(u32),
21 |     Sys(u32),
22 | }
23 | 
24 | impl Error {
25 |     pub const fn new_app(code: u32) -> Self {
26 |         Self::App(code)
27 |     }
28 | 
29 |     pub const fn new_icu(code: u32) -> Self {
30 |         Self::Icu(code)
31 |     }
32 | 
33 |     pub const fn new_sys(code: u32) -> Self {
34 |         Self::Sys(code)
35 |     }
36 | }
37 | 
38 | impl From<io::Error> for Error {
39 |     fn from(err: io::Error) -> Self {
40 |         sys::io_error_to_apperr(err)
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/arena/debug.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | #![allow(clippy::missing_safety_doc, clippy::mut_from_ref)]
  5 | 
  6 | use std::alloc::{AllocError, Allocator, Layout};
  7 | use std::mem::{self, MaybeUninit};
  8 | use std::ptr::NonNull;
  9 | 
 10 | use super::release;
 11 | use crate::apperr;
 12 | 
 13 | /// A debug wrapper for [`release::Arena`].
 14 | ///
 15 | /// The problem with [`super::ScratchArena`] is that it only "borrows" an underlying
 16 | /// [`release::Arena`]. Once the [`super::ScratchArena`] is dropped it resets the watermark
 17 | /// of the underlying [`release::Arena`], freeing all allocations done since borrowing it.
 18 | ///
 19 | /// It is completely valid for the same [`release::Arena`] to be borrowed multiple times at once,
 20 | /// *as long as* you only use the most recent borrow. Bad example:
 21 | /// ```should_panic
 22 | /// use edit::arena::scratch_arena;
 23 | ///
 24 | /// let mut scratch1 = scratch_arena(None);
 25 | /// let mut scratch2 = scratch_arena(None);
 26 | ///
 27 | /// let foo = scratch1.alloc_uninit::<usize>();
 28 | ///
 29 | /// // This will also reset `scratch1`'s allocation.
 30 | /// drop(scratch2);
 31 | ///
 32 | /// *foo; // BOOM! ...if it wasn't for our debug wrapper.
 33 | /// ```
 34 | ///
 35 | /// To avoid this, this wraps the real [`release::Arena`] in a "debug" one, which pretends as if every
 36 | /// instance of itself is a distinct [`release::Arena`] instance. Then we use this "debug" [`release::Arena`]
 37 | /// for [`super::ScratchArena`] which allows us to track which borrow is the most recent one.
 38 | pub enum Arena {
 39 |     // Delegate is 'static, because release::Arena requires no lifetime
 40 |     // annotations, and so this mere debug helper cannot use them either.
 41 |     Delegated { delegate: &'static release::Arena, borrow: usize },
 42 |     Owned { arena: release::Arena },
 43 | }
 44 | 
 45 | impl Drop for Arena {
 46 |     fn drop(&mut self) {
 47 |         if let Self::Delegated { delegate, borrow } = self {
 48 |             let borrows = delegate.borrows.get();
 49 |             assert_eq!(*borrow, borrows);
 50 |             delegate.borrows.set(borrows - 1);
 51 |         }
 52 |     }
 53 | }
 54 | 
 55 | impl Default for Arena {
 56 |     fn default() -> Self {
 57 |         Self::empty()
 58 |     }
 59 | }
 60 | 
 61 | impl Arena {
 62 |     pub const fn empty() -> Self {
 63 |         Self::Owned { arena: release::Arena::empty() }
 64 |     }
 65 | 
 66 |     pub fn new(capacity: usize) -> apperr::Result<Self> {
 67 |         Ok(Self::Owned { arena: release::Arena::new(capacity)? })
 68 |     }
 69 | 
 70 |     pub(super) fn delegated(delegate: &release::Arena) -> Self {
 71 |         let borrow = delegate.borrows.get() + 1;
 72 |         delegate.borrows.set(borrow);
 73 |         Self::Delegated { delegate: unsafe { mem::transmute(delegate) }, borrow }
 74 |     }
 75 | 
 76 |     #[inline]
 77 |     pub(super) fn delegate_target(&self) -> &release::Arena {
 78 |         match *self {
 79 |             Self::Delegated { delegate, borrow } => {
 80 |                 assert!(
 81 |                     borrow == delegate.borrows.get(),
 82 |                     "Arena already borrowed by a newer ScratchArena"
 83 |                 );
 84 |                 delegate
 85 |             }
 86 |             Self::Owned { ref arena } => arena,
 87 |         }
 88 |     }
 89 | 
 90 |     #[inline]
 91 |     pub(super) fn delegate_target_unchecked(&self) -> &release::Arena {
 92 |         match self {
 93 |             Self::Delegated { delegate, .. } => delegate,
 94 |             Self::Owned { arena } => arena,
 95 |         }
 96 |     }
 97 | 
 98 |     pub fn offset(&self) -> usize {
 99 |         self.delegate_target().offset()
100 |     }
101 | 
102 |     pub unsafe fn reset(&self, to: usize) {
103 |         unsafe { self.delegate_target().reset(to) }
104 |     }
105 | 
106 |     pub fn alloc_uninit<T>(&self) -> &mut MaybeUninit<T> {
107 |         self.delegate_target().alloc_uninit()
108 |     }
109 | 
110 |     pub fn alloc_uninit_slice<T>(&self, count: usize) -> &mut [MaybeUninit<T>] {
111 |         self.delegate_target().alloc_uninit_slice(count)
112 |     }
113 | }
114 | 
115 | unsafe impl Allocator for Arena {
116 |     fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
117 |         self.delegate_target().alloc_raw(layout.size(), layout.align())
118 |     }
119 | 
120 |     fn allocate_zeroed(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
121 |         self.delegate_target().allocate_zeroed(layout)
122 |     }
123 | 
124 |     // While it is possible to shrink the tail end of the arena, it is
125 |     // not very useful given the existence of scoped scratch arenas.
126 |     unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
127 |         unsafe { self.delegate_target().deallocate(ptr, layout) }
128 |     }
129 | 
130 |     unsafe fn grow(
131 |         &self,
132 |         ptr: NonNull<u8>,
133 |         old_layout: Layout,
134 |         new_layout: Layout,
135 |     ) -> Result<NonNull<[u8]>, AllocError> {
136 |         unsafe { self.delegate_target().grow(ptr, old_layout, new_layout) }
137 |     }
138 | 
139 |     unsafe fn grow_zeroed(
140 |         &self,
141 |         ptr: NonNull<u8>,
142 |         old_layout: Layout,
143 |         new_layout: Layout,
144 |     ) -> Result<NonNull<[u8]>, AllocError> {
145 |         unsafe { self.delegate_target().grow_zeroed(ptr, old_layout, new_layout) }
146 |     }
147 | 
148 |     unsafe fn shrink(
149 |         &self,
150 |         ptr: NonNull<u8>,
151 |         old_layout: Layout,
152 |         new_layout: Layout,
153 |     ) -> Result<NonNull<[u8]>, AllocError> {
154 |         unsafe { self.delegate_target().shrink(ptr, old_layout, new_layout) }
155 |     }
156 | }
157 | 


--------------------------------------------------------------------------------
/src/arena/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Licensed under the MIT License.
 3 | 
 4 | //! Arena allocators. Small and fast.
 5 | 
 6 | #[cfg(debug_assertions)]
 7 | mod debug;
 8 | mod release;
 9 | mod scratch;
10 | mod string;
11 | 
12 | #[cfg(all(not(doc), debug_assertions))]
13 | pub use self::debug::Arena;
14 | #[cfg(any(doc, not(debug_assertions)))]
15 | pub use self::release::Arena;
16 | pub use self::scratch::{ScratchArena, init, scratch_arena};
17 | pub use self::string::ArenaString;
18 | 


--------------------------------------------------------------------------------
/src/arena/release.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | #![allow(clippy::mut_from_ref)]
  5 | 
  6 | use std::alloc::{AllocError, Allocator, Layout};
  7 | use std::cell::Cell;
  8 | use std::hint::cold_path;
  9 | use std::mem::MaybeUninit;
 10 | use std::ptr::{self, NonNull};
 11 | use std::{mem, slice};
 12 | 
 13 | use crate::helpers::*;
 14 | use crate::{apperr, sys};
 15 | 
 16 | const ALLOC_CHUNK_SIZE: usize = 64 * KIBI;
 17 | 
 18 | /// An arena allocator.
 19 | ///
 20 | /// If you have never used an arena allocator before, think of it as
 21 | /// allocating objects on the stack, but the stack is *really* big.
 22 | /// Each time you allocate, memory gets pushed at the end of the stack,
 23 | /// each time you deallocate, memory gets popped from the end of the stack.
 24 | ///
 25 | /// One reason you'd want to use this is obviously performance: It's very simple
 26 | /// and so it's also very fast, >10x faster than your system allocator.
 27 | ///
 28 | /// However, modern allocators such as `mimalloc` are just as fast, so why not use them?
 29 | /// Because their performance comes at the cost of binary size and we can't have that.
 30 | ///
 31 | /// The biggest benefit though is that it sometimes massively simplifies lifetime
 32 | /// and memory management. This can best be seen by this project's UI code, which
 33 | /// uses an arena to allocate a tree of UI nodes. This is infamously difficult
 34 | /// to do in Rust, but not so when you got an arena allocator:
 35 | /// All nodes have the same lifetime, so you can just use references.
 36 | ///
 37 | /// # Safety
 38 | ///
 39 | /// **Do not** push objects into the arena that require destructors.
 40 | /// Destructors are not executed. Use a pool allocator for that.
 41 | pub struct Arena {
 42 |     base: NonNull<u8>,
 43 |     capacity: usize,
 44 |     commit: Cell<usize>,
 45 |     offset: Cell<usize>,
 46 | 
 47 |     /// See [`super::debug`], which uses this for borrow tracking.
 48 |     #[cfg(debug_assertions)]
 49 |     pub(super) borrows: Cell<usize>,
 50 | }
 51 | 
 52 | impl Arena {
 53 |     pub const fn empty() -> Self {
 54 |         Self {
 55 |             base: NonNull::dangling(),
 56 |             capacity: 0,
 57 |             commit: Cell::new(0),
 58 |             offset: Cell::new(0),
 59 | 
 60 |             #[cfg(debug_assertions)]
 61 |             borrows: Cell::new(0),
 62 |         }
 63 |     }
 64 | 
 65 |     pub fn new(capacity: usize) -> apperr::Result<Self> {
 66 |         let capacity = (capacity.max(1) + ALLOC_CHUNK_SIZE - 1) & !(ALLOC_CHUNK_SIZE - 1);
 67 |         let base = unsafe { sys::virtual_reserve(capacity)? };
 68 | 
 69 |         Ok(Self {
 70 |             base,
 71 |             capacity,
 72 |             commit: Cell::new(0),
 73 |             offset: Cell::new(0),
 74 | 
 75 |             #[cfg(debug_assertions)]
 76 |             borrows: Cell::new(0),
 77 |         })
 78 |     }
 79 | 
 80 |     pub fn offset(&self) -> usize {
 81 |         self.offset.get()
 82 |     }
 83 | 
 84 |     /// "Deallocates" the memory in the arena down to the given offset.
 85 |     ///
 86 |     /// # Safety
 87 |     ///
 88 |     /// Obviously, this is GIGA UNSAFE. It runs no destructors and does not check
 89 |     /// whether the offset is valid. You better take care when using this function.
 90 |     pub unsafe fn reset(&self, to: usize) {
 91 |         // Fill the deallocated memory with 0xDD to aid debugging.
 92 |         if cfg!(debug_assertions) && self.offset.get() > to {
 93 |             let commit = self.commit.get();
 94 |             let len = (self.offset.get() + 128).min(commit) - to;
 95 |             unsafe { slice::from_raw_parts_mut(self.base.add(to).as_ptr(), len).fill(0xDD) };
 96 |         }
 97 | 
 98 |         self.offset.replace(to);
 99 |     }
100 | 
101 |     #[inline]
102 |     pub(super) fn alloc_raw(
103 |         &self,
104 |         bytes: usize,
105 |         alignment: usize,
106 |     ) -> Result<NonNull<[u8]>, AllocError> {
107 |         let commit = self.commit.get();
108 |         let offset = self.offset.get();
109 | 
110 |         let beg = (offset + alignment - 1) & !(alignment - 1);
111 |         let end = beg + bytes;
112 | 
113 |         if end > commit {
114 |             return self.alloc_raw_bump(beg, end);
115 |         }
116 | 
117 |         if cfg!(debug_assertions) {
118 |             let ptr = unsafe { self.base.add(offset) };
119 |             let len = (end + 128).min(self.commit.get()) - offset;
120 |             unsafe { slice::from_raw_parts_mut(ptr.as_ptr(), len).fill(0xCD) };
121 |         }
122 | 
123 |         self.offset.replace(end);
124 |         Ok(unsafe { NonNull::slice_from_raw_parts(self.base.add(beg), bytes) })
125 |     }
126 | 
127 |     // With the code in `alloc_raw_bump()` out of the way, `alloc_raw()` compiles down to some super tight assembly.
128 |     #[cold]
129 |     fn alloc_raw_bump(&self, beg: usize, end: usize) -> Result<NonNull<[u8]>, AllocError> {
130 |         let offset = self.offset.get();
131 |         let commit_old = self.commit.get();
132 |         let commit_new = (end + ALLOC_CHUNK_SIZE - 1) & !(ALLOC_CHUNK_SIZE - 1);
133 | 
134 |         if commit_new > self.capacity
135 |             || unsafe {
136 |                 sys::virtual_commit(self.base.add(commit_old), commit_new - commit_old).is_err()
137 |             }
138 |         {
139 |             return Err(AllocError);
140 |         }
141 | 
142 |         if cfg!(debug_assertions) {
143 |             let ptr = unsafe { self.base.add(offset) };
144 |             let len = (end + 128).min(self.commit.get()) - offset;
145 |             unsafe { slice::from_raw_parts_mut(ptr.as_ptr(), len).fill(0xCD) };
146 |         }
147 | 
148 |         self.commit.replace(commit_new);
149 |         self.offset.replace(end);
150 |         Ok(unsafe { NonNull::slice_from_raw_parts(self.base.add(beg), end - beg) })
151 |     }
152 | 
153 |     #[allow(clippy::mut_from_ref)]
154 |     pub fn alloc_uninit<T>(&self) -> &mut MaybeUninit<T> {
155 |         let bytes = mem::size_of::<T>();
156 |         let alignment = mem::align_of::<T>();
157 |         let ptr = self.alloc_raw(bytes, alignment).unwrap();
158 |         unsafe { ptr.cast().as_mut() }
159 |     }
160 | 
161 |     #[allow(clippy::mut_from_ref)]
162 |     pub fn alloc_uninit_slice<T>(&self, count: usize) -> &mut [MaybeUninit<T>] {
163 |         let bytes = mem::size_of::<T>() * count;
164 |         let alignment = mem::align_of::<T>();
165 |         let ptr = self.alloc_raw(bytes, alignment).unwrap();
166 |         unsafe { slice::from_raw_parts_mut(ptr.cast().as_ptr(), count) }
167 |     }
168 | }
169 | 
170 | impl Drop for Arena {
171 |     fn drop(&mut self) {
172 |         if self.base != NonNull::dangling() {
173 |             unsafe { sys::virtual_release(self.base, self.capacity) };
174 |         }
175 |     }
176 | }
177 | 
178 | impl Default for Arena {
179 |     fn default() -> Self {
180 |         Self::empty()
181 |     }
182 | }
183 | 
184 | unsafe impl Allocator for Arena {
185 |     fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
186 |         self.alloc_raw(layout.size(), layout.align())
187 |     }
188 | 
189 |     fn allocate_zeroed(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
190 |         let p = self.alloc_raw(layout.size(), layout.align())?;
191 |         unsafe { p.cast::<u8>().as_ptr().write_bytes(0, p.len()) }
192 |         Ok(p)
193 |     }
194 | 
195 |     // While it is possible to shrink the tail end of the arena, it is
196 |     // not very useful given the existence of scoped scratch arenas.
197 |     unsafe fn deallocate(&self, _: NonNull<u8>, _: Layout) {}
198 | 
199 |     unsafe fn grow(
200 |         &self,
201 |         ptr: NonNull<u8>,
202 |         old_layout: Layout,
203 |         new_layout: Layout,
204 |     ) -> Result<NonNull<[u8]>, AllocError> {
205 |         debug_assert!(new_layout.size() >= old_layout.size());
206 |         debug_assert!(new_layout.align() <= old_layout.align());
207 | 
208 |         let new_ptr;
209 | 
210 |         // Growing the given area is possible if it is at the end of the arena.
211 |         if unsafe { ptr.add(old_layout.size()) == self.base.add(self.offset.get()) } {
212 |             new_ptr = ptr;
213 |             let delta = new_layout.size() - old_layout.size();
214 |             // Assuming that the given ptr/length area is at the end of the arena,
215 |             // we can just push more memory to the end of the arena to grow it.
216 |             self.alloc_raw(delta, 1)?;
217 |         } else {
218 |             cold_path();
219 | 
220 |             new_ptr = self.allocate(new_layout)?.cast();
221 | 
222 |             // SAFETY: It's weird to me that this doesn't assert new_layout.size() >= old_layout.size(),
223 |             // but neither does the stdlib code at the time of writing.
224 |             // So, assuming that is not needed, this code is safe since it just copies the old data over.
225 |             unsafe {
226 |                 ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_layout.size());
227 |                 self.deallocate(ptr, old_layout);
228 |             }
229 |         }
230 | 
231 |         Ok(NonNull::slice_from_raw_parts(new_ptr, new_layout.size()))
232 |     }
233 | 
234 |     unsafe fn grow_zeroed(
235 |         &self,
236 |         ptr: NonNull<u8>,
237 |         old_layout: Layout,
238 |         new_layout: Layout,
239 |     ) -> Result<NonNull<[u8]>, AllocError> {
240 |         unsafe {
241 |             // SAFETY: Same as grow().
242 |             let ptr = self.grow(ptr, old_layout, new_layout)?;
243 | 
244 |             // SAFETY: At this point, `ptr` must be valid for `new_layout.size()` bytes,
245 |             // allowing us to safely zero out the delta since `old_layout.size()`.
246 |             ptr.cast::<u8>()
247 |                 .add(old_layout.size())
248 |                 .write_bytes(0, new_layout.size() - old_layout.size());
249 | 
250 |             Ok(ptr)
251 |         }
252 |     }
253 | 
254 |     unsafe fn shrink(
255 |         &self,
256 |         ptr: NonNull<u8>,
257 |         old_layout: Layout,
258 |         new_layout: Layout,
259 |     ) -> Result<NonNull<[u8]>, AllocError> {
260 |         debug_assert!(new_layout.size() <= old_layout.size());
261 |         debug_assert!(new_layout.align() <= old_layout.align());
262 | 
263 |         let mut len = old_layout.size();
264 | 
265 |         // Shrinking the given area is possible if it is at the end of the arena.
266 |         if unsafe { ptr.add(len) == self.base.add(self.offset.get()) } {
267 |             self.offset.set(self.offset.get() - len + new_layout.size());
268 |             len = new_layout.size();
269 |         } else {
270 |             debug_assert!(
271 |                 false,
272 |                 "Did you call shrink_to_fit()? Only the last allocation can be shrunk!"
273 |             );
274 |         }
275 | 
276 |         Ok(NonNull::slice_from_raw_parts(ptr, len))
277 |     }
278 | }
279 | 


--------------------------------------------------------------------------------
/src/arena/scratch.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | use std::ops::Deref;
  5 | 
  6 | #[cfg(debug_assertions)]
  7 | use super::debug;
  8 | use super::{Arena, release};
  9 | use crate::apperr;
 10 | use crate::helpers::*;
 11 | 
 12 | static mut S_SCRATCH: [release::Arena; 2] =
 13 |     const { [release::Arena::empty(), release::Arena::empty()] };
 14 | 
 15 | /// Initialize the scratch arenas with a given capacity.
 16 | /// Call this before using [`scratch_arena`].
 17 | pub fn init(capacity: usize) -> apperr::Result<()> {
 18 |     unsafe {
 19 |         for s in &mut S_SCRATCH[..] {
 20 |             *s = release::Arena::new(capacity)?;
 21 |         }
 22 |     }
 23 |     Ok(())
 24 | }
 25 | 
 26 | /// Need an arena for temporary allocations? [`scratch_arena`] got you covered.
 27 | /// Call [`scratch_arena`] and it'll return an [`Arena`] that resets when it goes out of scope.
 28 | ///
 29 | /// ---
 30 | ///
 31 | /// Most methods make just two kinds of allocations:
 32 | /// * Interior: Temporary data that can be deallocated when the function returns.
 33 | /// * Exterior: Data that is returned to the caller and must remain alive until the caller stops using it.
 34 | ///
 35 | /// Such methods only have two lifetimes, for which you consequently also only need two arenas.
 36 | /// ...even if your method calls other methods recursively! This is because the exterior allocations
 37 | /// of a callee are simply interior allocations to the caller, and so on, recursively.
 38 | ///
 39 | /// This works as long as the two arenas flip/flop between being used as interior/exterior allocator
 40 | /// along the callstack. To ensure that is the case, we use a recursion counter in debug builds.
 41 | ///
 42 | /// This approach was described among others at: <https://nullprogram.com/blog/2023/09/27/>
 43 | ///
 44 | /// # Safety
 45 | ///
 46 | /// If your function takes an [`Arena`] argument, you **MUST** pass it to `scratch_arena` as `Some(&arena)`.
 47 | pub fn scratch_arena(conflict: Option<&Arena>) -> ScratchArena<'static> {
 48 |     unsafe {
 49 |         #[cfg(debug_assertions)]
 50 |         let conflict = conflict.map(|a| a.delegate_target_unchecked());
 51 | 
 52 |         let index = opt_ptr_eq(conflict, Some(&S_SCRATCH[0])) as usize;
 53 |         let arena = &mut S_SCRATCH[index];
 54 |         ScratchArena::new(arena)
 55 |     }
 56 | }
 57 | 
 58 | /// Borrows an [`Arena`] for temporary allocations.
 59 | ///
 60 | /// See [`scratch_arena`].
 61 | #[cfg(debug_assertions)]
 62 | pub struct ScratchArena<'a> {
 63 |     arena: debug::Arena,
 64 |     offset: usize,
 65 |     _phantom: std::marker::PhantomData<&'a ()>,
 66 | }
 67 | 
 68 | #[cfg(not(debug_assertions))]
 69 | pub struct ScratchArena<'a> {
 70 |     arena: &'a Arena,
 71 |     offset: usize,
 72 | }
 73 | 
 74 | #[cfg(debug_assertions)]
 75 | impl<'a> ScratchArena<'a> {
 76 |     fn new(arena: &'a release::Arena) -> Self {
 77 |         let offset = arena.offset();
 78 |         ScratchArena { arena: Arena::delegated(arena), _phantom: std::marker::PhantomData, offset }
 79 |     }
 80 | }
 81 | 
 82 | #[cfg(not(debug_assertions))]
 83 | impl<'a> ScratchArena<'a> {
 84 |     fn new(arena: &'a release::Arena) -> Self {
 85 |         let offset = arena.offset();
 86 |         ScratchArena { arena, offset }
 87 |     }
 88 | }
 89 | 
 90 | impl Drop for ScratchArena<'_> {
 91 |     fn drop(&mut self) {
 92 |         unsafe { self.arena.reset(self.offset) };
 93 |     }
 94 | }
 95 | 
 96 | #[cfg(debug_assertions)]
 97 | impl Deref for ScratchArena<'_> {
 98 |     type Target = debug::Arena;
 99 | 
100 |     fn deref(&self) -> &Self::Target {
101 |         &self.arena
102 |     }
103 | }
104 | 
105 | #[cfg(not(debug_assertions))]
106 | impl Deref for ScratchArena<'_> {
107 |     type Target = Arena;
108 | 
109 |     fn deref(&self) -> &Self::Target {
110 |         self.arena
111 |     }
112 | }
113 | 


--------------------------------------------------------------------------------
/src/arena/string.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | use std::fmt;
  5 | use std::ops::{Bound, Deref, DerefMut, RangeBounds};
  6 | 
  7 | use super::Arena;
  8 | use crate::helpers::*;
  9 | 
 10 | /// A custom string type, because `std` lacks allocator support for [`String`].
 11 | ///
 12 | /// To keep things simple, this one is hardcoded to [`Arena`].
 13 | #[derive(Clone)]
 14 | pub struct ArenaString<'a> {
 15 |     vec: Vec<u8, &'a Arena>,
 16 | }
 17 | 
 18 | impl<'a> ArenaString<'a> {
 19 |     /// Creates a new [`ArenaString`] in the given arena.
 20 |     #[must_use]
 21 |     pub const fn new_in(arena: &'a Arena) -> Self {
 22 |         Self { vec: Vec::new_in(arena) }
 23 |     }
 24 | 
 25 |     #[must_use]
 26 |     pub fn with_capacity_in(capacity: usize, arena: &'a Arena) -> Self {
 27 |         Self { vec: Vec::with_capacity_in(capacity, arena) }
 28 |     }
 29 | 
 30 |     /// Turns a [`str`] into an [`ArenaString`].
 31 |     #[must_use]
 32 |     pub fn from_str(arena: &'a Arena, s: &str) -> Self {
 33 |         let mut res = Self::new_in(arena);
 34 |         res.push_str(s);
 35 |         res
 36 |     }
 37 | 
 38 |     /// It says right here that you checked if `bytes` is valid UTF-8
 39 |     /// and you are sure it is. Presto! Here's an `ArenaString`!
 40 |     ///
 41 |     /// # Safety
 42 |     ///
 43 |     /// You fool! It says "unchecked" right there. Now the house is burning.
 44 |     #[inline]
 45 |     #[must_use]
 46 |     pub unsafe fn from_utf8_unchecked(bytes: Vec<u8, &'a Arena>) -> Self {
 47 |         Self { vec: bytes }
 48 |     }
 49 | 
 50 |     /// Checks whether `text` contains only valid UTF-8.
 51 |     /// If the entire string is valid, it returns `Ok(text)`.
 52 |     /// Otherwise, it returns `Err(ArenaString)` with all invalid sequences replaced with U+FFFD.
 53 |     pub fn from_utf8_lossy<'s>(arena: &'a Arena, text: &'s [u8]) -> Result<&'s str, Self> {
 54 |         let mut iter = text.utf8_chunks();
 55 |         let Some(mut chunk) = iter.next() else {
 56 |             return Ok("");
 57 |         };
 58 | 
 59 |         let valid = chunk.valid();
 60 |         if chunk.invalid().is_empty() {
 61 |             debug_assert_eq!(valid.len(), text.len());
 62 |             return Ok(unsafe { str::from_utf8_unchecked(text) });
 63 |         }
 64 | 
 65 |         const REPLACEMENT: &str = "\u{FFFD}";
 66 | 
 67 |         let mut res = Self::new_in(arena);
 68 |         res.reserve(text.len());
 69 | 
 70 |         loop {
 71 |             res.push_str(chunk.valid());
 72 |             if !chunk.invalid().is_empty() {
 73 |                 res.push_str(REPLACEMENT);
 74 |             }
 75 |             chunk = match iter.next() {
 76 |                 Some(chunk) => chunk,
 77 |                 None => break,
 78 |             };
 79 |         }
 80 | 
 81 |         Err(res)
 82 |     }
 83 | 
 84 |     /// Turns a [`Vec<u8>`] into an [`ArenaString`], replacing invalid UTF-8 sequences with U+FFFD.
 85 |     #[must_use]
 86 |     pub fn from_utf8_lossy_owned(v: Vec<u8, &'a Arena>) -> Self {
 87 |         match Self::from_utf8_lossy(v.allocator(), &v) {
 88 |             Ok(..) => unsafe { Self::from_utf8_unchecked(v) },
 89 |             Err(s) => s,
 90 |         }
 91 |     }
 92 | 
 93 |     /// It's empty.
 94 |     pub fn is_empty(&self) -> bool {
 95 |         self.vec.is_empty()
 96 |     }
 97 | 
 98 |     /// It's lengthy.
 99 |     pub fn len(&self) -> usize {
100 |         self.vec.len()
101 |     }
102 | 
103 |     /// It's capacatity.
104 |     pub fn capacity(&self) -> usize {
105 |         self.vec.capacity()
106 |     }
107 | 
108 |     /// It's a [`String`], now it's a [`str`]. Wow!
109 |     pub fn as_str(&self) -> &str {
110 |         unsafe { str::from_utf8_unchecked(self.vec.as_slice()) }
111 |     }
112 | 
113 |     /// It's a [`String`], now it's a [`str`]. And it's mutable! WOW!
114 |     pub fn as_mut_str(&mut self) -> &mut str {
115 |         unsafe { str::from_utf8_unchecked_mut(self.vec.as_mut_slice()) }
116 |     }
117 | 
118 |     /// Now it's bytes!
119 |     pub fn as_bytes(&self) -> &[u8] {
120 |         self.vec.as_slice()
121 |     }
122 | 
123 |     /// Returns a mutable reference to the contents of this `String`.
124 |     ///
125 |     /// # Safety
126 |     ///
127 |     /// The underlying `&mut Vec` allows writing bytes which are not valid UTF-8.
128 |     pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<u8, &'a Arena> {
129 |         &mut self.vec
130 |     }
131 | 
132 |     /// Reserves *additional* memory. For you old folks out there (totally not me),
133 |     /// this is different from C++'s `reserve` which reserves a total size.
134 |     pub fn reserve(&mut self, additional: usize) {
135 |         self.vec.reserve(additional)
136 |     }
137 | 
138 |     /// Just like [`ArenaString::reserve`], but it doesn't overallocate.
139 |     pub fn reserve_exact(&mut self, additional: usize) {
140 |         self.vec.reserve_exact(additional)
141 |     }
142 | 
143 |     /// Now it's small! Alarming!
144 |     ///
145 |     /// *Do not* call this unless this string is the last thing on the arena.
146 |     /// Arenas are stacks, they can't deallocate what's in the middle.
147 |     pub fn shrink_to_fit(&mut self) {
148 |         self.vec.shrink_to_fit()
149 |     }
150 | 
151 |     /// To no surprise, this clears the string.
152 |     pub fn clear(&mut self) {
153 |         self.vec.clear()
154 |     }
155 | 
156 |     /// Append some text.
157 |     pub fn push_str(&mut self, string: &str) {
158 |         self.vec.extend_from_slice(string.as_bytes())
159 |     }
160 | 
161 |     /// Append a single character.
162 |     #[inline]
163 |     pub fn push(&mut self, ch: char) {
164 |         match ch.len_utf8() {
165 |             1 => self.vec.push(ch as u8),
166 |             _ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
167 |         }
168 |     }
169 | 
170 |     /// Same as `push(char)` but with a specified number of character copies.
171 |     /// Shockingly absent from the standard library.
172 |     pub fn push_repeat(&mut self, ch: char, total_copies: usize) {
173 |         if total_copies == 0 {
174 |             return;
175 |         }
176 | 
177 |         let buf = unsafe { self.as_mut_vec() };
178 | 
179 |         if ch.is_ascii() {
180 |             // Compiles down to `memset()`.
181 |             buf.extend(std::iter::repeat_n(ch as u8, total_copies));
182 |         } else {
183 |             // Implements efficient string padding using quadratic duplication.
184 |             let mut utf8_buf = [0; 4];
185 |             let utf8 = ch.encode_utf8(&mut utf8_buf).as_bytes();
186 |             let initial_len = buf.len();
187 |             let added_len = utf8.len() * total_copies;
188 |             let final_len = initial_len + added_len;
189 | 
190 |             buf.reserve(added_len);
191 |             buf.extend_from_slice(utf8);
192 | 
193 |             while buf.len() != final_len {
194 |                 let end = (final_len - buf.len() + initial_len).min(buf.len());
195 |                 buf.extend_from_within(initial_len..end);
196 |             }
197 |         }
198 |     }
199 | 
200 |     /// Replaces a range of characters with a new string.
201 |     pub fn replace_range<R: RangeBounds<usize>>(&mut self, range: R, replace_with: &str) {
202 |         match range.start_bound() {
203 |             Bound::Included(&n) => assert!(self.is_char_boundary(n)),
204 |             Bound::Excluded(&n) => assert!(self.is_char_boundary(n + 1)),
205 |             Bound::Unbounded => {}
206 |         };
207 |         match range.end_bound() {
208 |             Bound::Included(&n) => assert!(self.is_char_boundary(n + 1)),
209 |             Bound::Excluded(&n) => assert!(self.is_char_boundary(n)),
210 |             Bound::Unbounded => {}
211 |         };
212 |         unsafe { self.as_mut_vec() }.replace_range(range, replace_with.as_bytes());
213 |     }
214 | 
215 |     /// Finds `old` in the string and replaces it with `new`.
216 |     /// Only performs one replacement.
217 |     pub fn replace_once_in_place(&mut self, old: &str, new: &str) {
218 |         if let Some(beg) = self.find(old) {
219 |             unsafe { self.as_mut_vec() }.replace_range(beg..beg + old.len(), new.as_bytes());
220 |         }
221 |     }
222 | }
223 | 
224 | impl fmt::Debug for ArenaString<'_> {
225 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
226 |         fmt::Debug::fmt(&**self, f)
227 |     }
228 | }
229 | 
230 | impl PartialEq<&str> for ArenaString<'_> {
231 |     fn eq(&self, other: &&str) -> bool {
232 |         self.as_str() == *other
233 |     }
234 | }
235 | 
236 | impl Deref for ArenaString<'_> {
237 |     type Target = str;
238 | 
239 |     fn deref(&self) -> &Self::Target {
240 |         self.as_str()
241 |     }
242 | }
243 | 
244 | impl DerefMut for ArenaString<'_> {
245 |     fn deref_mut(&mut self) -> &mut Self::Target {
246 |         self.as_mut_str()
247 |     }
248 | }
249 | 
250 | impl fmt::Display for ArenaString<'_> {
251 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
252 |         f.write_str(self.as_str())
253 |     }
254 | }
255 | 
256 | impl fmt::Write for ArenaString<'_> {
257 |     #[inline]
258 |     fn write_str(&mut self, s: &str) -> fmt::Result {
259 |         self.push_str(s);
260 |         Ok(())
261 |     }
262 | 
263 |     #[inline]
264 |     fn write_char(&mut self, c: char) -> fmt::Result {
265 |         self.push(c);
266 |         Ok(())
267 |     }
268 | }
269 | 
270 | #[macro_export]
271 | macro_rules! arena_format {
272 |     ($arena:expr, $($arg:tt)*) => {{
273 |         use std::fmt::Write as _;
274 |         let mut output = $crate::arena::ArenaString::new_in($arena);
275 |         output.write_fmt(format_args!($($arg)*)).unwrap();
276 |         output
277 |     }}
278 | }
279 | 


--------------------------------------------------------------------------------
/src/base64.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | //! Base64 facilities.
  5 | 
  6 | use crate::arena::ArenaString;
  7 | 
  8 | const CHARSET: [u8; 64] = *b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  9 | 
 10 | /// One aspect of base64 is that the encoded length can be
 11 | /// calculated accurately in advance, which is what this returns.
 12 | #[inline]
 13 | pub fn encode_len(src_len: usize) -> usize {
 14 |     src_len.div_ceil(3) * 4
 15 | }
 16 | 
 17 | /// Encodes the given bytes as base64 and appends them to the destination string.
 18 | pub fn encode(dst: &mut ArenaString, src: &[u8]) {
 19 |     unsafe {
 20 |         let mut inp = src.as_ptr();
 21 |         let mut remaining = src.len();
 22 |         let dst = dst.as_mut_vec();
 23 | 
 24 |         let out_len = encode_len(src.len());
 25 |         // ... we can then use this fact to reserve space all at once.
 26 |         dst.reserve(out_len);
 27 | 
 28 |         // SAFETY: Getting a pointer to the reserved space is only safe
 29 |         // *after* calling `reserve()` as it may change the pointer.
 30 |         let mut out = dst.as_mut_ptr().add(dst.len());
 31 | 
 32 |         if remaining != 0 {
 33 |             // Translate chunks of 3 source bytes into 4 base64-encoded bytes.
 34 |             while remaining > 3 {
 35 |                 // SAFETY: Thanks to `remaining > 3`, reading 4 bytes at once is safe.
 36 |                 // This improves performance massively over a byte-by-byte approach,
 37 |                 // because it allows us to byte-swap the read and use simple bit-shifts below.
 38 |                 let val = u32::from_be((inp as *const u32).read_unaligned());
 39 |                 inp = inp.add(3);
 40 |                 remaining -= 3;
 41 | 
 42 |                 *out = CHARSET[(val >> 26) as usize];
 43 |                 out = out.add(1);
 44 |                 *out = CHARSET[(val >> 20) as usize & 0x3f];
 45 |                 out = out.add(1);
 46 |                 *out = CHARSET[(val >> 14) as usize & 0x3f];
 47 |                 out = out.add(1);
 48 |                 *out = CHARSET[(val >> 8) as usize & 0x3f];
 49 |                 out = out.add(1);
 50 |             }
 51 | 
 52 |             // Convert the remaining 1-3 bytes.
 53 |             let mut in1 = 0;
 54 |             let mut in2 = 0;
 55 | 
 56 |             // We can simplify the following logic by assuming that there's only 1
 57 |             // byte left. If there's >1 byte left, these two '=' will be overwritten.
 58 |             *out.add(3) = b'=';
 59 |             *out.add(2) = b'=';
 60 | 
 61 |             if remaining >= 3 {
 62 |                 in2 = inp.add(2).read() as usize;
 63 |                 *out.add(3) = CHARSET[in2 & 0x3f];
 64 |             }
 65 | 
 66 |             if remaining >= 2 {
 67 |                 in1 = inp.add(1).read() as usize;
 68 |                 *out.add(2) = CHARSET[(in1 << 2 | in2 >> 6) & 0x3f];
 69 |             }
 70 | 
 71 |             let in0 = inp.add(0).read() as usize;
 72 |             *out.add(1) = CHARSET[(in0 << 4 | in1 >> 4) & 0x3f];
 73 |             *out.add(0) = CHARSET[in0 >> 2];
 74 |         }
 75 | 
 76 |         dst.set_len(dst.len() + out_len);
 77 |     }
 78 | }
 79 | 
 80 | #[cfg(test)]
 81 | mod tests {
 82 |     use super::encode;
 83 |     use crate::arena::{Arena, ArenaString};
 84 | 
 85 |     #[test]
 86 |     fn test_basic() {
 87 |         let arena = Arena::new(4 * 1024).unwrap();
 88 |         let enc = |s: &[u8]| {
 89 |             let mut dst = ArenaString::new_in(&arena);
 90 |             encode(&mut dst, s);
 91 |             dst
 92 |         };
 93 |         assert_eq!(enc(b""), "");
 94 |         assert_eq!(enc(b"a"), "YQ==");
 95 |         assert_eq!(enc(b"ab"), "YWI=");
 96 |         assert_eq!(enc(b"abc"), "YWJj");
 97 |         assert_eq!(enc(b"abcd"), "YWJjZA==");
 98 |         assert_eq!(enc(b"abcde"), "YWJjZGU=");
 99 |         assert_eq!(enc(b"abcdef"), "YWJjZGVm");
100 |         assert_eq!(enc(b"abcdefg"), "YWJjZGVmZw==");
101 |         assert_eq!(enc(b"abcdefgh"), "YWJjZGVmZ2g=");
102 |         assert_eq!(enc(b"abcdefghi"), "YWJjZGVmZ2hp");
103 |         assert_eq!(enc(b"abcdefghij"), "YWJjZGVmZ2hpag==");
104 |         assert_eq!(enc(b"abcdefghijk"), "YWJjZGVmZ2hpams=");
105 |         assert_eq!(enc(b"abcdefghijkl"), "YWJjZGVmZ2hpamts");
106 |         assert_eq!(enc(b"abcdefghijklm"), "YWJjZGVmZ2hpamtsbQ==");
107 |         assert_eq!(enc(b"abcdefghijklmN"), "YWJjZGVmZ2hpamtsbU4=");
108 |         assert_eq!(enc(b"abcdefghijklmNO"), "YWJjZGVmZ2hpamtsbU5P");
109 |         assert_eq!(enc(b"abcdefghijklmNOP"), "YWJjZGVmZ2hpamtsbU5PUA==");
110 |         assert_eq!(enc(b"abcdefghijklmNOPQ"), "YWJjZGVmZ2hpamtsbU5PUFE=");
111 |         assert_eq!(enc(b"abcdefghijklmNOPQR"), "YWJjZGVmZ2hpamtsbU5PUFFS");
112 |         assert_eq!(enc(b"abcdefghijklmNOPQRS"), "YWJjZGVmZ2hpamtsbU5PUFFSUw==");
113 |         assert_eq!(enc(b"abcdefghijklmNOPQRST"), "YWJjZGVmZ2hpamtsbU5PUFFSU1Q=");
114 |         assert_eq!(enc(b"abcdefghijklmNOPQRSTU"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RV");
115 |         assert_eq!(enc(b"abcdefghijklmNOPQRSTUV"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVg==");
116 |         assert_eq!(enc(b"abcdefghijklmNOPQRSTUVW"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVlc=");
117 |         assert_eq!(enc(b"abcdefghijklmNOPQRSTUVWX"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVldY");
118 |         assert_eq!(enc(b"abcdefghijklmNOPQRSTUVWXY"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVldYWQ==");
119 |         assert_eq!(enc(b"abcdefghijklmNOPQRSTUVWXYZ"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVldYWVo=");
120 |     }
121 | }
122 | 


--------------------------------------------------------------------------------
/src/bin/edit/documents.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | use std::collections::LinkedList;
  5 | use std::ffi::OsStr;
  6 | use std::fs::File;
  7 | use std::path::{Path, PathBuf};
  8 | 
  9 | use edit::buffer::{RcTextBuffer, TextBuffer};
 10 | use edit::helpers::{CoordType, Point};
 11 | use edit::{apperr, path, sys};
 12 | 
 13 | use crate::state::DisplayablePathBuf;
 14 | 
 15 | pub struct Document {
 16 |     pub buffer: RcTextBuffer,
 17 |     pub path: Option<PathBuf>,
 18 |     pub dir: Option<DisplayablePathBuf>,
 19 |     pub filename: String,
 20 |     pub file_id: Option<sys::FileId>,
 21 |     pub new_file_counter: usize,
 22 | }
 23 | 
 24 | impl Document {
 25 |     pub fn save(&mut self, new_path: Option<PathBuf>) -> apperr::Result<()> {
 26 |         let path = new_path.as_deref().unwrap_or_else(|| self.path.as_ref().unwrap().as_path());
 27 |         let mut file = DocumentManager::open_for_writing(path)?;
 28 | 
 29 |         {
 30 |             let mut tb = self.buffer.borrow_mut();
 31 |             tb.write_file(&mut file)?;
 32 |         }
 33 | 
 34 |         if let Ok(id) = sys::file_id(None, path) {
 35 |             self.file_id = Some(id);
 36 |         }
 37 | 
 38 |         if let Some(path) = new_path {
 39 |             self.set_path(path);
 40 |         }
 41 | 
 42 |         Ok(())
 43 |     }
 44 | 
 45 |     pub fn reread(&mut self, encoding: Option<&'static str>) -> apperr::Result<()> {
 46 |         let path = self.path.as_ref().unwrap().as_path();
 47 |         let mut file = DocumentManager::open_for_reading(path)?;
 48 | 
 49 |         {
 50 |             let mut tb = self.buffer.borrow_mut();
 51 |             tb.read_file(&mut file, encoding)?;
 52 |         }
 53 | 
 54 |         if let Ok(id) = sys::file_id(None, path) {
 55 |             self.file_id = Some(id);
 56 |         }
 57 | 
 58 |         Ok(())
 59 |     }
 60 | 
 61 |     fn set_path(&mut self, path: PathBuf) {
 62 |         let filename = path.file_name().unwrap_or_default().to_string_lossy().into_owned();
 63 |         let dir = path.parent().map(ToOwned::to_owned).unwrap_or_default();
 64 |         self.filename = filename;
 65 |         self.dir = Some(DisplayablePathBuf::from_path(dir));
 66 |         self.path = Some(path);
 67 |         self.update_file_mode();
 68 |     }
 69 | 
 70 |     fn update_file_mode(&mut self) {
 71 |         let mut tb = self.buffer.borrow_mut();
 72 |         tb.set_ruler(if self.filename == "COMMIT_EDITMSG" { 72 } else { 0 });
 73 |     }
 74 | }
 75 | 
 76 | #[derive(Default)]
 77 | pub struct DocumentManager {
 78 |     list: LinkedList<Document>,
 79 | }
 80 | 
 81 | impl DocumentManager {
 82 |     #[inline]
 83 |     pub fn len(&self) -> usize {
 84 |         self.list.len()
 85 |     }
 86 | 
 87 |     #[inline]
 88 |     pub fn active(&self) -> Option<&Document> {
 89 |         self.list.front()
 90 |     }
 91 | 
 92 |     #[inline]
 93 |     pub fn active_mut(&mut self) -> Option<&mut Document> {
 94 |         self.list.front_mut()
 95 |     }
 96 | 
 97 |     #[inline]
 98 |     pub fn update_active<F: FnMut(&Document) -> bool>(&mut self, mut func: F) -> bool {
 99 |         let mut cursor = self.list.cursor_front_mut();
100 |         while let Some(doc) = cursor.current() {
101 |             if func(doc) {
102 |                 let list = cursor.remove_current_as_list().unwrap();
103 |                 self.list.cursor_front_mut().splice_before(list);
104 |                 return true;
105 |             }
106 |             cursor.move_next();
107 |         }
108 |         false
109 |     }
110 | 
111 |     pub fn remove_active(&mut self) {
112 |         self.list.pop_front();
113 |     }
114 | 
115 |     pub fn add_untitled(&mut self) -> apperr::Result<&mut Document> {
116 |         let buffer = Self::create_buffer()?;
117 |         let mut doc = Document {
118 |             buffer,
119 |             path: None,
120 |             dir: Default::default(),
121 |             filename: Default::default(),
122 |             file_id: None,
123 |             new_file_counter: 0,
124 |         };
125 |         self.gen_untitled_name(&mut doc);
126 | 
127 |         self.list.push_front(doc);
128 |         Ok(self.list.front_mut().unwrap())
129 |     }
130 | 
131 |     pub fn gen_untitled_name(&self, doc: &mut Document) {
132 |         let mut new_file_counter = 0;
133 |         for doc in &self.list {
134 |             new_file_counter = new_file_counter.max(doc.new_file_counter);
135 |         }
136 |         new_file_counter += 1;
137 | 
138 |         doc.filename = format!("Untitled-{new_file_counter}.txt");
139 |         doc.new_file_counter = new_file_counter;
140 |     }
141 | 
142 |     pub fn add_file_path(&mut self, path: &Path) -> apperr::Result<&mut Document> {
143 |         let (path, goto) = Self::parse_filename_goto(path);
144 |         let path = path::normalize(path);
145 | 
146 |         let mut file = match Self::open_for_reading(&path) {
147 |             Ok(file) => Some(file),
148 |             Err(err) if sys::apperr_is_not_found(err) => None,
149 |             Err(err) => return Err(err),
150 |         };
151 | 
152 |         let file_id = if file.is_some() { Some(sys::file_id(file.as_ref(), &path)?) } else { None };
153 | 
154 |         // Check if the file is already open.
155 |         if file_id.is_some() && self.update_active(|doc| doc.file_id == file_id) {
156 |             let doc = self.active_mut().unwrap();
157 |             if let Some(goto) = goto {
158 |                 doc.buffer.borrow_mut().cursor_move_to_logical(goto);
159 |             }
160 |             return Ok(doc);
161 |         }
162 | 
163 |         let buffer = Self::create_buffer()?;
164 |         {
165 |             if let Some(file) = &mut file {
166 |                 let mut tb = buffer.borrow_mut();
167 |                 tb.read_file(file, None)?;
168 | 
169 |                 if let Some(goto) = goto
170 |                     && goto != Default::default()
171 |                 {
172 |                     tb.cursor_move_to_logical(goto);
173 |                 }
174 |             }
175 |         }
176 | 
177 |         let mut doc = Document {
178 |             buffer,
179 |             path: None,
180 |             dir: None,
181 |             filename: Default::default(),
182 |             file_id,
183 |             new_file_counter: 0,
184 |         };
185 |         doc.set_path(path);
186 | 
187 |         if let Some(active) = self.active()
188 |             && active.path.is_none()
189 |             && active.file_id.is_none()
190 |             && !active.buffer.borrow().is_dirty()
191 |         {
192 |             // If the current document is a pristine Untitled document with no
193 |             // name and no ID, replace it with the new document.
194 |             self.remove_active();
195 |         }
196 | 
197 |         self.list.push_front(doc);
198 |         Ok(self.list.front_mut().unwrap())
199 |     }
200 | 
201 |     pub fn reflow_all(&self) {
202 |         for doc in &self.list {
203 |             let mut tb = doc.buffer.borrow_mut();
204 |             tb.reflow();
205 |         }
206 |     }
207 | 
208 |     pub fn open_for_reading(path: &Path) -> apperr::Result<File> {
209 |         File::open(path).map_err(apperr::Error::from)
210 |     }
211 | 
212 |     pub fn open_for_writing(path: &Path) -> apperr::Result<File> {
213 |         File::create(path).map_err(apperr::Error::from)
214 |     }
215 | 
216 |     fn create_buffer() -> apperr::Result<RcTextBuffer> {
217 |         let buffer = TextBuffer::new_rc(false)?;
218 |         {
219 |             let mut tb = buffer.borrow_mut();
220 |             tb.set_insert_final_newline(!cfg!(windows)); // As mandated by POSIX.
221 |             tb.set_margin_enabled(true);
222 |             tb.set_line_highlight_enabled(true);
223 |         }
224 |         Ok(buffer)
225 |     }
226 | 
227 |     // Parse a filename in the form of "filename:line:char".
228 |     // Returns the position of the first colon and the line/char coordinates.
229 |     fn parse_filename_goto(path: &Path) -> (&Path, Option<Point>) {
230 |         fn parse(s: &[u8]) -> Option<CoordType> {
231 |             if s.is_empty() {
232 |                 return None;
233 |             }
234 | 
235 |             let mut num: CoordType = 0;
236 |             for &b in s {
237 |                 if !b.is_ascii_digit() {
238 |                     return None;
239 |                 }
240 |                 let digit = (b - b'0') as CoordType;
241 |                 num = num.checked_mul(10)?.checked_add(digit)?;
242 |             }
243 |             Some(num)
244 |         }
245 | 
246 |         fn find_colon_rev(bytes: &[u8], offset: usize) -> Option<usize> {
247 |             (0..offset.min(bytes.len())).rev().find(|&i| bytes[i] == b':')
248 |         }
249 | 
250 |         let bytes = path.as_os_str().as_encoded_bytes();
251 |         let colend = match find_colon_rev(bytes, bytes.len()) {
252 |             // Reject filenames that would result in an empty filename after stripping off the :line:char suffix.
253 |             // For instance, a filename like ":123:456" will not be processed by this function.
254 |             Some(colend) if colend > 0 => colend,
255 |             _ => return (path, None),
256 |         };
257 | 
258 |         let last = match parse(&bytes[colend + 1..]) {
259 |             Some(last) => last,
260 |             None => return (path, None),
261 |         };
262 |         let last = (last - 1).max(0);
263 |         let mut len = colend;
264 |         let mut goto = Point { x: 0, y: last };
265 | 
266 |         if let Some(colbeg) = find_colon_rev(bytes, colend) {
267 |             // Same here: Don't allow empty filenames.
268 |             if colbeg != 0
269 |                 && let Some(first) = parse(&bytes[colbeg + 1..colend])
270 |             {
271 |                 let first = (first - 1).max(0);
272 |                 len = colbeg;
273 |                 goto = Point { x: last, y: first };
274 |             }
275 |         }
276 | 
277 |         // Strip off the :line:char suffix.
278 |         let path = &bytes[..len];
279 |         let path = unsafe { OsStr::from_encoded_bytes_unchecked(path) };
280 |         let path = Path::new(path);
281 |         (path, Some(goto))
282 |     }
283 | }
284 | 
285 | #[cfg(test)]
286 | mod tests {
287 |     use super::*;
288 | 
289 |     #[test]
290 |     fn test_parse_last_numbers() {
291 |         fn parse(s: &str) -> (&str, Option<Point>) {
292 |             let (p, g) = DocumentManager::parse_filename_goto(Path::new(s));
293 |             (p.to_str().unwrap(), g)
294 |         }
295 | 
296 |         assert_eq!(parse("123"), ("123", None));
297 |         assert_eq!(parse("abc"), ("abc", None));
298 |         assert_eq!(parse(":123"), (":123", None));
299 |         assert_eq!(parse("abc:123"), ("abc", Some(Point { x: 0, y: 122 })));
300 |         assert_eq!(parse("45:123"), ("45", Some(Point { x: 0, y: 122 })));
301 |         assert_eq!(parse(":45:123"), (":45", Some(Point { x: 0, y: 122 })));
302 |         assert_eq!(parse("abc:45:123"), ("abc", Some(Point { x: 122, y: 44 })));
303 |         assert_eq!(parse("abc:def:123"), ("abc:def", Some(Point { x: 0, y: 122 })));
304 |         assert_eq!(parse("1:2:3"), ("1", Some(Point { x: 2, y: 1 })));
305 |         assert_eq!(parse("::3"), (":", Some(Point { x: 0, y: 2 })));
306 |         assert_eq!(parse("1::3"), ("1:", Some(Point { x: 0, y: 2 })));
307 |         assert_eq!(parse(""), ("", None));
308 |         assert_eq!(parse(":"), (":", None));
309 |         assert_eq!(parse("::"), ("::", None));
310 |         assert_eq!(parse("a:1"), ("a", Some(Point { x: 0, y: 0 })));
311 |         assert_eq!(parse("1:a"), ("1:a", None));
312 |         assert_eq!(parse("file.txt:10"), ("file.txt", Some(Point { x: 0, y: 9 })));
313 |         assert_eq!(parse("file.txt:10:5"), ("file.txt", Some(Point { x: 4, y: 9 })));
314 |     }
315 | }
316 | 


--------------------------------------------------------------------------------
/src/bin/edit/draw_filepicker.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | use std::cmp::Ordering;
  5 | use std::fs;
  6 | use std::path::{Path, PathBuf};
  7 | 
  8 | use edit::framebuffer::IndexedColor;
  9 | use edit::helpers::*;
 10 | use edit::input::vk;
 11 | use edit::tui::*;
 12 | use edit::{icu, path};
 13 | 
 14 | use crate::localization::*;
 15 | use crate::state::*;
 16 | 
 17 | pub fn draw_file_picker(ctx: &mut Context, state: &mut State) {
 18 |     // The save dialog is pre-filled with the current document filename.
 19 |     if state.wants_file_picker == StateFilePicker::SaveAs {
 20 |         state.wants_file_picker = StateFilePicker::SaveAsShown;
 21 | 
 22 |         if state.file_picker_pending_name.as_os_str().is_empty() {
 23 |             state.file_picker_pending_name =
 24 |                 state.documents.active().map_or("Untitled.txt", |doc| doc.filename.as_str()).into();
 25 |         }
 26 |     }
 27 | 
 28 |     let width = (ctx.size().width - 20).max(10);
 29 |     let height = (ctx.size().height - 10).max(10);
 30 |     let mut doit = None;
 31 |     let mut done = false;
 32 | 
 33 |     ctx.modal_begin(
 34 |         "file-picker",
 35 |         if state.wants_file_picker == StateFilePicker::Open {
 36 |             loc(LocId::FileOpen)
 37 |         } else {
 38 |             loc(LocId::FileSaveAs)
 39 |         },
 40 |     );
 41 |     ctx.attr_intrinsic_size(Size { width, height });
 42 |     {
 43 |         let mut activated = false;
 44 | 
 45 |         ctx.table_begin("path");
 46 |         ctx.table_set_columns(&[0, COORD_TYPE_SAFE_MAX]);
 47 |         ctx.table_set_cell_gap(Size { width: 1, height: 0 });
 48 |         ctx.attr_padding(Rect::two(1, 1));
 49 |         ctx.inherit_focus();
 50 |         {
 51 |             ctx.table_next_row();
 52 | 
 53 |             ctx.label("dir-label", loc(LocId::SaveAsDialogPathLabel));
 54 |             ctx.label("dir", state.file_picker_pending_dir.as_str());
 55 |             ctx.attr_overflow(Overflow::TruncateMiddle);
 56 | 
 57 |             ctx.table_next_row();
 58 |             ctx.inherit_focus();
 59 | 
 60 |             ctx.label("name-label", loc(LocId::SaveAsDialogNameLabel));
 61 |             ctx.editline("name", &mut state.file_picker_pending_name);
 62 |             ctx.inherit_focus();
 63 |             if ctx.is_focused() && ctx.consume_shortcut(vk::RETURN) {
 64 |                 activated = true;
 65 |             }
 66 |         }
 67 |         ctx.table_end();
 68 | 
 69 |         if state.file_picker_entries.is_none() {
 70 |             draw_dialog_saveas_refresh_files(state);
 71 |         }
 72 | 
 73 |         let files = state.file_picker_entries.as_ref().unwrap();
 74 | 
 75 |         ctx.scrollarea_begin(
 76 |             "directory",
 77 |             Size {
 78 |                 width: 0,
 79 |                 // -1 for the label (top)
 80 |                 // -1 for the label (bottom)
 81 |                 // -1 for the editline (bottom)
 82 |                 height: height - 3,
 83 |             },
 84 |         );
 85 |         ctx.attr_background_rgba(ctx.indexed_alpha(IndexedColor::Black, 1, 4));
 86 |         ctx.next_block_id_mixin(state.file_picker_pending_dir_revision);
 87 |         {
 88 |             ctx.list_begin("files");
 89 |             ctx.inherit_focus();
 90 |             for entry in files {
 91 |                 match ctx.list_item(false, entry.as_str()) {
 92 |                     ListSelection::Unchanged => {}
 93 |                     ListSelection::Selected => {
 94 |                         state.file_picker_pending_name = entry.as_path().into()
 95 |                     }
 96 |                     ListSelection::Activated => activated = true,
 97 |                 }
 98 |                 ctx.attr_overflow(Overflow::TruncateMiddle);
 99 |             }
100 |             ctx.list_end();
101 | 
102 |             if ctx.contains_focus() && ctx.consume_shortcut(vk::BACK) {
103 |                 state.file_picker_pending_name = "..".into();
104 |                 activated = true;
105 |             }
106 |         }
107 |         ctx.scrollarea_end();
108 | 
109 |         if activated {
110 |             doit = draw_file_picker_update_path(state);
111 | 
112 |             // Check if the file already exists and show an overwrite warning in that case.
113 |             if state.wants_file_picker != StateFilePicker::Open
114 |                 && let Some(path) = doit.as_deref()
115 |                 && path.exists()
116 |             {
117 |                 state.file_picker_overwrite_warning = doit.take();
118 |             }
119 |         }
120 |     }
121 |     if ctx.modal_end() {
122 |         done = true;
123 |     }
124 | 
125 |     if state.file_picker_overwrite_warning.is_some() {
126 |         let mut save;
127 | 
128 |         ctx.modal_begin("overwrite", loc(LocId::FileOverwriteWarning));
129 |         ctx.attr_background_rgba(ctx.indexed(IndexedColor::Red));
130 |         ctx.attr_foreground_rgba(ctx.indexed(IndexedColor::BrightWhite));
131 |         {
132 |             let contains_focus = ctx.contains_focus();
133 | 
134 |             ctx.label("description", loc(LocId::FileOverwriteWarningDescription));
135 |             ctx.attr_overflow(Overflow::TruncateTail);
136 |             ctx.attr_padding(Rect::three(1, 2, 1));
137 | 
138 |             ctx.table_begin("choices");
139 |             ctx.inherit_focus();
140 |             ctx.attr_padding(Rect::three(0, 2, 1));
141 |             ctx.attr_position(Position::Center);
142 |             ctx.table_set_cell_gap(Size { width: 2, height: 0 });
143 |             {
144 |                 ctx.table_next_row();
145 |                 ctx.inherit_focus();
146 | 
147 |                 save = ctx.button("yes", loc(LocId::Yes), ButtonStyle::default());
148 |                 ctx.inherit_focus();
149 | 
150 |                 if ctx.button("no", loc(LocId::No), ButtonStyle::default()) {
151 |                     state.file_picker_overwrite_warning = None;
152 |                 }
153 |             }
154 |             ctx.table_end();
155 | 
156 |             if contains_focus {
157 |                 save |= ctx.consume_shortcut(vk::Y);
158 |                 if ctx.consume_shortcut(vk::N) {
159 |                     state.file_picker_overwrite_warning = None;
160 |                 }
161 |             }
162 |         }
163 |         if ctx.modal_end() {
164 |             state.file_picker_overwrite_warning = None;
165 |         }
166 | 
167 |         if save {
168 |             doit = state.file_picker_overwrite_warning.take();
169 |         }
170 |     }
171 | 
172 |     if let Some(path) = doit {
173 |         let res = if state.wants_file_picker == StateFilePicker::Open {
174 |             state.documents.add_file_path(&path).map(|_| ())
175 |         } else if let Some(doc) = state.documents.active_mut() {
176 |             doc.save(Some(path))
177 |         } else {
178 |             Ok(())
179 |         };
180 |         match res {
181 |             Ok(..) => {
182 |                 ctx.needs_rerender();
183 |                 done = true;
184 |             }
185 |             Err(err) => error_log_add(ctx, state, err),
186 |         }
187 |     }
188 | 
189 |     if done {
190 |         state.wants_file_picker = StateFilePicker::None;
191 |         state.file_picker_pending_name = Default::default();
192 |         state.file_picker_entries = Default::default();
193 |         state.file_picker_overwrite_warning = Default::default();
194 |     }
195 | }
196 | 
197 | // Returns Some(path) if the path refers to a file.
198 | fn draw_file_picker_update_path(state: &mut State) -> Option<PathBuf> {
199 |     let old_path = state.file_picker_pending_dir.as_path();
200 |     let path = old_path.join(&state.file_picker_pending_name);
201 |     let path = path::normalize(&path);
202 | 
203 |     let (dir, name) = if path.is_dir() {
204 |         // If the current path is C:\ and the user selects "..", we want to
205 |         // navigate to the drive picker. Since `path::normalize` will turn C:\.. into C:\,
206 |         // we can detect this by checking if the length of the path didn't change.
207 |         let dir = if cfg!(windows)
208 |             && state.file_picker_pending_name == Path::new("..")
209 |             // It's unnecessary to check the contents of the paths.
210 |             && old_path.as_os_str().len() == path.as_os_str().len()
211 |         {
212 |             Path::new("")
213 |         } else {
214 |             path.as_path()
215 |         };
216 |         (dir, PathBuf::new())
217 |     } else {
218 |         let dir = path.parent().unwrap_or(&path);
219 |         let name = path.file_name().map_or(Default::default(), |s| s.into());
220 |         (dir, name)
221 |     };
222 |     if dir != state.file_picker_pending_dir.as_path() {
223 |         state.file_picker_pending_dir = DisplayablePathBuf::from_path(dir.to_path_buf());
224 |         state.file_picker_entries = None;
225 |     }
226 | 
227 |     state.file_picker_pending_name = name;
228 |     if state.file_picker_pending_name.as_os_str().is_empty() { None } else { Some(path) }
229 | }
230 | 
231 | fn draw_dialog_saveas_refresh_files(state: &mut State) {
232 |     let dir = state.file_picker_pending_dir.as_path();
233 |     let mut files = Vec::new();
234 |     let mut off = 0;
235 | 
236 |     #[cfg(windows)]
237 |     if dir.as_os_str().is_empty() {
238 |         // If the path is empty, we are at the drive picker.
239 |         // Add all drives as entries.
240 |         for drive in edit::sys::drives() {
241 |             files.push(DisplayablePathBuf::from_string(format!("{drive}:\\")));
242 |         }
243 | 
244 |         state.file_picker_entries = Some(files);
245 |         return;
246 |     }
247 | 
248 |     if cfg!(windows) || dir.parent().is_some() {
249 |         files.push(DisplayablePathBuf::from(".."));
250 |         off = 1;
251 |     }
252 | 
253 |     if let Ok(iter) = fs::read_dir(dir) {
254 |         for entry in iter.flatten() {
255 |             if let Ok(metadata) = entry.metadata() {
256 |                 let mut name = entry.file_name();
257 |                 if metadata.is_dir()
258 |                     || (metadata.is_symlink()
259 |                         && fs::metadata(entry.path()).is_ok_and(|m| m.is_dir()))
260 |                 {
261 |                     name.push("/");
262 |                 }
263 |                 files.push(DisplayablePathBuf::from(name));
264 |             }
265 |         }
266 |     }
267 | 
268 |     // Sort directories first, then by name, case-insensitive.
269 |     files[off..].sort_by(|a, b| {
270 |         let a = a.as_bytes();
271 |         let b = b.as_bytes();
272 | 
273 |         let a_is_dir = a.last() == Some(&b'/');
274 |         let b_is_dir = b.last() == Some(&b'/');
275 | 
276 |         match b_is_dir.cmp(&a_is_dir) {
277 |             Ordering::Equal => icu::compare_strings(a, b),
278 |             other => other,
279 |         }
280 |     });
281 | 
282 |     state.file_picker_entries = Some(files);
283 | }
284 | 


--------------------------------------------------------------------------------
/src/bin/edit/draw_menubar.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | use edit::arena_format;
  5 | use edit::helpers::*;
  6 | use edit::input::{kbmod, vk};
  7 | use edit::tui::*;
  8 | 
  9 | use crate::localization::*;
 10 | use crate::state::*;
 11 | 
 12 | pub fn draw_menubar(ctx: &mut Context, state: &mut State) {
 13 |     ctx.menubar_begin();
 14 |     ctx.attr_background_rgba(state.menubar_color_bg);
 15 |     ctx.attr_foreground_rgba(state.menubar_color_fg);
 16 |     {
 17 |         let contains_focus = ctx.contains_focus();
 18 | 
 19 |         if ctx.menubar_menu_begin(loc(LocId::File), 'F') {
 20 |             draw_menu_file(ctx, state);
 21 |         }
 22 |         if !contains_focus && ctx.consume_shortcut(vk::F10) {
 23 |             ctx.steal_focus();
 24 |         }
 25 |         if state.documents.active().is_some() && ctx.menubar_menu_begin(loc(LocId::Edit), 'E') {
 26 |             draw_menu_edit(ctx, state);
 27 |         }
 28 |         if ctx.menubar_menu_begin(loc(LocId::View), 'V') {
 29 |             draw_menu_view(ctx, state);
 30 |         }
 31 |         if ctx.menubar_menu_begin(loc(LocId::Help), 'H') {
 32 |             draw_menu_help(ctx, state);
 33 |         }
 34 |     }
 35 |     ctx.menubar_end();
 36 | }
 37 | 
 38 | fn draw_menu_file(ctx: &mut Context, state: &mut State) {
 39 |     if ctx.menubar_menu_button(loc(LocId::FileNew), 'N', kbmod::CTRL | vk::N) {
 40 |         draw_add_untitled_document(ctx, state);
 41 |     }
 42 |     if ctx.menubar_menu_button(loc(LocId::FileOpen), 'O', kbmod::CTRL | vk::O) {
 43 |         state.wants_file_picker = StateFilePicker::Open;
 44 |     }
 45 |     if state.documents.active().is_some() {
 46 |         if ctx.menubar_menu_button(loc(LocId::FileSave), 'S', kbmod::CTRL | vk::S) {
 47 |             state.wants_save = true;
 48 |         }
 49 |         if ctx.menubar_menu_button(loc(LocId::FileSaveAs), 'A', vk::NULL) {
 50 |             state.wants_file_picker = StateFilePicker::SaveAs;
 51 |         }
 52 |         if ctx.menubar_menu_button(loc(LocId::FileClose), 'C', kbmod::CTRL | vk::W) {
 53 |             state.wants_close = true;
 54 |         }
 55 |     }
 56 |     if ctx.menubar_menu_button(loc(LocId::FileExit), 'X', kbmod::CTRL | vk::Q) {
 57 |         state.wants_exit = true;
 58 |     }
 59 |     ctx.menubar_menu_end();
 60 | }
 61 | 
 62 | fn draw_menu_edit(ctx: &mut Context, state: &mut State) {
 63 |     let doc = state.documents.active().unwrap();
 64 |     let mut tb = doc.buffer.borrow_mut();
 65 | 
 66 |     if ctx.menubar_menu_button(loc(LocId::EditUndo), 'U', kbmod::CTRL | vk::Z) {
 67 |         tb.undo();
 68 |         ctx.needs_rerender();
 69 |     }
 70 |     if ctx.menubar_menu_button(loc(LocId::EditRedo), 'R', kbmod::CTRL | vk::Y) {
 71 |         tb.redo();
 72 |         ctx.needs_rerender();
 73 |     }
 74 |     if ctx.menubar_menu_button(loc(LocId::EditCut), 'T', kbmod::CTRL | vk::X) {
 75 |         ctx.set_clipboard(tb.extract_selection(true));
 76 |     }
 77 |     if ctx.menubar_menu_button(loc(LocId::EditCopy), 'C', kbmod::CTRL | vk::C) {
 78 |         ctx.set_clipboard(tb.extract_selection(false));
 79 |     }
 80 |     if ctx.menubar_menu_button(loc(LocId::EditPaste), 'P', kbmod::CTRL | vk::V) {
 81 |         tb.write(ctx.clipboard(), true);
 82 |         ctx.needs_rerender();
 83 |     }
 84 |     if state.wants_search.kind != StateSearchKind::Disabled {
 85 |         if ctx.menubar_menu_button(loc(LocId::EditFind), 'F', kbmod::CTRL | vk::F) {
 86 |             state.wants_search.kind = StateSearchKind::Search;
 87 |             state.wants_search.focus = true;
 88 |         }
 89 |         if ctx.menubar_menu_button(loc(LocId::EditReplace), 'L', kbmod::CTRL | vk::R) {
 90 |             state.wants_search.kind = StateSearchKind::Replace;
 91 |             state.wants_search.focus = true;
 92 |         }
 93 |     }
 94 |     if ctx.menubar_menu_button(loc(LocId::EditSelectAll), 'A', kbmod::CTRL | vk::A) {
 95 |         tb.select_all();
 96 |         ctx.needs_rerender();
 97 |     }
 98 |     ctx.menubar_menu_end();
 99 | }
100 | 
101 | fn draw_menu_view(ctx: &mut Context, state: &mut State) {
102 |     if ctx.menubar_menu_button(loc(LocId::ViewFocusStatusbar), 'S', vk::NULL) {
103 |         state.wants_statusbar_focus = true;
104 |     }
105 | 
106 |     if let Some(doc) = state.documents.active() {
107 |         let mut tb = doc.buffer.borrow_mut();
108 |         let word_wrap = tb.is_word_wrap_enabled();
109 | 
110 |         if ctx.menubar_menu_button(loc(LocId::ViewDocumentPicker), 'P', kbmod::CTRL | vk::P) {
111 |             state.wants_document_picker = true;
112 |         }
113 |         if ctx.menubar_menu_button(loc(LocId::FileGoto), 'G', kbmod::CTRL | vk::G) {
114 |             state.wants_goto = true;
115 |         }
116 |         if ctx.menubar_menu_checkbox(loc(LocId::ViewWordWrap), 'W', kbmod::ALT | vk::Z, word_wrap) {
117 |             tb.set_word_wrap(!word_wrap);
118 |             ctx.needs_rerender();
119 |         }
120 |     }
121 | 
122 |     ctx.menubar_menu_end();
123 | }
124 | 
125 | fn draw_menu_help(ctx: &mut Context, state: &mut State) {
126 |     if ctx.menubar_menu_button(loc(LocId::HelpAbout), 'A', vk::NULL) {
127 |         state.wants_about = true;
128 |     }
129 |     ctx.menubar_menu_end();
130 | }
131 | 
132 | pub fn draw_dialog_about(ctx: &mut Context, state: &mut State) {
133 |     ctx.modal_begin("about", loc(LocId::AboutDialogTitle));
134 |     {
135 |         ctx.block_begin("content");
136 |         ctx.inherit_focus();
137 |         ctx.attr_padding(Rect::three(1, 2, 1));
138 |         {
139 |             ctx.label("description", "Microsoft Edit");
140 |             ctx.attr_overflow(Overflow::TruncateTail);
141 |             ctx.attr_position(Position::Center);
142 | 
143 |             ctx.label(
144 |                 "version",
145 |                 &arena_format!(
146 |                     ctx.arena(),
147 |                     "{}{}",
148 |                     loc(LocId::AboutDialogVersion),
149 |                     env!("CARGO_PKG_VERSION")
150 |                 ),
151 |             );
152 |             ctx.attr_overflow(Overflow::TruncateHead);
153 |             ctx.attr_position(Position::Center);
154 | 
155 |             ctx.label("copyright", "Copyright (c) Microsoft Corp 2025");
156 |             ctx.attr_overflow(Overflow::TruncateTail);
157 |             ctx.attr_position(Position::Center);
158 | 
159 |             ctx.block_begin("choices");
160 |             ctx.inherit_focus();
161 |             ctx.attr_padding(Rect::three(1, 2, 0));
162 |             ctx.attr_position(Position::Center);
163 |             {
164 |                 if ctx.button("ok", loc(LocId::Ok), ButtonStyle::default()) {
165 |                     state.wants_about = false;
166 |                 }
167 |                 ctx.inherit_focus();
168 |             }
169 |             ctx.block_end();
170 |         }
171 |         ctx.block_end();
172 |     }
173 |     if ctx.modal_end() {
174 |         state.wants_about = false;
175 |     }
176 | }
177 | 


--------------------------------------------------------------------------------
/src/bin/edit/edit.exe.manifest:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 2 | <assembly
 3 |     xmlns="urn:schemas-microsoft-com:asm.v1"
 4 |     xmlns:asmv3="urn:schemas-microsoft-com:asm.v3"
 5 |     xmlns:cv1="urn:schemas-microsoft-com:compatibility.v1"
 6 |     xmlns:ws2="http://schemas.microsoft.com/SMI/2016/WindowsSettings"
 7 |     xmlns:ws3="http://schemas.microsoft.com/SMI/2019/WindowsSettings"
 8 |     xmlns:ws4="http://schemas.microsoft.com/SMI/2020/WindowsSettings"
 9 |     manifestVersion="1.0">
10 |     <asmv3:application>
11 |         <windowsSettings>
12 |             <ws2:longPathAware>true</ws2:longPathAware>
13 |             <ws3:activeCodePage>UTF-8</ws3:activeCodePage>
14 |             <ws4:heapType>SegmentHeap</ws4:heapType>
15 |         </windowsSettings>
16 |     </asmv3:application>
17 |     <cv1:compatibility>
18 |         <application>
19 |             <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
20 |         </application>
21 |     </cv1:compatibility>
22 | </assembly>
23 | 


--------------------------------------------------------------------------------
/src/bin/edit/state.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | use std::borrow::Cow;
  5 | use std::ffi::{OsStr, OsString};
  6 | use std::mem;
  7 | use std::path::{Path, PathBuf};
  8 | 
  9 | use edit::framebuffer::IndexedColor;
 10 | use edit::helpers::*;
 11 | use edit::tui::*;
 12 | use edit::{apperr, buffer, icu, sys};
 13 | 
 14 | use crate::documents::DocumentManager;
 15 | use crate::localization::*;
 16 | 
 17 | #[repr(transparent)]
 18 | pub struct FormatApperr(apperr::Error);
 19 | 
 20 | impl From<apperr::Error> for FormatApperr {
 21 |     fn from(err: apperr::Error) -> Self {
 22 |         Self(err)
 23 |     }
 24 | }
 25 | 
 26 | impl std::fmt::Display for FormatApperr {
 27 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 28 |         match self.0 {
 29 |             apperr::APP_ICU_MISSING => f.write_str(loc(LocId::ErrorIcuMissing)),
 30 |             apperr::Error::App(code) => write!(f, "Unknown app error code: {code}"),
 31 |             apperr::Error::Icu(code) => icu::apperr_format(f, code),
 32 |             apperr::Error::Sys(code) => sys::apperr_format(f, code),
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | pub struct DisplayablePathBuf {
 38 |     value: PathBuf,
 39 |     str: Cow<'static, str>,
 40 | }
 41 | 
 42 | impl DisplayablePathBuf {
 43 |     #[allow(dead_code, reason = "only used on Windows")]
 44 |     pub fn from_string(string: String) -> Self {
 45 |         let str = Cow::Borrowed(string.as_str());
 46 |         let str = unsafe { mem::transmute::<Cow<'_, str>, Cow<'_, str>>(str) };
 47 |         let value = PathBuf::from(string);
 48 |         Self { value, str }
 49 |     }
 50 | 
 51 |     pub fn from_path(value: PathBuf) -> Self {
 52 |         let str = value.to_string_lossy();
 53 |         let str = unsafe { mem::transmute::<Cow<'_, str>, Cow<'_, str>>(str) };
 54 |         Self { value, str }
 55 |     }
 56 | 
 57 |     pub fn as_path(&self) -> &Path {
 58 |         &self.value
 59 |     }
 60 | 
 61 |     pub fn as_str(&self) -> &str {
 62 |         &self.str
 63 |     }
 64 | 
 65 |     pub fn as_bytes(&self) -> &[u8] {
 66 |         self.value.as_os_str().as_encoded_bytes()
 67 |     }
 68 | }
 69 | 
 70 | impl Default for DisplayablePathBuf {
 71 |     fn default() -> Self {
 72 |         Self { value: Default::default(), str: Cow::Borrowed("") }
 73 |     }
 74 | }
 75 | 
 76 | impl Clone for DisplayablePathBuf {
 77 |     fn clone(&self) -> Self {
 78 |         Self::from_path(self.value.clone())
 79 |     }
 80 | }
 81 | 
 82 | impl From<OsString> for DisplayablePathBuf {
 83 |     fn from(s: OsString) -> Self {
 84 |         Self::from_path(PathBuf::from(s))
 85 |     }
 86 | }
 87 | 
 88 | impl<T: ?Sized + AsRef<OsStr>> From<&T> for DisplayablePathBuf {
 89 |     fn from(s: &T) -> Self {
 90 |         Self::from_path(PathBuf::from(s))
 91 |     }
 92 | }
 93 | 
 94 | pub struct StateSearch {
 95 |     pub kind: StateSearchKind,
 96 |     pub focus: bool,
 97 | }
 98 | 
 99 | #[derive(Clone, Copy, PartialEq, Eq)]
100 | pub enum StateSearchKind {
101 |     Hidden,
102 |     Disabled,
103 |     Search,
104 |     Replace,
105 | }
106 | 
107 | #[derive(Clone, Copy, PartialEq, Eq)]
108 | pub enum StateFilePicker {
109 |     None,
110 |     Open,
111 |     SaveAs,
112 | 
113 |     SaveAsShown, // Transitioned from SaveAs
114 | }
115 | 
116 | #[derive(Clone, Copy, PartialEq, Eq)]
117 | pub enum StateEncodingChange {
118 |     None,
119 |     Convert,
120 |     Reopen,
121 | }
122 | 
123 | pub struct State {
124 |     pub menubar_color_bg: u32,
125 |     pub menubar_color_fg: u32,
126 | 
127 |     pub documents: DocumentManager,
128 | 
129 |     // A ring buffer of the last 10 errors.
130 |     pub error_log: [String; 10],
131 |     pub error_log_index: usize,
132 |     pub error_log_count: usize,
133 | 
134 |     pub wants_file_picker: StateFilePicker,
135 |     pub file_picker_pending_dir: DisplayablePathBuf,
136 |     pub file_picker_pending_dir_revision: u64, // Bumped every time `file_picker_pending_dir` changes.
137 |     pub file_picker_pending_name: PathBuf,
138 |     pub file_picker_entries: Option<Vec<DisplayablePathBuf>>,
139 |     pub file_picker_overwrite_warning: Option<PathBuf>, // The path the warning is about.
140 | 
141 |     pub wants_search: StateSearch,
142 |     pub search_needle: String,
143 |     pub search_replacement: String,
144 |     pub search_options: buffer::SearchOptions,
145 |     pub search_success: bool,
146 | 
147 |     pub wants_encoding_picker: bool,
148 |     pub encoding_picker_needle: String,
149 |     pub encoding_picker_results: Option<Vec<icu::Encoding>>,
150 | 
151 |     pub wants_save: bool,
152 |     pub wants_statusbar_focus: bool,
153 |     pub wants_encoding_change: StateEncodingChange,
154 |     pub wants_indentation_picker: bool,
155 |     pub wants_document_picker: bool,
156 |     pub wants_about: bool,
157 |     pub wants_close: bool,
158 |     pub wants_exit: bool,
159 |     pub wants_goto: bool,
160 |     pub goto_target: String,
161 |     pub goto_invalid: bool,
162 | 
163 |     pub osc_title_filename: String,
164 |     pub osc_clipboard_seen_generation: u32,
165 |     pub osc_clipboard_send_generation: u32,
166 |     pub osc_clipboard_always_send: bool,
167 |     pub exit: bool,
168 | }
169 | 
170 | impl State {
171 |     pub fn new() -> apperr::Result<Self> {
172 |         Ok(Self {
173 |             menubar_color_bg: 0,
174 |             menubar_color_fg: 0,
175 | 
176 |             documents: Default::default(),
177 | 
178 |             error_log: [const { String::new() }; 10],
179 |             error_log_index: 0,
180 |             error_log_count: 0,
181 | 
182 |             wants_file_picker: StateFilePicker::None,
183 |             file_picker_pending_dir: Default::default(),
184 |             file_picker_pending_dir_revision: 0,
185 |             file_picker_pending_name: Default::default(),
186 |             file_picker_entries: None,
187 |             file_picker_overwrite_warning: None,
188 | 
189 |             wants_search: StateSearch { kind: StateSearchKind::Hidden, focus: false },
190 |             search_needle: Default::default(),
191 |             search_replacement: Default::default(),
192 |             search_options: Default::default(),
193 |             search_success: true,
194 | 
195 |             wants_encoding_picker: false,
196 |             encoding_picker_needle: Default::default(),
197 |             encoding_picker_results: Default::default(),
198 | 
199 |             wants_save: false,
200 |             wants_statusbar_focus: false,
201 |             wants_encoding_change: StateEncodingChange::None,
202 |             wants_indentation_picker: false,
203 |             wants_document_picker: false,
204 |             wants_about: false,
205 |             wants_close: false,
206 |             wants_exit: false,
207 |             wants_goto: false,
208 |             goto_target: Default::default(),
209 |             goto_invalid: false,
210 | 
211 |             osc_title_filename: Default::default(),
212 |             osc_clipboard_seen_generation: 0,
213 |             osc_clipboard_send_generation: 0,
214 |             osc_clipboard_always_send: false,
215 |             exit: false,
216 |         })
217 |     }
218 | }
219 | 
220 | pub fn draw_add_untitled_document(ctx: &mut Context, state: &mut State) {
221 |     if let Err(err) = state.documents.add_untitled() {
222 |         error_log_add(ctx, state, err);
223 |     }
224 | }
225 | 
226 | pub fn error_log_add(ctx: &mut Context, state: &mut State, err: apperr::Error) {
227 |     let msg = format!("{}", FormatApperr::from(err));
228 |     if !msg.is_empty() {
229 |         state.error_log[state.error_log_index] = msg;
230 |         state.error_log_index = (state.error_log_index + 1) % state.error_log.len();
231 |         state.error_log_count = state.error_log.len().min(state.error_log_count + 1);
232 |         ctx.needs_rerender();
233 |     }
234 | }
235 | 
236 | pub fn draw_error_log(ctx: &mut Context, state: &mut State) {
237 |     ctx.modal_begin("error", loc(LocId::ErrorDialogTitle));
238 |     ctx.attr_background_rgba(ctx.indexed(IndexedColor::Red));
239 |     ctx.attr_foreground_rgba(ctx.indexed(IndexedColor::BrightWhite));
240 |     {
241 |         ctx.block_begin("content");
242 |         ctx.attr_padding(Rect::three(0, 2, 1));
243 |         {
244 |             let off = state.error_log_index + state.error_log.len() - state.error_log_count;
245 | 
246 |             for i in 0..state.error_log_count {
247 |                 let idx = (off + i) % state.error_log.len();
248 |                 let msg = &state.error_log[idx][..];
249 | 
250 |                 if !msg.is_empty() {
251 |                     ctx.next_block_id_mixin(i as u64);
252 |                     ctx.label("error", msg);
253 |                     ctx.attr_overflow(Overflow::TruncateTail);
254 |                 }
255 |             }
256 |         }
257 |         ctx.block_end();
258 | 
259 |         if ctx.button("ok", loc(LocId::Ok), ButtonStyle::default()) {
260 |             state.error_log_count = 0;
261 |         }
262 |         ctx.attr_position(Position::Center);
263 |         ctx.inherit_focus();
264 |     }
265 |     if ctx.modal_end() {
266 |         state.error_log_count = 0;
267 |     }
268 | }
269 | 


--------------------------------------------------------------------------------
/src/buffer/line_cache.rs:
--------------------------------------------------------------------------------
  1 | use std::ops::Range;
  2 | 
  3 | use crate::{document::ReadableDocument, simd::memchr2};
  4 | 
  5 | /// Cache a line/offset pair every CACHE_EVERY lines to speed up line/offset calculations
  6 | const CACHE_EVERY: usize = 1024 * 64;
  7 | 
  8 | #[derive(Clone)]
  9 | pub struct CachePoint {
 10 |     pub index: usize,
 11 |     pub line: usize,
 12 |     // pub snapshot: ParserSnapshot
 13 | }
 14 | 
 15 | pub struct LineCache {
 16 |     cache: Vec<CachePoint>,
 17 | }
 18 | 
 19 | impl LineCache {
 20 |     pub fn new() -> Self {
 21 |         Self { cache: vec![] }
 22 |     }
 23 | 
 24 |     pub fn from_document<T: ReadableDocument>(&mut self, document: &T) {
 25 |         self.cache.clear();
 26 | 
 27 |         let mut offset = 0;
 28 |         let mut line = 0;
 29 |         loop {
 30 |             let text = document.read_forward(offset);
 31 |             if text.is_empty() { return; }
 32 |             
 33 |             let mut off = 0;
 34 |             loop {
 35 |                 off = memchr2(b'\n', b'\n', text, off);
 36 |                 if off == text.len() { break; }
 37 | 
 38 |                 if line % CACHE_EVERY == 0 {
 39 |                     self.cache.push(CachePoint { index: offset+off, line });
 40 |                 }
 41 |                 line += 1;
 42 |                 off += 1;
 43 |             }
 44 | 
 45 |             offset += text.len();
 46 |         }
 47 |     }
 48 | 
 49 |     /// Updates the cache after a deletion.
 50 |     /// `range` is the deleted byte range, and `text` is the content that was deleted.
 51 |     pub fn delete(&mut self, range: Range<usize>, text: &Vec<u8>) {
 52 |         let mut newlines = 0;
 53 |         for c in text {
 54 |             if *c == b'\n' {
 55 |                 newlines += 1;
 56 |             }
 57 |         }
 58 | 
 59 |         let mut beg_del = None;
 60 |         let mut end_del = None;
 61 |         for (i, point) in self.cache.iter_mut().enumerate() {
 62 |             if point.index >= range.start {
 63 |                 if point.index < range.end {
 64 |                     // cache point is within the deleted range
 65 |                     if beg_del.is_none() { beg_del = Some(i); }
 66 |                     end_del = Some(i + 1);
 67 |                 }
 68 |                 else {
 69 |                     point.index -= text.len();
 70 |                     point.line -= newlines;
 71 |                 }
 72 |             }
 73 |         }
 74 | 
 75 |         if let (Some(beg), Some(end)) = (beg_del, end_del) {
 76 |             self.cache.drain(beg..end);
 77 |         }
 78 |     }
 79 | 
 80 |     /// Updates the cache after an insertion.
 81 |     /// `offset` is where the insertion occurs, and `text` is the inserted content.
 82 |     pub fn insert(&mut self, offset: usize, text: &[u8]) {
 83 |         // Count how many newlines were inserted
 84 |         let mut newlines = 0;
 85 |         for c in text {
 86 |             if *c == b'\n' {
 87 |                 newlines += 1;
 88 |             }
 89 |         }
 90 | 
 91 |         let len = text.len();
 92 |         for point in &mut self.cache {
 93 |             if point.index > offset {
 94 |                 point.index += len;
 95 |                 point.line += newlines;
 96 |             }
 97 |         }
 98 | 
 99 |         // TODO: This also needs to insert new cache points
100 |     }
101 | 
102 |     /// Finds the nearest cached line-offset pair relative to a target line.
103 |     /// If `reverse` is false, it returns the closest *before* the target.
104 |     /// If `reverse` is true, it returns the closest *after or at* the target.
105 |     pub fn nearest_offset(&self, target_count: usize, reverse: bool) -> Option<CachePoint> {
106 |         match self.cache.binary_search_by_key(&target_count, |p| p.line) {
107 |             Ok(i) => Some(self.cache[i].clone()),
108 |             Err(i) => {
109 |                 if i == 0 || i == self.cache.len() { None }  // target < lowest cache point || target > highest cache point
110 |                 else {
111 |                     Some(self.cache[ if reverse {i} else {i-1} ].clone())
112 |                 }
113 |             }
114 |         }
115 |     }
116 | }
117 | 


--------------------------------------------------------------------------------
/src/buffer/navigation.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | use std::ops::Range;
  5 | 
  6 | use crate::document::ReadableDocument;
  7 | 
  8 | #[derive(Clone, Copy, PartialEq, Eq)]
  9 | enum CharClass {
 10 |     Whitespace,
 11 |     Newline,
 12 |     Separator,
 13 |     Word,
 14 | }
 15 | 
 16 | const fn construct_classifier(separators: &[u8]) -> [CharClass; 256] {
 17 |     let mut classifier = [CharClass::Word; 256];
 18 | 
 19 |     classifier[b' ' as usize] = CharClass::Whitespace;
 20 |     classifier[b'\t' as usize] = CharClass::Whitespace;
 21 |     classifier[b'\n' as usize] = CharClass::Newline;
 22 |     classifier[b'\r' as usize] = CharClass::Newline;
 23 | 
 24 |     let mut i = 0;
 25 |     let len = separators.len();
 26 |     while i < len {
 27 |         let ch = separators[i];
 28 |         assert!(ch < 128, "Only ASCII separators are supported.");
 29 |         classifier[ch as usize] = CharClass::Separator;
 30 |         i += 1;
 31 |     }
 32 | 
 33 |     classifier
 34 | }
 35 | 
 36 | const WORD_CLASSIFIER: [CharClass; 256] =
 37 |     construct_classifier(br#"`~!@#$%^&*()-=+[{]}\|;:'",.<>/?"#);
 38 | 
 39 | /// Finds the next word boundary given a document cursor offset.
 40 | /// Returns the offset of the next word boundary.
 41 | pub fn word_forward(doc: &dyn ReadableDocument, offset: usize) -> usize {
 42 |     word_navigation(WordForward { doc, offset, chunk: &[], chunk_off: 0 })
 43 | }
 44 | 
 45 | /// The backward version of `word_forward`.
 46 | pub fn word_backward(doc: &dyn ReadableDocument, offset: usize) -> usize {
 47 |     word_navigation(WordBackward { doc, offset, chunk: &[], chunk_off: 0 })
 48 | }
 49 | 
 50 | /// Word navigation implementation. Matches the behavior of VS Code.
 51 | fn word_navigation<T: WordNavigation>(mut nav: T) -> usize {
 52 |     // First, fill `self.chunk` with at least 1 grapheme.
 53 |     nav.read();
 54 | 
 55 |     // Skip one newline, if any.
 56 |     nav.skip_newline();
 57 | 
 58 |     // Skip any whitespace.
 59 |     nav.skip_class(CharClass::Whitespace);
 60 | 
 61 |     // Skip one word or separator and take note of the class.
 62 |     let class = nav.peek(CharClass::Whitespace);
 63 |     if matches!(class, CharClass::Separator | CharClass::Word) {
 64 |         nav.next();
 65 | 
 66 |         let off = nav.offset();
 67 | 
 68 |         // Continue skipping the same class.
 69 |         nav.skip_class(class);
 70 | 
 71 |         // If the class was a separator and we only moved one character,
 72 |         // continue skipping characters of the word class.
 73 |         if off == nav.offset() && class == CharClass::Separator {
 74 |             nav.skip_class(CharClass::Word);
 75 |         }
 76 |     }
 77 | 
 78 |     nav.offset()
 79 | }
 80 | 
 81 | trait WordNavigation {
 82 |     fn read(&mut self);
 83 |     fn skip_newline(&mut self);
 84 |     fn skip_class(&mut self, class: CharClass);
 85 |     fn peek(&self, default: CharClass) -> CharClass;
 86 |     fn next(&mut self);
 87 |     fn offset(&self) -> usize;
 88 | }
 89 | 
 90 | struct WordForward<'a> {
 91 |     doc: &'a dyn ReadableDocument,
 92 |     offset: usize,
 93 |     chunk: &'a [u8],
 94 |     chunk_off: usize,
 95 | }
 96 | 
 97 | impl WordNavigation for WordForward<'_> {
 98 |     fn read(&mut self) {
 99 |         self.chunk = self.doc.read_forward(self.offset);
100 |         self.chunk_off = 0;
101 |     }
102 | 
103 |     fn skip_newline(&mut self) {
104 |         // We can rely on the fact that the document does not split graphemes across chunks.
105 |         // = If there's a newline it's wholly contained in this chunk.
106 |         // Unlike with `WordBackward`, we can't check for CR and LF separately as only a CR followed
107 |         // by a LF is a newline. A lone CR in the document is just a regular control character.
108 |         self.chunk_off += match self.chunk.get(self.chunk_off) {
109 |             Some(&b'\n') => 1,
110 |             Some(&b'\r') if self.chunk.get(self.chunk_off + 1) == Some(&b'\n') => 2,
111 |             _ => 0,
112 |         }
113 |     }
114 | 
115 |     fn skip_class(&mut self, class: CharClass) {
116 |         while !self.chunk.is_empty() {
117 |             while self.chunk_off < self.chunk.len() {
118 |                 if WORD_CLASSIFIER[self.chunk[self.chunk_off] as usize] != class {
119 |                     return;
120 |                 }
121 |                 self.chunk_off += 1;
122 |             }
123 | 
124 |             self.offset += self.chunk.len();
125 |             self.chunk = self.doc.read_forward(self.offset);
126 |             self.chunk_off = 0;
127 |         }
128 |     }
129 | 
130 |     fn peek(&self, default: CharClass) -> CharClass {
131 |         if self.chunk_off < self.chunk.len() {
132 |             WORD_CLASSIFIER[self.chunk[self.chunk_off] as usize]
133 |         } else {
134 |             default
135 |         }
136 |     }
137 | 
138 |     fn next(&mut self) {
139 |         self.chunk_off += 1;
140 |     }
141 | 
142 |     fn offset(&self) -> usize {
143 |         self.offset + self.chunk_off
144 |     }
145 | }
146 | 
147 | struct WordBackward<'a> {
148 |     doc: &'a dyn ReadableDocument,
149 |     offset: usize,
150 |     chunk: &'a [u8],
151 |     chunk_off: usize,
152 | }
153 | 
154 | impl WordNavigation for WordBackward<'_> {
155 |     fn read(&mut self) {
156 |         self.chunk = self.doc.read_backward(self.offset);
157 |         self.chunk_off = self.chunk.len();
158 |     }
159 | 
160 |     fn skip_newline(&mut self) {
161 |         // We can rely on the fact that the document does not split graphemes across chunks.
162 |         // = If there's a newline it's wholly contained in this chunk.
163 |         if self.chunk_off > 0 && self.chunk[self.chunk_off - 1] == b'\n' {
164 |             self.chunk_off -= 1;
165 |         }
166 |         if self.chunk_off > 0 && self.chunk[self.chunk_off - 1] == b'\r' {
167 |             self.chunk_off -= 1;
168 |         }
169 |     }
170 | 
171 |     fn skip_class(&mut self, class: CharClass) {
172 |         while !self.chunk.is_empty() {
173 |             while self.chunk_off > 0 {
174 |                 if WORD_CLASSIFIER[self.chunk[self.chunk_off - 1] as usize] != class {
175 |                     return;
176 |                 }
177 |                 self.chunk_off -= 1;
178 |             }
179 | 
180 |             self.offset -= self.chunk.len();
181 |             self.chunk = self.doc.read_backward(self.offset);
182 |             self.chunk_off = self.chunk.len();
183 |         }
184 |     }
185 | 
186 |     fn peek(&self, default: CharClass) -> CharClass {
187 |         if self.chunk_off > 0 {
188 |             WORD_CLASSIFIER[self.chunk[self.chunk_off - 1] as usize]
189 |         } else {
190 |             default
191 |         }
192 |     }
193 | 
194 |     fn next(&mut self) {
195 |         self.chunk_off -= 1;
196 |     }
197 | 
198 |     fn offset(&self) -> usize {
199 |         self.offset - self.chunk.len() + self.chunk_off
200 |     }
201 | }
202 | 
203 | /// Returns the offset range of the "word" at the given offset.
204 | /// Does not cross newlines. Works similar to VS Code.
205 | pub fn word_select(doc: &dyn ReadableDocument, offset: usize) -> Range<usize> {
206 |     let mut beg = offset;
207 |     let mut end = offset;
208 |     let mut class = CharClass::Newline;
209 | 
210 |     let mut chunk = doc.read_forward(end);
211 |     if !chunk.is_empty() {
212 |         // Not at the end of the document? Great!
213 |         // We default to using the next char as the class, because in terminals
214 |         // the cursor is usually always to the left of the cell you clicked on.
215 |         class = WORD_CLASSIFIER[chunk[0] as usize];
216 | 
217 |         let mut chunk_off = 0;
218 | 
219 |         // Select the word, unless we hit a newline.
220 |         if class != CharClass::Newline {
221 |             loop {
222 |                 chunk_off += 1;
223 |                 end += 1;
224 | 
225 |                 if chunk_off >= chunk.len() {
226 |                     chunk = doc.read_forward(end);
227 |                     chunk_off = 0;
228 |                     if chunk.is_empty() {
229 |                         break;
230 |                     }
231 |                 }
232 | 
233 |                 if WORD_CLASSIFIER[chunk[chunk_off] as usize] != class {
234 |                     break;
235 |                 }
236 |             }
237 |         }
238 |     }
239 | 
240 |     let mut chunk = doc.read_backward(beg);
241 |     if !chunk.is_empty() {
242 |         let mut chunk_off = chunk.len();
243 | 
244 |         // If we failed to determine the class, because we hit the end of the document
245 |         // or a newline, we fall back to using the previous character, of course.
246 |         if class == CharClass::Newline {
247 |             class = WORD_CLASSIFIER[chunk[chunk_off - 1] as usize];
248 |         }
249 | 
250 |         // Select the word, unless we hit a newline.
251 |         if class != CharClass::Newline {
252 |             loop {
253 |                 if WORD_CLASSIFIER[chunk[chunk_off - 1] as usize] != class {
254 |                     break;
255 |                 }
256 | 
257 |                 chunk_off -= 1;
258 |                 beg -= 1;
259 | 
260 |                 if chunk_off == 0 {
261 |                     chunk = doc.read_backward(beg);
262 |                     chunk_off = chunk.len();
263 |                     if chunk.is_empty() {
264 |                         break;
265 |                     }
266 |                 }
267 |             }
268 |         }
269 |     }
270 | 
271 |     beg..end
272 | }
273 | 
274 | #[cfg(test)]
275 | mod test {
276 |     use super::*;
277 | 
278 |     #[test]
279 |     fn test_word_navigation() {
280 |         assert_eq!(word_forward(&"Hello World".as_bytes(), 0), 5);
281 |         assert_eq!(word_forward(&"Hello,World".as_bytes(), 0), 5);
282 |         assert_eq!(word_forward(&"   Hello".as_bytes(), 0), 8);
283 |         assert_eq!(word_forward(&"\n\nHello".as_bytes(), 0), 1);
284 | 
285 |         assert_eq!(word_backward(&"Hello World".as_bytes(), 11), 6);
286 |         assert_eq!(word_backward(&"Hello,World".as_bytes(), 10), 6);
287 |         assert_eq!(word_backward(&"Hello   ".as_bytes(), 7), 0);
288 |         assert_eq!(word_backward(&"Hello\n\n".as_bytes(), 7), 6);
289 |     }
290 | }
291 | 


--------------------------------------------------------------------------------
/src/cell.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Licensed under the MIT License.
 3 | 
 4 | //! [`std::cell::RefCell`], but without runtime checks in release builds.
 5 | 
 6 | #[cfg(debug_assertions)]
 7 | pub use debug::*;
 8 | #[cfg(not(debug_assertions))]
 9 | pub use release::*;
10 | 
11 | #[allow(unused)]
12 | #[cfg(debug_assertions)]
13 | mod debug {
14 |     pub type SemiRefCell<T> = std::cell::RefCell<T>;
15 |     pub type Ref<'b, T> = std::cell::Ref<'b, T>;
16 |     pub type RefMut<'b, T> = std::cell::RefMut<'b, T>;
17 | }
18 | 
19 | #[cfg(not(debug_assertions))]
20 | mod release {
21 |     #[derive(Default)]
22 |     #[repr(transparent)]
23 |     pub struct SemiRefCell<T>(std::cell::UnsafeCell<T>);
24 | 
25 |     impl<T> SemiRefCell<T> {
26 |         #[inline(always)]
27 |         pub const fn new(value: T) -> Self {
28 |             Self(std::cell::UnsafeCell::new(value))
29 |         }
30 | 
31 |         #[inline(always)]
32 |         pub const fn as_ptr(&self) -> *mut T {
33 |             self.0.get()
34 |         }
35 | 
36 |         #[inline(always)]
37 |         pub const fn borrow(&self) -> Ref<'_, T> {
38 |             Ref(unsafe { &*self.0.get() })
39 |         }
40 | 
41 |         #[inline(always)]
42 |         pub const fn borrow_mut(&self) -> RefMut<'_, T> {
43 |             RefMut(unsafe { &mut *self.0.get() })
44 |         }
45 |     }
46 | 
47 |     #[repr(transparent)]
48 |     pub struct Ref<'b, T>(&'b T);
49 | 
50 |     impl<'b, T> Ref<'b, T> {
51 |         #[inline(always)]
52 |         pub fn clone(orig: &Self) -> Self {
53 |             Ref(orig.0)
54 |         }
55 |     }
56 | 
57 |     impl<'b, T> std::ops::Deref for Ref<'b, T> {
58 |         type Target = T;
59 | 
60 |         #[inline(always)]
61 |         fn deref(&self) -> &Self::Target {
62 |             self.0
63 |         }
64 |     }
65 | 
66 |     #[repr(transparent)]
67 |     pub struct RefMut<'b, T>(&'b mut T);
68 | 
69 |     impl<'b, T> std::ops::Deref for RefMut<'b, T> {
70 |         type Target = T;
71 | 
72 |         #[inline(always)]
73 |         fn deref(&self) -> &Self::Target {
74 |             self.0
75 |         }
76 |     }
77 | 
78 |     impl<'b, T> std::ops::DerefMut for RefMut<'b, T> {
79 |         #[inline(always)]
80 |         fn deref_mut(&mut self) -> &mut Self::Target {
81 |             self.0
82 |         }
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/src/document.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | //! Abstractions over reading/writing arbitrary text containers.
  5 | 
  6 | use std::ffi::OsString;
  7 | use std::mem;
  8 | use std::ops::Range;
  9 | use std::path::PathBuf;
 10 | 
 11 | use crate::arena::{ArenaString, scratch_arena};
 12 | use crate::helpers::ReplaceRange as _;
 13 | 
 14 | /// An abstraction over reading from text containers.
 15 | pub trait ReadableDocument {
 16 |     /// Read some bytes starting at (including) the given absolute offset.
 17 |     ///
 18 |     /// # Warning
 19 |     ///
 20 |     /// * Be lenient on inputs:
 21 |     ///   * The given offset may be out of bounds and you MUST clamp it.
 22 |     ///   * You should not assume that offsets are at grapheme cluster boundaries.
 23 |     /// * Be strict on outputs:
 24 |     ///   * You MUST NOT break grapheme clusters across chunks.
 25 |     ///   * You MUST NOT return an empty slice unless the offset is at or beyond the end.
 26 |     fn read_forward(&self, off: usize) -> &[u8];
 27 | 
 28 |     /// Read some bytes before (but not including) the given absolute offset.
 29 |     ///
 30 |     /// # Warning
 31 |     ///
 32 |     /// * Be lenient on inputs:
 33 |     ///   * The given offset may be out of bounds and you MUST clamp it.
 34 |     ///   * You should not assume that offsets are at grapheme cluster boundaries.
 35 |     /// * Be strict on outputs:
 36 |     ///   * You MUST NOT break grapheme clusters across chunks.
 37 |     ///   * You MUST NOT return an empty slice unless the offset is zero.
 38 |     fn read_backward(&self, off: usize) -> &[u8];
 39 | }
 40 | 
 41 | /// An abstraction over writing to text containers.
 42 | pub trait WriteableDocument: ReadableDocument {
 43 |     /// Replace the given range with the given bytes.
 44 |     ///
 45 |     /// # Warning
 46 |     ///
 47 |     /// * The given range may be out of bounds and you MUST clamp it.
 48 |     /// * The replacement may not be valid UTF8.
 49 |     fn replace(&mut self, range: Range<usize>, replacement: &[u8]);
 50 | }
 51 | 
 52 | impl ReadableDocument for &[u8] {
 53 |     fn read_forward(&self, off: usize) -> &[u8] {
 54 |         let s = *self;
 55 |         &s[off.min(s.len())..]
 56 |     }
 57 | 
 58 |     fn read_backward(&self, off: usize) -> &[u8] {
 59 |         let s = *self;
 60 |         &s[..off.min(s.len())]
 61 |     }
 62 | }
 63 | 
 64 | impl ReadableDocument for String {
 65 |     fn read_forward(&self, off: usize) -> &[u8] {
 66 |         let s = self.as_bytes();
 67 |         &s[off.min(s.len())..]
 68 |     }
 69 | 
 70 |     fn read_backward(&self, off: usize) -> &[u8] {
 71 |         let s = self.as_bytes();
 72 |         &s[..off.min(s.len())]
 73 |     }
 74 | }
 75 | 
 76 | impl WriteableDocument for String {
 77 |     fn replace(&mut self, range: Range<usize>, replacement: &[u8]) {
 78 |         // `replacement` is not guaranteed to be valid UTF-8, so we need to sanitize it.
 79 |         let scratch = scratch_arena(None);
 80 |         let utf8 = ArenaString::from_utf8_lossy(&scratch, replacement);
 81 |         let src = match &utf8 {
 82 |             Ok(s) => s,
 83 |             Err(s) => s.as_str(),
 84 |         };
 85 | 
 86 |         // SAFETY: `range` is guaranteed to be on codepoint boundaries.
 87 |         unsafe { self.as_mut_vec() }.replace_range(range, src.as_bytes());
 88 |     }
 89 | }
 90 | 
 91 | impl ReadableDocument for PathBuf {
 92 |     fn read_forward(&self, off: usize) -> &[u8] {
 93 |         let s = self.as_os_str().as_encoded_bytes();
 94 |         &s[off.min(s.len())..]
 95 |     }
 96 | 
 97 |     fn read_backward(&self, off: usize) -> &[u8] {
 98 |         let s = self.as_os_str().as_encoded_bytes();
 99 |         &s[..off.min(s.len())]
100 |     }
101 | }
102 | 
103 | impl WriteableDocument for PathBuf {
104 |     fn replace(&mut self, range: Range<usize>, replacement: &[u8]) {
105 |         let mut vec = mem::take(self).into_os_string().into_encoded_bytes();
106 |         vec.replace_range(range, replacement);
107 |         *self = unsafe { Self::from(OsString::from_encoded_bytes_unchecked(vec)) };
108 |     }
109 | }
110 | 


--------------------------------------------------------------------------------
/src/fuzzy.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | //! Fuzzy search algorithm based on the one used in VS Code (`/src/vs/base/common/fuzzyScorer.ts`).
  5 | //! Other algorithms exist, such as Sublime Text's, or the one used in `fzf`,
  6 | //! but I figured that this one is what lots of people may be familiar with.
  7 | 
  8 | use std::vec;
  9 | 
 10 | use crate::arena::{Arena, scratch_arena};
 11 | use crate::icu;
 12 | 
 13 | const NO_MATCH: i32 = 0;
 14 | 
 15 | pub fn score_fuzzy<'a>(
 16 |     arena: &'a Arena,
 17 |     haystack: &str,
 18 |     needle: &str,
 19 |     allow_non_contiguous_matches: bool,
 20 | ) -> (i32, Vec<usize, &'a Arena>) {
 21 |     if haystack.is_empty() || needle.is_empty() {
 22 |         // return early if target or query are empty
 23 |         return (NO_MATCH, Vec::new_in(arena));
 24 |     }
 25 | 
 26 |     let scratch = scratch_arena(Some(arena));
 27 |     let target = map_chars(&scratch, haystack);
 28 |     let query = map_chars(&scratch, needle);
 29 | 
 30 |     if target.len() < query.len() {
 31 |         // impossible for query to be contained in target
 32 |         return (NO_MATCH, Vec::new_in(arena));
 33 |     }
 34 | 
 35 |     let target_lower = icu::fold_case(&scratch, haystack);
 36 |     let query_lower = icu::fold_case(&scratch, needle);
 37 |     let target_lower = map_chars(&scratch, &target_lower);
 38 |     let query_lower = map_chars(&scratch, &query_lower);
 39 | 
 40 |     let area = query.len() * target.len();
 41 |     let mut scores = vec::from_elem_in(0, area, &*scratch);
 42 |     let mut matches = vec::from_elem_in(0, area, &*scratch);
 43 | 
 44 |     //
 45 |     // Build Scorer Matrix:
 46 |     //
 47 |     // The matrix is composed of query q and target t. For each index we score
 48 |     // q[i] with t[i] and compare that with the previous score. If the score is
 49 |     // equal or larger, we keep the match. In addition to the score, we also keep
 50 |     // the length of the consecutive matches to use as boost for the score.
 51 |     //
 52 |     //      t   a   r   g   e   t
 53 |     //  q
 54 |     //  u
 55 |     //  e
 56 |     //  r
 57 |     //  y
 58 |     //
 59 |     for query_index in 0..query.len() {
 60 |         let query_index_offset = query_index * target.len();
 61 |         let query_index_previous_offset =
 62 |             if query_index > 0 { (query_index - 1) * target.len() } else { 0 };
 63 | 
 64 |         for target_index in 0..target.len() {
 65 |             let current_index = query_index_offset + target_index;
 66 |             let diag_index = if query_index > 0 && target_index > 0 {
 67 |                 query_index_previous_offset + target_index - 1
 68 |             } else {
 69 |                 0
 70 |             };
 71 |             let left_score = if target_index > 0 { scores[current_index - 1] } else { 0 };
 72 |             let diag_score =
 73 |                 if query_index > 0 && target_index > 0 { scores[diag_index] } else { 0 };
 74 |             let matches_sequence_len =
 75 |                 if query_index > 0 && target_index > 0 { matches[diag_index] } else { 0 };
 76 | 
 77 |             // If we are not matching on the first query character anymore, we only produce a
 78 |             // score if we had a score previously for the last query index (by looking at the diagScore).
 79 |             // This makes sure that the query always matches in sequence on the target. For example
 80 |             // given a target of "ede" and a query of "de", we would otherwise produce a wrong high score
 81 |             // for query[1] ("e") matching on target[0] ("e") because of the "beginning of word" boost.
 82 |             let score = if diag_score == 0 && query_index != 0 {
 83 |                 0
 84 |             } else {
 85 |                 compute_char_score(
 86 |                     query[query_index],
 87 |                     query_lower[query_index],
 88 |                     if target_index != 0 { Some(target[target_index - 1]) } else { None },
 89 |                     target[target_index],
 90 |                     target_lower[target_index],
 91 |                     matches_sequence_len,
 92 |                 )
 93 |             };
 94 | 
 95 |             // We have a score and its equal or larger than the left score
 96 |             // Match: sequence continues growing from previous diag value
 97 |             // Score: increases by diag score value
 98 |             let is_valid_score = score != 0 && diag_score + score >= left_score;
 99 |             if is_valid_score
100 |                 && (
101 |                     // We don't need to check if it's contiguous if we allow non-contiguous matches
102 |                     allow_non_contiguous_matches ||
103 |                         // We must be looking for a contiguous match.
104 |                         // Looking at an index above 0 in the query means we must have already
105 |                         // found out this is contiguous otherwise there wouldn't have been a score
106 |                         query_index > 0 ||
107 |                         // lastly check if the query is completely contiguous at this index in the target
108 |                         target_lower[target_index..].starts_with(&query_lower)
109 |                 )
110 |             {
111 |                 matches[current_index] = matches_sequence_len + 1;
112 |                 scores[current_index] = diag_score + score;
113 |             } else {
114 |                 // We either have no score or the score is lower than the left score
115 |                 // Match: reset to 0
116 |                 // Score: pick up from left hand side
117 |                 matches[current_index] = NO_MATCH;
118 |                 scores[current_index] = left_score;
119 |             }
120 |         }
121 |     }
122 | 
123 |     // Restore Positions (starting from bottom right of matrix)
124 |     let mut positions = Vec::new_in(arena);
125 | 
126 |     if !query.is_empty() && !target.is_empty() {
127 |         let mut query_index = query.len() - 1;
128 |         let mut target_index = target.len() - 1;
129 | 
130 |         loop {
131 |             let current_index = query_index * target.len() + target_index;
132 |             if matches[current_index] == NO_MATCH {
133 |                 if target_index == 0 {
134 |                     break;
135 |                 }
136 |                 target_index -= 1; // go left
137 |             } else {
138 |                 positions.push(target_index);
139 | 
140 |                 // go up and left
141 |                 if query_index == 0 || target_index == 0 {
142 |                     break;
143 |                 }
144 |                 query_index -= 1;
145 |                 target_index -= 1;
146 |             }
147 |         }
148 | 
149 |         positions.reverse();
150 |     }
151 | 
152 |     (scores[area - 1], positions)
153 | }
154 | 
155 | fn compute_char_score(
156 |     query: char,
157 |     query_lower: char,
158 |     target_prev: Option<char>,
159 |     target_curr: char,
160 |     target_curr_lower: char,
161 |     matches_sequence_len: i32,
162 | ) -> i32 {
163 |     let mut score = 0;
164 | 
165 |     if !consider_as_equal(query_lower, target_curr_lower) {
166 |         return score; // no match of characters
167 |     }
168 | 
169 |     // Character match bonus
170 |     score += 1;
171 | 
172 |     // Consecutive match bonus
173 |     if matches_sequence_len > 0 {
174 |         score += matches_sequence_len * 5;
175 |     }
176 | 
177 |     // Same case bonus
178 |     if query == target_curr {
179 |         score += 1;
180 |     }
181 | 
182 |     if let Some(target_prev) = target_prev {
183 |         // After separator bonus
184 |         let separator_bonus = score_separator_at_pos(target_prev);
185 |         if separator_bonus > 0 {
186 |             score += separator_bonus;
187 |         }
188 |         // Inside word upper case bonus (camel case). We only give this bonus if we're not in a contiguous sequence.
189 |         // For example:
190 |         // NPE => NullPointerException = boost
191 |         // HTTP => HTTP = not boost
192 |         else if target_curr != target_curr_lower && matches_sequence_len == 0 {
193 |             score += 2;
194 |         }
195 |     } else {
196 |         // Start of word bonus
197 |         score += 8;
198 |     }
199 | 
200 |     score
201 | }
202 | 
203 | fn consider_as_equal(a: char, b: char) -> bool {
204 |     // Special case path separators: ignore platform differences
205 |     a == b || (a == '/' && b == '\\') || (a == '\\' && b == '/')
206 | }
207 | 
208 | fn score_separator_at_pos(ch: char) -> i32 {
209 |     match ch {
210 |         '/' | '\\' => 5,                               // prefer path separators...
211 |         '_' | '-' | '.' | ' ' | '\'' | '"' | ':' => 4, // ...over other separators
212 |         _ => 0,
213 |     }
214 | }
215 | 
216 | fn map_chars<'a>(arena: &'a Arena, s: &str) -> Vec<char, &'a Arena> {
217 |     let mut chars = Vec::with_capacity_in(s.len(), arena);
218 |     chars.extend(s.chars());
219 |     chars.shrink_to_fit();
220 |     chars
221 | }
222 | 


--------------------------------------------------------------------------------
/src/hash.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Licensed under the MIT License.
 3 | 
 4 | //! Provides fast, non-cryptographic hash functions.
 5 | 
 6 | /// The venerable wyhash hash function.
 7 | ///
 8 | /// It's fast, has good statistical properties, and is in the public domain.
 9 | /// See: <https://github.com/wangyi-fudan/wyhash>
10 | /// If you visit the link, you'll find that it was superseded by "rapidhash",
11 | /// but that's not particularly interesting for this project. rapidhash results
12 | /// in way larger assembly and isn't faster when hashing small amounts of data.
13 | pub fn hash(mut seed: u64, data: &[u8]) -> u64 {
14 |     unsafe {
15 |         const S0: u64 = 0xa0761d6478bd642f;
16 |         const S1: u64 = 0xe7037ed1a0b428db;
17 |         const S2: u64 = 0x8ebc6af09c88c6e3;
18 |         const S3: u64 = 0x589965cc75374cc3;
19 | 
20 |         let len = data.len();
21 |         let mut p = data.as_ptr();
22 |         let a;
23 |         let b;
24 | 
25 |         seed ^= S0;
26 | 
27 |         if len <= 16 {
28 |             if len >= 4 {
29 |                 a = (wyr4(p) << 32) | wyr4(p.add((len >> 3) << 2));
30 |                 b = (wyr4(p.add(len - 4)) << 32) | wyr4(p.add(len - 4 - ((len >> 3) << 2)));
31 |             } else if len > 0 {
32 |                 a = wyr3(p, len);
33 |                 b = 0;
34 |             } else {
35 |                 a = 0;
36 |                 b = 0;
37 |             }
38 |         } else {
39 |             let mut i = len;
40 |             if i > 48 {
41 |                 let mut seed1 = seed;
42 |                 let mut seed2 = seed;
43 |                 while {
44 |                     seed = wymix(wyr8(p) ^ S1, wyr8(p.add(8)) ^ seed);
45 |                     seed1 = wymix(wyr8(p.add(16)) ^ S2, wyr8(p.add(24)) ^ seed1);
46 |                     seed2 = wymix(wyr8(p.add(32)) ^ S3, wyr8(p.add(40)) ^ seed2);
47 |                     p = p.add(48);
48 |                     i -= 48;
49 |                     i > 48
50 |                 } {}
51 |                 seed ^= seed1 ^ seed2;
52 |             }
53 |             while i > 16 {
54 |                 seed = wymix(wyr8(p) ^ S1, wyr8(p.add(8)) ^ seed);
55 |                 i -= 16;
56 |                 p = p.add(16);
57 |             }
58 |             a = wyr8(p.offset(i as isize - 16));
59 |             b = wyr8(p.offset(i as isize - 8));
60 |         }
61 | 
62 |         wymix(S1 ^ (len as u64), wymix(a ^ S1, b ^ seed))
63 |     }
64 | }
65 | 
66 | unsafe fn wyr3(p: *const u8, k: usize) -> u64 {
67 |     let p0 = unsafe { p.read() as u64 };
68 |     let p1 = unsafe { p.add(k >> 1).read() as u64 };
69 |     let p2 = unsafe { p.add(k - 1).read() as u64 };
70 |     (p0 << 16) | (p1 << 8) | p2
71 | }
72 | 
73 | unsafe fn wyr4(p: *const u8) -> u64 {
74 |     unsafe { (p as *const u32).read_unaligned() as u64 }
75 | }
76 | 
77 | unsafe fn wyr8(p: *const u8) -> u64 {
78 |     unsafe { (p as *const u64).read_unaligned() }
79 | }
80 | 
81 | // This is a weak mix function on its own. It may be worth considering
82 | // replacing external uses of this function with a stronger one.
83 | // On the other hand, it's very fast.
84 | pub fn wymix(lhs: u64, rhs: u64) -> u64 {
85 |     let lhs = lhs as u128;
86 |     let rhs = rhs as u128;
87 |     let r = lhs * rhs;
88 |     (r >> 64) as u64 ^ (r as u64)
89 | }
90 | 
91 | pub fn hash_str(seed: u64, s: &str) -> u64 {
92 |     hash(seed, s.as_bytes())
93 | }
94 | 


--------------------------------------------------------------------------------
/src/helpers.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | //! Random assortment of helpers I didn't know where to put.
  5 | 
  6 | use std::alloc::Allocator;
  7 | use std::cmp::Ordering;
  8 | use std::io::Read;
  9 | use std::mem::{self, MaybeUninit};
 10 | use std::ops::{Bound, Range, RangeBounds};
 11 | use std::{fmt, ptr, slice, str};
 12 | 
 13 | use crate::apperr;
 14 | 
 15 | pub const KILO: usize = 1000;
 16 | pub const MEGA: usize = 1000 * 1000;
 17 | pub const GIGA: usize = 1000 * 1000 * 1000;
 18 | 
 19 | pub const KIBI: usize = 1024;
 20 | pub const MEBI: usize = 1024 * 1024;
 21 | pub const GIBI: usize = 1024 * 1024 * 1024;
 22 | 
 23 | pub struct MetricFormatter<T>(pub T);
 24 | 
 25 | impl fmt::Display for MetricFormatter<usize> {
 26 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 27 |         let mut value = self.0;
 28 |         let mut suffix = "B";
 29 |         if value >= GIGA {
 30 |             value /= GIGA;
 31 |             suffix = "GB";
 32 |         } else if value >= MEGA {
 33 |             value /= MEGA;
 34 |             suffix = "MB";
 35 |         } else if value >= KILO {
 36 |             value /= KILO;
 37 |             suffix = "kB";
 38 |         }
 39 |         write!(f, "{value}{suffix}")
 40 |     }
 41 | }
 42 | 
 43 | /// A viewport coordinate type used throughout the application.
 44 | pub type CoordType = isize;
 45 | 
 46 | /// To avoid overflow issues because you're adding two [`CoordType::MAX`]
 47 | /// values together, you can use [`COORD_TYPE_SAFE_MAX`] instead.
 48 | ///
 49 | /// It equates to half the bits contained in [`CoordType`], which
 50 | /// for instance is 32767 (0x7FFF) when [`CoordType`] is a [`i32`].
 51 | pub const COORD_TYPE_SAFE_MAX: CoordType = (1 << (CoordType::BITS / 2 - 1)) - 1;
 52 | 
 53 | /// A 2D point. Uses [`CoordType`].
 54 | #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
 55 | pub struct Point {
 56 |     pub x: CoordType,
 57 |     pub y: CoordType,
 58 | }
 59 | 
 60 | impl Point {
 61 |     pub const MIN: Self = Self { x: CoordType::MIN, y: CoordType::MIN };
 62 |     pub const MAX: Self = Self { x: CoordType::MAX, y: CoordType::MAX };
 63 | }
 64 | 
 65 | impl PartialOrd<Self> for Point {
 66 |     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
 67 |         Some(self.cmp(other))
 68 |     }
 69 | }
 70 | 
 71 | impl Ord for Point {
 72 |     fn cmp(&self, other: &Self) -> Ordering {
 73 |         self.y.cmp(&other.y).then(self.x.cmp(&other.x))
 74 |     }
 75 | }
 76 | 
 77 | /// A 2D size. Uses [`CoordType`].
 78 | #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
 79 | pub struct Size {
 80 |     pub width: CoordType,
 81 |     pub height: CoordType,
 82 | }
 83 | 
 84 | impl Size {
 85 |     pub fn as_rect(&self) -> Rect {
 86 |         Rect { left: 0, top: 0, right: self.width, bottom: self.height }
 87 |     }
 88 | }
 89 | 
 90 | /// A 2D rectangle. Uses [`CoordType`].
 91 | #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
 92 | pub struct Rect {
 93 |     pub left: CoordType,
 94 |     pub top: CoordType,
 95 |     pub right: CoordType,
 96 |     pub bottom: CoordType,
 97 | }
 98 | 
 99 | impl Rect {
100 |     /// Mimics CSS's `padding` property where `padding: a` is `a a a a`.
101 |     pub fn one(value: CoordType) -> Self {
102 |         Self { left: value, top: value, right: value, bottom: value }
103 |     }
104 | 
105 |     /// Mimics CSS's `padding` property where `padding: a b` is `a b a b`,
106 |     /// and `a` is top/bottom and `b` is left/right.
107 |     pub fn two(top_bottom: CoordType, left_right: CoordType) -> Self {
108 |         Self { left: left_right, top: top_bottom, right: left_right, bottom: top_bottom }
109 |     }
110 | 
111 |     /// Mimics CSS's `padding` property where `padding: a b c` is `a b c b`,
112 |     /// and `a` is top, `b` is left/right, and `c` is bottom.
113 |     pub fn three(top: CoordType, left_right: CoordType, bottom: CoordType) -> Self {
114 |         Self { left: left_right, top, right: left_right, bottom }
115 |     }
116 | 
117 |     /// Is the rectangle empty?
118 |     pub fn is_empty(&self) -> bool {
119 |         self.left >= self.right || self.top >= self.bottom
120 |     }
121 | 
122 |     /// Width of the rectangle.
123 |     pub fn width(&self) -> CoordType {
124 |         self.right - self.left
125 |     }
126 | 
127 |     /// Height of the rectangle.
128 |     pub fn height(&self) -> CoordType {
129 |         self.bottom - self.top
130 |     }
131 | 
132 |     /// Check if it contains a point.
133 |     pub fn contains(&self, point: Point) -> bool {
134 |         point.x >= self.left && point.x < self.right && point.y >= self.top && point.y < self.bottom
135 |     }
136 | 
137 |     /// Intersect two rectangles.
138 |     pub fn intersect(&self, rhs: Self) -> Self {
139 |         let l = self.left.max(rhs.left);
140 |         let t = self.top.max(rhs.top);
141 |         let r = self.right.min(rhs.right);
142 |         let b = self.bottom.min(rhs.bottom);
143 | 
144 |         // Ensure that the size is non-negative. This avoids bugs,
145 |         // because some height/width is negative all of a sudden.
146 |         let r = l.max(r);
147 |         let b = t.max(b);
148 | 
149 |         Self { left: l, top: t, right: r, bottom: b }
150 |     }
151 | }
152 | 
153 | /// [`std::cmp::minmax`] is unstable, as per usual.
154 | pub fn minmax<T>(v1: T, v2: T) -> [T; 2]
155 | where
156 |     T: Ord,
157 | {
158 |     if v2 < v1 { [v2, v1] } else { [v1, v2] }
159 | }
160 | 
161 | #[inline(always)]
162 | #[allow(clippy::ptr_eq)]
163 | fn opt_ptr<T>(a: Option<&T>) -> *const T {
164 |     unsafe { mem::transmute(a) }
165 | }
166 | 
167 | /// Surprisingly, there's no way in Rust to do a `ptr::eq` on `Option<&T>`.
168 | /// Uses `unsafe` so that the debug performance isn't too bad.
169 | #[inline(always)]
170 | #[allow(clippy::ptr_eq)]
171 | pub fn opt_ptr_eq<T>(a: Option<&T>, b: Option<&T>) -> bool {
172 |     opt_ptr(a) == opt_ptr(b)
173 | }
174 | 
175 | /// Creates a `&str` from a pointer and a length.
176 | /// Exists, because `std::str::from_raw_parts` is unstable, par for the course.
177 | ///
178 | /// # Safety
179 | ///
180 | /// The given data must be valid UTF-8.
181 | /// The given data must outlive the returned reference.
182 | #[inline]
183 | #[must_use]
184 | pub const unsafe fn str_from_raw_parts<'a>(ptr: *const u8, len: usize) -> &'a str {
185 |     unsafe { str::from_utf8_unchecked(slice::from_raw_parts(ptr, len)) }
186 | }
187 | 
188 | /// [`<[T]>::copy_from_slice`] panics if the two slices have different lengths.
189 | /// This one just returns the copied amount.
190 | pub fn slice_copy_safe<T: Copy>(dst: &mut [T], src: &[T]) -> usize {
191 |     let len = src.len().min(dst.len());
192 |     unsafe { ptr::copy_nonoverlapping(src.as_ptr(), dst.as_mut_ptr(), len) };
193 |     len
194 | }
195 | 
196 | /// [`Vec::splice`] results in really bad assembly.
197 | /// This doesn't. Don't use [`Vec::splice`].
198 | pub trait ReplaceRange<T: Copy> {
199 |     fn replace_range<R: RangeBounds<usize>>(&mut self, range: R, src: &[T]);
200 | }
201 | 
202 | impl<T: Copy, A: Allocator> ReplaceRange<T> for Vec<T, A> {
203 |     fn replace_range<R: RangeBounds<usize>>(&mut self, range: R, src: &[T]) {
204 |         let start = match range.start_bound() {
205 |             Bound::Included(&start) => start,
206 |             Bound::Excluded(start) => start + 1,
207 |             Bound::Unbounded => 0,
208 |         };
209 |         let end = match range.end_bound() {
210 |             Bound::Included(end) => end + 1,
211 |             Bound::Excluded(&end) => end,
212 |             Bound::Unbounded => usize::MAX,
213 |         };
214 |         vec_replace_impl(self, start..end, src);
215 |     }
216 | }
217 | 
218 | fn vec_replace_impl<T: Copy, A: Allocator>(dst: &mut Vec<T, A>, range: Range<usize>, src: &[T]) {
219 |     unsafe {
220 |         let dst_len = dst.len();
221 |         let src_len = src.len();
222 |         let off = range.start.min(dst_len);
223 |         let del_len = range.end.saturating_sub(off).min(dst_len - off);
224 | 
225 |         if del_len == 0 && src_len == 0 {
226 |             return; // nothing to do
227 |         }
228 | 
229 |         let tail_len = dst_len - off - del_len;
230 |         let new_len = dst_len - del_len + src_len;
231 | 
232 |         if src_len > del_len {
233 |             dst.reserve(src_len - del_len);
234 |         }
235 | 
236 |         // NOTE: drop_in_place() is not needed here, because T is constrained to Copy.
237 | 
238 |         // SAFETY: as_mut_ptr() must called after reserve() to ensure that the pointer is valid.
239 |         let ptr = dst.as_mut_ptr().add(off);
240 | 
241 |         // Shift the tail.
242 |         if tail_len > 0 && src_len != del_len {
243 |             ptr::copy(ptr.add(del_len), ptr.add(src_len), tail_len);
244 |         }
245 | 
246 |         // Copy in the replacement.
247 |         ptr::copy_nonoverlapping(src.as_ptr(), ptr, src_len);
248 |         dst.set_len(new_len);
249 |     }
250 | }
251 | 
252 | /// [`Read`] but with [`MaybeUninit<u8>`] buffers.
253 | pub fn file_read_uninit<T: Read>(
254 |     file: &mut T,
255 |     buf: &mut [MaybeUninit<u8>],
256 | ) -> apperr::Result<usize> {
257 |     unsafe {
258 |         let buf_slice = slice::from_raw_parts_mut(buf.as_mut_ptr() as *mut u8, buf.len());
259 |         let n = file.read(buf_slice)?;
260 |         Ok(n)
261 |     }
262 | }
263 | 
264 | /// Turns a [`&[u8]`] into a [`&[MaybeUninit<T>]`].
265 | #[inline(always)]
266 | pub const fn slice_as_uninit_ref<T>(slice: &[T]) -> &[MaybeUninit<T>] {
267 |     unsafe { slice::from_raw_parts(slice.as_ptr() as *const MaybeUninit<T>, slice.len()) }
268 | }
269 | 
270 | /// Turns a [`&mut [T]`] into a [`&mut [MaybeUninit<T>]`].
271 | #[inline(always)]
272 | pub const fn slice_as_uninit_mut<T>(slice: &mut [T]) -> &mut [MaybeUninit<T>] {
273 |     unsafe { slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut MaybeUninit<T>, slice.len()) }
274 | }
275 | 
276 | /// Helpers for ASCII string comparisons.
277 | pub trait AsciiStringHelpers {
278 |     /// Tests if a string starts with a given ASCII prefix.
279 |     ///
280 |     /// This function name really is a mouthful, but it's a combination
281 |     /// of [`str::starts_with`] and [`str::eq_ignore_ascii_case`].
282 |     fn starts_with_ignore_ascii_case(&self, prefix: &str) -> bool;
283 | }
284 | 
285 | impl AsciiStringHelpers for str {
286 |     fn starts_with_ignore_ascii_case(&self, prefix: &str) -> bool {
287 |         // Casting to bytes first ensures we skip any UTF8 boundary checks.
288 |         // Since the comparison is ASCII, we don't need to worry about that.
289 |         let s = self.as_bytes();
290 |         let p = prefix.as_bytes();
291 |         p.len() <= s.len() && s[..p.len()].eq_ignore_ascii_case(p)
292 |     }
293 | }
294 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Licensed under the MIT License.
 3 | 
 4 | #![feature(
 5 |     allocator_api,
 6 |     breakpoint,
 7 |     cold_path,
 8 |     let_chains,
 9 |     linked_list_cursors,
10 |     maybe_uninit_fill,
11 |     maybe_uninit_slice,
12 |     maybe_uninit_uninit_array_transpose
13 | )]
14 | #![allow(clippy::missing_transmute_annotations, clippy::new_without_default, stable_features)]
15 | 
16 | #[macro_use]
17 | pub mod arena;
18 | 
19 | pub mod apperr;
20 | pub mod base64;
21 | pub mod buffer;
22 | pub mod cell;
23 | pub mod document;
24 | pub mod framebuffer;
25 | pub mod fuzzy;
26 | pub mod hash;
27 | pub mod helpers;
28 | pub mod icu;
29 | pub mod input;
30 | pub mod oklab;
31 | pub mod path;
32 | pub mod simd;
33 | pub mod sys;
34 | pub mod tui;
35 | pub mod unicode;
36 | pub mod vt;
37 | 


--------------------------------------------------------------------------------
/src/oklab.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | //! Oklab colorspace conversions.
  5 | //!
  6 | //! Implements Oklab as defined at: <https://bottosson.github.io/posts/oklab/>
  7 | 
  8 | #![allow(clippy::excessive_precision)]
  9 | 
 10 | /// An Oklab color with alpha.
 11 | pub struct Lab {
 12 |     pub l: f32,
 13 |     pub a: f32,
 14 |     pub b: f32,
 15 |     pub alpha: f32,
 16 | }
 17 | 
 18 | /// Converts a 32-bit sRGB color to Oklab.
 19 | pub fn srgb_to_oklab(color: u32) -> Lab {
 20 |     let r = SRGB_TO_RGB_LUT[(color & 0xff) as usize];
 21 |     let g = SRGB_TO_RGB_LUT[((color >> 8) & 0xff) as usize];
 22 |     let b = SRGB_TO_RGB_LUT[((color >> 16) & 0xff) as usize];
 23 |     let alpha = (color >> 24) as f32 * (1.0 / 255.0);
 24 | 
 25 |     let l = 0.4122214708 * r + 0.5363325363 * g + 0.0514459929 * b;
 26 |     let m = 0.2119034982 * r + 0.6806995451 * g + 0.1073969566 * b;
 27 |     let s = 0.0883024619 * r + 0.2817188376 * g + 0.6299787005 * b;
 28 | 
 29 |     let l_ = cbrtf_est(l);
 30 |     let m_ = cbrtf_est(m);
 31 |     let s_ = cbrtf_est(s);
 32 | 
 33 |     Lab {
 34 |         l: 0.2104542553 * l_ + 0.7936177850 * m_ - 0.0040720468 * s_,
 35 |         a: 1.9779984951 * l_ - 2.4285922050 * m_ + 0.4505937099 * s_,
 36 |         b: 0.0259040371 * l_ + 0.7827717662 * m_ - 0.8086757660 * s_,
 37 |         alpha,
 38 |     }
 39 | }
 40 | 
 41 | /// Converts an Oklab color to a 32-bit sRGB color.
 42 | pub fn oklab_to_srgb(c: Lab) -> u32 {
 43 |     let l_ = c.l + 0.3963377774 * c.a + 0.2158037573 * c.b;
 44 |     let m_ = c.l - 0.1055613458 * c.a - 0.0638541728 * c.b;
 45 |     let s_ = c.l - 0.0894841775 * c.a - 1.2914855480 * c.b;
 46 | 
 47 |     let l = l_ * l_ * l_;
 48 |     let m = m_ * m_ * m_;
 49 |     let s = s_ * s_ * s_;
 50 | 
 51 |     let r = 4.0767416621 * l - 3.3077115913 * m + 0.2309699292 * s;
 52 |     let g = -1.2684380046 * l + 2.6097574011 * m - 0.3413193965 * s;
 53 |     let b = -0.0041960863 * l - 0.7034186147 * m + 1.7076147010 * s;
 54 | 
 55 |     let r = r.clamp(0.0, 1.0);
 56 |     let g = g.clamp(0.0, 1.0);
 57 |     let b = b.clamp(0.0, 1.0);
 58 |     let alpha = c.alpha.clamp(0.0, 1.0);
 59 | 
 60 |     let r = linear_to_srgb(r);
 61 |     let g = linear_to_srgb(g);
 62 |     let b = linear_to_srgb(b);
 63 |     let a = (alpha * 255.0) as u32;
 64 | 
 65 |     r | (g << 8) | (b << 16) | (a << 24)
 66 | }
 67 | 
 68 | /// Blends two 32-bit sRGB colors in the Oklab color space.
 69 | pub fn oklab_blend(dst: u32, src: u32) -> u32 {
 70 |     let dst = srgb_to_oklab(dst);
 71 |     let src = srgb_to_oklab(src);
 72 | 
 73 |     let inv_a = 1.0 - src.alpha;
 74 |     let l = src.l + dst.l * inv_a;
 75 |     let a = src.a + dst.a * inv_a;
 76 |     let b = src.b + dst.b * inv_a;
 77 |     let alpha = src.alpha + dst.alpha * inv_a;
 78 | 
 79 |     oklab_to_srgb(Lab { l, a, b, alpha })
 80 | }
 81 | 
 82 | fn linear_to_srgb(c: f32) -> u32 {
 83 |     (if c > 0.0031308 {
 84 |         255.0 * 1.055 * c.powf(1.0 / 2.4) - 255.0 * 0.055
 85 |     } else {
 86 |         255.0 * 12.92 * c
 87 |     }) as u32
 88 | }
 89 | 
 90 | #[inline]
 91 | fn cbrtf_est(a: f32) -> f32 {
 92 |     // http://metamerist.com/cbrt/cbrt.htm showed a great estimator for the cube root:
 93 |     //   f32_as_uint32_t / 3 + 709921077
 94 |     // It's similar to the well known "fast inverse square root" trick.
 95 |     // Lots of numbers around 709921077 perform at least equally well to 709921077,
 96 |     // and it is unknown how and why 709921077 was chosen specifically.
 97 |     let u: u32 = f32::to_bits(a); // evil f32ing point bit level hacking
 98 |     let u = u / 3 + 709921077; // what the fuck?
 99 |     let x: f32 = f32::from_bits(u);
100 | 
101 |     // One round of Newton's method. It follows the Wikipedia article at
102 |     //   https://en.wikipedia.org/wiki/Cube_root#Numerical_methods
103 |     // For `a`s in the range between 0 and 1, this results in a maximum error of
104 |     // less than 6.7e-4f, which is not good, but good enough for us, because
105 |     // we're not an image editor. The benefit is that it's really fast.
106 |     (1.0 / 3.0) * (a / (x * x) + (x + x)) // 1st iteration
107 | }
108 | 
109 | #[rustfmt::skip]
110 | #[allow(clippy::excessive_precision)]
111 | const SRGB_TO_RGB_LUT: [f32; 256] = [
112 |     0.0000000000, 0.0003035270, 0.0006070540, 0.0009105810, 0.0012141080, 0.0015176350, 0.0018211619, 0.0021246888, 0.0024282159, 0.0027317430, 0.0030352699, 0.0033465356, 0.0036765069, 0.0040247170, 0.0043914421, 0.0047769533,
113 |     0.0051815170, 0.0056053917, 0.0060488326, 0.0065120910, 0.0069954102, 0.0074990317, 0.0080231922, 0.0085681248, 0.0091340570, 0.0097212177, 0.0103298230, 0.0109600937, 0.0116122449, 0.0122864870, 0.0129830306, 0.0137020806,
114 |     0.0144438436, 0.0152085144, 0.0159962922, 0.0168073755, 0.0176419523, 0.0185002182, 0.0193823613, 0.0202885624, 0.0212190095, 0.0221738834, 0.0231533647, 0.0241576303, 0.0251868572, 0.0262412224, 0.0273208916, 0.0284260381,
115 |     0.0295568332, 0.0307134409, 0.0318960287, 0.0331047624, 0.0343398079, 0.0356013142, 0.0368894450, 0.0382043645, 0.0395462364, 0.0409151986, 0.0423114114, 0.0437350273, 0.0451862030, 0.0466650836, 0.0481718220, 0.0497065634,
116 |     0.0512694679, 0.0528606549, 0.0544802807, 0.0561284944, 0.0578054339, 0.0595112406, 0.0612460710, 0.0630100295, 0.0648032799, 0.0666259527, 0.0684781820, 0.0703601092, 0.0722718611, 0.0742135793, 0.0761853904, 0.0781874284,
117 |     0.0802198276, 0.0822827145, 0.0843762159, 0.0865004659, 0.0886556059, 0.0908417329, 0.0930589810, 0.0953074843, 0.0975873619, 0.0998987406, 0.1022417471, 0.1046164930, 0.1070231125, 0.1094617173, 0.1119324341, 0.1144353822,
118 |     0.1169706732, 0.1195384338, 0.1221387982, 0.1247718409, 0.1274376959, 0.1301364899, 0.1328683347, 0.1356333494, 0.1384316236, 0.1412633061, 0.1441284865, 0.1470272839, 0.1499598026, 0.1529261619, 0.1559264660, 0.1589608639,
119 |     0.1620294005, 0.1651322246, 0.1682693958, 0.1714410931, 0.1746473908, 0.1778884083, 0.1811642349, 0.1844749898, 0.1878207624, 0.1912016720, 0.1946178079, 0.1980693042, 0.2015562356, 0.2050787061, 0.2086368501, 0.2122307271,
120 |     0.2158605307, 0.2195262313, 0.2232279778, 0.2269658893, 0.2307400703, 0.2345506549, 0.2383976579, 0.2422811985, 0.2462013960, 0.2501583695, 0.2541521788, 0.2581829131, 0.2622507215, 0.2663556635, 0.2704978585, 0.2746773660,
121 |     0.2788943350, 0.2831487954, 0.2874408960, 0.2917706966, 0.2961383164, 0.3005438447, 0.3049873710, 0.3094689548, 0.3139887452, 0.3185468316, 0.3231432438, 0.3277781308, 0.3324515820, 0.3371636569, 0.3419144452, 0.3467040956,
122 |     0.3515326977, 0.3564002514, 0.3613068759, 0.3662526906, 0.3712377846, 0.3762622178, 0.3813261092, 0.3864295185, 0.3915725648, 0.3967553079, 0.4019778669, 0.4072403014, 0.4125427008, 0.4178851545, 0.4232677519, 0.4286905527,
123 |     0.4341537058, 0.4396572411, 0.4452012479, 0.4507858455, 0.4564110637, 0.4620770514, 0.4677838385, 0.4735315442, 0.4793202281, 0.4851499796, 0.4910208881, 0.4969330430, 0.5028865933, 0.5088814497, 0.5149177909, 0.5209956765,
124 |     0.5271152258, 0.5332764983, 0.5394796133, 0.5457245708, 0.5520114899, 0.5583404899, 0.5647116303, 0.5711249113, 0.5775805116, 0.5840784907, 0.5906189084, 0.5972018838, 0.6038274169, 0.6104956269, 0.6172066331, 0.6239604354,
125 |     0.6307572126, 0.6375969648, 0.6444797516, 0.6514056921, 0.6583748460, 0.6653873324, 0.6724432111, 0.6795425415, 0.6866854429, 0.6938719153, 0.7011020184, 0.7083759308, 0.7156936526, 0.7230552435, 0.7304608822, 0.7379105687,
126 |     0.7454043627, 0.7529423237, 0.7605246305, 0.7681512833, 0.7758223414, 0.7835379243, 0.7912980318, 0.7991028428, 0.8069523573, 0.8148466945, 0.8227858543, 0.8307699561, 0.8387991190, 0.8468732834, 0.8549926877, 0.8631572723,
127 |     0.8713672161, 0.8796223402, 0.8879231811, 0.8962693810, 0.9046613574, 0.9130986929, 0.9215820432, 0.9301108718, 0.9386858940, 0.9473065734, 0.9559735060, 0.9646862745, 0.9734454751, 0.9822505713, 0.9911022186, 1.0000000000,
128 | ];
129 | 


--------------------------------------------------------------------------------
/src/path.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Licensed under the MIT License.
 3 | 
 4 | //! Path related helpers.
 5 | 
 6 | use std::ffi::{OsStr, OsString};
 7 | use std::path::{Component, MAIN_SEPARATOR_STR, Path, PathBuf};
 8 | 
 9 | /// Normalizes a given path by removing redundant components.
10 | /// The given path must be absolute (e.g. by joining it with the current working directory).
11 | pub fn normalize(path: &Path) -> PathBuf {
12 |     debug_assert!(path.is_absolute());
13 | 
14 |     let mut res = PathBuf::with_capacity(path.as_os_str().as_encoded_bytes().len());
15 |     let mut root_len = 0;
16 | 
17 |     for component in path.components() {
18 |         match component {
19 |             Component::Prefix(p) => res.push(p.as_os_str()),
20 |             Component::RootDir => {
21 |                 res.push(OsStr::new(MAIN_SEPARATOR_STR));
22 |                 root_len = res.as_os_str().as_encoded_bytes().len();
23 |             }
24 |             Component::CurDir => {}
25 |             Component::ParentDir => {
26 |                 // Get the length up to the parent directory
27 |                 if let Some(len) = res
28 |                     .parent()
29 |                     .map(|p| p.as_os_str().as_encoded_bytes().len())
30 |                     // Ensure we don't pop the root directory
31 |                     && len >= root_len
32 |                 {
33 |                     // Pop the last component from `res`.
34 |                     //
35 |                     // This can be replaced with a plain `res.as_mut_os_string().truncate(len)`
36 |                     // once `os_string_truncate` is stabilized (#133262).
37 |                     let mut bytes = res.into_os_string().into_encoded_bytes();
38 |                     bytes.truncate(len);
39 |                     res = PathBuf::from(unsafe { OsString::from_encoded_bytes_unchecked(bytes) });
40 |                 }
41 |             }
42 |             Component::Normal(p) => res.push(p),
43 |         }
44 |     }
45 | 
46 |     res
47 | }
48 | 
49 | #[cfg(test)]
50 | mod tests {
51 |     use std::ffi::OsString;
52 |     use std::path::Path;
53 | 
54 |     use super::*;
55 | 
56 |     fn norm(s: &str) -> OsString {
57 |         normalize(Path::new(s)).into_os_string()
58 |     }
59 | 
60 |     #[cfg(unix)]
61 |     #[test]
62 |     fn test_unix() {
63 |         assert_eq!(norm("/a/b/c"), "/a/b/c");
64 |         assert_eq!(norm("/a/b/c/"), "/a/b/c");
65 |         assert_eq!(norm("/a/./b"), "/a/b");
66 |         assert_eq!(norm("/a/b/../c"), "/a/c");
67 |         assert_eq!(norm("/../../a"), "/a");
68 |         assert_eq!(norm("/../"), "/");
69 |         assert_eq!(norm("/a//b/c"), "/a/b/c");
70 |         assert_eq!(norm("/a/b/c/../../../../d"), "/d");
71 |         assert_eq!(norm("//"), "/");
72 |     }
73 | 
74 |     #[cfg(windows)]
75 |     #[test]
76 |     fn test_windows() {
77 |         assert_eq!(norm(r"C:\a\b\c"), r"C:\a\b\c");
78 |         assert_eq!(norm(r"C:\a\b\c\"), r"C:\a\b\c");
79 |         assert_eq!(norm(r"C:\a\.\b"), r"C:\a\b");
80 |         assert_eq!(norm(r"C:\a\b\..\c"), r"C:\a\c");
81 |         assert_eq!(norm(r"C:\..\..\a"), r"C:\a");
82 |         assert_eq!(norm(r"C:\..\"), r"C:\");
83 |         assert_eq!(norm(r"C:\a\\b\c"), r"C:\a\b\c");
84 |         assert_eq!(norm(r"C:/a\b/c"), r"C:\a\b\c");
85 |         assert_eq!(norm(r"C:\a\b\c\..\..\..\..\d"), r"C:\d");
86 |         assert_eq!(norm(r"\\server\share\path"), r"\\server\share\path");
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/src/simd/lines_bwd.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | use std::ptr;
  5 | 
  6 | use crate::helpers::CoordType;
  7 | 
  8 | /// Starting from the `offset` in `haystack` with a current line index of
  9 | /// `line`, this seeks backwards to the `line_stop`-nth line and returns the
 10 | /// new offset and the line index at that point.
 11 | ///
 12 | /// Note that this function differs from `lines_fwd` in that it
 13 | /// seeks backwards even if the `line` is already at `line_stop`.
 14 | /// This allows you to ensure (or test) whether `offset` is at a line start.
 15 | ///
 16 | /// It returns an offset *past* a newline and thus at the start of a line.
 17 | pub fn lines_bwd(
 18 |     haystack: &[u8],
 19 |     offset: usize,
 20 |     line: CoordType,
 21 |     line_stop: CoordType,
 22 | ) -> (usize, CoordType) {
 23 |     unsafe {
 24 |         let beg = haystack.as_ptr();
 25 |         let it = beg.add(offset.min(haystack.len()));
 26 |         let (it, line) = lines_bwd_raw(beg, it, line, line_stop);
 27 |         (it.offset_from_unsigned(beg), line)
 28 |     }
 29 | }
 30 | 
 31 | unsafe fn lines_bwd_raw(
 32 |     beg: *const u8,
 33 |     end: *const u8,
 34 |     line: CoordType,
 35 |     line_stop: CoordType,
 36 | ) -> (*const u8, CoordType) {
 37 |     #[cfg(target_arch = "x86_64")]
 38 |     return unsafe { LINES_BWD_DISPATCH(beg, end, line, line_stop) };
 39 | 
 40 |     #[cfg(target_arch = "aarch64")]
 41 |     return unsafe { lines_bwd_neon(beg, end, line, line_stop) };
 42 | 
 43 |     #[allow(unreachable_code)]
 44 |     return unsafe { lines_bwd_fallback(beg, end, line, line_stop) };
 45 | }
 46 | 
 47 | unsafe fn lines_bwd_fallback(
 48 |     beg: *const u8,
 49 |     mut end: *const u8,
 50 |     mut line: CoordType,
 51 |     line_stop: CoordType,
 52 | ) -> (*const u8, CoordType) {
 53 |     unsafe {
 54 |         while !ptr::eq(end, beg) {
 55 |             let n = end.sub(1);
 56 |             if *n == b'\n' {
 57 |                 if line <= line_stop {
 58 |                     break;
 59 |                 }
 60 |                 line -= 1;
 61 |             }
 62 |             end = n;
 63 |         }
 64 |         (end, line)
 65 |     }
 66 | }
 67 | 
 68 | #[cfg(target_arch = "x86_64")]
 69 | static mut LINES_BWD_DISPATCH: unsafe fn(
 70 |     beg: *const u8,
 71 |     end: *const u8,
 72 |     line: CoordType,
 73 |     line_stop: CoordType,
 74 | ) -> (*const u8, CoordType) = lines_bwd_dispatch;
 75 | 
 76 | #[cfg(target_arch = "x86_64")]
 77 | unsafe fn lines_bwd_dispatch(
 78 |     beg: *const u8,
 79 |     end: *const u8,
 80 |     line: CoordType,
 81 |     line_stop: CoordType,
 82 | ) -> (*const u8, CoordType) {
 83 |     let func = if is_x86_feature_detected!("avx2") { lines_bwd_avx2 } else { lines_bwd_fallback };
 84 |     unsafe { LINES_BWD_DISPATCH = func };
 85 |     unsafe { func(beg, end, line, line_stop) }
 86 | }
 87 | 
 88 | #[cfg(target_arch = "x86_64")]
 89 | #[target_feature(enable = "avx2")]
 90 | unsafe fn lines_bwd_avx2(
 91 |     beg: *const u8,
 92 |     mut end: *const u8,
 93 |     mut line: CoordType,
 94 |     line_stop: CoordType,
 95 | ) -> (*const u8, CoordType) {
 96 |     unsafe {
 97 |         use std::arch::x86_64::*;
 98 | 
 99 |         #[inline(always)]
100 |         unsafe fn horizontal_sum_i64(v: __m256i) -> i64 {
101 |             unsafe {
102 |                 let hi = _mm256_extracti128_si256::<1>(v);
103 |                 let lo = _mm256_castsi256_si128(v);
104 |                 let sum = _mm_add_epi64(lo, hi);
105 |                 let shuf = _mm_shuffle_epi32::<0b11_10_11_10>(sum);
106 |                 let sum = _mm_add_epi64(sum, shuf);
107 |                 _mm_cvtsi128_si64(sum)
108 |             }
109 |         }
110 | 
111 |         let lf = _mm256_set1_epi8(b'\n' as i8);
112 |         let line_stop = line_stop.min(line);
113 |         let mut remaining = end.offset_from_unsigned(beg);
114 | 
115 |         while remaining >= 128 {
116 |             let chunk_start = end.sub(128);
117 | 
118 |             let v1 = _mm256_loadu_si256(chunk_start.add(0) as *const _);
119 |             let v2 = _mm256_loadu_si256(chunk_start.add(32) as *const _);
120 |             let v3 = _mm256_loadu_si256(chunk_start.add(64) as *const _);
121 |             let v4 = _mm256_loadu_si256(chunk_start.add(96) as *const _);
122 | 
123 |             let mut sum = _mm256_setzero_si256();
124 |             sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v1, lf));
125 |             sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v2, lf));
126 |             sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v3, lf));
127 |             sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v4, lf));
128 | 
129 |             let sum = _mm256_sad_epu8(sum, _mm256_setzero_si256());
130 |             let sum = horizontal_sum_i64(sum);
131 | 
132 |             let line_next = line - sum as CoordType;
133 |             if line_next <= line_stop {
134 |                 break;
135 |             }
136 | 
137 |             end = chunk_start;
138 |             remaining -= 128;
139 |             line = line_next;
140 |         }
141 | 
142 |         while remaining >= 32 {
143 |             let chunk_start = end.sub(32);
144 |             let v = _mm256_loadu_si256(chunk_start as *const _);
145 |             let c = _mm256_cmpeq_epi8(v, lf);
146 | 
147 |             let ones = _mm256_and_si256(c, _mm256_set1_epi8(0x01));
148 |             let sum = _mm256_sad_epu8(ones, _mm256_setzero_si256());
149 |             let sum = horizontal_sum_i64(sum);
150 | 
151 |             let line_next = line - sum as CoordType;
152 |             if line_next <= line_stop {
153 |                 break;
154 |             }
155 | 
156 |             end = chunk_start;
157 |             remaining -= 32;
158 |             line = line_next;
159 |         }
160 | 
161 |         lines_bwd_fallback(beg, end, line, line_stop)
162 |     }
163 | }
164 | 
165 | #[cfg(target_arch = "aarch64")]
166 | unsafe fn lines_bwd_neon(
167 |     beg: *const u8,
168 |     mut end: *const u8,
169 |     mut line: CoordType,
170 |     line_stop: CoordType,
171 | ) -> (*const u8, CoordType) {
172 |     unsafe {
173 |         use std::arch::aarch64::*;
174 | 
175 |         let lf = vdupq_n_u8(b'\n');
176 |         let line_stop = line_stop.min(line);
177 |         let mut remaining = end.offset_from_unsigned(beg);
178 | 
179 |         while remaining >= 64 {
180 |             let chunk_start = end.sub(64);
181 | 
182 |             let v1 = vld1q_u8(chunk_start.add(0));
183 |             let v2 = vld1q_u8(chunk_start.add(16));
184 |             let v3 = vld1q_u8(chunk_start.add(32));
185 |             let v4 = vld1q_u8(chunk_start.add(48));
186 | 
187 |             let mut sum = vdupq_n_u8(0);
188 |             sum = vsubq_u8(sum, vceqq_u8(v1, lf));
189 |             sum = vsubq_u8(sum, vceqq_u8(v2, lf));
190 |             sum = vsubq_u8(sum, vceqq_u8(v3, lf));
191 |             sum = vsubq_u8(sum, vceqq_u8(v4, lf));
192 | 
193 |             let sum = vaddvq_u8(sum);
194 | 
195 |             let line_next = line - sum as CoordType;
196 |             if line_next <= line_stop {
197 |                 break;
198 |             }
199 | 
200 |             end = chunk_start;
201 |             remaining -= 64;
202 |             line = line_next;
203 |         }
204 | 
205 |         while remaining >= 16 {
206 |             let chunk_start = end.sub(16);
207 |             let v = vld1q_u8(chunk_start);
208 |             let c = vceqq_u8(v, lf);
209 |             let c = vandq_u8(c, vdupq_n_u8(0x01));
210 |             let sum = vaddvq_u8(c);
211 | 
212 |             let line_next = line - sum as CoordType;
213 |             if line_next <= line_stop {
214 |                 break;
215 |             }
216 | 
217 |             end = chunk_start;
218 |             remaining -= 16;
219 |             line = line_next;
220 |         }
221 | 
222 |         lines_bwd_fallback(beg, end, line, line_stop)
223 |     }
224 | }
225 | 
226 | #[cfg(test)]
227 | mod test {
228 |     use super::*;
229 |     use crate::helpers::CoordType;
230 |     use crate::simd::test::*;
231 | 
232 |     #[test]
233 |     fn pseudo_fuzz() {
234 |         let text = generate_random_text(1024);
235 |         let lines = count_lines(&text);
236 |         let mut offset_rng = make_rng();
237 |         let mut line_rng = make_rng();
238 |         let mut line_distance_rng = make_rng();
239 | 
240 |         for _ in 0..1000 {
241 |             let offset = offset_rng() % (text.len() + 1);
242 |             let line_stop = line_distance_rng() % (lines + 1);
243 |             let line = line_stop + line_rng() % 100;
244 | 
245 |             let line = line as CoordType;
246 |             let line_stop = line_stop as CoordType;
247 | 
248 |             let expected = reference_lines_bwd(text.as_bytes(), offset, line, line_stop);
249 |             let actual = lines_bwd(text.as_bytes(), offset, line, line_stop);
250 | 
251 |             assert_eq!(expected, actual);
252 |         }
253 |     }
254 | 
255 |     fn reference_lines_bwd(
256 |         haystack: &[u8],
257 |         mut offset: usize,
258 |         mut line: CoordType,
259 |         line_stop: CoordType,
260 |     ) -> (usize, CoordType) {
261 |         if line >= line_stop {
262 |             while offset > 0 {
263 |                 let c = haystack[offset - 1];
264 |                 if c == b'\n' {
265 |                     if line == line_stop {
266 |                         break;
267 |                     }
268 |                     line -= 1;
269 |                 }
270 |                 offset -= 1;
271 |             }
272 |         }
273 |         (offset, line)
274 |     }
275 |     #[test]
276 |     fn seeks_to_start() {
277 |         for i in 6..=11 {
278 |             let (off, line) = lines_bwd(b"Hello\nWorld\n", i, 123, 456);
279 |             assert_eq!(off, 6); // After "Hello\n"
280 |             assert_eq!(line, 123); // Still on the same line
281 |         }
282 |     }
283 | }
284 | 


--------------------------------------------------------------------------------
/src/simd/lines_fwd.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | use std::ptr;
  5 | 
  6 | use crate::helpers::CoordType;
  7 | 
  8 | /// Starting from the `offset` in `haystack` with a current line index of
  9 | /// `line`, this seeks to the `line_stop`-nth line and returns the
 10 | /// new offset and the line index at that point.
 11 | ///
 12 | /// It returns an offset *past* the newline.
 13 | /// If `line` is already at or past `line_stop`, it returns immediately.
 14 | pub fn lines_fwd(
 15 |     haystack: &[u8],
 16 |     offset: usize,
 17 |     line: CoordType,
 18 |     line_stop: CoordType,
 19 | ) -> (usize, CoordType) {
 20 |     unsafe {
 21 |         let beg = haystack.as_ptr();
 22 |         let end = beg.add(haystack.len());
 23 |         let it = beg.add(offset.min(haystack.len()));
 24 |         let (it, line) = lines_fwd_raw(it, end, line, line_stop);
 25 |         (it.offset_from_unsigned(beg), line)
 26 |     }
 27 | }
 28 | 
 29 | unsafe fn lines_fwd_raw(
 30 |     beg: *const u8,
 31 |     end: *const u8,
 32 |     line: CoordType,
 33 |     line_stop: CoordType,
 34 | ) -> (*const u8, CoordType) {
 35 |     #[cfg(target_arch = "x86_64")]
 36 |     return unsafe { LINES_FWD_DISPATCH(beg, end, line, line_stop) };
 37 | 
 38 |     #[cfg(target_arch = "aarch64")]
 39 |     return unsafe { lines_fwd_neon(beg, end, line, line_stop) };
 40 | 
 41 |     #[allow(unreachable_code)]
 42 |     return unsafe { lines_fwd_fallback(beg, end, line, line_stop) };
 43 | }
 44 | 
 45 | unsafe fn lines_fwd_fallback(
 46 |     mut beg: *const u8,
 47 |     end: *const u8,
 48 |     mut line: CoordType,
 49 |     line_stop: CoordType,
 50 | ) -> (*const u8, CoordType) {
 51 |     unsafe {
 52 |         if line < line_stop {
 53 |             while !ptr::eq(beg, end) {
 54 |                 let c = *beg;
 55 |                 beg = beg.add(1);
 56 |                 if c == b'\n' {
 57 |                     line += 1;
 58 |                     if line == line_stop {
 59 |                         break;
 60 |                     }
 61 |                 }
 62 |             }
 63 |         }
 64 |         (beg, line)
 65 |     }
 66 | }
 67 | 
 68 | #[cfg(target_arch = "x86_64")]
 69 | static mut LINES_FWD_DISPATCH: unsafe fn(
 70 |     beg: *const u8,
 71 |     end: *const u8,
 72 |     line: CoordType,
 73 |     line_stop: CoordType,
 74 | ) -> (*const u8, CoordType) = lines_fwd_dispatch;
 75 | 
 76 | #[cfg(target_arch = "x86_64")]
 77 | unsafe fn lines_fwd_dispatch(
 78 |     beg: *const u8,
 79 |     end: *const u8,
 80 |     line: CoordType,
 81 |     line_stop: CoordType,
 82 | ) -> (*const u8, CoordType) {
 83 |     let func = if is_x86_feature_detected!("avx2") { lines_fwd_avx2 } else { lines_fwd_fallback };
 84 |     unsafe { LINES_FWD_DISPATCH = func };
 85 |     unsafe { func(beg, end, line, line_stop) }
 86 | }
 87 | 
 88 | #[cfg(target_arch = "x86_64")]
 89 | #[target_feature(enable = "avx2")]
 90 | unsafe fn lines_fwd_avx2(
 91 |     mut beg: *const u8,
 92 |     end: *const u8,
 93 |     mut line: CoordType,
 94 |     line_stop: CoordType,
 95 | ) -> (*const u8, CoordType) {
 96 |     unsafe {
 97 |         use std::arch::x86_64::*;
 98 | 
 99 |         #[inline(always)]
100 |         unsafe fn horizontal_sum_i64(v: __m256i) -> i64 {
101 |             unsafe {
102 |                 let hi = _mm256_extracti128_si256::<1>(v);
103 |                 let lo = _mm256_castsi256_si128(v);
104 |                 let sum = _mm_add_epi64(lo, hi);
105 |                 let shuf = _mm_shuffle_epi32::<0b11_10_11_10>(sum);
106 |                 let sum = _mm_add_epi64(sum, shuf);
107 |                 _mm_cvtsi128_si64(sum)
108 |             }
109 |         }
110 | 
111 |         let lf = _mm256_set1_epi8(b'\n' as i8);
112 |         let mut remaining = end.offset_from_unsigned(beg);
113 | 
114 |         if line < line_stop {
115 |             // Unrolling the loop by 4x speeds things up by >3x.
116 |             // It allows us to accumulate matches before doing a single `vpsadbw`.
117 |             while remaining >= 128 {
118 |                 let v1 = _mm256_loadu_si256(beg.add(0) as *const _);
119 |                 let v2 = _mm256_loadu_si256(beg.add(32) as *const _);
120 |                 let v3 = _mm256_loadu_si256(beg.add(64) as *const _);
121 |                 let v4 = _mm256_loadu_si256(beg.add(96) as *const _);
122 | 
123 |                 // `vpcmpeqb` leaves each comparison result byte as 0 or -1 (0xff).
124 |                 // This allows us to accumulate the comparisons by subtracting them.
125 |                 let mut sum = _mm256_setzero_si256();
126 |                 sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v1, lf));
127 |                 sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v2, lf));
128 |                 sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v3, lf));
129 |                 sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v4, lf));
130 | 
131 |                 // Calculate the total number of matches in this chunk.
132 |                 let sum = _mm256_sad_epu8(sum, _mm256_setzero_si256());
133 |                 let sum = horizontal_sum_i64(sum);
134 | 
135 |                 let line_next = line + sum as CoordType;
136 |                 if line_next >= line_stop {
137 |                     break;
138 |                 }
139 | 
140 |                 beg = beg.add(128);
141 |                 remaining -= 128;
142 |                 line = line_next;
143 |             }
144 | 
145 |             while remaining >= 32 {
146 |                 let v = _mm256_loadu_si256(beg as *const _);
147 |                 let c = _mm256_cmpeq_epi8(v, lf);
148 | 
149 |                 // If you ask an LLM, the best way to do this is
150 |                 // to do a `vpmovmskb` followed by `popcnt`.
151 |                 // One contemporary hardware that's a bad idea though.
152 |                 let ones = _mm256_and_si256(c, _mm256_set1_epi8(0x01));
153 |                 let sum = _mm256_sad_epu8(ones, _mm256_setzero_si256());
154 |                 let sum = horizontal_sum_i64(sum);
155 | 
156 |                 let line_next = line + sum as CoordType;
157 |                 if line_next >= line_stop {
158 |                     break;
159 |                 }
160 | 
161 |                 beg = beg.add(32);
162 |                 remaining -= 32;
163 |                 line = line_next;
164 |             }
165 |         }
166 | 
167 |         lines_fwd_fallback(beg, end, line, line_stop)
168 |     }
169 | }
170 | 
171 | #[cfg(target_arch = "aarch64")]
172 | unsafe fn lines_fwd_neon(
173 |     mut beg: *const u8,
174 |     end: *const u8,
175 |     mut line: CoordType,
176 |     line_stop: CoordType,
177 | ) -> (*const u8, CoordType) {
178 |     unsafe {
179 |         use std::arch::aarch64::*;
180 | 
181 |         let lf = vdupq_n_u8(b'\n');
182 |         let mut remaining = end.offset_from_unsigned(beg);
183 | 
184 |         if line < line_stop {
185 |             while remaining >= 64 {
186 |                 let v1 = vld1q_u8(beg.add(0));
187 |                 let v2 = vld1q_u8(beg.add(16));
188 |                 let v3 = vld1q_u8(beg.add(32));
189 |                 let v4 = vld1q_u8(beg.add(48));
190 | 
191 |                 // `vceqq_u8` leaves each comparison result byte as 0 or -1 (0xff).
192 |                 // This allows us to accumulate the comparisons by subtracting them.
193 |                 let mut sum = vdupq_n_u8(0);
194 |                 sum = vsubq_u8(sum, vceqq_u8(v1, lf));
195 |                 sum = vsubq_u8(sum, vceqq_u8(v2, lf));
196 |                 sum = vsubq_u8(sum, vceqq_u8(v3, lf));
197 |                 sum = vsubq_u8(sum, vceqq_u8(v4, lf));
198 | 
199 |                 let sum = vaddvq_u8(sum);
200 | 
201 |                 let line_next = line + sum as CoordType;
202 |                 if line_next >= line_stop {
203 |                     break;
204 |                 }
205 | 
206 |                 beg = beg.add(64);
207 |                 remaining -= 64;
208 |                 line = line_next;
209 |             }
210 | 
211 |             while remaining >= 16 {
212 |                 let v = vld1q_u8(beg);
213 |                 let c = vceqq_u8(v, lf);
214 |                 let c = vandq_u8(c, vdupq_n_u8(0x01));
215 |                 let sum = vaddvq_u8(c);
216 | 
217 |                 let line_next = line + sum as CoordType;
218 |                 if line_next >= line_stop {
219 |                     break;
220 |                 }
221 | 
222 |                 beg = beg.add(16);
223 |                 remaining -= 16;
224 |                 line = line_next;
225 |             }
226 |         }
227 | 
228 |         lines_fwd_fallback(beg, end, line, line_stop)
229 |     }
230 | }
231 | 
232 | #[cfg(test)]
233 | mod test {
234 |     use super::*;
235 |     use crate::helpers::CoordType;
236 |     use crate::simd::test::*;
237 | 
238 |     #[test]
239 |     fn pseudo_fuzz() {
240 |         let text = generate_random_text(1024);
241 |         let lines = count_lines(&text);
242 |         let mut offset_rng = make_rng();
243 |         let mut line_rng = make_rng();
244 |         let mut line_distance_rng = make_rng();
245 | 
246 |         for _ in 0..1000 {
247 |             let offset = offset_rng() % (text.len() + 1);
248 |             let line = line_rng() % 100;
249 |             let line_stop = line + line_distance_rng() % (lines + 1);
250 | 
251 |             let line = line as CoordType;
252 |             let line_stop = line_stop as CoordType;
253 | 
254 |             let expected = reference_lines_fwd(text.as_bytes(), offset, line, line_stop);
255 |             let actual = lines_fwd(text.as_bytes(), offset, line, line_stop);
256 | 
257 |             assert_eq!(expected, actual);
258 |         }
259 |     }
260 | 
261 |     fn reference_lines_fwd(
262 |         haystack: &[u8],
263 |         mut offset: usize,
264 |         mut line: CoordType,
265 |         line_stop: CoordType,
266 |     ) -> (usize, CoordType) {
267 |         if line < line_stop {
268 |             while offset < haystack.len() {
269 |                 let c = haystack[offset];
270 |                 offset += 1;
271 |                 if c == b'\n' {
272 |                     line += 1;
273 |                     if line == line_stop {
274 |                         break;
275 |                     }
276 |                 }
277 |             }
278 |         }
279 |         (offset, line)
280 |     }
281 | }
282 | 


--------------------------------------------------------------------------------
/src/simd/memchr2.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | //! `memchr`, but with two needles.
  5 | 
  6 | use std::ptr;
  7 | 
  8 | /// `memchr`, but with two needles.
  9 | ///
 10 | /// Returns the index of the first occurrence of either needle in the
 11 | /// `haystack`. If no needle is found, `haystack.len()` is returned.
 12 | /// `offset` specifies the index to start searching from.
 13 | pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8], offset: usize) -> usize {
 14 |     unsafe {
 15 |         let beg = haystack.as_ptr();
 16 |         let end = beg.add(haystack.len());
 17 |         let it = beg.add(offset.min(haystack.len()));
 18 |         let it = memchr2_raw(needle1, needle2, it, end);
 19 |         it.offset_from_unsigned(beg)
 20 |     }
 21 | }
 22 | 
 23 | unsafe fn memchr2_raw(needle1: u8, needle2: u8, beg: *const u8, end: *const u8) -> *const u8 {
 24 |     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 25 |     return unsafe { MEMCHR2_DISPATCH(needle1, needle2, beg, end) };
 26 | 
 27 |     #[cfg(target_arch = "aarch64")]
 28 |     return unsafe { memchr2_neon(needle1, needle2, beg, end) };
 29 | 
 30 |     #[allow(unreachable_code)]
 31 |     return unsafe { memchr2_fallback(needle1, needle2, beg, end) };
 32 | }
 33 | 
 34 | unsafe fn memchr2_fallback(
 35 |     needle1: u8,
 36 |     needle2: u8,
 37 |     mut beg: *const u8,
 38 |     end: *const u8,
 39 | ) -> *const u8 {
 40 |     unsafe {
 41 |         while !ptr::eq(beg, end) {
 42 |             let ch = *beg;
 43 |             if ch == needle1 || ch == needle2 {
 44 |                 break;
 45 |             }
 46 |             beg = beg.add(1);
 47 |         }
 48 |         beg
 49 |     }
 50 | }
 51 | 
 52 | // In order to make `memchr2_raw` slim and fast, we use a function pointer that updates
 53 | // itself to the correct implementation on the first call. This reduces binary size.
 54 | // It would also reduce branches if we had >2 implementations (a jump still needs to be predicted).
 55 | // NOTE that this ONLY works if Control Flow Guard is disabled on Windows.
 56 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 57 | static mut MEMCHR2_DISPATCH: unsafe fn(
 58 |     needle1: u8,
 59 |     needle2: u8,
 60 |     beg: *const u8,
 61 |     end: *const u8,
 62 | ) -> *const u8 = memchr2_dispatch;
 63 | 
 64 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 65 | unsafe fn memchr2_dispatch(needle1: u8, needle2: u8, beg: *const u8, end: *const u8) -> *const u8 {
 66 |     let func = if is_x86_feature_detected!("avx2") { memchr2_avx2 } else { memchr2_fallback };
 67 |     unsafe { MEMCHR2_DISPATCH = func };
 68 |     unsafe { func(needle1, needle2, beg, end) }
 69 | }
 70 | 
 71 | // FWIW, I found that adding support for AVX512 was not useful at the time,
 72 | // as it only marginally improved file load performance by <5%.
 73 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 74 | #[target_feature(enable = "avx2")]
 75 | unsafe fn memchr2_avx2(needle1: u8, needle2: u8, mut beg: *const u8, end: *const u8) -> *const u8 {
 76 |     unsafe {
 77 |         #[cfg(target_arch = "x86")]
 78 |         use std::arch::x86::*;
 79 |         #[cfg(target_arch = "x86_64")]
 80 |         use std::arch::x86_64::*;
 81 | 
 82 |         let n1 = _mm256_set1_epi8(needle1 as i8);
 83 |         let n2 = _mm256_set1_epi8(needle2 as i8);
 84 |         let mut remaining = end.offset_from_unsigned(beg);
 85 | 
 86 |         while remaining >= 32 {
 87 |             let v = _mm256_loadu_si256(beg as *const _);
 88 |             let a = _mm256_cmpeq_epi8(v, n1);
 89 |             let b = _mm256_cmpeq_epi8(v, n2);
 90 |             let c = _mm256_or_si256(a, b);
 91 |             let m = _mm256_movemask_epi8(c) as u32;
 92 | 
 93 |             if m != 0 {
 94 |                 return beg.add(m.trailing_zeros() as usize);
 95 |             }
 96 | 
 97 |             beg = beg.add(32);
 98 |             remaining -= 32;
 99 |         }
100 | 
101 |         memchr2_fallback(needle1, needle2, beg, end)
102 |     }
103 | }
104 | 
105 | #[cfg(target_arch = "aarch64")]
106 | unsafe fn memchr2_neon(needle1: u8, needle2: u8, mut beg: *const u8, end: *const u8) -> *const u8 {
107 |     unsafe {
108 |         use std::arch::aarch64::*;
109 | 
110 |         if end.offset_from_unsigned(beg) >= 16 {
111 |             let n1 = vdupq_n_u8(needle1);
112 |             let n2 = vdupq_n_u8(needle2);
113 | 
114 |             loop {
115 |                 let v = vld1q_u8(beg as *const _);
116 |                 let a = vceqq_u8(v, n1);
117 |                 let b = vceqq_u8(v, n2);
118 |                 let c = vorrq_u8(a, b);
119 | 
120 |                 // https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
121 |                 let m = vreinterpretq_u16_u8(c);
122 |                 let m = vshrn_n_u16(m, 4);
123 |                 let m = vreinterpret_u64_u8(m);
124 |                 let m = vget_lane_u64(m, 0);
125 | 
126 |                 if m != 0 {
127 |                     return beg.add(m.trailing_zeros() as usize >> 2);
128 |                 }
129 | 
130 |                 beg = beg.add(16);
131 |                 if end.offset_from_unsigned(beg) < 16 {
132 |                     break;
133 |                 }
134 |             }
135 |         }
136 | 
137 |         memchr2_fallback(needle1, needle2, beg, end)
138 |     }
139 | }
140 | 
141 | #[cfg(test)]
142 | mod tests {
143 |     use std::slice;
144 | 
145 |     use super::*;
146 |     use crate::sys;
147 | 
148 |     #[test]
149 |     fn test_empty() {
150 |         assert_eq!(memchr2(b'a', b'b', b"", 0), 0);
151 |     }
152 | 
153 |     #[test]
154 |     fn test_basic() {
155 |         let haystack = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
156 |         let haystack = &haystack[..43];
157 | 
158 |         assert_eq!(memchr2(b'a', b'z', haystack, 0), 0);
159 |         assert_eq!(memchr2(b'p', b'q', haystack, 0), 15);
160 |         assert_eq!(memchr2(b'Q', b'Z', haystack, 0), 42);
161 |         assert_eq!(memchr2(b'0', b'9', haystack, 0), haystack.len());
162 |     }
163 | 
164 |     // Test that it doesn't match before/after the start offset respectively.
165 |     #[test]
166 |     fn test_with_offset() {
167 |         let haystack = b"abcdefghabcdefghabcdefghabcdefghabcdefgh";
168 | 
169 |         assert_eq!(memchr2(b'a', b'b', haystack, 0), 0);
170 |         assert_eq!(memchr2(b'a', b'b', haystack, 1), 1);
171 |         assert_eq!(memchr2(b'a', b'b', haystack, 2), 8);
172 |         assert_eq!(memchr2(b'a', b'b', haystack, 9), 9);
173 |         assert_eq!(memchr2(b'a', b'b', haystack, 16), 16);
174 |         assert_eq!(memchr2(b'a', b'b', haystack, 41), 40);
175 |     }
176 | 
177 |     // Test memory access safety at page boundaries.
178 |     // The test is a success if it doesn't segfault.
179 |     #[test]
180 |     fn test_page_boundary() {
181 |         let page = unsafe {
182 |             const PAGE_SIZE: usize = 64 * 1024; // 64 KiB to cover many architectures.
183 | 
184 |             // 3 pages: uncommitted, committed, uncommitted
185 |             let ptr = sys::virtual_reserve(PAGE_SIZE * 3).unwrap();
186 |             sys::virtual_commit(ptr.add(PAGE_SIZE), PAGE_SIZE).unwrap();
187 |             slice::from_raw_parts_mut(ptr.add(PAGE_SIZE).as_ptr(), PAGE_SIZE)
188 |         };
189 | 
190 |         page.fill(b'a');
191 | 
192 |         // Test if it seeks beyond the page boundary.
193 |         assert_eq!(memchr2(b'\0', b'\0', &page[page.len() - 40..], 0), 40);
194 |         // Test if it seeks before the page boundary for the masked/partial load.
195 |         assert_eq!(memchr2(b'\0', b'\0', &page[..10], 0), 10);
196 |     }
197 | }
198 | 


--------------------------------------------------------------------------------
/src/simd/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Licensed under the MIT License.
 3 | 
 4 | //! Provides various high-throughput utilities.
 5 | 
 6 | pub mod lines_bwd;
 7 | pub mod lines_fwd;
 8 | mod memchr2;
 9 | mod memset;
10 | 
11 | pub use lines_bwd::*;
12 | pub use lines_fwd::*;
13 | pub use memchr2::*;
14 | pub use memset::*;
15 | 
16 | #[cfg(test)]
17 | mod test {
18 |     // Knuth's MMIX LCG
19 |     pub fn make_rng() -> impl FnMut() -> usize {
20 |         let mut state = 1442695040888963407u64;
21 |         move || {
22 |             state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
23 |             state as usize
24 |         }
25 |     }
26 | 
27 |     pub fn generate_random_text(len: usize) -> String {
28 |         const ALPHABET: &[u8; 20] = b"0123456789abcdef\n\n\n\n";
29 | 
30 |         let mut rng = make_rng();
31 |         let mut res = String::new();
32 | 
33 |         for _ in 0..len {
34 |             res.push(ALPHABET[rng() % ALPHABET.len()] as char);
35 |         }
36 | 
37 |         res
38 |     }
39 | 
40 |     pub fn count_lines(text: &str) -> usize {
41 |         text.lines().count()
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/sys/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Licensed under the MIT License.
 3 | 
 4 | //! Platform abstractions.
 5 | 
 6 | #[cfg(unix)]
 7 | mod unix;
 8 | #[cfg(windows)]
 9 | mod windows;
10 | 
11 | #[cfg(not(windows))]
12 | pub use std::fs::canonicalize;
13 | 
14 | #[cfg(unix)]
15 | pub use unix::*;
16 | #[cfg(windows)]
17 | pub use windows::*;
18 | 


--------------------------------------------------------------------------------
/src/unicode/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Licensed under the MIT License.
 3 | 
 4 | //! Everything related to Unicode lives here.
 5 | 
 6 | mod measurement;
 7 | mod tables;
 8 | mod utf8;
 9 | 
10 | pub use measurement::*;
11 | pub use utf8::*;
12 | 


--------------------------------------------------------------------------------
/src/unicode/utf8.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Licensed under the MIT License.
  3 | 
  4 | use std::{hint, iter};
  5 | 
  6 | /// An iterator over UTF-8 encoded characters.
  7 | ///
  8 | /// This differs from [`std::str::Chars`] in that it works on unsanitized
  9 | /// byte slices and transparently replaces invalid UTF-8 sequences with U+FFFD.
 10 | ///
 11 | /// This follows ICU's bitmask approach for `U8_NEXT_OR_FFFD` relatively
 12 | /// closely. This is important for compatibility, because it implements the
 13 | /// WHATWG recommendation for UTF8 error recovery. It's also helpful, because
 14 | /// the excellent folks at ICU have probably spent a lot of time optimizing it.
 15 | #[derive(Clone, Copy)]
 16 | pub struct Utf8Chars<'a> {
 17 |     source: &'a [u8],
 18 |     offset: usize,
 19 | }
 20 | 
 21 | impl<'a> Utf8Chars<'a> {
 22 |     /// Creates a new `Utf8Chars` iterator starting at the given `offset`.
 23 |     pub fn new(source: &'a [u8], offset: usize) -> Self {
 24 |         Self { source, offset }
 25 |     }
 26 | 
 27 |     /// Returns the byte slice this iterator was created with.
 28 |     pub fn source(&self) -> &'a [u8] {
 29 |         self.source
 30 |     }
 31 | 
 32 |     /// Checks if the source is empty.
 33 |     pub fn is_empty(&self) -> bool {
 34 |         self.source.is_empty()
 35 |     }
 36 | 
 37 |     /// Returns the length of the source.
 38 |     pub fn len(&self) -> usize {
 39 |         self.source.len()
 40 |     }
 41 | 
 42 |     /// Returns the current offset in the byte slice.
 43 |     ///
 44 |     /// This will be past the last returned character.
 45 |     pub fn offset(&self) -> usize {
 46 |         self.offset
 47 |     }
 48 | 
 49 |     /// Sets the offset to continue iterating from.
 50 |     pub fn seek(&mut self, offset: usize) {
 51 |         self.offset = offset;
 52 |     }
 53 | 
 54 |     /// Returns true if `next` will return another character.
 55 |     pub fn has_next(&self) -> bool {
 56 |         self.offset < self.source.len()
 57 |     }
 58 | 
 59 |     // I found that on mixed 50/50 English/Non-English text,
 60 |     // performance actually suffers when this gets inlined.
 61 |     #[cold]
 62 |     fn next_slow(&mut self, c: u8) -> char {
 63 |         if self.offset >= self.source.len() {
 64 |             return Self::fffd();
 65 |         }
 66 | 
 67 |         let mut cp = c as u32;
 68 | 
 69 |         if cp < 0xE0 {
 70 |             // UTF8-2 = %xC2-DF UTF8-tail
 71 | 
 72 |             if cp < 0xC2 {
 73 |                 return Self::fffd();
 74 |             }
 75 | 
 76 |             // The lead byte is 110xxxxx
 77 |             // -> Strip off the 110 prefix
 78 |             cp &= !0xE0;
 79 |         } else if cp < 0xF0 {
 80 |             // UTF8-3 =
 81 |             //   %xE0    %xA0-BF   UTF8-tail
 82 |             //   %xE1-EC UTF8-tail UTF8-tail
 83 |             //   %xED    %x80-9F   UTF8-tail
 84 |             //   %xEE-EF UTF8-tail UTF8-tail
 85 | 
 86 |             // This is a pretty neat approach seen in ICU4C, because it's a 1:1 translation of the RFC.
 87 |             // I don't understand why others don't do the same thing. It's rather performant.
 88 |             const BITS_80_9F: u8 = 1 << 0b100; // 0x80-9F, aka 0b100xxxxx
 89 |             const BITS_A0_BF: u8 = 1 << 0b101; // 0xA0-BF, aka 0b101xxxxx
 90 |             const BITS_BOTH: u8 = BITS_80_9F | BITS_A0_BF;
 91 |             const LEAD_TRAIL1_BITS: [u8; 16] = [
 92 |                 //             v-- lead byte
 93 |                 BITS_A0_BF, // 0xE0
 94 |                 BITS_BOTH,  // 0xE1
 95 |                 BITS_BOTH,  // 0xE2
 96 |                 BITS_BOTH,  // 0xE3
 97 |                 BITS_BOTH,  // 0xE4
 98 |                 BITS_BOTH,  // 0xE5
 99 |                 BITS_BOTH,  // 0xE6
100 |                 BITS_BOTH,  // 0xE7
101 |                 BITS_BOTH,  // 0xE8
102 |                 BITS_BOTH,  // 0xE9
103 |                 BITS_BOTH,  // 0xEA
104 |                 BITS_BOTH,  // 0xEB
105 |                 BITS_BOTH,  // 0xEC
106 |                 BITS_80_9F, // 0xED
107 |                 BITS_BOTH,  // 0xEE
108 |                 BITS_BOTH,  // 0xEF
109 |             ];
110 | 
111 |             // The lead byte is 1110xxxx
112 |             // -> Strip off the 1110 prefix
113 |             cp &= !0xF0;
114 | 
115 |             let t = self.source[self.offset] as u32;
116 |             if LEAD_TRAIL1_BITS[cp as usize] & (1 << (t >> 5)) == 0 {
117 |                 return Self::fffd();
118 |             }
119 |             cp = (cp << 6) | (t & 0x3F);
120 | 
121 |             self.offset += 1;
122 |             if self.offset >= self.source.len() {
123 |                 return Self::fffd();
124 |             }
125 |         } else {
126 |             // UTF8-4 =
127 |             //   %xF0    %x90-BF   UTF8-tail UTF8-tail
128 |             //   %xF1-F3 UTF8-tail UTF8-tail UTF8-tail
129 |             //   %xF4    %x80-8F   UTF8-tail UTF8-tail
130 | 
131 |             // This is similar to the above, but with the indices flipped:
132 |             // The trail byte is the index and the lead byte mask is the value.
133 |             // This is because the split at 0x90 requires more bits than fit into an u8.
134 |             const TRAIL1_LEAD_BITS: [u8; 16] = [
135 |                 // --------- 0xF4 lead
136 |                 // |         ...
137 |                 // |   +---- 0xF0 lead
138 |                 // v   v
139 |                 0b_00000, //
140 |                 0b_00000, //
141 |                 0b_00000, //
142 |                 0b_00000, //
143 |                 0b_00000, //
144 |                 0b_00000, //
145 |                 0b_00000, // trail bytes:
146 |                 0b_00000, //
147 |                 0b_11110, // 0x80-8F -> 0x80-8F can be preceded by 0xF1-F4
148 |                 0b_01111, // 0x90-9F -v
149 |                 0b_01111, // 0xA0-AF -> 0x90-BF can be preceded by 0xF0-F3
150 |                 0b_01111, // 0xB0-BF -^
151 |                 0b_00000, //
152 |                 0b_00000, //
153 |                 0b_00000, //
154 |                 0b_00000, //
155 |             ];
156 | 
157 |             // The lead byte *may* be 11110xxx, but could also be e.g. 11111xxx.
158 |             // -> Only strip off the 1111 prefix
159 |             cp &= !0xF0;
160 | 
161 |             // Now we can verify if it's actually <= 0xF4.
162 |             // Curiously, this if condition does a lot of heavy lifting for
163 |             // performance (+13%). I think it's just a coincidence though.
164 |             if cp > 4 {
165 |                 return Self::fffd();
166 |             }
167 | 
168 |             let t = self.source[self.offset] as u32;
169 |             if TRAIL1_LEAD_BITS[(t >> 4) as usize] & (1 << cp) == 0 {
170 |                 return Self::fffd();
171 |             }
172 |             cp = (cp << 6) | (t & 0x3F);
173 | 
174 |             self.offset += 1;
175 |             if self.offset >= self.source.len() {
176 |                 return Self::fffd();
177 |             }
178 | 
179 |             // UTF8-tail = %x80-BF
180 |             let t = (self.source[self.offset] as u32).wrapping_sub(0x80);
181 |             if t > 0x3F {
182 |                 return Self::fffd();
183 |             }
184 |             cp = (cp << 6) | t;
185 | 
186 |             self.offset += 1;
187 |             if self.offset >= self.source.len() {
188 |                 return Self::fffd();
189 |             }
190 |         }
191 | 
192 |         // SAFETY: All branches above check for `if self.offset >= self.source.len()`
193 |         // one way or another. This is here because the compiler doesn't get it otherwise.
194 |         unsafe { hint::assert_unchecked(self.offset < self.source.len()) };
195 | 
196 |         // UTF8-tail = %x80-BF
197 |         let t = (self.source[self.offset] as u32).wrapping_sub(0x80);
198 |         if t > 0x3F {
199 |             return Self::fffd();
200 |         }
201 |         cp = (cp << 6) | t;
202 | 
203 |         self.offset += 1;
204 | 
205 |         // SAFETY: If `cp` wasn't a valid codepoint, we already returned U+FFFD above.
206 |         unsafe { char::from_u32_unchecked(cp) }
207 |     }
208 | 
209 |     // This simultaneously serves as a `cold_path` marker.
210 |     // It improves performance by ~5% and reduces code size.
211 |     #[cold]
212 |     #[inline(always)]
213 |     fn fffd() -> char {
214 |         '\u{FFFD}'
215 |     }
216 | }
217 | 
218 | impl Iterator for Utf8Chars<'_> {
219 |     type Item = char;
220 | 
221 |     #[inline]
222 |     fn next(&mut self) -> Option<Self::Item> {
223 |         if self.offset >= self.source.len() {
224 |             return None;
225 |         }
226 | 
227 |         let c = self.source[self.offset];
228 |         self.offset += 1;
229 | 
230 |         // Fast-passing ASCII allows this function to be trivially inlined everywhere,
231 |         // as the full decoder is a little too large for that.
232 |         if (c & 0x80) == 0 {
233 |             // UTF8-1 = %x00-7F
234 |             Some(c as char)
235 |         } else {
236 |             // Weirdly enough, adding a hint here to assert that `next_slow`
237 |             // only returns codepoints >= 0x80 makes `ucd` ~5% slower.
238 |             Some(self.next_slow(c))
239 |         }
240 |     }
241 | 
242 |     #[inline]
243 |     fn size_hint(&self) -> (usize, Option<usize>) {
244 |         // Lower bound: All remaining bytes are 4-byte sequences.
245 |         // Upper bound: All remaining bytes are ASCII.
246 |         let remaining = self.source.len() - self.offset;
247 |         (remaining / 4, Some(remaining))
248 |     }
249 | }
250 | 
251 | impl iter::FusedIterator for Utf8Chars<'_> {}
252 | 
253 | #[cfg(test)]
254 | mod tests {
255 |     use super::*;
256 | 
257 |     #[test]
258 |     fn test_broken_utf8() {
259 |         let source = [b'a', 0xED, 0xA0, 0x80, b'b'];
260 |         let mut chars = Utf8Chars::new(&source, 0);
261 |         let mut offset = 0;
262 |         for chunk in source.utf8_chunks() {
263 |             for ch in chunk.valid().chars() {
264 |                 offset += ch.len_utf8();
265 |                 assert_eq!(chars.next(), Some(ch));
266 |                 assert_eq!(chars.offset(), offset);
267 |             }
268 |             if !chunk.invalid().is_empty() {
269 |                 offset += chunk.invalid().len();
270 |                 assert_eq!(chars.next(), Some('\u{FFFD}'));
271 |                 assert_eq!(chars.offset(), offset);
272 |             }
273 |         }
274 |     }
275 | }
276 | 


--------------------------------------------------------------------------------
/tools/grapheme-table-gen/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "grapheme-table-gen"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | anyhow = "1.0.95"
 8 | chrono = "0.4.39"
 9 | indoc = "2.0.5"
10 | pico-args = { version = "0.5.0", features = ["eq-separator"] }
11 | rayon = "1.10.0"
12 | roxmltree = { version = "0.20.0", default-features = false, features = ["std"] }
13 | 


--------------------------------------------------------------------------------
/tools/grapheme-table-gen/README.md:
--------------------------------------------------------------------------------
 1 | # Grapheme Table Generator
 2 | 
 3 | This tool processes Unicode Character Database (UCD) XML files to generate efficient, multi-stage trie lookup tables for properties relevant to terminal applications:
 4 | * Grapheme cluster breaking rules
 5 | * Line breaking rules (optional)
 6 | * Character width properties
 7 | 
 8 | ## Usage
 9 | 
10 | * Download [ucd.nounihan.grouped.zip](https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip)
11 | * Run some equivalent of:
12 |   ```sh
13 |   grapheme-table-gen --lang=rust --extended --no-ambiguous --line-breaks path/to/ucd.nounihan.grouped.xml
14 |   ```
15 | * Place the result in `src/unicode/tables.rs`
16 | 


--------------------------------------------------------------------------------