├── .cargo
├── release-windows-ms.toml
└── release.toml
├── .github
└── workflows
│ ├── ci.yml
│ └── winget.yml
├── .gitignore
├── .pipelines
├── release.yml
└── tsa.json
├── .vscode
├── launch.json
└── tasks.json
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── SECURITY.md
├── assets
├── Microsoft_logo_(1980).svg
├── com.microsoft.edit.desktop
├── edit.svg
├── edit_hero_image.png
├── editing-traces
│ ├── README.md
│ └── rustcode.json.zst
├── manpage
│ └── edit.1
├── microsoft.png
└── microsoft.sixel
├── benches
└── lib.rs
├── build.rs
├── rust-toolchain.toml
├── rustfmt.toml
├── src
├── apperr.rs
├── arena
│ ├── debug.rs
│ ├── mod.rs
│ ├── release.rs
│ ├── scratch.rs
│ └── string.rs
├── base64.rs
├── bin
│ └── edit
│ │ ├── documents.rs
│ │ ├── draw_editor.rs
│ │ ├── draw_filepicker.rs
│ │ ├── draw_menubar.rs
│ │ ├── draw_statusbar.rs
│ │ ├── edit.exe.manifest
│ │ ├── localization.rs
│ │ ├── main.rs
│ │ └── state.rs
├── buffer
│ ├── gap_buffer.rs
│ ├── line_cache.rs
│ ├── mod.rs
│ └── navigation.rs
├── cell.rs
├── document.rs
├── framebuffer.rs
├── fuzzy.rs
├── hash.rs
├── helpers.rs
├── icu.rs
├── input.rs
├── lib.rs
├── oklab.rs
├── path.rs
├── simd
│ ├── lines_bwd.rs
│ ├── lines_fwd.rs
│ ├── memchr2.rs
│ ├── memset.rs
│ └── mod.rs
├── sys
│ ├── mod.rs
│ ├── unix.rs
│ └── windows.rs
├── tui.rs
├── unicode
│ ├── measurement.rs
│ ├── mod.rs
│ ├── tables.rs
│ └── utf8.rs
└── vt.rs
└── tools
└── grapheme-table-gen
├── Cargo.lock
├── Cargo.toml
├── README.md
└── src
├── main.rs
└── rules.rs
/.cargo/release-windows-ms.toml:
--------------------------------------------------------------------------------
1 | # vvv The following parts are identical to release.toml vvv
2 |
3 | # Avoid linking with vcruntime140.dll by statically linking everything,
4 | # and then explicitly linking with ucrtbase.dll dynamically.
5 | # We do this, because vcruntime140.dll is an optional Windows component.
6 | [target.'cfg(target_os = "windows")']
7 | rustflags = [
8 | "-Ctarget-feature=+crt-static",
9 | "-Clink-args=/DEFAULTLIB:ucrt.lib",
10 | "-Clink-args=/NODEFAULTLIB:vcruntime.lib",
11 | "-Clink-args=/NODEFAULTLIB:msvcrt.lib",
12 | "-Clink-args=/NODEFAULTLIB:libucrt.lib",
13 | ]
14 |
15 | # The backtrace code for panics in Rust is almost as large as the entire editor.
16 | # = Huge reduction in binary size by removing all that.
17 | [unstable]
18 | build-std = ["std", "panic_abort"]
19 | build-std-features = ["panic_immediate_abort", "optimize_for_size"]
20 |
21 | # vvv The following parts are specific to official Windows builds. vvv
22 | # (The use of internal registries, security features, etc., are mandatory.)
23 |
24 | # Enable shadow stacks: https://learn.microsoft.com/en-us/cpp/build/reference/cetcompat
25 | [target.'cfg(all(target_os = "windows", any(target_arch = "x86", target_arch = "x86_64")))']
26 | rustflags = ["-Clink-args=/DYNAMICBASE", "-Clink-args=/CETCOMPAT"]
27 |
28 | [registries.Edit_PublicPackages]
29 | index = "sparse+https://pkgs.dev.azure.com/microsoft/Dart/_packaging/Edit_PublicPackages/Cargo/index/"
30 |
31 | [source.crates-io]
32 | replace-with = "Edit_PublicPackages"
33 |
--------------------------------------------------------------------------------
/.cargo/release.toml:
--------------------------------------------------------------------------------
1 | # The following is not used by default via .cargo/config.toml,
2 | # because `build-std-features` cannot be keyed by profile.
3 | # This breaks the bench profile which doesn't support panic=abort.
4 | # See: https://github.com/rust-lang/cargo/issues/11214
5 | # See: https://github.com/rust-lang/cargo/issues/13894
6 |
7 | # Avoid linking with vcruntime140.dll by statically linking everything,
8 | # and then explicitly linking with ucrtbase.dll dynamically.
9 | # We do this, because vcruntime140.dll is an optional Windows component.
10 | [target.'cfg(all(target_os = "windows", target_env = "msvc"))']
11 | rustflags = [
12 | "-Ctarget-feature=+crt-static",
13 | "-Clink-args=/DEFAULTLIB:ucrt.lib",
14 | "-Clink-args=/NODEFAULTLIB:vcruntime.lib",
15 | "-Clink-args=/NODEFAULTLIB:msvcrt.lib",
16 | "-Clink-args=/NODEFAULTLIB:libucrt.lib",
17 | ]
18 |
19 | # The backtrace code for panics in Rust is almost as large as the entire editor.
20 | # = Huge reduction in binary size by removing all that.
21 | [unstable]
22 | build-std = ["std", "panic_abort"]
23 | build-std-features = ["panic_immediate_abort", "optimize_for_size"]
24 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 |
11 | env:
12 | CARGO_TERM_COLOR: always
13 |
14 | jobs:
15 | check:
16 | runs-on: ${{ matrix.os }}
17 | strategy:
18 | fail-fast: false
19 | matrix:
20 | os:
21 | - ubuntu-latest
22 | - windows-latest
23 | steps:
24 | # The Windows runners have autocrlf enabled by default.
25 | - name: Disable git autocrlf
26 | run: git config --global core.autocrlf false
27 | if: matrix.os == 'windows-latest'
28 | - name: Checkout
29 | uses: actions/checkout@v4
30 | # https://github.com/actions/cache/blob/main/examples.md#rust---cargo
31 | # Depends on `Cargo.lock` --> Has to be after checkout.
32 | - uses: actions/cache@v4
33 | with:
34 | path: |
35 | ~/.cargo/bin/
36 | ~/.cargo/registry/index/
37 | ~/.cargo/registry/cache/
38 | ~/.cargo/git/db/
39 | target/
40 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
41 | - name: Install Rust
42 | run: rustup toolchain install nightly --no-self-update --profile minimal --component rust-src,rustfmt,clippy
43 | - name: Check formatting
44 | run: cargo fmt --all -- --check
45 | - name: Run tests
46 | run: cargo test --all-features --all-targets
47 | - name: Run clippy
48 | run: cargo clippy --all-features --all-targets -- --deny warnings
49 |
--------------------------------------------------------------------------------
/.github/workflows/winget.yml:
--------------------------------------------------------------------------------
1 | name: Submit release to the WinGet community repository
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | jobs:
8 | publish-winget:
9 | name: Submit to WinGet repository
10 |
11 | # winget-create is only supported on Windows
12 | runs-on: windows-latest
13 |
14 | # Only submit stable releases
15 | if: ${{ !github.event.release.prerelease }}
16 | steps:
17 | - name: Submit package using wingetcreate
18 | run: |
19 | # Get installer info from release event
20 | $assets = '${{ toJSON(github.event.release.assets) }}' | ConvertFrom-Json
21 | $x64InstallerUrl = $assets | Where-Object -Property name -like '*x86_64-windows.zip' | Select-Object -ExpandProperty browser_download_url
22 | $arm64InstallerUrl = $assets | Where-Object -Property name -like '*aarch64-windows.zip' | Select-Object -ExpandProperty browser_download_url
23 | $packageVersion = (${{ toJSON(github.event.release.tag_name) }}).Trim('v')
24 |
25 | # Update package using wingetcreate
26 | curl.exe -JLO https://aka.ms/wingetcreate/latest
27 | .\wingetcreate.exe update Microsoft.Edit `
28 | --version $packageVersion `
29 | --urls $x64InstallerUrl $arm64InstallerUrl `
30 | --token "${{ secrets.WINGET_TOKEN }}" `
31 | --submit
32 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | .vs
3 | *.profraw
4 | lcov.info
5 | target
6 |
--------------------------------------------------------------------------------
/.pipelines/release.yml:
--------------------------------------------------------------------------------
1 | # Documentation: https://aka.ms/obpipelines
2 |
3 | trigger: none
4 |
5 | parameters:
6 | - name: debug
7 | displayName: Enable debug output
8 | type: boolean
9 | default: false
10 | - name: official
11 | displayName: Whether to build Official or NonOfficial
12 | type: string
13 | default: NonOfficial
14 | values:
15 | - NonOfficial
16 | - Official
17 | - name: createvpack
18 | displayName: Enable vpack creation
19 | type: boolean
20 | default: false
21 | - name: buildPlatforms
22 | type: object
23 | default:
24 | - x86_64-pc-windows-msvc
25 | - aarch64-pc-windows-msvc
26 |
27 | variables:
28 | system.debug: ${{parameters.debug}}
29 | WindowsContainerImage: onebranch.azurecr.io/windows/ltsc2022/vse2022:latest
30 | # CDP_DEFINITION_BUILD_COUNT is needed for onebranch.pipeline.version task.
31 | # See: https://aka.ms/obpipelines/versioning
32 | CDP_DEFINITION_BUILD_COUNT: $[counter('', 0)]
33 | # LOAD BEARING - the vpack task fails without these
34 | ROOT: $(Build.SourcesDirectory)
35 | REPOROOT: $(Build.SourcesDirectory)
36 | OUTPUTROOT: $(REPOROOT)\out
37 | NUGET_XMLDOC_MODE: none
38 |
39 | resources:
40 | repositories:
41 | - repository: GovernedTemplates
42 | type: git
43 | name: OneBranch.Pipelines/GovernedTemplates
44 | ref: refs/heads/main
45 |
46 | extends:
47 | template: v2/Microsoft.${{parameters.official}}.yml@GovernedTemplates
48 | parameters:
49 | featureFlags:
50 | WindowsHostVersion:
51 | Version: 2022
52 | Network: R1
53 | platform:
54 | name: windows_undocked
55 | product: edit
56 | # https://aka.ms/obpipelines/cloudvault
57 | cloudvault:
58 | enabled: false
59 | # https://aka.ms/obpipelines/sdl
60 | globalSdl:
61 | binskim:
62 | # > Due to some legacy reasons, 1ES PT is scanning full sources directory
63 | # > for BinSkim tool instead of just scanning the output directory [...]
64 | scanOutputDirectoryOnly: true
65 | isNativeCode: true
66 | tsa:
67 | enabled: ${{eq(parameters.official, 'Official')}}
68 | configFile: "$(Build.SourcesDirectory)/.pipelines/tsa.json"
69 | stages:
70 | # Our Build stage will build all three targets in one job, so we don't need
71 | # to repeat most of the boilerplate work in three separate jobs.
72 | - stage: Build
73 | jobs:
74 | - job: Windows
75 | pool:
76 | type: windows
77 | variables:
78 | # Binaries will go here.
79 | # More settings at https://aka.ms/obpipelines/yaml/jobs
80 | ob_outputDirectory: "$(Build.SourcesDirectory)/out"
81 | # The vPack gets created from stuff in here.
82 | # It will have a structure like:
83 | # .../vpack/
84 | # - amd64/
85 | # - edit.exe
86 | # - i386/
87 | # - edit.exe
88 | # - arm64/
89 | # - edit.exe
90 | ob_createvpack_enabled: ${{parameters.createvpack}}
91 | ob_createvpack_vpackdirectory: "$(ob_outputDirectory)/vpack"
92 | ob_createvpack_packagename: "windows_edit.$(Build.SourceBranchName)"
93 | ob_createvpack_owneralias: lhecker@microsoft.com
94 | ob_createvpack_description: Microsoft Edit
95 | ob_createvpack_targetDestinationDirectory: "$(Destination)"
96 | ob_createvpack_propsFile: false
97 | ob_createvpack_provData: true
98 | ob_createvpack_versionAs: string
99 | ob_createvpack_version: "$(EditVersion)-$(CDP_DEFINITION_BUILD_COUNT)"
100 | ob_createvpack_metadata: "$(Build.SourceVersion)"
101 | ob_createvpack_topLevelRetries: 0
102 | ob_createvpack_failOnStdErr: true
103 | ob_createvpack_verbose: ${{ parameters.debug }}
104 | # For details on this cargo_target_dir setting, see:
105 | # https://eng.ms/docs/more/rust/topics/onebranch-workaround
106 | CARGO_TARGET_DIR: C:\cargo_target_dir
107 | # msrustup only supports stable toolchains, but this project requires nightly.
108 | # We were told RUSTC_BOOTSTRAP=1 is a supported workaround.
109 | RUSTC_BOOTSTRAP: 1
110 | steps:
111 | # NOTE: Step objects have ordered keys and you MUST have "task" as the first key.
112 | # Objects with ordered keys... lol
113 | - task: RustInstaller@1
114 | displayName: Install Rust toolchain
115 | inputs:
116 | rustVersion: ms-stable
117 | additionalTargets: x86_64-pc-windows-msvc aarch64-pc-windows-msvc
118 | # URL of an Azure Artifacts feed configured with a crates.io upstream. Must be within the current ADO collection.
119 | # NOTE: Azure Artifacts support for Rust is not yet public, but it is enabled for internal ADO organizations.
120 | # https://learn.microsoft.com/en-us/azure/devops/artifacts/how-to/set-up-upstream-sources?view=azure-devops
121 | cratesIoFeedOverride: sparse+https://pkgs.dev.azure.com/microsoft/Dart/_packaging/Edit_PublicPackages/Cargo/index/
122 | # URL of an Azure Artifacts NuGet feed configured with the mscodehub Rust feed as an upstream.
123 | # * The feed must be within the current ADO collection.
124 | # * The CI account, usually "Project Collection Build Service (org-name)", must have at least "Collaborator" permission.
125 | # When setting up the upstream NuGet feed, use following Azure Artifacts feed locator:
126 | # azure-feed://mscodehub/Rust/Rust@Release
127 | toolchainFeed: https://pkgs.dev.azure.com/microsoft/_packaging/RustTools/nuget/v3/index.json
128 | - task: CargoAuthenticate@0
129 | displayName: Authenticate with Azure Artifacts
130 | inputs:
131 | configFile: ".cargo/release-windows-ms.toml"
132 | # We recommend making a separate `cargo fetch` step, as some build systems perform
133 | # fetching entirely prior to the build, and perform the build with the network disabled.
134 | - script: cargo fetch --config .cargo/release-windows-ms.toml
135 | displayName: Fetch crates
136 | - ${{ each platform in parameters.buildPlatforms }}:
137 | - script: cargo build --config .cargo/release-windows-ms.toml --frozen --release --target ${{platform}}
138 | displayName: Build ${{platform}} Release
139 | - task: CopyFiles@2
140 | displayName: Copy files to vpack (${{platform}})
141 | inputs:
142 | sourceFolder: "$(CARGO_TARGET_DIR)/${{platform}}/release"
143 | ${{ if eq(platform, 'i686-pc-windows-msvc') }}:
144 | targetFolder: "$(ob_createvpack_vpackdirectory)/i386"
145 | ${{ elseif eq(platform, 'x86_64-pc-windows-msvc') }}:
146 | targetFolder: "$(ob_createvpack_vpackdirectory)/amd64"
147 | ${{ else }}: # aarch64-pc-windows-msvc
148 | targetFolder: "$(ob_createvpack_vpackdirectory)/arm64"
149 | contents: |
150 | *.exe
151 | *.pdb
152 | # Extract the version for `ob_createvpack_version`.
153 | - script: |-
154 | @echo off
155 | for /f "tokens=3 delims=- " %%x in ('findstr /c:"version = " Cargo.toml') do (
156 | echo ##vso[task.setvariable variable=EditVersion]%%~x
157 | goto :EOF
158 | )
159 | displayName: "Set EditVersion"
160 | - task: onebranch.pipeline.signing@1
161 | displayName: "Sign files"
162 | inputs:
163 | command: "sign"
164 | signing_profile: "external_distribution"
165 | files_to_sign: "**/edit.exe"
166 | search_root: "$(ob_createvpack_vpackdirectory)"
167 | use_testsign: false
168 | in_container: true
169 |
170 | - ${{ each platform in parameters.buildPlatforms }}:
171 | - pwsh: |-
172 | $Dest = New-Item -Type Directory "_staging/${env:RELEASE_NAME}"
173 | Write-Host "Staging files from ${env:VPACK_ROOT} at $Dest"
174 | Get-ChildItem "${env:VPACK_ROOT}\*" -Include *.exe, *.pdb | Copy-Item -Destination $Dest -Verbose
175 | tar.exe -c -v --format=zip -f "$(ob_outputDirectory)\${env:RELEASE_NAME}.zip" -C _staging $env:RELEASE_NAME
176 | env:
177 | RELEASE_NAME: edit-$(EditVersion)-${{ replace(platform, 'pc-windows-msvc', 'windows') }}
178 | ${{ if eq(platform, 'i686-pc-windows-msvc') }}:
179 | VPACK_ROOT: "$(ob_createvpack_vpackdirectory)/i386"
180 | ${{ elseif eq(platform, 'x86_64-pc-windows-msvc') }}:
181 | VPACK_ROOT: "$(ob_createvpack_vpackdirectory)/amd64"
182 | ${{ else }}: # aarch64-pc-windows-msvc
183 | VPACK_ROOT: "$(ob_createvpack_vpackdirectory)/arm64"
184 | displayName: Produce ${{platform}} release archive
185 |
--------------------------------------------------------------------------------
/.pipelines/tsa.json:
--------------------------------------------------------------------------------
1 | {
2 | "instanceUrl": "https://microsoft.visualstudio.com",
3 | "projectName": "OS",
4 | "areaPath": "OS\\Windows Client and Services\\WinPD\\DFX-Developer Fundamentals and Experiences\\DEFT\\SHINE\\Commandline Tooling",
5 | "notificationAliases": ["condev@microsoft.com", "duhowett@microsoft.com"],
6 | "template": "VSTS_Microsoft_OSGS"
7 | }
8 |
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "0.2.0",
3 | "configurations": [
4 | {
5 | "name": "Launch Debug (Windows)",
6 | "preLaunchTask": "rust: cargo build",
7 | "type": "cppvsdbg",
8 | "request": "launch",
9 | "console": "externalTerminal",
10 | "program": "${workspaceFolder}/target/debug/edit",
11 | "cwd": "${workspaceFolder}",
12 | "args": [
13 | "${workspaceFolder}/src/bin/edit/main.rs"
14 | ],
15 | },
16 | {
17 | "name": "Launch Debug (GDB/LLDB)",
18 | "preLaunchTask": "rust: cargo build",
19 | "type": "cppdbg",
20 | "request": "launch",
21 | "program": "${workspaceFolder}/target/debug/edit",
22 | "cwd": "${workspaceFolder}",
23 | "args": [
24 | "${workspaceFolder}/src/bin/edit/main.rs"
25 | ],
26 | }
27 | ]
28 | }
29 |
--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "2.0.0",
3 | "tasks": [
4 | {
5 | "label": "rust: cargo build",
6 | "type": "process",
7 | "command": "cargo",
8 | "args": [
9 | "build",
10 | "--package",
11 | "edit",
12 | "--features",
13 | "debug-latency"
14 | ],
15 | "group": {
16 | "kind": "build",
17 | "isDefault": true
18 | },
19 | "problemMatcher": [
20 | "$rustc"
21 | ]
22 | }
23 | ]
24 | }
25 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Microsoft Open Source Code of Conduct
2 |
3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
4 |
5 | Resources:
6 |
7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | - Employees can reach out at [aka.ms/opensource/moderation-support](https://aka.ms/opensource/moderation-support)
11 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | ## Translation improvements
4 |
5 | You can find our translations in [`src/bin/edit/localization.rs`](./src/bin/edit/localization.rs).
6 | Please feel free to open a pull request with your changes at any time.
7 | If you'd like to discuss your changes first, please feel free to open an issue.
8 |
9 | ## Bug reports
10 |
11 | If you find any bugs, we gladly accept pull requests without prior discussion.
12 | Otherwise, you can of course always open an issue for us to look into.
13 |
14 | ## Feature requests
15 |
16 | Please open a new issue for any feature requests you have in mind.
17 | Keeping the binary size of the editor small is a priority for us and so we may need to discuss any new features first until we have support for plugins.
18 |
19 | ## Code changes
20 |
21 | The project has a focus on a small binary size and sufficient (good) performance.
22 | As such, we generally do not accept pull requests that introduce dependencies (there are always exceptions of course).
23 | Otherwise, you can consider this project a playground for trying out any cool ideas you have.
24 |
25 | The overall architecture of the project can be summarized as follows:
26 | * The underlying text buffer in `src/buffer` doesn't keep track of line breaks in the document.
27 | This is a crucial design aspect that permeates throughout the entire codebase.
28 |
29 | To oversimplify, the *only* state that is kept is the current cursor position.
30 | When the user asks to move to another line, the editor will `O(n)` seek through the underlying document until it found the corresponding number of line breaks.
31 | * As a result, `src/simd` contains crucial `memchr2` functions to quickly find the next or previous line break (runs at up to >100GB/s).
32 | * Furthermore, `src/unicode` implements an `Utf8Chars` iterator which transparently inserts U+FFFD replacements during iteration (runs at up to 4GB/s).
33 | * Furthermore, `src/unicode` also implements grapheme cluster segmentation and cluster width measurement via its `MeasurementConfig` (runs at up to 600MB/s).
34 | * If word wrap is disabled, `memchr2` is used for all navigation across lines, allowing us to breeze through 1GB large files as if they were 1MB.
35 | * Even if word-wrap is enabled, it's still sufficiently smooth thanks to `MeasurementConfig`. This is only possible because these base functions are heavily optimized.
36 | * `src/framebuffer.rs` implements a "framebuffer" like in video games.
37 | It allows us to draw the UI output into an intermediate buffer first, accumulating all changes and handling things like color blending.
38 | Then, it can compare the accumulated output with the previous frame and only send the necessary changes to the terminal.
39 | * `src/tui.rs` implements an immediate mode UI. Its module implementation gives an overview how it works and I recommend reading it.
40 | * `src/vt.rs` implements our VT parser.
41 | * `src/sys` contains our platform abstractions.
42 | * Finally, `src/bin/edit` ties everything together.
43 | It's roughly 90% UI code and business logic.
44 | It contains a little bit of VT logic in `setup_terminal`.
45 |
46 | If you have an issue with your terminal, the places of interest are the aforementioned:
47 | * VT parser in `src/vt.rs`
48 | * Platform specific code in `src/sys`
49 | * And the `setup_terminal` function in `src/bin/edit/main.rs`
50 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "edit"
3 | version = "1.1.0"
4 | edition = "2024"
5 | rust-version = "1.87"
6 | readme = "README.md"
7 | repository = "https://github.com/microsoft/edit"
8 | homepage = "https://github.com/microsoft/edit"
9 | license = "MIT"
10 | categories = ["text-editors"]
11 |
12 | [[bench]]
13 | name = "lib"
14 | harness = false
15 |
16 | [features]
17 | debug-latency = []
18 |
19 | # We use `opt-level = "s"` as it significantly reduces binary size.
20 | # We could then use the `#[optimize(speed)]` attribute for spot optimizations.
21 | # Unfortunately, that attribute currently doesn't work on intrinsics such as memset.
22 | [profile.release]
23 | codegen-units = 1 # reduces binary size by ~2%
24 | debug = "full" # No one needs an undebuggable release binary
25 | lto = true # reduces binary size by ~14%
26 | opt-level = "s" # reduces binary size by ~25%
27 | panic = "abort" # reduces binary size by ~50% in combination with -Zbuild-std-features=panic_immediate_abort
28 | split-debuginfo = "packed" # generates a separate *.dwp/*.dSYM so the binary can get stripped
29 | strip = "symbols" # See split-debuginfo - allows us to drop the size by ~65%
30 | incremental = true # Improves re-compile times
31 |
32 | [profile.bench]
33 | codegen-units = 16 # Make compiling criterion faster (16 is the default, but profile.release sets it to 1)
34 | lto = "thin" # Similarly, speed up linking by a ton
35 |
36 | [dependencies]
37 |
38 | [target.'cfg(unix)'.dependencies]
39 | libc = "0.2"
40 |
41 | [target.'cfg(windows)'.build-dependencies]
42 | winresource = "0.1.22"
43 |
44 | [target.'cfg(windows)'.dependencies.windows-sys]
45 | version = "0.59"
46 | features = [
47 | "Win32_Globalization",
48 | "Win32_Security",
49 | "Win32_Storage_FileSystem",
50 | "Win32_System_Console",
51 | "Win32_System_Diagnostics_Debug",
52 | "Win32_System_IO",
53 | "Win32_System_LibraryLoader",
54 | "Win32_System_Memory",
55 | "Win32_System_Threading",
56 | ]
57 |
58 | [dev-dependencies]
59 | criterion = { version = "0.6", features = ["html_reports"] }
60 | serde = { version = "1.0", features = ["derive"] }
61 | serde_json = { version = "1.0" }
62 | zstd = { version = "0.13", default-features = false }
63 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) Microsoft Corporation. All rights reserved.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | #  Edit
2 |
3 | A simple editor for simple needs.
4 |
5 | This editor pays homage to the classic [MS-DOS Editor](https://en.wikipedia.org/wiki/MS-DOS_Editor), but with a modern interface and input controls similar to VS Code. The goal is to provide an accessible editor that even users largely unfamiliar with terminals can easily use.
6 |
7 | 
8 |
9 | ## Installation
10 |
11 | [](https://repology.org/project/microsoft-edit/versions)
12 |
13 | You can also download binaries from [our Releases page](https://github.com/microsoft/edit/releases/latest).
14 |
15 | ### Windows
16 |
17 | You can install the latest version with WinGet:
18 | ```powershell
19 | winget install Microsoft.Edit
20 | ```
21 |
22 | ### Notes to Package Maintainers
23 |
24 | The canonical executable name is "edit" and the alternative name is "msedit".
25 |
26 | We're aware of the potential conflict of "edit" with existing commands and as such recommend naming packages and executables "msedit".
27 | Names such as "ms-edit" should be avoided.
28 | Assigning an "edit" alias is recommended if possible.
29 |
30 | ## Build Instructions
31 |
32 | * [Install Rust](https://www.rust-lang.org/tools/install)
33 | * Install the nightly toolchain: `rustup install nightly`
34 | * Alternatively, set the environment variable `RUSTC_BOOTSTRAP=1`
35 | * Clone the repository
36 | * For a release build, run: `cargo build --config .cargo/release.toml --release`
37 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## Security
4 |
5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
6 |
7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
8 |
9 | ## Reporting Security Issues
10 |
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 |
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
14 |
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
16 |
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
18 |
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 |
21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 | * Full paths of source file(s) related to the manifestation of the issue
23 | * The location of the affected source code (tag/branch/commit or direct URL)
24 | * Any special configuration required to reproduce the issue
25 | * Step-by-step instructions to reproduce the issue
26 | * Proof-of-concept or exploit code (if possible)
27 | * Impact of the issue, including how an attacker might exploit the issue
28 |
29 | This information will help us triage your report more quickly.
30 |
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
32 |
33 | ## Preferred Languages
34 |
35 | We prefer all communications to be in English.
36 |
37 | ## Policy
38 |
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
40 |
41 |
42 |
--------------------------------------------------------------------------------
/assets/Microsoft_logo_(1980).svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
27 |
--------------------------------------------------------------------------------
/assets/com.microsoft.edit.desktop:
--------------------------------------------------------------------------------
1 | [Desktop Entry]
2 | Type=Application
3 | Name=Microsoft Edit
4 | GenericName=Text Editor
5 | Comment=A simple editor for simple needs
6 | Icon=edit
7 | Exec=edit %U
8 | Terminal=true
9 | MimeType=text/plain
10 | Keywords=text;editor
11 |
--------------------------------------------------------------------------------
/assets/edit.svg:
--------------------------------------------------------------------------------
1 |
76 |
--------------------------------------------------------------------------------
/assets/edit_hero_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/edit/5c5471e9443353b0f60c5380e515c4559e0316b1/assets/edit_hero_image.png
--------------------------------------------------------------------------------
/assets/editing-traces/README.md:
--------------------------------------------------------------------------------
1 | # editing-traces
2 |
3 | This directory contains Seph Gentle's ASCII-only `rustcode` editing traces from: https://github.com/josephg/editing-traces
4 |
5 | The trace was provided under the [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) license.
6 |
--------------------------------------------------------------------------------
/assets/editing-traces/rustcode.json.zst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/edit/5c5471e9443353b0f60c5380e515c4559e0316b1/assets/editing-traces/rustcode.json.zst
--------------------------------------------------------------------------------
/assets/manpage/edit.1:
--------------------------------------------------------------------------------
1 | .TH EDIT 1 "version 1.0" "May 2025"
2 | .SH NAME
3 | edit \- a simple text editor
4 | .SH SYNOPSIS
5 | \fBedit\fP [\fIOPTIONS\fP]... [\fIARGUMENTS\fP]...
6 | .SH DESCRIPTION
7 | edit is a simple text editor inspired by MS-DOS edit.
8 | .SH EDITING
9 | Edit is an interactive mode-less editor. Use Alt-F to access the menus.
10 | .SH ARGUMENTS
11 | .TP
12 | \fIFILE[:LINE[:COLUMN]]\fP
13 | The file to open, optionally with line and column (e.g., \fBfoo.txt:123:45\fP).
14 | .SH OPTIONS
15 | .TP
16 | \fB\-h\fP, \fB\-\-help\fP
17 | Print the help message.
18 | .TP
19 | \fB\-v\fP, \fB\-\-version\fP
20 | Print the version number.
21 | .SH COPYRIGHT
22 | Copyright (c) Microsoft Corporation.
23 | .br
24 | Licensed under the MIT License.
25 | .SH SEE ALSO
26 | https://github.com/microsoft/edit
27 |
--------------------------------------------------------------------------------
/assets/microsoft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/edit/5c5471e9443353b0f60c5380e515c4559e0316b1/assets/microsoft.png
--------------------------------------------------------------------------------
/assets/microsoft.sixel:
--------------------------------------------------------------------------------
1 | P;1q"1;1;300;60#0;2;100;100;100#0!42?_ow{}!12?_ow{}!6?_ow{}}!5?_ow{{}}}!17~^NFbpw{}!8~!4}{wwo_!12?_oow{{{!4}!6~!4}{{wwo__!4?_ow{{}}}!23~^Nfrxw{{}}}!9~!4}{{woo_!12?_ow{}!15~^NFbpw{}!17~^NFB@-!36?_ow{}!6~!6?_ow{}!6~??w{}!7~?o{}!10~^^!10NFBpw{}!6~!8N^!9~{_!4?_o{}!8~^^!9N^^!9~{w}!8~^!18NFbx{}!9~^^!8N^^!9~}{o???ow{}!6~!11NFB@GKM!5N!10~!4NFB@-!30?_ow{}!12~_ow{}!12~??!20~FB@!15?!10~!10?r!9~???{!8~NB@!15?@FN!16~!4{!4wooo__!5?_}!8~^FB!16?@F^!8~{o!10~!9o!13?!10~-!24?_ow{}!35~??!19~x!18?!10~?CK[!4{}!9~^B??N!8~x!21?!10~N^^!18~}{o!10~!22?!29~!13?!10~-!18?_ow{}!8~^NFB@?!11~^NFB@?!10~??!10~F!9~}{wo__!12?!10~!5?@BFN^!9~}{wof^!7~}wo__!11?__o{!9~N@!7?!6@Bb!10~N!9~{o__!12?__o{}!8~F@!10~!9B!13?!10~-!12?_ow{}!8~^NFB@!7?!5~^NFB@!7?!10~??!10~??@FN^!20~??!10~!11?@BFN^!23~!7}!10~^NFB~!12}!12~^NB??BFN^!9~!10}!9~^NF@???!10~!22?!5~^NFB@-!6?_ow{}!8~^NFB@!13?FFB@!13?!10F??!10F!7?@@BB!15F??!10F!17?@BFN^!10~|zrfFF!10NFFFBB@@!5?!21FBB@!11?@BBFFNNN!10^NNNFFBB@!8?!10~!22?NFB@-_ow{}!8~^NFB@!119?@BFN^!9~}{wo!88?!10~-!7~^NFB@!131?@BFN^!7~!88?!7~^NF-~^NFB@!143?@BFN^~!88?~^NFB@\
2 |
--------------------------------------------------------------------------------
/benches/lib.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | use std::hint::black_box;
5 | use std::io::Cursor;
6 | use std::{mem, vec};
7 |
8 | use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
9 | use edit::helpers::*;
10 | use edit::simd::MemsetSafe;
11 | use edit::{arena, buffer, hash, oklab, simd, unicode};
12 | use serde::Deserialize;
13 |
14 | #[derive(Deserialize)]
15 | pub struct EditingTracePatch(pub usize, pub usize, pub String);
16 |
17 | #[derive(Deserialize)]
18 | pub struct EditingTraceTransaction {
19 | pub patches: Vec,
20 | }
21 |
22 | #[derive(Deserialize)]
23 | pub struct EditingTraceData {
24 | #[serde(rename = "startContent")]
25 | pub start_content: String,
26 | #[serde(rename = "endContent")]
27 | pub end_content: String,
28 | pub txns: Vec,
29 | }
30 |
31 | fn bench_buffer(c: &mut Criterion) {
32 | let data = include_bytes!("../assets/editing-traces/rustcode.json.zst");
33 | let data = zstd::decode_all(Cursor::new(data)).unwrap();
34 | let data: EditingTraceData = serde_json::from_slice(&data).unwrap();
35 | let mut patches_with_coords = Vec::new();
36 |
37 | {
38 | let mut tb = buffer::TextBuffer::new(false).unwrap();
39 | tb.set_crlf(false);
40 | tb.write(data.start_content.as_bytes(), true);
41 |
42 | for t in &data.txns {
43 | for p in &t.patches {
44 | tb.cursor_move_to_offset(p.0);
45 | let beg = tb.cursor_logical_pos();
46 |
47 | tb.delete(buffer::CursorMovement::Grapheme, p.1 as CoordType);
48 |
49 | tb.write(p.2.as_bytes(), true);
50 | patches_with_coords.push((beg, p.1 as CoordType, p.2.clone()));
51 | }
52 | }
53 |
54 | let mut actual = String::new();
55 | tb.save_as_string(&mut actual);
56 | assert_eq!(actual, data.end_content);
57 | }
58 |
59 | let bench_gap_buffer = || {
60 | let mut buf = buffer::GapBuffer::new(false).unwrap();
61 | buf.replace(0..usize::MAX, data.start_content.as_bytes());
62 |
63 | for t in &data.txns {
64 | for p in &t.patches {
65 | buf.replace(p.0..p.0 + p.1, p.2.as_bytes());
66 | }
67 | }
68 |
69 | buf
70 | };
71 |
72 | let bench_text_buffer = || {
73 | let mut tb = buffer::TextBuffer::new(false).unwrap();
74 | tb.set_crlf(false);
75 | tb.write(data.start_content.as_bytes(), true);
76 |
77 | for p in &patches_with_coords {
78 | tb.cursor_move_to_logical(p.0);
79 | tb.delete(buffer::CursorMovement::Grapheme, p.1);
80 | tb.write(p.2.as_bytes(), true);
81 | }
82 |
83 | tb
84 | };
85 |
86 | // Sanity check: If this fails, the implementation is incorrect.
87 | {
88 | let buf = bench_gap_buffer();
89 | let mut actual = Vec::new();
90 | buf.extract_raw(0..usize::MAX, &mut actual, 0);
91 | assert_eq!(actual, data.end_content.as_bytes());
92 | }
93 | {
94 | let mut tb = bench_text_buffer();
95 | let mut actual = String::new();
96 | tb.save_as_string(&mut actual);
97 | assert_eq!(actual, data.end_content);
98 | }
99 |
100 | c.benchmark_group("buffer")
101 | .bench_function(BenchmarkId::new("GapBuffer", "rustcode"), |b| {
102 | b.iter(bench_gap_buffer);
103 | })
104 | .bench_function(BenchmarkId::new("TextBuffer", "rustcode"), |b| {
105 | b.iter(bench_text_buffer);
106 | });
107 | }
108 |
109 | fn bench_hash(c: &mut Criterion) {
110 | c.benchmark_group("hash")
111 | .throughput(Throughput::Bytes(8))
112 | .bench_function(BenchmarkId::new("hash", 8), |b| {
113 | let data = [0u8; 8];
114 | b.iter(|| hash::hash(0, black_box(&data)))
115 | })
116 | .throughput(Throughput::Bytes(16))
117 | .bench_function(BenchmarkId::new("hash", 16), |b| {
118 | let data = [0u8; 16];
119 | b.iter(|| hash::hash(0, black_box(&data)))
120 | })
121 | .throughput(Throughput::Bytes(1024))
122 | .bench_function(BenchmarkId::new("hash", 1024), |b| {
123 | let data = [0u8; 1024];
124 | b.iter(|| hash::hash(0, black_box(&data)))
125 | });
126 | }
127 |
128 | fn bench_oklab(c: &mut Criterion) {
129 | c.benchmark_group("oklab")
130 | .bench_function("srgb_to_oklab", |b| b.iter(|| oklab::srgb_to_oklab(black_box(0xff212cbe))))
131 | .bench_function("oklab_blend", |b| {
132 | b.iter(|| oklab::oklab_blend(black_box(0x7f212cbe), black_box(0x7f3aae3f)))
133 | });
134 | }
135 |
136 | fn bench_simd_lines_fwd(c: &mut Criterion) {
137 | let mut group = c.benchmark_group("simd");
138 | let buf = vec![b'\n'; 128 * MEBI];
139 |
140 | for &lines in &[1, 8, 128, KIBI, 128 * KIBI, 128 * MEBI] {
141 | group.throughput(Throughput::Bytes(lines as u64)).bench_with_input(
142 | BenchmarkId::new("lines_fwd", lines),
143 | &lines,
144 | |b, &lines| {
145 | b.iter(|| simd::lines_fwd(black_box(&buf), 0, 0, lines as CoordType));
146 | },
147 | );
148 | }
149 | }
150 |
151 | fn bench_simd_memchr2(c: &mut Criterion) {
152 | let mut group = c.benchmark_group("simd");
153 | let mut buf = vec![0u8; 128 * MEBI + KIBI];
154 |
155 | // For small sizes we add a small offset of +8,
156 | // to ensure we also benchmark the non-SIMD tail handling.
157 | // For large sizes, its relative impact is negligible.
158 | for &bytes in &[8usize, 128 + 8, KIBI, 128 * KIBI, 128 * MEBI] {
159 | group.throughput(Throughput::Bytes(bytes as u64 + 1)).bench_with_input(
160 | BenchmarkId::new("memchr2", bytes),
161 | &bytes,
162 | |b, &size| {
163 | buf.fill(b'a');
164 | buf[size] = b'\n';
165 | b.iter(|| simd::memchr2(b'\n', b'\r', black_box(&buf), 0));
166 | },
167 | );
168 | }
169 | }
170 |
171 | fn bench_simd_memset(c: &mut Criterion) {
172 | let mut group = c.benchmark_group("simd");
173 | let name = format!("memset<{}>", std::any::type_name::());
174 | let size = mem::size_of::();
175 | let mut buf: Vec = vec![Default::default(); 128 * MEBI / size];
176 |
177 | // For small sizes we add a small offset of +8,
178 | // to ensure we also benchmark the non-SIMD tail handling.
179 | // For large sizes, its relative impact is negligible.
180 | for &bytes in &[8usize, 128 + 8, KIBI, 128 * KIBI, 128 * MEBI] {
181 | group.throughput(Throughput::Bytes(bytes as u64)).bench_with_input(
182 | BenchmarkId::new(&name, bytes),
183 | &bytes,
184 | |b, &bytes| {
185 | let slice = unsafe { buf.get_unchecked_mut(..bytes / size) };
186 | b.iter(|| simd::memset(black_box(slice), Default::default()));
187 | },
188 | );
189 | }
190 | }
191 |
192 | fn bench_unicode(c: &mut Criterion) {
193 | let reference = concat!(
194 | "In the quiet twilight, dreams unfold, soft whispers of a story untold.\n",
195 | "月明かりが静かに照らし出し、夢を見る心の奥で詩が静かに囁かれる\n",
196 | "Stars collide in the early light of hope, echoing the silent call of the night.\n",
197 | "夜の静寂、希望と孤独が混ざり合うその中で詩が永遠に続く\n",
198 | );
199 | let buffer = reference.repeat(10);
200 | let bytes = buffer.as_bytes();
201 |
202 | c.benchmark_group("unicode::MeasurementConfig::goto_logical")
203 | .throughput(Throughput::Bytes(bytes.len() as u64))
204 | .bench_function("basic", |b| {
205 | b.iter(|| unicode::MeasurementConfig::new(&bytes).goto_logical(Point::MAX))
206 | })
207 | .bench_function("word_wrap", |b| {
208 | b.iter(|| {
209 | unicode::MeasurementConfig::new(black_box(&bytes))
210 | .with_word_wrap_column(50)
211 | .goto_logical(Point::MAX)
212 | })
213 | });
214 |
215 | c.benchmark_group("unicode::Utf8Chars")
216 | .throughput(Throughput::Bytes(bytes.len() as u64))
217 | .bench_function("next", |b| {
218 | b.iter(|| {
219 | unicode::Utf8Chars::new(bytes, 0).fold(0u32, |acc, ch| acc.wrapping_add(ch as u32))
220 | })
221 | });
222 | }
223 |
224 | fn bench(c: &mut Criterion) {
225 | arena::init(128 * MEBI).unwrap();
226 |
227 | bench_buffer(c);
228 | bench_hash(c);
229 | bench_oklab(c);
230 | bench_simd_lines_fwd(c);
231 | bench_simd_memchr2(c);
232 | bench_simd_memset::(c);
233 | bench_simd_memset::(c);
234 | bench_unicode(c);
235 | }
236 |
237 | criterion_group!(benches, bench);
238 | criterion_main!(benches);
239 |
--------------------------------------------------------------------------------
/build.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | fn main() {
5 | #[cfg(windows)]
6 | if std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default() == "windows" {
7 | winresource::WindowsResource::new()
8 | .set_manifest_file("src/bin/edit/edit.exe.manifest")
9 | .set("FileDescription", "Microsoft Edit")
10 | .set("LegalCopyright", "© Microsoft Corporation. All rights reserved.")
11 | .compile()
12 | .unwrap();
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | channel = "nightly"
3 |
--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | style_edition = "2024"
2 | use_small_heuristics = "Max"
3 | group_imports = "StdExternalCrate"
4 | imports_granularity = "Module"
5 | format_code_in_doc_comments = true
6 | newline_style = "Unix"
7 | use_field_init_shorthand = true
8 |
--------------------------------------------------------------------------------
/src/apperr.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | //! Provides a transparent error type for edit.
5 |
6 | use std::{io, result};
7 |
8 | use crate::sys;
9 |
10 | pub const APP_ICU_MISSING: Error = Error::new_app(0);
11 |
12 | /// Edit's transparent `Result` type.
13 | pub type Result = result::Result;
14 |
15 | /// Edit's transparent `Error` type.
16 | /// Abstracts over system and application errors.
17 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
18 | pub enum Error {
19 | App(u32),
20 | Icu(u32),
21 | Sys(u32),
22 | }
23 |
24 | impl Error {
25 | pub const fn new_app(code: u32) -> Self {
26 | Self::App(code)
27 | }
28 |
29 | pub const fn new_icu(code: u32) -> Self {
30 | Self::Icu(code)
31 | }
32 |
33 | pub const fn new_sys(code: u32) -> Self {
34 | Self::Sys(code)
35 | }
36 | }
37 |
38 | impl From for Error {
39 | fn from(err: io::Error) -> Self {
40 | sys::io_error_to_apperr(err)
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/arena/debug.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | #![allow(clippy::missing_safety_doc, clippy::mut_from_ref)]
5 |
6 | use std::alloc::{AllocError, Allocator, Layout};
7 | use std::mem::{self, MaybeUninit};
8 | use std::ptr::NonNull;
9 |
10 | use super::release;
11 | use crate::apperr;
12 |
13 | /// A debug wrapper for [`release::Arena`].
14 | ///
15 | /// The problem with [`super::ScratchArena`] is that it only "borrows" an underlying
16 | /// [`release::Arena`]. Once the [`super::ScratchArena`] is dropped it resets the watermark
17 | /// of the underlying [`release::Arena`], freeing all allocations done since borrowing it.
18 | ///
19 | /// It is completely valid for the same [`release::Arena`] to be borrowed multiple times at once,
20 | /// *as long as* you only use the most recent borrow. Bad example:
21 | /// ```should_panic
22 | /// use edit::arena::scratch_arena;
23 | ///
24 | /// let mut scratch1 = scratch_arena(None);
25 | /// let mut scratch2 = scratch_arena(None);
26 | ///
27 | /// let foo = scratch1.alloc_uninit::();
28 | ///
29 | /// // This will also reset `scratch1`'s allocation.
30 | /// drop(scratch2);
31 | ///
32 | /// *foo; // BOOM! ...if it wasn't for our debug wrapper.
33 | /// ```
34 | ///
35 | /// To avoid this, this wraps the real [`release::Arena`] in a "debug" one, which pretends as if every
36 | /// instance of itself is a distinct [`release::Arena`] instance. Then we use this "debug" [`release::Arena`]
37 | /// for [`super::ScratchArena`] which allows us to track which borrow is the most recent one.
38 | pub enum Arena {
39 | // Delegate is 'static, because release::Arena requires no lifetime
40 | // annotations, and so this mere debug helper cannot use them either.
41 | Delegated { delegate: &'static release::Arena, borrow: usize },
42 | Owned { arena: release::Arena },
43 | }
44 |
45 | impl Drop for Arena {
46 | fn drop(&mut self) {
47 | if let Self::Delegated { delegate, borrow } = self {
48 | let borrows = delegate.borrows.get();
49 | assert_eq!(*borrow, borrows);
50 | delegate.borrows.set(borrows - 1);
51 | }
52 | }
53 | }
54 |
55 | impl Default for Arena {
56 | fn default() -> Self {
57 | Self::empty()
58 | }
59 | }
60 |
61 | impl Arena {
62 | pub const fn empty() -> Self {
63 | Self::Owned { arena: release::Arena::empty() }
64 | }
65 |
66 | pub fn new(capacity: usize) -> apperr::Result {
67 | Ok(Self::Owned { arena: release::Arena::new(capacity)? })
68 | }
69 |
70 | pub(super) fn delegated(delegate: &release::Arena) -> Self {
71 | let borrow = delegate.borrows.get() + 1;
72 | delegate.borrows.set(borrow);
73 | Self::Delegated { delegate: unsafe { mem::transmute(delegate) }, borrow }
74 | }
75 |
76 | #[inline]
77 | pub(super) fn delegate_target(&self) -> &release::Arena {
78 | match *self {
79 | Self::Delegated { delegate, borrow } => {
80 | assert!(
81 | borrow == delegate.borrows.get(),
82 | "Arena already borrowed by a newer ScratchArena"
83 | );
84 | delegate
85 | }
86 | Self::Owned { ref arena } => arena,
87 | }
88 | }
89 |
90 | #[inline]
91 | pub(super) fn delegate_target_unchecked(&self) -> &release::Arena {
92 | match self {
93 | Self::Delegated { delegate, .. } => delegate,
94 | Self::Owned { arena } => arena,
95 | }
96 | }
97 |
98 | pub fn offset(&self) -> usize {
99 | self.delegate_target().offset()
100 | }
101 |
102 | pub unsafe fn reset(&self, to: usize) {
103 | unsafe { self.delegate_target().reset(to) }
104 | }
105 |
106 | pub fn alloc_uninit(&self) -> &mut MaybeUninit {
107 | self.delegate_target().alloc_uninit()
108 | }
109 |
110 | pub fn alloc_uninit_slice(&self, count: usize) -> &mut [MaybeUninit] {
111 | self.delegate_target().alloc_uninit_slice(count)
112 | }
113 | }
114 |
115 | unsafe impl Allocator for Arena {
116 | fn allocate(&self, layout: Layout) -> Result, AllocError> {
117 | self.delegate_target().alloc_raw(layout.size(), layout.align())
118 | }
119 |
120 | fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> {
121 | self.delegate_target().allocate_zeroed(layout)
122 | }
123 |
124 | // While it is possible to shrink the tail end of the arena, it is
125 | // not very useful given the existence of scoped scratch arenas.
126 | unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) {
127 | unsafe { self.delegate_target().deallocate(ptr, layout) }
128 | }
129 |
130 | unsafe fn grow(
131 | &self,
132 | ptr: NonNull,
133 | old_layout: Layout,
134 | new_layout: Layout,
135 | ) -> Result, AllocError> {
136 | unsafe { self.delegate_target().grow(ptr, old_layout, new_layout) }
137 | }
138 |
139 | unsafe fn grow_zeroed(
140 | &self,
141 | ptr: NonNull,
142 | old_layout: Layout,
143 | new_layout: Layout,
144 | ) -> Result, AllocError> {
145 | unsafe { self.delegate_target().grow_zeroed(ptr, old_layout, new_layout) }
146 | }
147 |
148 | unsafe fn shrink(
149 | &self,
150 | ptr: NonNull,
151 | old_layout: Layout,
152 | new_layout: Layout,
153 | ) -> Result, AllocError> {
154 | unsafe { self.delegate_target().shrink(ptr, old_layout, new_layout) }
155 | }
156 | }
157 |
--------------------------------------------------------------------------------
/src/arena/mod.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | //! Arena allocators. Small and fast.
5 |
6 | #[cfg(debug_assertions)]
7 | mod debug;
8 | mod release;
9 | mod scratch;
10 | mod string;
11 |
12 | #[cfg(all(not(doc), debug_assertions))]
13 | pub use self::debug::Arena;
14 | #[cfg(any(doc, not(debug_assertions)))]
15 | pub use self::release::Arena;
16 | pub use self::scratch::{ScratchArena, init, scratch_arena};
17 | pub use self::string::ArenaString;
18 |
--------------------------------------------------------------------------------
/src/arena/release.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | #![allow(clippy::mut_from_ref)]
5 |
6 | use std::alloc::{AllocError, Allocator, Layout};
7 | use std::cell::Cell;
8 | use std::hint::cold_path;
9 | use std::mem::MaybeUninit;
10 | use std::ptr::{self, NonNull};
11 | use std::{mem, slice};
12 |
13 | use crate::helpers::*;
14 | use crate::{apperr, sys};
15 |
16 | const ALLOC_CHUNK_SIZE: usize = 64 * KIBI;
17 |
18 | /// An arena allocator.
19 | ///
20 | /// If you have never used an arena allocator before, think of it as
21 | /// allocating objects on the stack, but the stack is *really* big.
22 | /// Each time you allocate, memory gets pushed at the end of the stack,
23 | /// each time you deallocate, memory gets popped from the end of the stack.
24 | ///
25 | /// One reason you'd want to use this is obviously performance: It's very simple
26 | /// and so it's also very fast, >10x faster than your system allocator.
27 | ///
28 | /// However, modern allocators such as `mimalloc` are just as fast, so why not use them?
29 | /// Because their performance comes at the cost of binary size and we can't have that.
30 | ///
31 | /// The biggest benefit though is that it sometimes massively simplifies lifetime
32 | /// and memory management. This can best be seen by this project's UI code, which
33 | /// uses an arena to allocate a tree of UI nodes. This is infamously difficult
34 | /// to do in Rust, but not so when you got an arena allocator:
35 | /// All nodes have the same lifetime, so you can just use references.
36 | ///
37 | /// # Safety
38 | ///
39 | /// **Do not** push objects into the arena that require destructors.
40 | /// Destructors are not executed. Use a pool allocator for that.
41 | pub struct Arena {
42 | base: NonNull,
43 | capacity: usize,
44 | commit: Cell,
45 | offset: Cell,
46 |
47 | /// See [`super::debug`], which uses this for borrow tracking.
48 | #[cfg(debug_assertions)]
49 | pub(super) borrows: Cell,
50 | }
51 |
52 | impl Arena {
53 | pub const fn empty() -> Self {
54 | Self {
55 | base: NonNull::dangling(),
56 | capacity: 0,
57 | commit: Cell::new(0),
58 | offset: Cell::new(0),
59 |
60 | #[cfg(debug_assertions)]
61 | borrows: Cell::new(0),
62 | }
63 | }
64 |
65 | pub fn new(capacity: usize) -> apperr::Result {
66 | let capacity = (capacity.max(1) + ALLOC_CHUNK_SIZE - 1) & !(ALLOC_CHUNK_SIZE - 1);
67 | let base = unsafe { sys::virtual_reserve(capacity)? };
68 |
69 | Ok(Self {
70 | base,
71 | capacity,
72 | commit: Cell::new(0),
73 | offset: Cell::new(0),
74 |
75 | #[cfg(debug_assertions)]
76 | borrows: Cell::new(0),
77 | })
78 | }
79 |
80 | pub fn offset(&self) -> usize {
81 | self.offset.get()
82 | }
83 |
84 | /// "Deallocates" the memory in the arena down to the given offset.
85 | ///
86 | /// # Safety
87 | ///
88 | /// Obviously, this is GIGA UNSAFE. It runs no destructors and does not check
89 | /// whether the offset is valid. You better take care when using this function.
90 | pub unsafe fn reset(&self, to: usize) {
91 | // Fill the deallocated memory with 0xDD to aid debugging.
92 | if cfg!(debug_assertions) && self.offset.get() > to {
93 | let commit = self.commit.get();
94 | let len = (self.offset.get() + 128).min(commit) - to;
95 | unsafe { slice::from_raw_parts_mut(self.base.add(to).as_ptr(), len).fill(0xDD) };
96 | }
97 |
98 | self.offset.replace(to);
99 | }
100 |
101 | #[inline]
102 | pub(super) fn alloc_raw(
103 | &self,
104 | bytes: usize,
105 | alignment: usize,
106 | ) -> Result, AllocError> {
107 | let commit = self.commit.get();
108 | let offset = self.offset.get();
109 |
110 | let beg = (offset + alignment - 1) & !(alignment - 1);
111 | let end = beg + bytes;
112 |
113 | if end > commit {
114 | return self.alloc_raw_bump(beg, end);
115 | }
116 |
117 | if cfg!(debug_assertions) {
118 | let ptr = unsafe { self.base.add(offset) };
119 | let len = (end + 128).min(self.commit.get()) - offset;
120 | unsafe { slice::from_raw_parts_mut(ptr.as_ptr(), len).fill(0xCD) };
121 | }
122 |
123 | self.offset.replace(end);
124 | Ok(unsafe { NonNull::slice_from_raw_parts(self.base.add(beg), bytes) })
125 | }
126 |
127 | // With the code in `alloc_raw_bump()` out of the way, `alloc_raw()` compiles down to some super tight assembly.
128 | #[cold]
129 | fn alloc_raw_bump(&self, beg: usize, end: usize) -> Result, AllocError> {
130 | let offset = self.offset.get();
131 | let commit_old = self.commit.get();
132 | let commit_new = (end + ALLOC_CHUNK_SIZE - 1) & !(ALLOC_CHUNK_SIZE - 1);
133 |
134 | if commit_new > self.capacity
135 | || unsafe {
136 | sys::virtual_commit(self.base.add(commit_old), commit_new - commit_old).is_err()
137 | }
138 | {
139 | return Err(AllocError);
140 | }
141 |
142 | if cfg!(debug_assertions) {
143 | let ptr = unsafe { self.base.add(offset) };
144 | let len = (end + 128).min(self.commit.get()) - offset;
145 | unsafe { slice::from_raw_parts_mut(ptr.as_ptr(), len).fill(0xCD) };
146 | }
147 |
148 | self.commit.replace(commit_new);
149 | self.offset.replace(end);
150 | Ok(unsafe { NonNull::slice_from_raw_parts(self.base.add(beg), end - beg) })
151 | }
152 |
153 | #[allow(clippy::mut_from_ref)]
154 | pub fn alloc_uninit(&self) -> &mut MaybeUninit {
155 | let bytes = mem::size_of::();
156 | let alignment = mem::align_of::();
157 | let ptr = self.alloc_raw(bytes, alignment).unwrap();
158 | unsafe { ptr.cast().as_mut() }
159 | }
160 |
161 | #[allow(clippy::mut_from_ref)]
162 | pub fn alloc_uninit_slice(&self, count: usize) -> &mut [MaybeUninit] {
163 | let bytes = mem::size_of::() * count;
164 | let alignment = mem::align_of::();
165 | let ptr = self.alloc_raw(bytes, alignment).unwrap();
166 | unsafe { slice::from_raw_parts_mut(ptr.cast().as_ptr(), count) }
167 | }
168 | }
169 |
170 | impl Drop for Arena {
171 | fn drop(&mut self) {
172 | if self.base != NonNull::dangling() {
173 | unsafe { sys::virtual_release(self.base, self.capacity) };
174 | }
175 | }
176 | }
177 |
178 | impl Default for Arena {
179 | fn default() -> Self {
180 | Self::empty()
181 | }
182 | }
183 |
184 | unsafe impl Allocator for Arena {
185 | fn allocate(&self, layout: Layout) -> Result, AllocError> {
186 | self.alloc_raw(layout.size(), layout.align())
187 | }
188 |
189 | fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> {
190 | let p = self.alloc_raw(layout.size(), layout.align())?;
191 | unsafe { p.cast::().as_ptr().write_bytes(0, p.len()) }
192 | Ok(p)
193 | }
194 |
195 | // While it is possible to shrink the tail end of the arena, it is
196 | // not very useful given the existence of scoped scratch arenas.
197 | unsafe fn deallocate(&self, _: NonNull, _: Layout) {}
198 |
199 | unsafe fn grow(
200 | &self,
201 | ptr: NonNull,
202 | old_layout: Layout,
203 | new_layout: Layout,
204 | ) -> Result, AllocError> {
205 | debug_assert!(new_layout.size() >= old_layout.size());
206 | debug_assert!(new_layout.align() <= old_layout.align());
207 |
208 | let new_ptr;
209 |
210 | // Growing the given area is possible if it is at the end of the arena.
211 | if unsafe { ptr.add(old_layout.size()) == self.base.add(self.offset.get()) } {
212 | new_ptr = ptr;
213 | let delta = new_layout.size() - old_layout.size();
214 | // Assuming that the given ptr/length area is at the end of the arena,
215 | // we can just push more memory to the end of the arena to grow it.
216 | self.alloc_raw(delta, 1)?;
217 | } else {
218 | cold_path();
219 |
220 | new_ptr = self.allocate(new_layout)?.cast();
221 |
222 | // SAFETY: It's weird to me that this doesn't assert new_layout.size() >= old_layout.size(),
223 | // but neither does the stdlib code at the time of writing.
224 | // So, assuming that is not needed, this code is safe since it just copies the old data over.
225 | unsafe {
226 | ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_layout.size());
227 | self.deallocate(ptr, old_layout);
228 | }
229 | }
230 |
231 | Ok(NonNull::slice_from_raw_parts(new_ptr, new_layout.size()))
232 | }
233 |
234 | unsafe fn grow_zeroed(
235 | &self,
236 | ptr: NonNull,
237 | old_layout: Layout,
238 | new_layout: Layout,
239 | ) -> Result, AllocError> {
240 | unsafe {
241 | // SAFETY: Same as grow().
242 | let ptr = self.grow(ptr, old_layout, new_layout)?;
243 |
244 | // SAFETY: At this point, `ptr` must be valid for `new_layout.size()` bytes,
245 | // allowing us to safely zero out the delta since `old_layout.size()`.
246 | ptr.cast::()
247 | .add(old_layout.size())
248 | .write_bytes(0, new_layout.size() - old_layout.size());
249 |
250 | Ok(ptr)
251 | }
252 | }
253 |
254 | unsafe fn shrink(
255 | &self,
256 | ptr: NonNull,
257 | old_layout: Layout,
258 | new_layout: Layout,
259 | ) -> Result, AllocError> {
260 | debug_assert!(new_layout.size() <= old_layout.size());
261 | debug_assert!(new_layout.align() <= old_layout.align());
262 |
263 | let mut len = old_layout.size();
264 |
265 | // Shrinking the given area is possible if it is at the end of the arena.
266 | if unsafe { ptr.add(len) == self.base.add(self.offset.get()) } {
267 | self.offset.set(self.offset.get() - len + new_layout.size());
268 | len = new_layout.size();
269 | } else {
270 | debug_assert!(
271 | false,
272 | "Did you call shrink_to_fit()? Only the last allocation can be shrunk!"
273 | );
274 | }
275 |
276 | Ok(NonNull::slice_from_raw_parts(ptr, len))
277 | }
278 | }
279 |
--------------------------------------------------------------------------------
/src/arena/scratch.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | use std::ops::Deref;
5 |
6 | #[cfg(debug_assertions)]
7 | use super::debug;
8 | use super::{Arena, release};
9 | use crate::apperr;
10 | use crate::helpers::*;
11 |
12 | static mut S_SCRATCH: [release::Arena; 2] =
13 | const { [release::Arena::empty(), release::Arena::empty()] };
14 |
15 | /// Initialize the scratch arenas with a given capacity.
16 | /// Call this before using [`scratch_arena`].
17 | pub fn init(capacity: usize) -> apperr::Result<()> {
18 | unsafe {
19 | for s in &mut S_SCRATCH[..] {
20 | *s = release::Arena::new(capacity)?;
21 | }
22 | }
23 | Ok(())
24 | }
25 |
26 | /// Need an arena for temporary allocations? [`scratch_arena`] got you covered.
27 | /// Call [`scratch_arena`] and it'll return an [`Arena`] that resets when it goes out of scope.
28 | ///
29 | /// ---
30 | ///
31 | /// Most methods make just two kinds of allocations:
32 | /// * Interior: Temporary data that can be deallocated when the function returns.
33 | /// * Exterior: Data that is returned to the caller and must remain alive until the caller stops using it.
34 | ///
35 | /// Such methods only have two lifetimes, for which you consequently also only need two arenas.
36 | /// ...even if your method calls other methods recursively! This is because the exterior allocations
37 | /// of a callee are simply interior allocations to the caller, and so on, recursively.
38 | ///
39 | /// This works as long as the two arenas flip/flop between being used as interior/exterior allocator
40 | /// along the callstack. To ensure that is the case, we use a recursion counter in debug builds.
41 | ///
42 | /// This approach was described among others at:
43 | ///
44 | /// # Safety
45 | ///
46 | /// If your function takes an [`Arena`] argument, you **MUST** pass it to `scratch_arena` as `Some(&arena)`.
47 | pub fn scratch_arena(conflict: Option<&Arena>) -> ScratchArena<'static> {
48 | unsafe {
49 | #[cfg(debug_assertions)]
50 | let conflict = conflict.map(|a| a.delegate_target_unchecked());
51 |
52 | let index = opt_ptr_eq(conflict, Some(&S_SCRATCH[0])) as usize;
53 | let arena = &mut S_SCRATCH[index];
54 | ScratchArena::new(arena)
55 | }
56 | }
57 |
58 | /// Borrows an [`Arena`] for temporary allocations.
59 | ///
60 | /// See [`scratch_arena`].
61 | #[cfg(debug_assertions)]
62 | pub struct ScratchArena<'a> {
63 | arena: debug::Arena,
64 | offset: usize,
65 | _phantom: std::marker::PhantomData<&'a ()>,
66 | }
67 |
68 | #[cfg(not(debug_assertions))]
69 | pub struct ScratchArena<'a> {
70 | arena: &'a Arena,
71 | offset: usize,
72 | }
73 |
74 | #[cfg(debug_assertions)]
75 | impl<'a> ScratchArena<'a> {
76 | fn new(arena: &'a release::Arena) -> Self {
77 | let offset = arena.offset();
78 | ScratchArena { arena: Arena::delegated(arena), _phantom: std::marker::PhantomData, offset }
79 | }
80 | }
81 |
82 | #[cfg(not(debug_assertions))]
83 | impl<'a> ScratchArena<'a> {
84 | fn new(arena: &'a release::Arena) -> Self {
85 | let offset = arena.offset();
86 | ScratchArena { arena, offset }
87 | }
88 | }
89 |
90 | impl Drop for ScratchArena<'_> {
91 | fn drop(&mut self) {
92 | unsafe { self.arena.reset(self.offset) };
93 | }
94 | }
95 |
96 | #[cfg(debug_assertions)]
97 | impl Deref for ScratchArena<'_> {
98 | type Target = debug::Arena;
99 |
100 | fn deref(&self) -> &Self::Target {
101 | &self.arena
102 | }
103 | }
104 |
105 | #[cfg(not(debug_assertions))]
106 | impl Deref for ScratchArena<'_> {
107 | type Target = Arena;
108 |
109 | fn deref(&self) -> &Self::Target {
110 | self.arena
111 | }
112 | }
113 |
--------------------------------------------------------------------------------
/src/arena/string.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | use std::fmt;
5 | use std::ops::{Bound, Deref, DerefMut, RangeBounds};
6 |
7 | use super::Arena;
8 | use crate::helpers::*;
9 |
10 | /// A custom string type, because `std` lacks allocator support for [`String`].
11 | ///
12 | /// To keep things simple, this one is hardcoded to [`Arena`].
13 | #[derive(Clone)]
14 | pub struct ArenaString<'a> {
15 | vec: Vec,
16 | }
17 |
18 | impl<'a> ArenaString<'a> {
19 | /// Creates a new [`ArenaString`] in the given arena.
20 | #[must_use]
21 | pub const fn new_in(arena: &'a Arena) -> Self {
22 | Self { vec: Vec::new_in(arena) }
23 | }
24 |
25 | #[must_use]
26 | pub fn with_capacity_in(capacity: usize, arena: &'a Arena) -> Self {
27 | Self { vec: Vec::with_capacity_in(capacity, arena) }
28 | }
29 |
30 | /// Turns a [`str`] into an [`ArenaString`].
31 | #[must_use]
32 | pub fn from_str(arena: &'a Arena, s: &str) -> Self {
33 | let mut res = Self::new_in(arena);
34 | res.push_str(s);
35 | res
36 | }
37 |
38 | /// It says right here that you checked if `bytes` is valid UTF-8
39 | /// and you are sure it is. Presto! Here's an `ArenaString`!
40 | ///
41 | /// # Safety
42 | ///
43 | /// You fool! It says "unchecked" right there. Now the house is burning.
44 | #[inline]
45 | #[must_use]
46 | pub unsafe fn from_utf8_unchecked(bytes: Vec) -> Self {
47 | Self { vec: bytes }
48 | }
49 |
50 | /// Checks whether `text` contains only valid UTF-8.
51 | /// If the entire string is valid, it returns `Ok(text)`.
52 | /// Otherwise, it returns `Err(ArenaString)` with all invalid sequences replaced with U+FFFD.
53 | pub fn from_utf8_lossy<'s>(arena: &'a Arena, text: &'s [u8]) -> Result<&'s str, Self> {
54 | let mut iter = text.utf8_chunks();
55 | let Some(mut chunk) = iter.next() else {
56 | return Ok("");
57 | };
58 |
59 | let valid = chunk.valid();
60 | if chunk.invalid().is_empty() {
61 | debug_assert_eq!(valid.len(), text.len());
62 | return Ok(unsafe { str::from_utf8_unchecked(text) });
63 | }
64 |
65 | const REPLACEMENT: &str = "\u{FFFD}";
66 |
67 | let mut res = Self::new_in(arena);
68 | res.reserve(text.len());
69 |
70 | loop {
71 | res.push_str(chunk.valid());
72 | if !chunk.invalid().is_empty() {
73 | res.push_str(REPLACEMENT);
74 | }
75 | chunk = match iter.next() {
76 | Some(chunk) => chunk,
77 | None => break,
78 | };
79 | }
80 |
81 | Err(res)
82 | }
83 |
84 | /// Turns a [`Vec`] into an [`ArenaString`], replacing invalid UTF-8 sequences with U+FFFD.
85 | #[must_use]
86 | pub fn from_utf8_lossy_owned(v: Vec) -> Self {
87 | match Self::from_utf8_lossy(v.allocator(), &v) {
88 | Ok(..) => unsafe { Self::from_utf8_unchecked(v) },
89 | Err(s) => s,
90 | }
91 | }
92 |
93 | /// It's empty.
94 | pub fn is_empty(&self) -> bool {
95 | self.vec.is_empty()
96 | }
97 |
98 | /// It's lengthy.
99 | pub fn len(&self) -> usize {
100 | self.vec.len()
101 | }
102 |
103 | /// It's capacatity.
104 | pub fn capacity(&self) -> usize {
105 | self.vec.capacity()
106 | }
107 |
108 | /// It's a [`String`], now it's a [`str`]. Wow!
109 | pub fn as_str(&self) -> &str {
110 | unsafe { str::from_utf8_unchecked(self.vec.as_slice()) }
111 | }
112 |
113 | /// It's a [`String`], now it's a [`str`]. And it's mutable! WOW!
114 | pub fn as_mut_str(&mut self) -> &mut str {
115 | unsafe { str::from_utf8_unchecked_mut(self.vec.as_mut_slice()) }
116 | }
117 |
118 | /// Now it's bytes!
119 | pub fn as_bytes(&self) -> &[u8] {
120 | self.vec.as_slice()
121 | }
122 |
123 | /// Returns a mutable reference to the contents of this `String`.
124 | ///
125 | /// # Safety
126 | ///
127 | /// The underlying `&mut Vec` allows writing bytes which are not valid UTF-8.
128 | pub unsafe fn as_mut_vec(&mut self) -> &mut Vec {
129 | &mut self.vec
130 | }
131 |
132 | /// Reserves *additional* memory. For you old folks out there (totally not me),
133 | /// this is different from C++'s `reserve` which reserves a total size.
134 | pub fn reserve(&mut self, additional: usize) {
135 | self.vec.reserve(additional)
136 | }
137 |
138 | /// Just like [`ArenaString::reserve`], but it doesn't overallocate.
139 | pub fn reserve_exact(&mut self, additional: usize) {
140 | self.vec.reserve_exact(additional)
141 | }
142 |
143 | /// Now it's small! Alarming!
144 | ///
145 | /// *Do not* call this unless this string is the last thing on the arena.
146 | /// Arenas are stacks, they can't deallocate what's in the middle.
147 | pub fn shrink_to_fit(&mut self) {
148 | self.vec.shrink_to_fit()
149 | }
150 |
151 | /// To no surprise, this clears the string.
152 | pub fn clear(&mut self) {
153 | self.vec.clear()
154 | }
155 |
156 | /// Append some text.
157 | pub fn push_str(&mut self, string: &str) {
158 | self.vec.extend_from_slice(string.as_bytes())
159 | }
160 |
161 | /// Append a single character.
162 | #[inline]
163 | pub fn push(&mut self, ch: char) {
164 | match ch.len_utf8() {
165 | 1 => self.vec.push(ch as u8),
166 | _ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
167 | }
168 | }
169 |
170 | /// Same as `push(char)` but with a specified number of character copies.
171 | /// Shockingly absent from the standard library.
172 | pub fn push_repeat(&mut self, ch: char, total_copies: usize) {
173 | if total_copies == 0 {
174 | return;
175 | }
176 |
177 | let buf = unsafe { self.as_mut_vec() };
178 |
179 | if ch.is_ascii() {
180 | // Compiles down to `memset()`.
181 | buf.extend(std::iter::repeat_n(ch as u8, total_copies));
182 | } else {
183 | // Implements efficient string padding using quadratic duplication.
184 | let mut utf8_buf = [0; 4];
185 | let utf8 = ch.encode_utf8(&mut utf8_buf).as_bytes();
186 | let initial_len = buf.len();
187 | let added_len = utf8.len() * total_copies;
188 | let final_len = initial_len + added_len;
189 |
190 | buf.reserve(added_len);
191 | buf.extend_from_slice(utf8);
192 |
193 | while buf.len() != final_len {
194 | let end = (final_len - buf.len() + initial_len).min(buf.len());
195 | buf.extend_from_within(initial_len..end);
196 | }
197 | }
198 | }
199 |
200 | /// Replaces a range of characters with a new string.
201 | pub fn replace_range>(&mut self, range: R, replace_with: &str) {
202 | match range.start_bound() {
203 | Bound::Included(&n) => assert!(self.is_char_boundary(n)),
204 | Bound::Excluded(&n) => assert!(self.is_char_boundary(n + 1)),
205 | Bound::Unbounded => {}
206 | };
207 | match range.end_bound() {
208 | Bound::Included(&n) => assert!(self.is_char_boundary(n + 1)),
209 | Bound::Excluded(&n) => assert!(self.is_char_boundary(n)),
210 | Bound::Unbounded => {}
211 | };
212 | unsafe { self.as_mut_vec() }.replace_range(range, replace_with.as_bytes());
213 | }
214 |
215 | /// Finds `old` in the string and replaces it with `new`.
216 | /// Only performs one replacement.
217 | pub fn replace_once_in_place(&mut self, old: &str, new: &str) {
218 | if let Some(beg) = self.find(old) {
219 | unsafe { self.as_mut_vec() }.replace_range(beg..beg + old.len(), new.as_bytes());
220 | }
221 | }
222 | }
223 |
224 | impl fmt::Debug for ArenaString<'_> {
225 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
226 | fmt::Debug::fmt(&**self, f)
227 | }
228 | }
229 |
230 | impl PartialEq<&str> for ArenaString<'_> {
231 | fn eq(&self, other: &&str) -> bool {
232 | self.as_str() == *other
233 | }
234 | }
235 |
236 | impl Deref for ArenaString<'_> {
237 | type Target = str;
238 |
239 | fn deref(&self) -> &Self::Target {
240 | self.as_str()
241 | }
242 | }
243 |
244 | impl DerefMut for ArenaString<'_> {
245 | fn deref_mut(&mut self) -> &mut Self::Target {
246 | self.as_mut_str()
247 | }
248 | }
249 |
250 | impl fmt::Display for ArenaString<'_> {
251 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
252 | f.write_str(self.as_str())
253 | }
254 | }
255 |
256 | impl fmt::Write for ArenaString<'_> {
257 | #[inline]
258 | fn write_str(&mut self, s: &str) -> fmt::Result {
259 | self.push_str(s);
260 | Ok(())
261 | }
262 |
263 | #[inline]
264 | fn write_char(&mut self, c: char) -> fmt::Result {
265 | self.push(c);
266 | Ok(())
267 | }
268 | }
269 |
270 | #[macro_export]
271 | macro_rules! arena_format {
272 | ($arena:expr, $($arg:tt)*) => {{
273 | use std::fmt::Write as _;
274 | let mut output = $crate::arena::ArenaString::new_in($arena);
275 | output.write_fmt(format_args!($($arg)*)).unwrap();
276 | output
277 | }}
278 | }
279 |
--------------------------------------------------------------------------------
/src/base64.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | //! Base64 facilities.
5 |
6 | use crate::arena::ArenaString;
7 |
8 | const CHARSET: [u8; 64] = *b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
9 |
10 | /// One aspect of base64 is that the encoded length can be
11 | /// calculated accurately in advance, which is what this returns.
12 | #[inline]
13 | pub fn encode_len(src_len: usize) -> usize {
14 | src_len.div_ceil(3) * 4
15 | }
16 |
17 | /// Encodes the given bytes as base64 and appends them to the destination string.
18 | pub fn encode(dst: &mut ArenaString, src: &[u8]) {
19 | unsafe {
20 | let mut inp = src.as_ptr();
21 | let mut remaining = src.len();
22 | let dst = dst.as_mut_vec();
23 |
24 | let out_len = encode_len(src.len());
25 | // ... we can then use this fact to reserve space all at once.
26 | dst.reserve(out_len);
27 |
28 | // SAFETY: Getting a pointer to the reserved space is only safe
29 | // *after* calling `reserve()` as it may change the pointer.
30 | let mut out = dst.as_mut_ptr().add(dst.len());
31 |
32 | if remaining != 0 {
33 | // Translate chunks of 3 source bytes into 4 base64-encoded bytes.
34 | while remaining > 3 {
35 | // SAFETY: Thanks to `remaining > 3`, reading 4 bytes at once is safe.
36 | // This improves performance massively over a byte-by-byte approach,
37 | // because it allows us to byte-swap the read and use simple bit-shifts below.
38 | let val = u32::from_be((inp as *const u32).read_unaligned());
39 | inp = inp.add(3);
40 | remaining -= 3;
41 |
42 | *out = CHARSET[(val >> 26) as usize];
43 | out = out.add(1);
44 | *out = CHARSET[(val >> 20) as usize & 0x3f];
45 | out = out.add(1);
46 | *out = CHARSET[(val >> 14) as usize & 0x3f];
47 | out = out.add(1);
48 | *out = CHARSET[(val >> 8) as usize & 0x3f];
49 | out = out.add(1);
50 | }
51 |
52 | // Convert the remaining 1-3 bytes.
53 | let mut in1 = 0;
54 | let mut in2 = 0;
55 |
56 | // We can simplify the following logic by assuming that there's only 1
57 | // byte left. If there's >1 byte left, these two '=' will be overwritten.
58 | *out.add(3) = b'=';
59 | *out.add(2) = b'=';
60 |
61 | if remaining >= 3 {
62 | in2 = inp.add(2).read() as usize;
63 | *out.add(3) = CHARSET[in2 & 0x3f];
64 | }
65 |
66 | if remaining >= 2 {
67 | in1 = inp.add(1).read() as usize;
68 | *out.add(2) = CHARSET[(in1 << 2 | in2 >> 6) & 0x3f];
69 | }
70 |
71 | let in0 = inp.add(0).read() as usize;
72 | *out.add(1) = CHARSET[(in0 << 4 | in1 >> 4) & 0x3f];
73 | *out.add(0) = CHARSET[in0 >> 2];
74 | }
75 |
76 | dst.set_len(dst.len() + out_len);
77 | }
78 | }
79 |
80 | #[cfg(test)]
81 | mod tests {
82 | use super::encode;
83 | use crate::arena::{Arena, ArenaString};
84 |
85 | #[test]
86 | fn test_basic() {
87 | let arena = Arena::new(4 * 1024).unwrap();
88 | let enc = |s: &[u8]| {
89 | let mut dst = ArenaString::new_in(&arena);
90 | encode(&mut dst, s);
91 | dst
92 | };
93 | assert_eq!(enc(b""), "");
94 | assert_eq!(enc(b"a"), "YQ==");
95 | assert_eq!(enc(b"ab"), "YWI=");
96 | assert_eq!(enc(b"abc"), "YWJj");
97 | assert_eq!(enc(b"abcd"), "YWJjZA==");
98 | assert_eq!(enc(b"abcde"), "YWJjZGU=");
99 | assert_eq!(enc(b"abcdef"), "YWJjZGVm");
100 | assert_eq!(enc(b"abcdefg"), "YWJjZGVmZw==");
101 | assert_eq!(enc(b"abcdefgh"), "YWJjZGVmZ2g=");
102 | assert_eq!(enc(b"abcdefghi"), "YWJjZGVmZ2hp");
103 | assert_eq!(enc(b"abcdefghij"), "YWJjZGVmZ2hpag==");
104 | assert_eq!(enc(b"abcdefghijk"), "YWJjZGVmZ2hpams=");
105 | assert_eq!(enc(b"abcdefghijkl"), "YWJjZGVmZ2hpamts");
106 | assert_eq!(enc(b"abcdefghijklm"), "YWJjZGVmZ2hpamtsbQ==");
107 | assert_eq!(enc(b"abcdefghijklmN"), "YWJjZGVmZ2hpamtsbU4=");
108 | assert_eq!(enc(b"abcdefghijklmNO"), "YWJjZGVmZ2hpamtsbU5P");
109 | assert_eq!(enc(b"abcdefghijklmNOP"), "YWJjZGVmZ2hpamtsbU5PUA==");
110 | assert_eq!(enc(b"abcdefghijklmNOPQ"), "YWJjZGVmZ2hpamtsbU5PUFE=");
111 | assert_eq!(enc(b"abcdefghijklmNOPQR"), "YWJjZGVmZ2hpamtsbU5PUFFS");
112 | assert_eq!(enc(b"abcdefghijklmNOPQRS"), "YWJjZGVmZ2hpamtsbU5PUFFSUw==");
113 | assert_eq!(enc(b"abcdefghijklmNOPQRST"), "YWJjZGVmZ2hpamtsbU5PUFFSU1Q=");
114 | assert_eq!(enc(b"abcdefghijklmNOPQRSTU"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RV");
115 | assert_eq!(enc(b"abcdefghijklmNOPQRSTUV"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVg==");
116 | assert_eq!(enc(b"abcdefghijklmNOPQRSTUVW"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVlc=");
117 | assert_eq!(enc(b"abcdefghijklmNOPQRSTUVWX"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVldY");
118 | assert_eq!(enc(b"abcdefghijklmNOPQRSTUVWXY"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVldYWQ==");
119 | assert_eq!(enc(b"abcdefghijklmNOPQRSTUVWXYZ"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVldYWVo=");
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/src/bin/edit/documents.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | use std::collections::LinkedList;
5 | use std::ffi::OsStr;
6 | use std::fs::File;
7 | use std::path::{Path, PathBuf};
8 |
9 | use edit::buffer::{RcTextBuffer, TextBuffer};
10 | use edit::helpers::{CoordType, Point};
11 | use edit::{apperr, path, sys};
12 |
13 | use crate::state::DisplayablePathBuf;
14 |
15 | pub struct Document {
16 | pub buffer: RcTextBuffer,
17 | pub path: Option,
18 | pub dir: Option,
19 | pub filename: String,
20 | pub file_id: Option,
21 | pub new_file_counter: usize,
22 | }
23 |
24 | impl Document {
25 | pub fn save(&mut self, new_path: Option) -> apperr::Result<()> {
26 | let path = new_path.as_deref().unwrap_or_else(|| self.path.as_ref().unwrap().as_path());
27 | let mut file = DocumentManager::open_for_writing(path)?;
28 |
29 | {
30 | let mut tb = self.buffer.borrow_mut();
31 | tb.write_file(&mut file)?;
32 | }
33 |
34 | if let Ok(id) = sys::file_id(None, path) {
35 | self.file_id = Some(id);
36 | }
37 |
38 | if let Some(path) = new_path {
39 | self.set_path(path);
40 | }
41 |
42 | Ok(())
43 | }
44 |
45 | pub fn reread(&mut self, encoding: Option<&'static str>) -> apperr::Result<()> {
46 | let path = self.path.as_ref().unwrap().as_path();
47 | let mut file = DocumentManager::open_for_reading(path)?;
48 |
49 | {
50 | let mut tb = self.buffer.borrow_mut();
51 | tb.read_file(&mut file, encoding)?;
52 | }
53 |
54 | if let Ok(id) = sys::file_id(None, path) {
55 | self.file_id = Some(id);
56 | }
57 |
58 | Ok(())
59 | }
60 |
61 | fn set_path(&mut self, path: PathBuf) {
62 | let filename = path.file_name().unwrap_or_default().to_string_lossy().into_owned();
63 | let dir = path.parent().map(ToOwned::to_owned).unwrap_or_default();
64 | self.filename = filename;
65 | self.dir = Some(DisplayablePathBuf::from_path(dir));
66 | self.path = Some(path);
67 | self.update_file_mode();
68 | }
69 |
70 | fn update_file_mode(&mut self) {
71 | let mut tb = self.buffer.borrow_mut();
72 | tb.set_ruler(if self.filename == "COMMIT_EDITMSG" { 72 } else { 0 });
73 | }
74 | }
75 |
76 | #[derive(Default)]
77 | pub struct DocumentManager {
78 | list: LinkedList,
79 | }
80 |
81 | impl DocumentManager {
82 | #[inline]
83 | pub fn len(&self) -> usize {
84 | self.list.len()
85 | }
86 |
87 | #[inline]
88 | pub fn active(&self) -> Option<&Document> {
89 | self.list.front()
90 | }
91 |
92 | #[inline]
93 | pub fn active_mut(&mut self) -> Option<&mut Document> {
94 | self.list.front_mut()
95 | }
96 |
97 | #[inline]
98 | pub fn update_active bool>(&mut self, mut func: F) -> bool {
99 | let mut cursor = self.list.cursor_front_mut();
100 | while let Some(doc) = cursor.current() {
101 | if func(doc) {
102 | let list = cursor.remove_current_as_list().unwrap();
103 | self.list.cursor_front_mut().splice_before(list);
104 | return true;
105 | }
106 | cursor.move_next();
107 | }
108 | false
109 | }
110 |
111 | pub fn remove_active(&mut self) {
112 | self.list.pop_front();
113 | }
114 |
115 | pub fn add_untitled(&mut self) -> apperr::Result<&mut Document> {
116 | let buffer = Self::create_buffer()?;
117 | let mut doc = Document {
118 | buffer,
119 | path: None,
120 | dir: Default::default(),
121 | filename: Default::default(),
122 | file_id: None,
123 | new_file_counter: 0,
124 | };
125 | self.gen_untitled_name(&mut doc);
126 |
127 | self.list.push_front(doc);
128 | Ok(self.list.front_mut().unwrap())
129 | }
130 |
131 | pub fn gen_untitled_name(&self, doc: &mut Document) {
132 | let mut new_file_counter = 0;
133 | for doc in &self.list {
134 | new_file_counter = new_file_counter.max(doc.new_file_counter);
135 | }
136 | new_file_counter += 1;
137 |
138 | doc.filename = format!("Untitled-{new_file_counter}.txt");
139 | doc.new_file_counter = new_file_counter;
140 | }
141 |
142 | pub fn add_file_path(&mut self, path: &Path) -> apperr::Result<&mut Document> {
143 | let (path, goto) = Self::parse_filename_goto(path);
144 | let path = path::normalize(path);
145 |
146 | let mut file = match Self::open_for_reading(&path) {
147 | Ok(file) => Some(file),
148 | Err(err) if sys::apperr_is_not_found(err) => None,
149 | Err(err) => return Err(err),
150 | };
151 |
152 | let file_id = if file.is_some() { Some(sys::file_id(file.as_ref(), &path)?) } else { None };
153 |
154 | // Check if the file is already open.
155 | if file_id.is_some() && self.update_active(|doc| doc.file_id == file_id) {
156 | let doc = self.active_mut().unwrap();
157 | if let Some(goto) = goto {
158 | doc.buffer.borrow_mut().cursor_move_to_logical(goto);
159 | }
160 | return Ok(doc);
161 | }
162 |
163 | let buffer = Self::create_buffer()?;
164 | {
165 | if let Some(file) = &mut file {
166 | let mut tb = buffer.borrow_mut();
167 | tb.read_file(file, None)?;
168 |
169 | if let Some(goto) = goto
170 | && goto != Default::default()
171 | {
172 | tb.cursor_move_to_logical(goto);
173 | }
174 | }
175 | }
176 |
177 | let mut doc = Document {
178 | buffer,
179 | path: None,
180 | dir: None,
181 | filename: Default::default(),
182 | file_id,
183 | new_file_counter: 0,
184 | };
185 | doc.set_path(path);
186 |
187 | if let Some(active) = self.active()
188 | && active.path.is_none()
189 | && active.file_id.is_none()
190 | && !active.buffer.borrow().is_dirty()
191 | {
192 | // If the current document is a pristine Untitled document with no
193 | // name and no ID, replace it with the new document.
194 | self.remove_active();
195 | }
196 |
197 | self.list.push_front(doc);
198 | Ok(self.list.front_mut().unwrap())
199 | }
200 |
201 | pub fn reflow_all(&self) {
202 | for doc in &self.list {
203 | let mut tb = doc.buffer.borrow_mut();
204 | tb.reflow();
205 | }
206 | }
207 |
208 | pub fn open_for_reading(path: &Path) -> apperr::Result {
209 | File::open(path).map_err(apperr::Error::from)
210 | }
211 |
212 | pub fn open_for_writing(path: &Path) -> apperr::Result {
213 | File::create(path).map_err(apperr::Error::from)
214 | }
215 |
216 | fn create_buffer() -> apperr::Result {
217 | let buffer = TextBuffer::new_rc(false)?;
218 | {
219 | let mut tb = buffer.borrow_mut();
220 | tb.set_insert_final_newline(!cfg!(windows)); // As mandated by POSIX.
221 | tb.set_margin_enabled(true);
222 | tb.set_line_highlight_enabled(true);
223 | }
224 | Ok(buffer)
225 | }
226 |
227 | // Parse a filename in the form of "filename:line:char".
228 | // Returns the position of the first colon and the line/char coordinates.
229 | fn parse_filename_goto(path: &Path) -> (&Path, Option) {
230 | fn parse(s: &[u8]) -> Option {
231 | if s.is_empty() {
232 | return None;
233 | }
234 |
235 | let mut num: CoordType = 0;
236 | for &b in s {
237 | if !b.is_ascii_digit() {
238 | return None;
239 | }
240 | let digit = (b - b'0') as CoordType;
241 | num = num.checked_mul(10)?.checked_add(digit)?;
242 | }
243 | Some(num)
244 | }
245 |
246 | fn find_colon_rev(bytes: &[u8], offset: usize) -> Option {
247 | (0..offset.min(bytes.len())).rev().find(|&i| bytes[i] == b':')
248 | }
249 |
250 | let bytes = path.as_os_str().as_encoded_bytes();
251 | let colend = match find_colon_rev(bytes, bytes.len()) {
252 | // Reject filenames that would result in an empty filename after stripping off the :line:char suffix.
253 | // For instance, a filename like ":123:456" will not be processed by this function.
254 | Some(colend) if colend > 0 => colend,
255 | _ => return (path, None),
256 | };
257 |
258 | let last = match parse(&bytes[colend + 1..]) {
259 | Some(last) => last,
260 | None => return (path, None),
261 | };
262 | let last = (last - 1).max(0);
263 | let mut len = colend;
264 | let mut goto = Point { x: 0, y: last };
265 |
266 | if let Some(colbeg) = find_colon_rev(bytes, colend) {
267 | // Same here: Don't allow empty filenames.
268 | if colbeg != 0
269 | && let Some(first) = parse(&bytes[colbeg + 1..colend])
270 | {
271 | let first = (first - 1).max(0);
272 | len = colbeg;
273 | goto = Point { x: last, y: first };
274 | }
275 | }
276 |
277 | // Strip off the :line:char suffix.
278 | let path = &bytes[..len];
279 | let path = unsafe { OsStr::from_encoded_bytes_unchecked(path) };
280 | let path = Path::new(path);
281 | (path, Some(goto))
282 | }
283 | }
284 |
285 | #[cfg(test)]
286 | mod tests {
287 | use super::*;
288 |
289 | #[test]
290 | fn test_parse_last_numbers() {
291 | fn parse(s: &str) -> (&str, Option) {
292 | let (p, g) = DocumentManager::parse_filename_goto(Path::new(s));
293 | (p.to_str().unwrap(), g)
294 | }
295 |
296 | assert_eq!(parse("123"), ("123", None));
297 | assert_eq!(parse("abc"), ("abc", None));
298 | assert_eq!(parse(":123"), (":123", None));
299 | assert_eq!(parse("abc:123"), ("abc", Some(Point { x: 0, y: 122 })));
300 | assert_eq!(parse("45:123"), ("45", Some(Point { x: 0, y: 122 })));
301 | assert_eq!(parse(":45:123"), (":45", Some(Point { x: 0, y: 122 })));
302 | assert_eq!(parse("abc:45:123"), ("abc", Some(Point { x: 122, y: 44 })));
303 | assert_eq!(parse("abc:def:123"), ("abc:def", Some(Point { x: 0, y: 122 })));
304 | assert_eq!(parse("1:2:3"), ("1", Some(Point { x: 2, y: 1 })));
305 | assert_eq!(parse("::3"), (":", Some(Point { x: 0, y: 2 })));
306 | assert_eq!(parse("1::3"), ("1:", Some(Point { x: 0, y: 2 })));
307 | assert_eq!(parse(""), ("", None));
308 | assert_eq!(parse(":"), (":", None));
309 | assert_eq!(parse("::"), ("::", None));
310 | assert_eq!(parse("a:1"), ("a", Some(Point { x: 0, y: 0 })));
311 | assert_eq!(parse("1:a"), ("1:a", None));
312 | assert_eq!(parse("file.txt:10"), ("file.txt", Some(Point { x: 0, y: 9 })));
313 | assert_eq!(parse("file.txt:10:5"), ("file.txt", Some(Point { x: 4, y: 9 })));
314 | }
315 | }
316 |
--------------------------------------------------------------------------------
/src/bin/edit/draw_filepicker.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | use std::cmp::Ordering;
5 | use std::fs;
6 | use std::path::{Path, PathBuf};
7 |
8 | use edit::framebuffer::IndexedColor;
9 | use edit::helpers::*;
10 | use edit::input::vk;
11 | use edit::tui::*;
12 | use edit::{icu, path};
13 |
14 | use crate::localization::*;
15 | use crate::state::*;
16 |
17 | pub fn draw_file_picker(ctx: &mut Context, state: &mut State) {
18 | // The save dialog is pre-filled with the current document filename.
19 | if state.wants_file_picker == StateFilePicker::SaveAs {
20 | state.wants_file_picker = StateFilePicker::SaveAsShown;
21 |
22 | if state.file_picker_pending_name.as_os_str().is_empty() {
23 | state.file_picker_pending_name =
24 | state.documents.active().map_or("Untitled.txt", |doc| doc.filename.as_str()).into();
25 | }
26 | }
27 |
28 | let width = (ctx.size().width - 20).max(10);
29 | let height = (ctx.size().height - 10).max(10);
30 | let mut doit = None;
31 | let mut done = false;
32 |
33 | ctx.modal_begin(
34 | "file-picker",
35 | if state.wants_file_picker == StateFilePicker::Open {
36 | loc(LocId::FileOpen)
37 | } else {
38 | loc(LocId::FileSaveAs)
39 | },
40 | );
41 | ctx.attr_intrinsic_size(Size { width, height });
42 | {
43 | let mut activated = false;
44 |
45 | ctx.table_begin("path");
46 | ctx.table_set_columns(&[0, COORD_TYPE_SAFE_MAX]);
47 | ctx.table_set_cell_gap(Size { width: 1, height: 0 });
48 | ctx.attr_padding(Rect::two(1, 1));
49 | ctx.inherit_focus();
50 | {
51 | ctx.table_next_row();
52 |
53 | ctx.label("dir-label", loc(LocId::SaveAsDialogPathLabel));
54 | ctx.label("dir", state.file_picker_pending_dir.as_str());
55 | ctx.attr_overflow(Overflow::TruncateMiddle);
56 |
57 | ctx.table_next_row();
58 | ctx.inherit_focus();
59 |
60 | ctx.label("name-label", loc(LocId::SaveAsDialogNameLabel));
61 | ctx.editline("name", &mut state.file_picker_pending_name);
62 | ctx.inherit_focus();
63 | if ctx.is_focused() && ctx.consume_shortcut(vk::RETURN) {
64 | activated = true;
65 | }
66 | }
67 | ctx.table_end();
68 |
69 | if state.file_picker_entries.is_none() {
70 | draw_dialog_saveas_refresh_files(state);
71 | }
72 |
73 | let files = state.file_picker_entries.as_ref().unwrap();
74 |
75 | ctx.scrollarea_begin(
76 | "directory",
77 | Size {
78 | width: 0,
79 | // -1 for the label (top)
80 | // -1 for the label (bottom)
81 | // -1 for the editline (bottom)
82 | height: height - 3,
83 | },
84 | );
85 | ctx.attr_background_rgba(ctx.indexed_alpha(IndexedColor::Black, 1, 4));
86 | ctx.next_block_id_mixin(state.file_picker_pending_dir_revision);
87 | {
88 | ctx.list_begin("files");
89 | ctx.inherit_focus();
90 | for entry in files {
91 | match ctx.list_item(false, entry.as_str()) {
92 | ListSelection::Unchanged => {}
93 | ListSelection::Selected => {
94 | state.file_picker_pending_name = entry.as_path().into()
95 | }
96 | ListSelection::Activated => activated = true,
97 | }
98 | ctx.attr_overflow(Overflow::TruncateMiddle);
99 | }
100 | ctx.list_end();
101 |
102 | if ctx.contains_focus() && ctx.consume_shortcut(vk::BACK) {
103 | state.file_picker_pending_name = "..".into();
104 | activated = true;
105 | }
106 | }
107 | ctx.scrollarea_end();
108 |
109 | if activated {
110 | doit = draw_file_picker_update_path(state);
111 |
112 | // Check if the file already exists and show an overwrite warning in that case.
113 | if state.wants_file_picker != StateFilePicker::Open
114 | && let Some(path) = doit.as_deref()
115 | && path.exists()
116 | {
117 | state.file_picker_overwrite_warning = doit.take();
118 | }
119 | }
120 | }
121 | if ctx.modal_end() {
122 | done = true;
123 | }
124 |
125 | if state.file_picker_overwrite_warning.is_some() {
126 | let mut save;
127 |
128 | ctx.modal_begin("overwrite", loc(LocId::FileOverwriteWarning));
129 | ctx.attr_background_rgba(ctx.indexed(IndexedColor::Red));
130 | ctx.attr_foreground_rgba(ctx.indexed(IndexedColor::BrightWhite));
131 | {
132 | let contains_focus = ctx.contains_focus();
133 |
134 | ctx.label("description", loc(LocId::FileOverwriteWarningDescription));
135 | ctx.attr_overflow(Overflow::TruncateTail);
136 | ctx.attr_padding(Rect::three(1, 2, 1));
137 |
138 | ctx.table_begin("choices");
139 | ctx.inherit_focus();
140 | ctx.attr_padding(Rect::three(0, 2, 1));
141 | ctx.attr_position(Position::Center);
142 | ctx.table_set_cell_gap(Size { width: 2, height: 0 });
143 | {
144 | ctx.table_next_row();
145 | ctx.inherit_focus();
146 |
147 | save = ctx.button("yes", loc(LocId::Yes), ButtonStyle::default());
148 | ctx.inherit_focus();
149 |
150 | if ctx.button("no", loc(LocId::No), ButtonStyle::default()) {
151 | state.file_picker_overwrite_warning = None;
152 | }
153 | }
154 | ctx.table_end();
155 |
156 | if contains_focus {
157 | save |= ctx.consume_shortcut(vk::Y);
158 | if ctx.consume_shortcut(vk::N) {
159 | state.file_picker_overwrite_warning = None;
160 | }
161 | }
162 | }
163 | if ctx.modal_end() {
164 | state.file_picker_overwrite_warning = None;
165 | }
166 |
167 | if save {
168 | doit = state.file_picker_overwrite_warning.take();
169 | }
170 | }
171 |
172 | if let Some(path) = doit {
173 | let res = if state.wants_file_picker == StateFilePicker::Open {
174 | state.documents.add_file_path(&path).map(|_| ())
175 | } else if let Some(doc) = state.documents.active_mut() {
176 | doc.save(Some(path))
177 | } else {
178 | Ok(())
179 | };
180 | match res {
181 | Ok(..) => {
182 | ctx.needs_rerender();
183 | done = true;
184 | }
185 | Err(err) => error_log_add(ctx, state, err),
186 | }
187 | }
188 |
189 | if done {
190 | state.wants_file_picker = StateFilePicker::None;
191 | state.file_picker_pending_name = Default::default();
192 | state.file_picker_entries = Default::default();
193 | state.file_picker_overwrite_warning = Default::default();
194 | }
195 | }
196 |
197 | // Returns Some(path) if the path refers to a file.
198 | fn draw_file_picker_update_path(state: &mut State) -> Option {
199 | let old_path = state.file_picker_pending_dir.as_path();
200 | let path = old_path.join(&state.file_picker_pending_name);
201 | let path = path::normalize(&path);
202 |
203 | let (dir, name) = if path.is_dir() {
204 | // If the current path is C:\ and the user selects "..", we want to
205 | // navigate to the drive picker. Since `path::normalize` will turn C:\.. into C:\,
206 | // we can detect this by checking if the length of the path didn't change.
207 | let dir = if cfg!(windows)
208 | && state.file_picker_pending_name == Path::new("..")
209 | // It's unnecessary to check the contents of the paths.
210 | && old_path.as_os_str().len() == path.as_os_str().len()
211 | {
212 | Path::new("")
213 | } else {
214 | path.as_path()
215 | };
216 | (dir, PathBuf::new())
217 | } else {
218 | let dir = path.parent().unwrap_or(&path);
219 | let name = path.file_name().map_or(Default::default(), |s| s.into());
220 | (dir, name)
221 | };
222 | if dir != state.file_picker_pending_dir.as_path() {
223 | state.file_picker_pending_dir = DisplayablePathBuf::from_path(dir.to_path_buf());
224 | state.file_picker_entries = None;
225 | }
226 |
227 | state.file_picker_pending_name = name;
228 | if state.file_picker_pending_name.as_os_str().is_empty() { None } else { Some(path) }
229 | }
230 |
231 | fn draw_dialog_saveas_refresh_files(state: &mut State) {
232 | let dir = state.file_picker_pending_dir.as_path();
233 | let mut files = Vec::new();
234 | let mut off = 0;
235 |
236 | #[cfg(windows)]
237 | if dir.as_os_str().is_empty() {
238 | // If the path is empty, we are at the drive picker.
239 | // Add all drives as entries.
240 | for drive in edit::sys::drives() {
241 | files.push(DisplayablePathBuf::from_string(format!("{drive}:\\")));
242 | }
243 |
244 | state.file_picker_entries = Some(files);
245 | return;
246 | }
247 |
248 | if cfg!(windows) || dir.parent().is_some() {
249 | files.push(DisplayablePathBuf::from(".."));
250 | off = 1;
251 | }
252 |
253 | if let Ok(iter) = fs::read_dir(dir) {
254 | for entry in iter.flatten() {
255 | if let Ok(metadata) = entry.metadata() {
256 | let mut name = entry.file_name();
257 | if metadata.is_dir()
258 | || (metadata.is_symlink()
259 | && fs::metadata(entry.path()).is_ok_and(|m| m.is_dir()))
260 | {
261 | name.push("/");
262 | }
263 | files.push(DisplayablePathBuf::from(name));
264 | }
265 | }
266 | }
267 |
268 | // Sort directories first, then by name, case-insensitive.
269 | files[off..].sort_by(|a, b| {
270 | let a = a.as_bytes();
271 | let b = b.as_bytes();
272 |
273 | let a_is_dir = a.last() == Some(&b'/');
274 | let b_is_dir = b.last() == Some(&b'/');
275 |
276 | match b_is_dir.cmp(&a_is_dir) {
277 | Ordering::Equal => icu::compare_strings(a, b),
278 | other => other,
279 | }
280 | });
281 |
282 | state.file_picker_entries = Some(files);
283 | }
284 |
--------------------------------------------------------------------------------
/src/bin/edit/draw_menubar.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | use edit::arena_format;
5 | use edit::helpers::*;
6 | use edit::input::{kbmod, vk};
7 | use edit::tui::*;
8 |
9 | use crate::localization::*;
10 | use crate::state::*;
11 |
12 | pub fn draw_menubar(ctx: &mut Context, state: &mut State) {
13 | ctx.menubar_begin();
14 | ctx.attr_background_rgba(state.menubar_color_bg);
15 | ctx.attr_foreground_rgba(state.menubar_color_fg);
16 | {
17 | let contains_focus = ctx.contains_focus();
18 |
19 | if ctx.menubar_menu_begin(loc(LocId::File), 'F') {
20 | draw_menu_file(ctx, state);
21 | }
22 | if !contains_focus && ctx.consume_shortcut(vk::F10) {
23 | ctx.steal_focus();
24 | }
25 | if state.documents.active().is_some() && ctx.menubar_menu_begin(loc(LocId::Edit), 'E') {
26 | draw_menu_edit(ctx, state);
27 | }
28 | if ctx.menubar_menu_begin(loc(LocId::View), 'V') {
29 | draw_menu_view(ctx, state);
30 | }
31 | if ctx.menubar_menu_begin(loc(LocId::Help), 'H') {
32 | draw_menu_help(ctx, state);
33 | }
34 | }
35 | ctx.menubar_end();
36 | }
37 |
38 | fn draw_menu_file(ctx: &mut Context, state: &mut State) {
39 | if ctx.menubar_menu_button(loc(LocId::FileNew), 'N', kbmod::CTRL | vk::N) {
40 | draw_add_untitled_document(ctx, state);
41 | }
42 | if ctx.menubar_menu_button(loc(LocId::FileOpen), 'O', kbmod::CTRL | vk::O) {
43 | state.wants_file_picker = StateFilePicker::Open;
44 | }
45 | if state.documents.active().is_some() {
46 | if ctx.menubar_menu_button(loc(LocId::FileSave), 'S', kbmod::CTRL | vk::S) {
47 | state.wants_save = true;
48 | }
49 | if ctx.menubar_menu_button(loc(LocId::FileSaveAs), 'A', vk::NULL) {
50 | state.wants_file_picker = StateFilePicker::SaveAs;
51 | }
52 | if ctx.menubar_menu_button(loc(LocId::FileClose), 'C', kbmod::CTRL | vk::W) {
53 | state.wants_close = true;
54 | }
55 | }
56 | if ctx.menubar_menu_button(loc(LocId::FileExit), 'X', kbmod::CTRL | vk::Q) {
57 | state.wants_exit = true;
58 | }
59 | ctx.menubar_menu_end();
60 | }
61 |
62 | fn draw_menu_edit(ctx: &mut Context, state: &mut State) {
63 | let doc = state.documents.active().unwrap();
64 | let mut tb = doc.buffer.borrow_mut();
65 |
66 | if ctx.menubar_menu_button(loc(LocId::EditUndo), 'U', kbmod::CTRL | vk::Z) {
67 | tb.undo();
68 | ctx.needs_rerender();
69 | }
70 | if ctx.menubar_menu_button(loc(LocId::EditRedo), 'R', kbmod::CTRL | vk::Y) {
71 | tb.redo();
72 | ctx.needs_rerender();
73 | }
74 | if ctx.menubar_menu_button(loc(LocId::EditCut), 'T', kbmod::CTRL | vk::X) {
75 | ctx.set_clipboard(tb.extract_selection(true));
76 | }
77 | if ctx.menubar_menu_button(loc(LocId::EditCopy), 'C', kbmod::CTRL | vk::C) {
78 | ctx.set_clipboard(tb.extract_selection(false));
79 | }
80 | if ctx.menubar_menu_button(loc(LocId::EditPaste), 'P', kbmod::CTRL | vk::V) {
81 | tb.write(ctx.clipboard(), true);
82 | ctx.needs_rerender();
83 | }
84 | if state.wants_search.kind != StateSearchKind::Disabled {
85 | if ctx.menubar_menu_button(loc(LocId::EditFind), 'F', kbmod::CTRL | vk::F) {
86 | state.wants_search.kind = StateSearchKind::Search;
87 | state.wants_search.focus = true;
88 | }
89 | if ctx.menubar_menu_button(loc(LocId::EditReplace), 'L', kbmod::CTRL | vk::R) {
90 | state.wants_search.kind = StateSearchKind::Replace;
91 | state.wants_search.focus = true;
92 | }
93 | }
94 | if ctx.menubar_menu_button(loc(LocId::EditSelectAll), 'A', kbmod::CTRL | vk::A) {
95 | tb.select_all();
96 | ctx.needs_rerender();
97 | }
98 | ctx.menubar_menu_end();
99 | }
100 |
101 | fn draw_menu_view(ctx: &mut Context, state: &mut State) {
102 | if ctx.menubar_menu_button(loc(LocId::ViewFocusStatusbar), 'S', vk::NULL) {
103 | state.wants_statusbar_focus = true;
104 | }
105 |
106 | if let Some(doc) = state.documents.active() {
107 | let mut tb = doc.buffer.borrow_mut();
108 | let word_wrap = tb.is_word_wrap_enabled();
109 |
110 | if ctx.menubar_menu_button(loc(LocId::ViewDocumentPicker), 'P', kbmod::CTRL | vk::P) {
111 | state.wants_document_picker = true;
112 | }
113 | if ctx.menubar_menu_button(loc(LocId::FileGoto), 'G', kbmod::CTRL | vk::G) {
114 | state.wants_goto = true;
115 | }
116 | if ctx.menubar_menu_checkbox(loc(LocId::ViewWordWrap), 'W', kbmod::ALT | vk::Z, word_wrap) {
117 | tb.set_word_wrap(!word_wrap);
118 | ctx.needs_rerender();
119 | }
120 | }
121 |
122 | ctx.menubar_menu_end();
123 | }
124 |
125 | fn draw_menu_help(ctx: &mut Context, state: &mut State) {
126 | if ctx.menubar_menu_button(loc(LocId::HelpAbout), 'A', vk::NULL) {
127 | state.wants_about = true;
128 | }
129 | ctx.menubar_menu_end();
130 | }
131 |
132 | pub fn draw_dialog_about(ctx: &mut Context, state: &mut State) {
133 | ctx.modal_begin("about", loc(LocId::AboutDialogTitle));
134 | {
135 | ctx.block_begin("content");
136 | ctx.inherit_focus();
137 | ctx.attr_padding(Rect::three(1, 2, 1));
138 | {
139 | ctx.label("description", "Microsoft Edit");
140 | ctx.attr_overflow(Overflow::TruncateTail);
141 | ctx.attr_position(Position::Center);
142 |
143 | ctx.label(
144 | "version",
145 | &arena_format!(
146 | ctx.arena(),
147 | "{}{}",
148 | loc(LocId::AboutDialogVersion),
149 | env!("CARGO_PKG_VERSION")
150 | ),
151 | );
152 | ctx.attr_overflow(Overflow::TruncateHead);
153 | ctx.attr_position(Position::Center);
154 |
155 | ctx.label("copyright", "Copyright (c) Microsoft Corp 2025");
156 | ctx.attr_overflow(Overflow::TruncateTail);
157 | ctx.attr_position(Position::Center);
158 |
159 | ctx.block_begin("choices");
160 | ctx.inherit_focus();
161 | ctx.attr_padding(Rect::three(1, 2, 0));
162 | ctx.attr_position(Position::Center);
163 | {
164 | if ctx.button("ok", loc(LocId::Ok), ButtonStyle::default()) {
165 | state.wants_about = false;
166 | }
167 | ctx.inherit_focus();
168 | }
169 | ctx.block_end();
170 | }
171 | ctx.block_end();
172 | }
173 | if ctx.modal_end() {
174 | state.wants_about = false;
175 | }
176 | }
177 |
--------------------------------------------------------------------------------
/src/bin/edit/edit.exe.manifest:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 | true
13 | UTF-8
14 | SegmentHeap
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/src/bin/edit/state.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | use std::borrow::Cow;
5 | use std::ffi::{OsStr, OsString};
6 | use std::mem;
7 | use std::path::{Path, PathBuf};
8 |
9 | use edit::framebuffer::IndexedColor;
10 | use edit::helpers::*;
11 | use edit::tui::*;
12 | use edit::{apperr, buffer, icu, sys};
13 |
14 | use crate::documents::DocumentManager;
15 | use crate::localization::*;
16 |
17 | #[repr(transparent)]
18 | pub struct FormatApperr(apperr::Error);
19 |
20 | impl From for FormatApperr {
21 | fn from(err: apperr::Error) -> Self {
22 | Self(err)
23 | }
24 | }
25 |
26 | impl std::fmt::Display for FormatApperr {
27 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28 | match self.0 {
29 | apperr::APP_ICU_MISSING => f.write_str(loc(LocId::ErrorIcuMissing)),
30 | apperr::Error::App(code) => write!(f, "Unknown app error code: {code}"),
31 | apperr::Error::Icu(code) => icu::apperr_format(f, code),
32 | apperr::Error::Sys(code) => sys::apperr_format(f, code),
33 | }
34 | }
35 | }
36 |
37 | pub struct DisplayablePathBuf {
38 | value: PathBuf,
39 | str: Cow<'static, str>,
40 | }
41 |
42 | impl DisplayablePathBuf {
43 | #[allow(dead_code, reason = "only used on Windows")]
44 | pub fn from_string(string: String) -> Self {
45 | let str = Cow::Borrowed(string.as_str());
46 | let str = unsafe { mem::transmute::, Cow<'_, str>>(str) };
47 | let value = PathBuf::from(string);
48 | Self { value, str }
49 | }
50 |
51 | pub fn from_path(value: PathBuf) -> Self {
52 | let str = value.to_string_lossy();
53 | let str = unsafe { mem::transmute::, Cow<'_, str>>(str) };
54 | Self { value, str }
55 | }
56 |
57 | pub fn as_path(&self) -> &Path {
58 | &self.value
59 | }
60 |
61 | pub fn as_str(&self) -> &str {
62 | &self.str
63 | }
64 |
65 | pub fn as_bytes(&self) -> &[u8] {
66 | self.value.as_os_str().as_encoded_bytes()
67 | }
68 | }
69 |
70 | impl Default for DisplayablePathBuf {
71 | fn default() -> Self {
72 | Self { value: Default::default(), str: Cow::Borrowed("") }
73 | }
74 | }
75 |
76 | impl Clone for DisplayablePathBuf {
77 | fn clone(&self) -> Self {
78 | Self::from_path(self.value.clone())
79 | }
80 | }
81 |
82 | impl From for DisplayablePathBuf {
83 | fn from(s: OsString) -> Self {
84 | Self::from_path(PathBuf::from(s))
85 | }
86 | }
87 |
88 | impl> From<&T> for DisplayablePathBuf {
89 | fn from(s: &T) -> Self {
90 | Self::from_path(PathBuf::from(s))
91 | }
92 | }
93 |
94 | pub struct StateSearch {
95 | pub kind: StateSearchKind,
96 | pub focus: bool,
97 | }
98 |
99 | #[derive(Clone, Copy, PartialEq, Eq)]
100 | pub enum StateSearchKind {
101 | Hidden,
102 | Disabled,
103 | Search,
104 | Replace,
105 | }
106 |
107 | #[derive(Clone, Copy, PartialEq, Eq)]
108 | pub enum StateFilePicker {
109 | None,
110 | Open,
111 | SaveAs,
112 |
113 | SaveAsShown, // Transitioned from SaveAs
114 | }
115 |
116 | #[derive(Clone, Copy, PartialEq, Eq)]
117 | pub enum StateEncodingChange {
118 | None,
119 | Convert,
120 | Reopen,
121 | }
122 |
123 | pub struct State {
124 | pub menubar_color_bg: u32,
125 | pub menubar_color_fg: u32,
126 |
127 | pub documents: DocumentManager,
128 |
129 | // A ring buffer of the last 10 errors.
130 | pub error_log: [String; 10],
131 | pub error_log_index: usize,
132 | pub error_log_count: usize,
133 |
134 | pub wants_file_picker: StateFilePicker,
135 | pub file_picker_pending_dir: DisplayablePathBuf,
136 | pub file_picker_pending_dir_revision: u64, // Bumped every time `file_picker_pending_dir` changes.
137 | pub file_picker_pending_name: PathBuf,
138 | pub file_picker_entries: Option>,
139 | pub file_picker_overwrite_warning: Option, // The path the warning is about.
140 |
141 | pub wants_search: StateSearch,
142 | pub search_needle: String,
143 | pub search_replacement: String,
144 | pub search_options: buffer::SearchOptions,
145 | pub search_success: bool,
146 |
147 | pub wants_encoding_picker: bool,
148 | pub encoding_picker_needle: String,
149 | pub encoding_picker_results: Option>,
150 |
151 | pub wants_save: bool,
152 | pub wants_statusbar_focus: bool,
153 | pub wants_encoding_change: StateEncodingChange,
154 | pub wants_indentation_picker: bool,
155 | pub wants_document_picker: bool,
156 | pub wants_about: bool,
157 | pub wants_close: bool,
158 | pub wants_exit: bool,
159 | pub wants_goto: bool,
160 | pub goto_target: String,
161 | pub goto_invalid: bool,
162 |
163 | pub osc_title_filename: String,
164 | pub osc_clipboard_seen_generation: u32,
165 | pub osc_clipboard_send_generation: u32,
166 | pub osc_clipboard_always_send: bool,
167 | pub exit: bool,
168 | }
169 |
170 | impl State {
171 | pub fn new() -> apperr::Result {
172 | Ok(Self {
173 | menubar_color_bg: 0,
174 | menubar_color_fg: 0,
175 |
176 | documents: Default::default(),
177 |
178 | error_log: [const { String::new() }; 10],
179 | error_log_index: 0,
180 | error_log_count: 0,
181 |
182 | wants_file_picker: StateFilePicker::None,
183 | file_picker_pending_dir: Default::default(),
184 | file_picker_pending_dir_revision: 0,
185 | file_picker_pending_name: Default::default(),
186 | file_picker_entries: None,
187 | file_picker_overwrite_warning: None,
188 |
189 | wants_search: StateSearch { kind: StateSearchKind::Hidden, focus: false },
190 | search_needle: Default::default(),
191 | search_replacement: Default::default(),
192 | search_options: Default::default(),
193 | search_success: true,
194 |
195 | wants_encoding_picker: false,
196 | encoding_picker_needle: Default::default(),
197 | encoding_picker_results: Default::default(),
198 |
199 | wants_save: false,
200 | wants_statusbar_focus: false,
201 | wants_encoding_change: StateEncodingChange::None,
202 | wants_indentation_picker: false,
203 | wants_document_picker: false,
204 | wants_about: false,
205 | wants_close: false,
206 | wants_exit: false,
207 | wants_goto: false,
208 | goto_target: Default::default(),
209 | goto_invalid: false,
210 |
211 | osc_title_filename: Default::default(),
212 | osc_clipboard_seen_generation: 0,
213 | osc_clipboard_send_generation: 0,
214 | osc_clipboard_always_send: false,
215 | exit: false,
216 | })
217 | }
218 | }
219 |
220 | pub fn draw_add_untitled_document(ctx: &mut Context, state: &mut State) {
221 | if let Err(err) = state.documents.add_untitled() {
222 | error_log_add(ctx, state, err);
223 | }
224 | }
225 |
226 | pub fn error_log_add(ctx: &mut Context, state: &mut State, err: apperr::Error) {
227 | let msg = format!("{}", FormatApperr::from(err));
228 | if !msg.is_empty() {
229 | state.error_log[state.error_log_index] = msg;
230 | state.error_log_index = (state.error_log_index + 1) % state.error_log.len();
231 | state.error_log_count = state.error_log.len().min(state.error_log_count + 1);
232 | ctx.needs_rerender();
233 | }
234 | }
235 |
236 | pub fn draw_error_log(ctx: &mut Context, state: &mut State) {
237 | ctx.modal_begin("error", loc(LocId::ErrorDialogTitle));
238 | ctx.attr_background_rgba(ctx.indexed(IndexedColor::Red));
239 | ctx.attr_foreground_rgba(ctx.indexed(IndexedColor::BrightWhite));
240 | {
241 | ctx.block_begin("content");
242 | ctx.attr_padding(Rect::three(0, 2, 1));
243 | {
244 | let off = state.error_log_index + state.error_log.len() - state.error_log_count;
245 |
246 | for i in 0..state.error_log_count {
247 | let idx = (off + i) % state.error_log.len();
248 | let msg = &state.error_log[idx][..];
249 |
250 | if !msg.is_empty() {
251 | ctx.next_block_id_mixin(i as u64);
252 | ctx.label("error", msg);
253 | ctx.attr_overflow(Overflow::TruncateTail);
254 | }
255 | }
256 | }
257 | ctx.block_end();
258 |
259 | if ctx.button("ok", loc(LocId::Ok), ButtonStyle::default()) {
260 | state.error_log_count = 0;
261 | }
262 | ctx.attr_position(Position::Center);
263 | ctx.inherit_focus();
264 | }
265 | if ctx.modal_end() {
266 | state.error_log_count = 0;
267 | }
268 | }
269 |
--------------------------------------------------------------------------------
/src/buffer/line_cache.rs:
--------------------------------------------------------------------------------
1 | use std::ops::Range;
2 |
3 | use crate::{document::ReadableDocument, simd::memchr2};
4 |
5 | /// Cache a line/offset pair every CACHE_EVERY lines to speed up line/offset calculations
6 | const CACHE_EVERY: usize = 1024 * 64;
7 |
8 | #[derive(Clone)]
9 | pub struct CachePoint {
10 | pub index: usize,
11 | pub line: usize,
12 | // pub snapshot: ParserSnapshot
13 | }
14 |
15 | pub struct LineCache {
16 | cache: Vec,
17 | }
18 |
19 | impl LineCache {
20 | pub fn new() -> Self {
21 | Self { cache: vec![] }
22 | }
23 |
24 | pub fn from_document(&mut self, document: &T) {
25 | self.cache.clear();
26 |
27 | let mut offset = 0;
28 | let mut line = 0;
29 | loop {
30 | let text = document.read_forward(offset);
31 | if text.is_empty() { return; }
32 |
33 | let mut off = 0;
34 | loop {
35 | off = memchr2(b'\n', b'\n', text, off);
36 | if off == text.len() { break; }
37 |
38 | if line % CACHE_EVERY == 0 {
39 | self.cache.push(CachePoint { index: offset+off, line });
40 | }
41 | line += 1;
42 | off += 1;
43 | }
44 |
45 | offset += text.len();
46 | }
47 | }
48 |
49 | /// Updates the cache after a deletion.
50 | /// `range` is the deleted byte range, and `text` is the content that was deleted.
51 | pub fn delete(&mut self, range: Range, text: &Vec) {
52 | let mut newlines = 0;
53 | for c in text {
54 | if *c == b'\n' {
55 | newlines += 1;
56 | }
57 | }
58 |
59 | let mut beg_del = None;
60 | let mut end_del = None;
61 | for (i, point) in self.cache.iter_mut().enumerate() {
62 | if point.index >= range.start {
63 | if point.index < range.end {
64 | // cache point is within the deleted range
65 | if beg_del.is_none() { beg_del = Some(i); }
66 | end_del = Some(i + 1);
67 | }
68 | else {
69 | point.index -= text.len();
70 | point.line -= newlines;
71 | }
72 | }
73 | }
74 |
75 | if let (Some(beg), Some(end)) = (beg_del, end_del) {
76 | self.cache.drain(beg..end);
77 | }
78 | }
79 |
80 | /// Updates the cache after an insertion.
81 | /// `offset` is where the insertion occurs, and `text` is the inserted content.
82 | pub fn insert(&mut self, offset: usize, text: &[u8]) {
83 | // Count how many newlines were inserted
84 | let mut newlines = 0;
85 | for c in text {
86 | if *c == b'\n' {
87 | newlines += 1;
88 | }
89 | }
90 |
91 | let len = text.len();
92 | for point in &mut self.cache {
93 | if point.index > offset {
94 | point.index += len;
95 | point.line += newlines;
96 | }
97 | }
98 |
99 | // TODO: This also needs to insert new cache points
100 | }
101 |
102 | /// Finds the nearest cached line-offset pair relative to a target line.
103 | /// If `reverse` is false, it returns the closest *before* the target.
104 | /// If `reverse` is true, it returns the closest *after or at* the target.
105 | pub fn nearest_offset(&self, target_count: usize, reverse: bool) -> Option {
106 | match self.cache.binary_search_by_key(&target_count, |p| p.line) {
107 | Ok(i) => Some(self.cache[i].clone()),
108 | Err(i) => {
109 | if i == 0 || i == self.cache.len() { None } // target < lowest cache point || target > highest cache point
110 | else {
111 | Some(self.cache[ if reverse {i} else {i-1} ].clone())
112 | }
113 | }
114 | }
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/src/buffer/navigation.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | use std::ops::Range;
5 |
6 | use crate::document::ReadableDocument;
7 |
8 | #[derive(Clone, Copy, PartialEq, Eq)]
9 | enum CharClass {
10 | Whitespace,
11 | Newline,
12 | Separator,
13 | Word,
14 | }
15 |
16 | const fn construct_classifier(separators: &[u8]) -> [CharClass; 256] {
17 | let mut classifier = [CharClass::Word; 256];
18 |
19 | classifier[b' ' as usize] = CharClass::Whitespace;
20 | classifier[b'\t' as usize] = CharClass::Whitespace;
21 | classifier[b'\n' as usize] = CharClass::Newline;
22 | classifier[b'\r' as usize] = CharClass::Newline;
23 |
24 | let mut i = 0;
25 | let len = separators.len();
26 | while i < len {
27 | let ch = separators[i];
28 | assert!(ch < 128, "Only ASCII separators are supported.");
29 | classifier[ch as usize] = CharClass::Separator;
30 | i += 1;
31 | }
32 |
33 | classifier
34 | }
35 |
36 | const WORD_CLASSIFIER: [CharClass; 256] =
37 | construct_classifier(br#"`~!@#$%^&*()-=+[{]}\|;:'",.<>/?"#);
38 |
39 | /// Finds the next word boundary given a document cursor offset.
40 | /// Returns the offset of the next word boundary.
41 | pub fn word_forward(doc: &dyn ReadableDocument, offset: usize) -> usize {
42 | word_navigation(WordForward { doc, offset, chunk: &[], chunk_off: 0 })
43 | }
44 |
45 | /// The backward version of `word_forward`.
46 | pub fn word_backward(doc: &dyn ReadableDocument, offset: usize) -> usize {
47 | word_navigation(WordBackward { doc, offset, chunk: &[], chunk_off: 0 })
48 | }
49 |
50 | /// Word navigation implementation. Matches the behavior of VS Code.
51 | fn word_navigation(mut nav: T) -> usize {
52 | // First, fill `self.chunk` with at least 1 grapheme.
53 | nav.read();
54 |
55 | // Skip one newline, if any.
56 | nav.skip_newline();
57 |
58 | // Skip any whitespace.
59 | nav.skip_class(CharClass::Whitespace);
60 |
61 | // Skip one word or separator and take note of the class.
62 | let class = nav.peek(CharClass::Whitespace);
63 | if matches!(class, CharClass::Separator | CharClass::Word) {
64 | nav.next();
65 |
66 | let off = nav.offset();
67 |
68 | // Continue skipping the same class.
69 | nav.skip_class(class);
70 |
71 | // If the class was a separator and we only moved one character,
72 | // continue skipping characters of the word class.
73 | if off == nav.offset() && class == CharClass::Separator {
74 | nav.skip_class(CharClass::Word);
75 | }
76 | }
77 |
78 | nav.offset()
79 | }
80 |
81 | trait WordNavigation {
82 | fn read(&mut self);
83 | fn skip_newline(&mut self);
84 | fn skip_class(&mut self, class: CharClass);
85 | fn peek(&self, default: CharClass) -> CharClass;
86 | fn next(&mut self);
87 | fn offset(&self) -> usize;
88 | }
89 |
90 | struct WordForward<'a> {
91 | doc: &'a dyn ReadableDocument,
92 | offset: usize,
93 | chunk: &'a [u8],
94 | chunk_off: usize,
95 | }
96 |
97 | impl WordNavigation for WordForward<'_> {
98 | fn read(&mut self) {
99 | self.chunk = self.doc.read_forward(self.offset);
100 | self.chunk_off = 0;
101 | }
102 |
103 | fn skip_newline(&mut self) {
104 | // We can rely on the fact that the document does not split graphemes across chunks.
105 | // = If there's a newline it's wholly contained in this chunk.
106 | // Unlike with `WordBackward`, we can't check for CR and LF separately as only a CR followed
107 | // by a LF is a newline. A lone CR in the document is just a regular control character.
108 | self.chunk_off += match self.chunk.get(self.chunk_off) {
109 | Some(&b'\n') => 1,
110 | Some(&b'\r') if self.chunk.get(self.chunk_off + 1) == Some(&b'\n') => 2,
111 | _ => 0,
112 | }
113 | }
114 |
115 | fn skip_class(&mut self, class: CharClass) {
116 | while !self.chunk.is_empty() {
117 | while self.chunk_off < self.chunk.len() {
118 | if WORD_CLASSIFIER[self.chunk[self.chunk_off] as usize] != class {
119 | return;
120 | }
121 | self.chunk_off += 1;
122 | }
123 |
124 | self.offset += self.chunk.len();
125 | self.chunk = self.doc.read_forward(self.offset);
126 | self.chunk_off = 0;
127 | }
128 | }
129 |
130 | fn peek(&self, default: CharClass) -> CharClass {
131 | if self.chunk_off < self.chunk.len() {
132 | WORD_CLASSIFIER[self.chunk[self.chunk_off] as usize]
133 | } else {
134 | default
135 | }
136 | }
137 |
138 | fn next(&mut self) {
139 | self.chunk_off += 1;
140 | }
141 |
142 | fn offset(&self) -> usize {
143 | self.offset + self.chunk_off
144 | }
145 | }
146 |
147 | struct WordBackward<'a> {
148 | doc: &'a dyn ReadableDocument,
149 | offset: usize,
150 | chunk: &'a [u8],
151 | chunk_off: usize,
152 | }
153 |
154 | impl WordNavigation for WordBackward<'_> {
155 | fn read(&mut self) {
156 | self.chunk = self.doc.read_backward(self.offset);
157 | self.chunk_off = self.chunk.len();
158 | }
159 |
160 | fn skip_newline(&mut self) {
161 | // We can rely on the fact that the document does not split graphemes across chunks.
162 | // = If there's a newline it's wholly contained in this chunk.
163 | if self.chunk_off > 0 && self.chunk[self.chunk_off - 1] == b'\n' {
164 | self.chunk_off -= 1;
165 | }
166 | if self.chunk_off > 0 && self.chunk[self.chunk_off - 1] == b'\r' {
167 | self.chunk_off -= 1;
168 | }
169 | }
170 |
171 | fn skip_class(&mut self, class: CharClass) {
172 | while !self.chunk.is_empty() {
173 | while self.chunk_off > 0 {
174 | if WORD_CLASSIFIER[self.chunk[self.chunk_off - 1] as usize] != class {
175 | return;
176 | }
177 | self.chunk_off -= 1;
178 | }
179 |
180 | self.offset -= self.chunk.len();
181 | self.chunk = self.doc.read_backward(self.offset);
182 | self.chunk_off = self.chunk.len();
183 | }
184 | }
185 |
186 | fn peek(&self, default: CharClass) -> CharClass {
187 | if self.chunk_off > 0 {
188 | WORD_CLASSIFIER[self.chunk[self.chunk_off - 1] as usize]
189 | } else {
190 | default
191 | }
192 | }
193 |
194 | fn next(&mut self) {
195 | self.chunk_off -= 1;
196 | }
197 |
198 | fn offset(&self) -> usize {
199 | self.offset - self.chunk.len() + self.chunk_off
200 | }
201 | }
202 |
203 | /// Returns the offset range of the "word" at the given offset.
204 | /// Does not cross newlines. Works similar to VS Code.
205 | pub fn word_select(doc: &dyn ReadableDocument, offset: usize) -> Range {
206 | let mut beg = offset;
207 | let mut end = offset;
208 | let mut class = CharClass::Newline;
209 |
210 | let mut chunk = doc.read_forward(end);
211 | if !chunk.is_empty() {
212 | // Not at the end of the document? Great!
213 | // We default to using the next char as the class, because in terminals
214 | // the cursor is usually always to the left of the cell you clicked on.
215 | class = WORD_CLASSIFIER[chunk[0] as usize];
216 |
217 | let mut chunk_off = 0;
218 |
219 | // Select the word, unless we hit a newline.
220 | if class != CharClass::Newline {
221 | loop {
222 | chunk_off += 1;
223 | end += 1;
224 |
225 | if chunk_off >= chunk.len() {
226 | chunk = doc.read_forward(end);
227 | chunk_off = 0;
228 | if chunk.is_empty() {
229 | break;
230 | }
231 | }
232 |
233 | if WORD_CLASSIFIER[chunk[chunk_off] as usize] != class {
234 | break;
235 | }
236 | }
237 | }
238 | }
239 |
240 | let mut chunk = doc.read_backward(beg);
241 | if !chunk.is_empty() {
242 | let mut chunk_off = chunk.len();
243 |
244 | // If we failed to determine the class, because we hit the end of the document
245 | // or a newline, we fall back to using the previous character, of course.
246 | if class == CharClass::Newline {
247 | class = WORD_CLASSIFIER[chunk[chunk_off - 1] as usize];
248 | }
249 |
250 | // Select the word, unless we hit a newline.
251 | if class != CharClass::Newline {
252 | loop {
253 | if WORD_CLASSIFIER[chunk[chunk_off - 1] as usize] != class {
254 | break;
255 | }
256 |
257 | chunk_off -= 1;
258 | beg -= 1;
259 |
260 | if chunk_off == 0 {
261 | chunk = doc.read_backward(beg);
262 | chunk_off = chunk.len();
263 | if chunk.is_empty() {
264 | break;
265 | }
266 | }
267 | }
268 | }
269 | }
270 |
271 | beg..end
272 | }
273 |
274 | #[cfg(test)]
275 | mod test {
276 | use super::*;
277 |
278 | #[test]
279 | fn test_word_navigation() {
280 | assert_eq!(word_forward(&"Hello World".as_bytes(), 0), 5);
281 | assert_eq!(word_forward(&"Hello,World".as_bytes(), 0), 5);
282 | assert_eq!(word_forward(&" Hello".as_bytes(), 0), 8);
283 | assert_eq!(word_forward(&"\n\nHello".as_bytes(), 0), 1);
284 |
285 | assert_eq!(word_backward(&"Hello World".as_bytes(), 11), 6);
286 | assert_eq!(word_backward(&"Hello,World".as_bytes(), 10), 6);
287 | assert_eq!(word_backward(&"Hello ".as_bytes(), 7), 0);
288 | assert_eq!(word_backward(&"Hello\n\n".as_bytes(), 7), 6);
289 | }
290 | }
291 |
--------------------------------------------------------------------------------
/src/cell.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | //! [`std::cell::RefCell`], but without runtime checks in release builds.
5 |
6 | #[cfg(debug_assertions)]
7 | pub use debug::*;
8 | #[cfg(not(debug_assertions))]
9 | pub use release::*;
10 |
11 | #[allow(unused)]
12 | #[cfg(debug_assertions)]
13 | mod debug {
14 | pub type SemiRefCell = std::cell::RefCell;
15 | pub type Ref<'b, T> = std::cell::Ref<'b, T>;
16 | pub type RefMut<'b, T> = std::cell::RefMut<'b, T>;
17 | }
18 |
19 | #[cfg(not(debug_assertions))]
20 | mod release {
21 | #[derive(Default)]
22 | #[repr(transparent)]
23 | pub struct SemiRefCell(std::cell::UnsafeCell);
24 |
25 | impl SemiRefCell {
26 | #[inline(always)]
27 | pub const fn new(value: T) -> Self {
28 | Self(std::cell::UnsafeCell::new(value))
29 | }
30 |
31 | #[inline(always)]
32 | pub const fn as_ptr(&self) -> *mut T {
33 | self.0.get()
34 | }
35 |
36 | #[inline(always)]
37 | pub const fn borrow(&self) -> Ref<'_, T> {
38 | Ref(unsafe { &*self.0.get() })
39 | }
40 |
41 | #[inline(always)]
42 | pub const fn borrow_mut(&self) -> RefMut<'_, T> {
43 | RefMut(unsafe { &mut *self.0.get() })
44 | }
45 | }
46 |
47 | #[repr(transparent)]
48 | pub struct Ref<'b, T>(&'b T);
49 |
50 | impl<'b, T> Ref<'b, T> {
51 | #[inline(always)]
52 | pub fn clone(orig: &Self) -> Self {
53 | Ref(orig.0)
54 | }
55 | }
56 |
57 | impl<'b, T> std::ops::Deref for Ref<'b, T> {
58 | type Target = T;
59 |
60 | #[inline(always)]
61 | fn deref(&self) -> &Self::Target {
62 | self.0
63 | }
64 | }
65 |
66 | #[repr(transparent)]
67 | pub struct RefMut<'b, T>(&'b mut T);
68 |
69 | impl<'b, T> std::ops::Deref for RefMut<'b, T> {
70 | type Target = T;
71 |
72 | #[inline(always)]
73 | fn deref(&self) -> &Self::Target {
74 | self.0
75 | }
76 | }
77 |
78 | impl<'b, T> std::ops::DerefMut for RefMut<'b, T> {
79 | #[inline(always)]
80 | fn deref_mut(&mut self) -> &mut Self::Target {
81 | self.0
82 | }
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/src/document.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | //! Abstractions over reading/writing arbitrary text containers.
5 |
6 | use std::ffi::OsString;
7 | use std::mem;
8 | use std::ops::Range;
9 | use std::path::PathBuf;
10 |
11 | use crate::arena::{ArenaString, scratch_arena};
12 | use crate::helpers::ReplaceRange as _;
13 |
14 | /// An abstraction over reading from text containers.
15 | pub trait ReadableDocument {
16 | /// Read some bytes starting at (including) the given absolute offset.
17 | ///
18 | /// # Warning
19 | ///
20 | /// * Be lenient on inputs:
21 | /// * The given offset may be out of bounds and you MUST clamp it.
22 | /// * You should not assume that offsets are at grapheme cluster boundaries.
23 | /// * Be strict on outputs:
24 | /// * You MUST NOT break grapheme clusters across chunks.
25 | /// * You MUST NOT return an empty slice unless the offset is at or beyond the end.
26 | fn read_forward(&self, off: usize) -> &[u8];
27 |
28 | /// Read some bytes before (but not including) the given absolute offset.
29 | ///
30 | /// # Warning
31 | ///
32 | /// * Be lenient on inputs:
33 | /// * The given offset may be out of bounds and you MUST clamp it.
34 | /// * You should not assume that offsets are at grapheme cluster boundaries.
35 | /// * Be strict on outputs:
36 | /// * You MUST NOT break grapheme clusters across chunks.
37 | /// * You MUST NOT return an empty slice unless the offset is zero.
38 | fn read_backward(&self, off: usize) -> &[u8];
39 | }
40 |
41 | /// An abstraction over writing to text containers.
42 | pub trait WriteableDocument: ReadableDocument {
43 | /// Replace the given range with the given bytes.
44 | ///
45 | /// # Warning
46 | ///
47 | /// * The given range may be out of bounds and you MUST clamp it.
48 | /// * The replacement may not be valid UTF8.
49 | fn replace(&mut self, range: Range, replacement: &[u8]);
50 | }
51 |
52 | impl ReadableDocument for &[u8] {
53 | fn read_forward(&self, off: usize) -> &[u8] {
54 | let s = *self;
55 | &s[off.min(s.len())..]
56 | }
57 |
58 | fn read_backward(&self, off: usize) -> &[u8] {
59 | let s = *self;
60 | &s[..off.min(s.len())]
61 | }
62 | }
63 |
64 | impl ReadableDocument for String {
65 | fn read_forward(&self, off: usize) -> &[u8] {
66 | let s = self.as_bytes();
67 | &s[off.min(s.len())..]
68 | }
69 |
70 | fn read_backward(&self, off: usize) -> &[u8] {
71 | let s = self.as_bytes();
72 | &s[..off.min(s.len())]
73 | }
74 | }
75 |
76 | impl WriteableDocument for String {
77 | fn replace(&mut self, range: Range, replacement: &[u8]) {
78 | // `replacement` is not guaranteed to be valid UTF-8, so we need to sanitize it.
79 | let scratch = scratch_arena(None);
80 | let utf8 = ArenaString::from_utf8_lossy(&scratch, replacement);
81 | let src = match &utf8 {
82 | Ok(s) => s,
83 | Err(s) => s.as_str(),
84 | };
85 |
86 | // SAFETY: `range` is guaranteed to be on codepoint boundaries.
87 | unsafe { self.as_mut_vec() }.replace_range(range, src.as_bytes());
88 | }
89 | }
90 |
91 | impl ReadableDocument for PathBuf {
92 | fn read_forward(&self, off: usize) -> &[u8] {
93 | let s = self.as_os_str().as_encoded_bytes();
94 | &s[off.min(s.len())..]
95 | }
96 |
97 | fn read_backward(&self, off: usize) -> &[u8] {
98 | let s = self.as_os_str().as_encoded_bytes();
99 | &s[..off.min(s.len())]
100 | }
101 | }
102 |
103 | impl WriteableDocument for PathBuf {
104 | fn replace(&mut self, range: Range, replacement: &[u8]) {
105 | let mut vec = mem::take(self).into_os_string().into_encoded_bytes();
106 | vec.replace_range(range, replacement);
107 | *self = unsafe { Self::from(OsString::from_encoded_bytes_unchecked(vec)) };
108 | }
109 | }
110 |
--------------------------------------------------------------------------------
/src/fuzzy.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | //! Fuzzy search algorithm based on the one used in VS Code (`/src/vs/base/common/fuzzyScorer.ts`).
5 | //! Other algorithms exist, such as Sublime Text's, or the one used in `fzf`,
6 | //! but I figured that this one is what lots of people may be familiar with.
7 |
8 | use std::vec;
9 |
10 | use crate::arena::{Arena, scratch_arena};
11 | use crate::icu;
12 |
13 | const NO_MATCH: i32 = 0;
14 |
15 | pub fn score_fuzzy<'a>(
16 | arena: &'a Arena,
17 | haystack: &str,
18 | needle: &str,
19 | allow_non_contiguous_matches: bool,
20 | ) -> (i32, Vec) {
21 | if haystack.is_empty() || needle.is_empty() {
22 | // return early if target or query are empty
23 | return (NO_MATCH, Vec::new_in(arena));
24 | }
25 |
26 | let scratch = scratch_arena(Some(arena));
27 | let target = map_chars(&scratch, haystack);
28 | let query = map_chars(&scratch, needle);
29 |
30 | if target.len() < query.len() {
31 | // impossible for query to be contained in target
32 | return (NO_MATCH, Vec::new_in(arena));
33 | }
34 |
35 | let target_lower = icu::fold_case(&scratch, haystack);
36 | let query_lower = icu::fold_case(&scratch, needle);
37 | let target_lower = map_chars(&scratch, &target_lower);
38 | let query_lower = map_chars(&scratch, &query_lower);
39 |
40 | let area = query.len() * target.len();
41 | let mut scores = vec::from_elem_in(0, area, &*scratch);
42 | let mut matches = vec::from_elem_in(0, area, &*scratch);
43 |
44 | //
45 | // Build Scorer Matrix:
46 | //
47 | // The matrix is composed of query q and target t. For each index we score
48 | // q[i] with t[i] and compare that with the previous score. If the score is
49 | // equal or larger, we keep the match. In addition to the score, we also keep
50 | // the length of the consecutive matches to use as boost for the score.
51 | //
52 | // t a r g e t
53 | // q
54 | // u
55 | // e
56 | // r
57 | // y
58 | //
59 | for query_index in 0..query.len() {
60 | let query_index_offset = query_index * target.len();
61 | let query_index_previous_offset =
62 | if query_index > 0 { (query_index - 1) * target.len() } else { 0 };
63 |
64 | for target_index in 0..target.len() {
65 | let current_index = query_index_offset + target_index;
66 | let diag_index = if query_index > 0 && target_index > 0 {
67 | query_index_previous_offset + target_index - 1
68 | } else {
69 | 0
70 | };
71 | let left_score = if target_index > 0 { scores[current_index - 1] } else { 0 };
72 | let diag_score =
73 | if query_index > 0 && target_index > 0 { scores[diag_index] } else { 0 };
74 | let matches_sequence_len =
75 | if query_index > 0 && target_index > 0 { matches[diag_index] } else { 0 };
76 |
77 | // If we are not matching on the first query character anymore, we only produce a
78 | // score if we had a score previously for the last query index (by looking at the diagScore).
79 | // This makes sure that the query always matches in sequence on the target. For example
80 | // given a target of "ede" and a query of "de", we would otherwise produce a wrong high score
81 | // for query[1] ("e") matching on target[0] ("e") because of the "beginning of word" boost.
82 | let score = if diag_score == 0 && query_index != 0 {
83 | 0
84 | } else {
85 | compute_char_score(
86 | query[query_index],
87 | query_lower[query_index],
88 | if target_index != 0 { Some(target[target_index - 1]) } else { None },
89 | target[target_index],
90 | target_lower[target_index],
91 | matches_sequence_len,
92 | )
93 | };
94 |
95 | // We have a score and its equal or larger than the left score
96 | // Match: sequence continues growing from previous diag value
97 | // Score: increases by diag score value
98 | let is_valid_score = score != 0 && diag_score + score >= left_score;
99 | if is_valid_score
100 | && (
101 | // We don't need to check if it's contiguous if we allow non-contiguous matches
102 | allow_non_contiguous_matches ||
103 | // We must be looking for a contiguous match.
104 | // Looking at an index above 0 in the query means we must have already
105 | // found out this is contiguous otherwise there wouldn't have been a score
106 | query_index > 0 ||
107 | // lastly check if the query is completely contiguous at this index in the target
108 | target_lower[target_index..].starts_with(&query_lower)
109 | )
110 | {
111 | matches[current_index] = matches_sequence_len + 1;
112 | scores[current_index] = diag_score + score;
113 | } else {
114 | // We either have no score or the score is lower than the left score
115 | // Match: reset to 0
116 | // Score: pick up from left hand side
117 | matches[current_index] = NO_MATCH;
118 | scores[current_index] = left_score;
119 | }
120 | }
121 | }
122 |
123 | // Restore Positions (starting from bottom right of matrix)
124 | let mut positions = Vec::new_in(arena);
125 |
126 | if !query.is_empty() && !target.is_empty() {
127 | let mut query_index = query.len() - 1;
128 | let mut target_index = target.len() - 1;
129 |
130 | loop {
131 | let current_index = query_index * target.len() + target_index;
132 | if matches[current_index] == NO_MATCH {
133 | if target_index == 0 {
134 | break;
135 | }
136 | target_index -= 1; // go left
137 | } else {
138 | positions.push(target_index);
139 |
140 | // go up and left
141 | if query_index == 0 || target_index == 0 {
142 | break;
143 | }
144 | query_index -= 1;
145 | target_index -= 1;
146 | }
147 | }
148 |
149 | positions.reverse();
150 | }
151 |
152 | (scores[area - 1], positions)
153 | }
154 |
155 | fn compute_char_score(
156 | query: char,
157 | query_lower: char,
158 | target_prev: Option,
159 | target_curr: char,
160 | target_curr_lower: char,
161 | matches_sequence_len: i32,
162 | ) -> i32 {
163 | let mut score = 0;
164 |
165 | if !consider_as_equal(query_lower, target_curr_lower) {
166 | return score; // no match of characters
167 | }
168 |
169 | // Character match bonus
170 | score += 1;
171 |
172 | // Consecutive match bonus
173 | if matches_sequence_len > 0 {
174 | score += matches_sequence_len * 5;
175 | }
176 |
177 | // Same case bonus
178 | if query == target_curr {
179 | score += 1;
180 | }
181 |
182 | if let Some(target_prev) = target_prev {
183 | // After separator bonus
184 | let separator_bonus = score_separator_at_pos(target_prev);
185 | if separator_bonus > 0 {
186 | score += separator_bonus;
187 | }
188 | // Inside word upper case bonus (camel case). We only give this bonus if we're not in a contiguous sequence.
189 | // For example:
190 | // NPE => NullPointerException = boost
191 | // HTTP => HTTP = not boost
192 | else if target_curr != target_curr_lower && matches_sequence_len == 0 {
193 | score += 2;
194 | }
195 | } else {
196 | // Start of word bonus
197 | score += 8;
198 | }
199 |
200 | score
201 | }
202 |
203 | fn consider_as_equal(a: char, b: char) -> bool {
204 | // Special case path separators: ignore platform differences
205 | a == b || (a == '/' && b == '\\') || (a == '\\' && b == '/')
206 | }
207 |
208 | fn score_separator_at_pos(ch: char) -> i32 {
209 | match ch {
210 | '/' | '\\' => 5, // prefer path separators...
211 | '_' | '-' | '.' | ' ' | '\'' | '"' | ':' => 4, // ...over other separators
212 | _ => 0,
213 | }
214 | }
215 |
216 | fn map_chars<'a>(arena: &'a Arena, s: &str) -> Vec {
217 | let mut chars = Vec::with_capacity_in(s.len(), arena);
218 | chars.extend(s.chars());
219 | chars.shrink_to_fit();
220 | chars
221 | }
222 |
--------------------------------------------------------------------------------
/src/hash.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | //! Provides fast, non-cryptographic hash functions.
5 |
6 | /// The venerable wyhash hash function.
7 | ///
8 | /// It's fast, has good statistical properties, and is in the public domain.
9 | /// See:
10 | /// If you visit the link, you'll find that it was superseded by "rapidhash",
11 | /// but that's not particularly interesting for this project. rapidhash results
12 | /// in way larger assembly and isn't faster when hashing small amounts of data.
13 | pub fn hash(mut seed: u64, data: &[u8]) -> u64 {
14 | unsafe {
15 | const S0: u64 = 0xa0761d6478bd642f;
16 | const S1: u64 = 0xe7037ed1a0b428db;
17 | const S2: u64 = 0x8ebc6af09c88c6e3;
18 | const S3: u64 = 0x589965cc75374cc3;
19 |
20 | let len = data.len();
21 | let mut p = data.as_ptr();
22 | let a;
23 | let b;
24 |
25 | seed ^= S0;
26 |
27 | if len <= 16 {
28 | if len >= 4 {
29 | a = (wyr4(p) << 32) | wyr4(p.add((len >> 3) << 2));
30 | b = (wyr4(p.add(len - 4)) << 32) | wyr4(p.add(len - 4 - ((len >> 3) << 2)));
31 | } else if len > 0 {
32 | a = wyr3(p, len);
33 | b = 0;
34 | } else {
35 | a = 0;
36 | b = 0;
37 | }
38 | } else {
39 | let mut i = len;
40 | if i > 48 {
41 | let mut seed1 = seed;
42 | let mut seed2 = seed;
43 | while {
44 | seed = wymix(wyr8(p) ^ S1, wyr8(p.add(8)) ^ seed);
45 | seed1 = wymix(wyr8(p.add(16)) ^ S2, wyr8(p.add(24)) ^ seed1);
46 | seed2 = wymix(wyr8(p.add(32)) ^ S3, wyr8(p.add(40)) ^ seed2);
47 | p = p.add(48);
48 | i -= 48;
49 | i > 48
50 | } {}
51 | seed ^= seed1 ^ seed2;
52 | }
53 | while i > 16 {
54 | seed = wymix(wyr8(p) ^ S1, wyr8(p.add(8)) ^ seed);
55 | i -= 16;
56 | p = p.add(16);
57 | }
58 | a = wyr8(p.offset(i as isize - 16));
59 | b = wyr8(p.offset(i as isize - 8));
60 | }
61 |
62 | wymix(S1 ^ (len as u64), wymix(a ^ S1, b ^ seed))
63 | }
64 | }
65 |
66 | unsafe fn wyr3(p: *const u8, k: usize) -> u64 {
67 | let p0 = unsafe { p.read() as u64 };
68 | let p1 = unsafe { p.add(k >> 1).read() as u64 };
69 | let p2 = unsafe { p.add(k - 1).read() as u64 };
70 | (p0 << 16) | (p1 << 8) | p2
71 | }
72 |
73 | unsafe fn wyr4(p: *const u8) -> u64 {
74 | unsafe { (p as *const u32).read_unaligned() as u64 }
75 | }
76 |
77 | unsafe fn wyr8(p: *const u8) -> u64 {
78 | unsafe { (p as *const u64).read_unaligned() }
79 | }
80 |
81 | // This is a weak mix function on its own. It may be worth considering
82 | // replacing external uses of this function with a stronger one.
83 | // On the other hand, it's very fast.
84 | pub fn wymix(lhs: u64, rhs: u64) -> u64 {
85 | let lhs = lhs as u128;
86 | let rhs = rhs as u128;
87 | let r = lhs * rhs;
88 | (r >> 64) as u64 ^ (r as u64)
89 | }
90 |
91 | pub fn hash_str(seed: u64, s: &str) -> u64 {
92 | hash(seed, s.as_bytes())
93 | }
94 |
--------------------------------------------------------------------------------
/src/helpers.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | //! Random assortment of helpers I didn't know where to put.
5 |
6 | use std::alloc::Allocator;
7 | use std::cmp::Ordering;
8 | use std::io::Read;
9 | use std::mem::{self, MaybeUninit};
10 | use std::ops::{Bound, Range, RangeBounds};
11 | use std::{fmt, ptr, slice, str};
12 |
13 | use crate::apperr;
14 |
15 | pub const KILO: usize = 1000;
16 | pub const MEGA: usize = 1000 * 1000;
17 | pub const GIGA: usize = 1000 * 1000 * 1000;
18 |
19 | pub const KIBI: usize = 1024;
20 | pub const MEBI: usize = 1024 * 1024;
21 | pub const GIBI: usize = 1024 * 1024 * 1024;
22 |
23 | pub struct MetricFormatter(pub T);
24 |
25 | impl fmt::Display for MetricFormatter {
26 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
27 | let mut value = self.0;
28 | let mut suffix = "B";
29 | if value >= GIGA {
30 | value /= GIGA;
31 | suffix = "GB";
32 | } else if value >= MEGA {
33 | value /= MEGA;
34 | suffix = "MB";
35 | } else if value >= KILO {
36 | value /= KILO;
37 | suffix = "kB";
38 | }
39 | write!(f, "{value}{suffix}")
40 | }
41 | }
42 |
43 | /// A viewport coordinate type used throughout the application.
44 | pub type CoordType = isize;
45 |
46 | /// To avoid overflow issues because you're adding two [`CoordType::MAX`]
47 | /// values together, you can use [`COORD_TYPE_SAFE_MAX`] instead.
48 | ///
49 | /// It equates to half the bits contained in [`CoordType`], which
50 | /// for instance is 32767 (0x7FFF) when [`CoordType`] is a [`i32`].
51 | pub const COORD_TYPE_SAFE_MAX: CoordType = (1 << (CoordType::BITS / 2 - 1)) - 1;
52 |
53 | /// A 2D point. Uses [`CoordType`].
54 | #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
55 | pub struct Point {
56 | pub x: CoordType,
57 | pub y: CoordType,
58 | }
59 |
60 | impl Point {
61 | pub const MIN: Self = Self { x: CoordType::MIN, y: CoordType::MIN };
62 | pub const MAX: Self = Self { x: CoordType::MAX, y: CoordType::MAX };
63 | }
64 |
65 | impl PartialOrd for Point {
66 | fn partial_cmp(&self, other: &Self) -> Option {
67 | Some(self.cmp(other))
68 | }
69 | }
70 |
71 | impl Ord for Point {
72 | fn cmp(&self, other: &Self) -> Ordering {
73 | self.y.cmp(&other.y).then(self.x.cmp(&other.x))
74 | }
75 | }
76 |
77 | /// A 2D size. Uses [`CoordType`].
78 | #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
79 | pub struct Size {
80 | pub width: CoordType,
81 | pub height: CoordType,
82 | }
83 |
84 | impl Size {
85 | pub fn as_rect(&self) -> Rect {
86 | Rect { left: 0, top: 0, right: self.width, bottom: self.height }
87 | }
88 | }
89 |
90 | /// A 2D rectangle. Uses [`CoordType`].
91 | #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
92 | pub struct Rect {
93 | pub left: CoordType,
94 | pub top: CoordType,
95 | pub right: CoordType,
96 | pub bottom: CoordType,
97 | }
98 |
99 | impl Rect {
100 | /// Mimics CSS's `padding` property where `padding: a` is `a a a a`.
101 | pub fn one(value: CoordType) -> Self {
102 | Self { left: value, top: value, right: value, bottom: value }
103 | }
104 |
105 | /// Mimics CSS's `padding` property where `padding: a b` is `a b a b`,
106 | /// and `a` is top/bottom and `b` is left/right.
107 | pub fn two(top_bottom: CoordType, left_right: CoordType) -> Self {
108 | Self { left: left_right, top: top_bottom, right: left_right, bottom: top_bottom }
109 | }
110 |
111 | /// Mimics CSS's `padding` property where `padding: a b c` is `a b c b`,
112 | /// and `a` is top, `b` is left/right, and `c` is bottom.
113 | pub fn three(top: CoordType, left_right: CoordType, bottom: CoordType) -> Self {
114 | Self { left: left_right, top, right: left_right, bottom }
115 | }
116 |
117 | /// Is the rectangle empty?
118 | pub fn is_empty(&self) -> bool {
119 | self.left >= self.right || self.top >= self.bottom
120 | }
121 |
122 | /// Width of the rectangle.
123 | pub fn width(&self) -> CoordType {
124 | self.right - self.left
125 | }
126 |
127 | /// Height of the rectangle.
128 | pub fn height(&self) -> CoordType {
129 | self.bottom - self.top
130 | }
131 |
132 | /// Check if it contains a point.
133 | pub fn contains(&self, point: Point) -> bool {
134 | point.x >= self.left && point.x < self.right && point.y >= self.top && point.y < self.bottom
135 | }
136 |
137 | /// Intersect two rectangles.
138 | pub fn intersect(&self, rhs: Self) -> Self {
139 | let l = self.left.max(rhs.left);
140 | let t = self.top.max(rhs.top);
141 | let r = self.right.min(rhs.right);
142 | let b = self.bottom.min(rhs.bottom);
143 |
144 | // Ensure that the size is non-negative. This avoids bugs,
145 | // because some height/width is negative all of a sudden.
146 | let r = l.max(r);
147 | let b = t.max(b);
148 |
149 | Self { left: l, top: t, right: r, bottom: b }
150 | }
151 | }
152 |
153 | /// [`std::cmp::minmax`] is unstable, as per usual.
154 | pub fn minmax(v1: T, v2: T) -> [T; 2]
155 | where
156 | T: Ord,
157 | {
158 | if v2 < v1 { [v2, v1] } else { [v1, v2] }
159 | }
160 |
161 | #[inline(always)]
162 | #[allow(clippy::ptr_eq)]
163 | fn opt_ptr(a: Option<&T>) -> *const T {
164 | unsafe { mem::transmute(a) }
165 | }
166 |
167 | /// Surprisingly, there's no way in Rust to do a `ptr::eq` on `Option<&T>`.
168 | /// Uses `unsafe` so that the debug performance isn't too bad.
169 | #[inline(always)]
170 | #[allow(clippy::ptr_eq)]
171 | pub fn opt_ptr_eq(a: Option<&T>, b: Option<&T>) -> bool {
172 | opt_ptr(a) == opt_ptr(b)
173 | }
174 |
175 | /// Creates a `&str` from a pointer and a length.
176 | /// Exists, because `std::str::from_raw_parts` is unstable, par for the course.
177 | ///
178 | /// # Safety
179 | ///
180 | /// The given data must be valid UTF-8.
181 | /// The given data must outlive the returned reference.
182 | #[inline]
183 | #[must_use]
184 | pub const unsafe fn str_from_raw_parts<'a>(ptr: *const u8, len: usize) -> &'a str {
185 | unsafe { str::from_utf8_unchecked(slice::from_raw_parts(ptr, len)) }
186 | }
187 |
188 | /// [`<[T]>::copy_from_slice`] panics if the two slices have different lengths.
189 | /// This one just returns the copied amount.
190 | pub fn slice_copy_safe(dst: &mut [T], src: &[T]) -> usize {
191 | let len = src.len().min(dst.len());
192 | unsafe { ptr::copy_nonoverlapping(src.as_ptr(), dst.as_mut_ptr(), len) };
193 | len
194 | }
195 |
196 | /// [`Vec::splice`] results in really bad assembly.
197 | /// This doesn't. Don't use [`Vec::splice`].
198 | pub trait ReplaceRange {
199 | fn replace_range>(&mut self, range: R, src: &[T]);
200 | }
201 |
202 | impl ReplaceRange for Vec {
203 | fn replace_range>(&mut self, range: R, src: &[T]) {
204 | let start = match range.start_bound() {
205 | Bound::Included(&start) => start,
206 | Bound::Excluded(start) => start + 1,
207 | Bound::Unbounded => 0,
208 | };
209 | let end = match range.end_bound() {
210 | Bound::Included(end) => end + 1,
211 | Bound::Excluded(&end) => end,
212 | Bound::Unbounded => usize::MAX,
213 | };
214 | vec_replace_impl(self, start..end, src);
215 | }
216 | }
217 |
218 | fn vec_replace_impl(dst: &mut Vec, range: Range, src: &[T]) {
219 | unsafe {
220 | let dst_len = dst.len();
221 | let src_len = src.len();
222 | let off = range.start.min(dst_len);
223 | let del_len = range.end.saturating_sub(off).min(dst_len - off);
224 |
225 | if del_len == 0 && src_len == 0 {
226 | return; // nothing to do
227 | }
228 |
229 | let tail_len = dst_len - off - del_len;
230 | let new_len = dst_len - del_len + src_len;
231 |
232 | if src_len > del_len {
233 | dst.reserve(src_len - del_len);
234 | }
235 |
236 | // NOTE: drop_in_place() is not needed here, because T is constrained to Copy.
237 |
238 | // SAFETY: as_mut_ptr() must called after reserve() to ensure that the pointer is valid.
239 | let ptr = dst.as_mut_ptr().add(off);
240 |
241 | // Shift the tail.
242 | if tail_len > 0 && src_len != del_len {
243 | ptr::copy(ptr.add(del_len), ptr.add(src_len), tail_len);
244 | }
245 |
246 | // Copy in the replacement.
247 | ptr::copy_nonoverlapping(src.as_ptr(), ptr, src_len);
248 | dst.set_len(new_len);
249 | }
250 | }
251 |
252 | /// [`Read`] but with [`MaybeUninit`] buffers.
253 | pub fn file_read_uninit(
254 | file: &mut T,
255 | buf: &mut [MaybeUninit],
256 | ) -> apperr::Result {
257 | unsafe {
258 | let buf_slice = slice::from_raw_parts_mut(buf.as_mut_ptr() as *mut u8, buf.len());
259 | let n = file.read(buf_slice)?;
260 | Ok(n)
261 | }
262 | }
263 |
264 | /// Turns a [`&[u8]`] into a [`&[MaybeUninit]`].
265 | #[inline(always)]
266 | pub const fn slice_as_uninit_ref(slice: &[T]) -> &[MaybeUninit] {
267 | unsafe { slice::from_raw_parts(slice.as_ptr() as *const MaybeUninit, slice.len()) }
268 | }
269 |
270 | /// Turns a [`&mut [T]`] into a [`&mut [MaybeUninit]`].
271 | #[inline(always)]
272 | pub const fn slice_as_uninit_mut(slice: &mut [T]) -> &mut [MaybeUninit] {
273 | unsafe { slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut MaybeUninit, slice.len()) }
274 | }
275 |
276 | /// Helpers for ASCII string comparisons.
277 | pub trait AsciiStringHelpers {
278 | /// Tests if a string starts with a given ASCII prefix.
279 | ///
280 | /// This function name really is a mouthful, but it's a combination
281 | /// of [`str::starts_with`] and [`str::eq_ignore_ascii_case`].
282 | fn starts_with_ignore_ascii_case(&self, prefix: &str) -> bool;
283 | }
284 |
285 | impl AsciiStringHelpers for str {
286 | fn starts_with_ignore_ascii_case(&self, prefix: &str) -> bool {
287 | // Casting to bytes first ensures we skip any UTF8 boundary checks.
288 | // Since the comparison is ASCII, we don't need to worry about that.
289 | let s = self.as_bytes();
290 | let p = prefix.as_bytes();
291 | p.len() <= s.len() && s[..p.len()].eq_ignore_ascii_case(p)
292 | }
293 | }
294 |
--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | #![feature(
5 | allocator_api,
6 | breakpoint,
7 | cold_path,
8 | let_chains,
9 | linked_list_cursors,
10 | maybe_uninit_fill,
11 | maybe_uninit_slice,
12 | maybe_uninit_uninit_array_transpose
13 | )]
14 | #![allow(clippy::missing_transmute_annotations, clippy::new_without_default, stable_features)]
15 |
16 | #[macro_use]
17 | pub mod arena;
18 |
19 | pub mod apperr;
20 | pub mod base64;
21 | pub mod buffer;
22 | pub mod cell;
23 | pub mod document;
24 | pub mod framebuffer;
25 | pub mod fuzzy;
26 | pub mod hash;
27 | pub mod helpers;
28 | pub mod icu;
29 | pub mod input;
30 | pub mod oklab;
31 | pub mod path;
32 | pub mod simd;
33 | pub mod sys;
34 | pub mod tui;
35 | pub mod unicode;
36 | pub mod vt;
37 |
--------------------------------------------------------------------------------
/src/oklab.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | //! Oklab colorspace conversions.
5 | //!
6 | //! Implements Oklab as defined at:
7 |
8 | #![allow(clippy::excessive_precision)]
9 |
10 | /// An Oklab color with alpha.
11 | pub struct Lab {
12 | pub l: f32,
13 | pub a: f32,
14 | pub b: f32,
15 | pub alpha: f32,
16 | }
17 |
18 | /// Converts a 32-bit sRGB color to Oklab.
19 | pub fn srgb_to_oklab(color: u32) -> Lab {
20 | let r = SRGB_TO_RGB_LUT[(color & 0xff) as usize];
21 | let g = SRGB_TO_RGB_LUT[((color >> 8) & 0xff) as usize];
22 | let b = SRGB_TO_RGB_LUT[((color >> 16) & 0xff) as usize];
23 | let alpha = (color >> 24) as f32 * (1.0 / 255.0);
24 |
25 | let l = 0.4122214708 * r + 0.5363325363 * g + 0.0514459929 * b;
26 | let m = 0.2119034982 * r + 0.6806995451 * g + 0.1073969566 * b;
27 | let s = 0.0883024619 * r + 0.2817188376 * g + 0.6299787005 * b;
28 |
29 | let l_ = cbrtf_est(l);
30 | let m_ = cbrtf_est(m);
31 | let s_ = cbrtf_est(s);
32 |
33 | Lab {
34 | l: 0.2104542553 * l_ + 0.7936177850 * m_ - 0.0040720468 * s_,
35 | a: 1.9779984951 * l_ - 2.4285922050 * m_ + 0.4505937099 * s_,
36 | b: 0.0259040371 * l_ + 0.7827717662 * m_ - 0.8086757660 * s_,
37 | alpha,
38 | }
39 | }
40 |
41 | /// Converts an Oklab color to a 32-bit sRGB color.
42 | pub fn oklab_to_srgb(c: Lab) -> u32 {
43 | let l_ = c.l + 0.3963377774 * c.a + 0.2158037573 * c.b;
44 | let m_ = c.l - 0.1055613458 * c.a - 0.0638541728 * c.b;
45 | let s_ = c.l - 0.0894841775 * c.a - 1.2914855480 * c.b;
46 |
47 | let l = l_ * l_ * l_;
48 | let m = m_ * m_ * m_;
49 | let s = s_ * s_ * s_;
50 |
51 | let r = 4.0767416621 * l - 3.3077115913 * m + 0.2309699292 * s;
52 | let g = -1.2684380046 * l + 2.6097574011 * m - 0.3413193965 * s;
53 | let b = -0.0041960863 * l - 0.7034186147 * m + 1.7076147010 * s;
54 |
55 | let r = r.clamp(0.0, 1.0);
56 | let g = g.clamp(0.0, 1.0);
57 | let b = b.clamp(0.0, 1.0);
58 | let alpha = c.alpha.clamp(0.0, 1.0);
59 |
60 | let r = linear_to_srgb(r);
61 | let g = linear_to_srgb(g);
62 | let b = linear_to_srgb(b);
63 | let a = (alpha * 255.0) as u32;
64 |
65 | r | (g << 8) | (b << 16) | (a << 24)
66 | }
67 |
68 | /// Blends two 32-bit sRGB colors in the Oklab color space.
69 | pub fn oklab_blend(dst: u32, src: u32) -> u32 {
70 | let dst = srgb_to_oklab(dst);
71 | let src = srgb_to_oklab(src);
72 |
73 | let inv_a = 1.0 - src.alpha;
74 | let l = src.l + dst.l * inv_a;
75 | let a = src.a + dst.a * inv_a;
76 | let b = src.b + dst.b * inv_a;
77 | let alpha = src.alpha + dst.alpha * inv_a;
78 |
79 | oklab_to_srgb(Lab { l, a, b, alpha })
80 | }
81 |
82 | fn linear_to_srgb(c: f32) -> u32 {
83 | (if c > 0.0031308 {
84 | 255.0 * 1.055 * c.powf(1.0 / 2.4) - 255.0 * 0.055
85 | } else {
86 | 255.0 * 12.92 * c
87 | }) as u32
88 | }
89 |
90 | #[inline]
91 | fn cbrtf_est(a: f32) -> f32 {
92 | // http://metamerist.com/cbrt/cbrt.htm showed a great estimator for the cube root:
93 | // f32_as_uint32_t / 3 + 709921077
94 | // It's similar to the well known "fast inverse square root" trick.
95 | // Lots of numbers around 709921077 perform at least equally well to 709921077,
96 | // and it is unknown how and why 709921077 was chosen specifically.
97 | let u: u32 = f32::to_bits(a); // evil f32ing point bit level hacking
98 | let u = u / 3 + 709921077; // what the fuck?
99 | let x: f32 = f32::from_bits(u);
100 |
101 | // One round of Newton's method. It follows the Wikipedia article at
102 | // https://en.wikipedia.org/wiki/Cube_root#Numerical_methods
103 | // For `a`s in the range between 0 and 1, this results in a maximum error of
104 | // less than 6.7e-4f, which is not good, but good enough for us, because
105 | // we're not an image editor. The benefit is that it's really fast.
106 | (1.0 / 3.0) * (a / (x * x) + (x + x)) // 1st iteration
107 | }
108 |
109 | #[rustfmt::skip]
110 | #[allow(clippy::excessive_precision)]
111 | const SRGB_TO_RGB_LUT: [f32; 256] = [
112 | 0.0000000000, 0.0003035270, 0.0006070540, 0.0009105810, 0.0012141080, 0.0015176350, 0.0018211619, 0.0021246888, 0.0024282159, 0.0027317430, 0.0030352699, 0.0033465356, 0.0036765069, 0.0040247170, 0.0043914421, 0.0047769533,
113 | 0.0051815170, 0.0056053917, 0.0060488326, 0.0065120910, 0.0069954102, 0.0074990317, 0.0080231922, 0.0085681248, 0.0091340570, 0.0097212177, 0.0103298230, 0.0109600937, 0.0116122449, 0.0122864870, 0.0129830306, 0.0137020806,
114 | 0.0144438436, 0.0152085144, 0.0159962922, 0.0168073755, 0.0176419523, 0.0185002182, 0.0193823613, 0.0202885624, 0.0212190095, 0.0221738834, 0.0231533647, 0.0241576303, 0.0251868572, 0.0262412224, 0.0273208916, 0.0284260381,
115 | 0.0295568332, 0.0307134409, 0.0318960287, 0.0331047624, 0.0343398079, 0.0356013142, 0.0368894450, 0.0382043645, 0.0395462364, 0.0409151986, 0.0423114114, 0.0437350273, 0.0451862030, 0.0466650836, 0.0481718220, 0.0497065634,
116 | 0.0512694679, 0.0528606549, 0.0544802807, 0.0561284944, 0.0578054339, 0.0595112406, 0.0612460710, 0.0630100295, 0.0648032799, 0.0666259527, 0.0684781820, 0.0703601092, 0.0722718611, 0.0742135793, 0.0761853904, 0.0781874284,
117 | 0.0802198276, 0.0822827145, 0.0843762159, 0.0865004659, 0.0886556059, 0.0908417329, 0.0930589810, 0.0953074843, 0.0975873619, 0.0998987406, 0.1022417471, 0.1046164930, 0.1070231125, 0.1094617173, 0.1119324341, 0.1144353822,
118 | 0.1169706732, 0.1195384338, 0.1221387982, 0.1247718409, 0.1274376959, 0.1301364899, 0.1328683347, 0.1356333494, 0.1384316236, 0.1412633061, 0.1441284865, 0.1470272839, 0.1499598026, 0.1529261619, 0.1559264660, 0.1589608639,
119 | 0.1620294005, 0.1651322246, 0.1682693958, 0.1714410931, 0.1746473908, 0.1778884083, 0.1811642349, 0.1844749898, 0.1878207624, 0.1912016720, 0.1946178079, 0.1980693042, 0.2015562356, 0.2050787061, 0.2086368501, 0.2122307271,
120 | 0.2158605307, 0.2195262313, 0.2232279778, 0.2269658893, 0.2307400703, 0.2345506549, 0.2383976579, 0.2422811985, 0.2462013960, 0.2501583695, 0.2541521788, 0.2581829131, 0.2622507215, 0.2663556635, 0.2704978585, 0.2746773660,
121 | 0.2788943350, 0.2831487954, 0.2874408960, 0.2917706966, 0.2961383164, 0.3005438447, 0.3049873710, 0.3094689548, 0.3139887452, 0.3185468316, 0.3231432438, 0.3277781308, 0.3324515820, 0.3371636569, 0.3419144452, 0.3467040956,
122 | 0.3515326977, 0.3564002514, 0.3613068759, 0.3662526906, 0.3712377846, 0.3762622178, 0.3813261092, 0.3864295185, 0.3915725648, 0.3967553079, 0.4019778669, 0.4072403014, 0.4125427008, 0.4178851545, 0.4232677519, 0.4286905527,
123 | 0.4341537058, 0.4396572411, 0.4452012479, 0.4507858455, 0.4564110637, 0.4620770514, 0.4677838385, 0.4735315442, 0.4793202281, 0.4851499796, 0.4910208881, 0.4969330430, 0.5028865933, 0.5088814497, 0.5149177909, 0.5209956765,
124 | 0.5271152258, 0.5332764983, 0.5394796133, 0.5457245708, 0.5520114899, 0.5583404899, 0.5647116303, 0.5711249113, 0.5775805116, 0.5840784907, 0.5906189084, 0.5972018838, 0.6038274169, 0.6104956269, 0.6172066331, 0.6239604354,
125 | 0.6307572126, 0.6375969648, 0.6444797516, 0.6514056921, 0.6583748460, 0.6653873324, 0.6724432111, 0.6795425415, 0.6866854429, 0.6938719153, 0.7011020184, 0.7083759308, 0.7156936526, 0.7230552435, 0.7304608822, 0.7379105687,
126 | 0.7454043627, 0.7529423237, 0.7605246305, 0.7681512833, 0.7758223414, 0.7835379243, 0.7912980318, 0.7991028428, 0.8069523573, 0.8148466945, 0.8227858543, 0.8307699561, 0.8387991190, 0.8468732834, 0.8549926877, 0.8631572723,
127 | 0.8713672161, 0.8796223402, 0.8879231811, 0.8962693810, 0.9046613574, 0.9130986929, 0.9215820432, 0.9301108718, 0.9386858940, 0.9473065734, 0.9559735060, 0.9646862745, 0.9734454751, 0.9822505713, 0.9911022186, 1.0000000000,
128 | ];
129 |
--------------------------------------------------------------------------------
/src/path.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | //! Path related helpers.
5 |
6 | use std::ffi::{OsStr, OsString};
7 | use std::path::{Component, MAIN_SEPARATOR_STR, Path, PathBuf};
8 |
9 | /// Normalizes a given path by removing redundant components.
10 | /// The given path must be absolute (e.g. by joining it with the current working directory).
11 | pub fn normalize(path: &Path) -> PathBuf {
12 | debug_assert!(path.is_absolute());
13 |
14 | let mut res = PathBuf::with_capacity(path.as_os_str().as_encoded_bytes().len());
15 | let mut root_len = 0;
16 |
17 | for component in path.components() {
18 | match component {
19 | Component::Prefix(p) => res.push(p.as_os_str()),
20 | Component::RootDir => {
21 | res.push(OsStr::new(MAIN_SEPARATOR_STR));
22 | root_len = res.as_os_str().as_encoded_bytes().len();
23 | }
24 | Component::CurDir => {}
25 | Component::ParentDir => {
26 | // Get the length up to the parent directory
27 | if let Some(len) = res
28 | .parent()
29 | .map(|p| p.as_os_str().as_encoded_bytes().len())
30 | // Ensure we don't pop the root directory
31 | && len >= root_len
32 | {
33 | // Pop the last component from `res`.
34 | //
35 | // This can be replaced with a plain `res.as_mut_os_string().truncate(len)`
36 | // once `os_string_truncate` is stabilized (#133262).
37 | let mut bytes = res.into_os_string().into_encoded_bytes();
38 | bytes.truncate(len);
39 | res = PathBuf::from(unsafe { OsString::from_encoded_bytes_unchecked(bytes) });
40 | }
41 | }
42 | Component::Normal(p) => res.push(p),
43 | }
44 | }
45 |
46 | res
47 | }
48 |
49 | #[cfg(test)]
50 | mod tests {
51 | use std::ffi::OsString;
52 | use std::path::Path;
53 |
54 | use super::*;
55 |
56 | fn norm(s: &str) -> OsString {
57 | normalize(Path::new(s)).into_os_string()
58 | }
59 |
60 | #[cfg(unix)]
61 | #[test]
62 | fn test_unix() {
63 | assert_eq!(norm("/a/b/c"), "/a/b/c");
64 | assert_eq!(norm("/a/b/c/"), "/a/b/c");
65 | assert_eq!(norm("/a/./b"), "/a/b");
66 | assert_eq!(norm("/a/b/../c"), "/a/c");
67 | assert_eq!(norm("/../../a"), "/a");
68 | assert_eq!(norm("/../"), "/");
69 | assert_eq!(norm("/a//b/c"), "/a/b/c");
70 | assert_eq!(norm("/a/b/c/../../../../d"), "/d");
71 | assert_eq!(norm("//"), "/");
72 | }
73 |
74 | #[cfg(windows)]
75 | #[test]
76 | fn test_windows() {
77 | assert_eq!(norm(r"C:\a\b\c"), r"C:\a\b\c");
78 | assert_eq!(norm(r"C:\a\b\c\"), r"C:\a\b\c");
79 | assert_eq!(norm(r"C:\a\.\b"), r"C:\a\b");
80 | assert_eq!(norm(r"C:\a\b\..\c"), r"C:\a\c");
81 | assert_eq!(norm(r"C:\..\..\a"), r"C:\a");
82 | assert_eq!(norm(r"C:\..\"), r"C:\");
83 | assert_eq!(norm(r"C:\a\\b\c"), r"C:\a\b\c");
84 | assert_eq!(norm(r"C:/a\b/c"), r"C:\a\b\c");
85 | assert_eq!(norm(r"C:\a\b\c\..\..\..\..\d"), r"C:\d");
86 | assert_eq!(norm(r"\\server\share\path"), r"\\server\share\path");
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/src/simd/lines_bwd.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | use std::ptr;
5 |
6 | use crate::helpers::CoordType;
7 |
8 | /// Starting from the `offset` in `haystack` with a current line index of
9 | /// `line`, this seeks backwards to the `line_stop`-nth line and returns the
10 | /// new offset and the line index at that point.
11 | ///
12 | /// Note that this function differs from `lines_fwd` in that it
13 | /// seeks backwards even if the `line` is already at `line_stop`.
14 | /// This allows you to ensure (or test) whether `offset` is at a line start.
15 | ///
16 | /// It returns an offset *past* a newline and thus at the start of a line.
17 | pub fn lines_bwd(
18 | haystack: &[u8],
19 | offset: usize,
20 | line: CoordType,
21 | line_stop: CoordType,
22 | ) -> (usize, CoordType) {
23 | unsafe {
24 | let beg = haystack.as_ptr();
25 | let it = beg.add(offset.min(haystack.len()));
26 | let (it, line) = lines_bwd_raw(beg, it, line, line_stop);
27 | (it.offset_from_unsigned(beg), line)
28 | }
29 | }
30 |
31 | unsafe fn lines_bwd_raw(
32 | beg: *const u8,
33 | end: *const u8,
34 | line: CoordType,
35 | line_stop: CoordType,
36 | ) -> (*const u8, CoordType) {
37 | #[cfg(target_arch = "x86_64")]
38 | return unsafe { LINES_BWD_DISPATCH(beg, end, line, line_stop) };
39 |
40 | #[cfg(target_arch = "aarch64")]
41 | return unsafe { lines_bwd_neon(beg, end, line, line_stop) };
42 |
43 | #[allow(unreachable_code)]
44 | return unsafe { lines_bwd_fallback(beg, end, line, line_stop) };
45 | }
46 |
47 | unsafe fn lines_bwd_fallback(
48 | beg: *const u8,
49 | mut end: *const u8,
50 | mut line: CoordType,
51 | line_stop: CoordType,
52 | ) -> (*const u8, CoordType) {
53 | unsafe {
54 | while !ptr::eq(end, beg) {
55 | let n = end.sub(1);
56 | if *n == b'\n' {
57 | if line <= line_stop {
58 | break;
59 | }
60 | line -= 1;
61 | }
62 | end = n;
63 | }
64 | (end, line)
65 | }
66 | }
67 |
68 | #[cfg(target_arch = "x86_64")]
69 | static mut LINES_BWD_DISPATCH: unsafe fn(
70 | beg: *const u8,
71 | end: *const u8,
72 | line: CoordType,
73 | line_stop: CoordType,
74 | ) -> (*const u8, CoordType) = lines_bwd_dispatch;
75 |
76 | #[cfg(target_arch = "x86_64")]
77 | unsafe fn lines_bwd_dispatch(
78 | beg: *const u8,
79 | end: *const u8,
80 | line: CoordType,
81 | line_stop: CoordType,
82 | ) -> (*const u8, CoordType) {
83 | let func = if is_x86_feature_detected!("avx2") { lines_bwd_avx2 } else { lines_bwd_fallback };
84 | unsafe { LINES_BWD_DISPATCH = func };
85 | unsafe { func(beg, end, line, line_stop) }
86 | }
87 |
88 | #[cfg(target_arch = "x86_64")]
89 | #[target_feature(enable = "avx2")]
90 | unsafe fn lines_bwd_avx2(
91 | beg: *const u8,
92 | mut end: *const u8,
93 | mut line: CoordType,
94 | line_stop: CoordType,
95 | ) -> (*const u8, CoordType) {
96 | unsafe {
97 | use std::arch::x86_64::*;
98 |
99 | #[inline(always)]
100 | unsafe fn horizontal_sum_i64(v: __m256i) -> i64 {
101 | unsafe {
102 | let hi = _mm256_extracti128_si256::<1>(v);
103 | let lo = _mm256_castsi256_si128(v);
104 | let sum = _mm_add_epi64(lo, hi);
105 | let shuf = _mm_shuffle_epi32::<0b11_10_11_10>(sum);
106 | let sum = _mm_add_epi64(sum, shuf);
107 | _mm_cvtsi128_si64(sum)
108 | }
109 | }
110 |
111 | let lf = _mm256_set1_epi8(b'\n' as i8);
112 | let line_stop = line_stop.min(line);
113 | let mut remaining = end.offset_from_unsigned(beg);
114 |
115 | while remaining >= 128 {
116 | let chunk_start = end.sub(128);
117 |
118 | let v1 = _mm256_loadu_si256(chunk_start.add(0) as *const _);
119 | let v2 = _mm256_loadu_si256(chunk_start.add(32) as *const _);
120 | let v3 = _mm256_loadu_si256(chunk_start.add(64) as *const _);
121 | let v4 = _mm256_loadu_si256(chunk_start.add(96) as *const _);
122 |
123 | let mut sum = _mm256_setzero_si256();
124 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v1, lf));
125 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v2, lf));
126 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v3, lf));
127 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v4, lf));
128 |
129 | let sum = _mm256_sad_epu8(sum, _mm256_setzero_si256());
130 | let sum = horizontal_sum_i64(sum);
131 |
132 | let line_next = line - sum as CoordType;
133 | if line_next <= line_stop {
134 | break;
135 | }
136 |
137 | end = chunk_start;
138 | remaining -= 128;
139 | line = line_next;
140 | }
141 |
142 | while remaining >= 32 {
143 | let chunk_start = end.sub(32);
144 | let v = _mm256_loadu_si256(chunk_start as *const _);
145 | let c = _mm256_cmpeq_epi8(v, lf);
146 |
147 | let ones = _mm256_and_si256(c, _mm256_set1_epi8(0x01));
148 | let sum = _mm256_sad_epu8(ones, _mm256_setzero_si256());
149 | let sum = horizontal_sum_i64(sum);
150 |
151 | let line_next = line - sum as CoordType;
152 | if line_next <= line_stop {
153 | break;
154 | }
155 |
156 | end = chunk_start;
157 | remaining -= 32;
158 | line = line_next;
159 | }
160 |
161 | lines_bwd_fallback(beg, end, line, line_stop)
162 | }
163 | }
164 |
165 | #[cfg(target_arch = "aarch64")]
166 | unsafe fn lines_bwd_neon(
167 | beg: *const u8,
168 | mut end: *const u8,
169 | mut line: CoordType,
170 | line_stop: CoordType,
171 | ) -> (*const u8, CoordType) {
172 | unsafe {
173 | use std::arch::aarch64::*;
174 |
175 | let lf = vdupq_n_u8(b'\n');
176 | let line_stop = line_stop.min(line);
177 | let mut remaining = end.offset_from_unsigned(beg);
178 |
179 | while remaining >= 64 {
180 | let chunk_start = end.sub(64);
181 |
182 | let v1 = vld1q_u8(chunk_start.add(0));
183 | let v2 = vld1q_u8(chunk_start.add(16));
184 | let v3 = vld1q_u8(chunk_start.add(32));
185 | let v4 = vld1q_u8(chunk_start.add(48));
186 |
187 | let mut sum = vdupq_n_u8(0);
188 | sum = vsubq_u8(sum, vceqq_u8(v1, lf));
189 | sum = vsubq_u8(sum, vceqq_u8(v2, lf));
190 | sum = vsubq_u8(sum, vceqq_u8(v3, lf));
191 | sum = vsubq_u8(sum, vceqq_u8(v4, lf));
192 |
193 | let sum = vaddvq_u8(sum);
194 |
195 | let line_next = line - sum as CoordType;
196 | if line_next <= line_stop {
197 | break;
198 | }
199 |
200 | end = chunk_start;
201 | remaining -= 64;
202 | line = line_next;
203 | }
204 |
205 | while remaining >= 16 {
206 | let chunk_start = end.sub(16);
207 | let v = vld1q_u8(chunk_start);
208 | let c = vceqq_u8(v, lf);
209 | let c = vandq_u8(c, vdupq_n_u8(0x01));
210 | let sum = vaddvq_u8(c);
211 |
212 | let line_next = line - sum as CoordType;
213 | if line_next <= line_stop {
214 | break;
215 | }
216 |
217 | end = chunk_start;
218 | remaining -= 16;
219 | line = line_next;
220 | }
221 |
222 | lines_bwd_fallback(beg, end, line, line_stop)
223 | }
224 | }
225 |
226 | #[cfg(test)]
227 | mod test {
228 | use super::*;
229 | use crate::helpers::CoordType;
230 | use crate::simd::test::*;
231 |
232 | #[test]
233 | fn pseudo_fuzz() {
234 | let text = generate_random_text(1024);
235 | let lines = count_lines(&text);
236 | let mut offset_rng = make_rng();
237 | let mut line_rng = make_rng();
238 | let mut line_distance_rng = make_rng();
239 |
240 | for _ in 0..1000 {
241 | let offset = offset_rng() % (text.len() + 1);
242 | let line_stop = line_distance_rng() % (lines + 1);
243 | let line = line_stop + line_rng() % 100;
244 |
245 | let line = line as CoordType;
246 | let line_stop = line_stop as CoordType;
247 |
248 | let expected = reference_lines_bwd(text.as_bytes(), offset, line, line_stop);
249 | let actual = lines_bwd(text.as_bytes(), offset, line, line_stop);
250 |
251 | assert_eq!(expected, actual);
252 | }
253 | }
254 |
255 | fn reference_lines_bwd(
256 | haystack: &[u8],
257 | mut offset: usize,
258 | mut line: CoordType,
259 | line_stop: CoordType,
260 | ) -> (usize, CoordType) {
261 | if line >= line_stop {
262 | while offset > 0 {
263 | let c = haystack[offset - 1];
264 | if c == b'\n' {
265 | if line == line_stop {
266 | break;
267 | }
268 | line -= 1;
269 | }
270 | offset -= 1;
271 | }
272 | }
273 | (offset, line)
274 | }
275 | #[test]
276 | fn seeks_to_start() {
277 | for i in 6..=11 {
278 | let (off, line) = lines_bwd(b"Hello\nWorld\n", i, 123, 456);
279 | assert_eq!(off, 6); // After "Hello\n"
280 | assert_eq!(line, 123); // Still on the same line
281 | }
282 | }
283 | }
284 |
--------------------------------------------------------------------------------
/src/simd/lines_fwd.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | use std::ptr;
5 |
6 | use crate::helpers::CoordType;
7 |
8 | /// Starting from the `offset` in `haystack` with a current line index of
9 | /// `line`, this seeks to the `line_stop`-nth line and returns the
10 | /// new offset and the line index at that point.
11 | ///
12 | /// It returns an offset *past* the newline.
13 | /// If `line` is already at or past `line_stop`, it returns immediately.
14 | pub fn lines_fwd(
15 | haystack: &[u8],
16 | offset: usize,
17 | line: CoordType,
18 | line_stop: CoordType,
19 | ) -> (usize, CoordType) {
20 | unsafe {
21 | let beg = haystack.as_ptr();
22 | let end = beg.add(haystack.len());
23 | let it = beg.add(offset.min(haystack.len()));
24 | let (it, line) = lines_fwd_raw(it, end, line, line_stop);
25 | (it.offset_from_unsigned(beg), line)
26 | }
27 | }
28 |
29 | unsafe fn lines_fwd_raw(
30 | beg: *const u8,
31 | end: *const u8,
32 | line: CoordType,
33 | line_stop: CoordType,
34 | ) -> (*const u8, CoordType) {
35 | #[cfg(target_arch = "x86_64")]
36 | return unsafe { LINES_FWD_DISPATCH(beg, end, line, line_stop) };
37 |
38 | #[cfg(target_arch = "aarch64")]
39 | return unsafe { lines_fwd_neon(beg, end, line, line_stop) };
40 |
41 | #[allow(unreachable_code)]
42 | return unsafe { lines_fwd_fallback(beg, end, line, line_stop) };
43 | }
44 |
45 | unsafe fn lines_fwd_fallback(
46 | mut beg: *const u8,
47 | end: *const u8,
48 | mut line: CoordType,
49 | line_stop: CoordType,
50 | ) -> (*const u8, CoordType) {
51 | unsafe {
52 | if line < line_stop {
53 | while !ptr::eq(beg, end) {
54 | let c = *beg;
55 | beg = beg.add(1);
56 | if c == b'\n' {
57 | line += 1;
58 | if line == line_stop {
59 | break;
60 | }
61 | }
62 | }
63 | }
64 | (beg, line)
65 | }
66 | }
67 |
68 | #[cfg(target_arch = "x86_64")]
69 | static mut LINES_FWD_DISPATCH: unsafe fn(
70 | beg: *const u8,
71 | end: *const u8,
72 | line: CoordType,
73 | line_stop: CoordType,
74 | ) -> (*const u8, CoordType) = lines_fwd_dispatch;
75 |
76 | #[cfg(target_arch = "x86_64")]
77 | unsafe fn lines_fwd_dispatch(
78 | beg: *const u8,
79 | end: *const u8,
80 | line: CoordType,
81 | line_stop: CoordType,
82 | ) -> (*const u8, CoordType) {
83 | let func = if is_x86_feature_detected!("avx2") { lines_fwd_avx2 } else { lines_fwd_fallback };
84 | unsafe { LINES_FWD_DISPATCH = func };
85 | unsafe { func(beg, end, line, line_stop) }
86 | }
87 |
88 | #[cfg(target_arch = "x86_64")]
89 | #[target_feature(enable = "avx2")]
90 | unsafe fn lines_fwd_avx2(
91 | mut beg: *const u8,
92 | end: *const u8,
93 | mut line: CoordType,
94 | line_stop: CoordType,
95 | ) -> (*const u8, CoordType) {
96 | unsafe {
97 | use std::arch::x86_64::*;
98 |
99 | #[inline(always)]
100 | unsafe fn horizontal_sum_i64(v: __m256i) -> i64 {
101 | unsafe {
102 | let hi = _mm256_extracti128_si256::<1>(v);
103 | let lo = _mm256_castsi256_si128(v);
104 | let sum = _mm_add_epi64(lo, hi);
105 | let shuf = _mm_shuffle_epi32::<0b11_10_11_10>(sum);
106 | let sum = _mm_add_epi64(sum, shuf);
107 | _mm_cvtsi128_si64(sum)
108 | }
109 | }
110 |
111 | let lf = _mm256_set1_epi8(b'\n' as i8);
112 | let mut remaining = end.offset_from_unsigned(beg);
113 |
114 | if line < line_stop {
115 | // Unrolling the loop by 4x speeds things up by >3x.
116 | // It allows us to accumulate matches before doing a single `vpsadbw`.
117 | while remaining >= 128 {
118 | let v1 = _mm256_loadu_si256(beg.add(0) as *const _);
119 | let v2 = _mm256_loadu_si256(beg.add(32) as *const _);
120 | let v3 = _mm256_loadu_si256(beg.add(64) as *const _);
121 | let v4 = _mm256_loadu_si256(beg.add(96) as *const _);
122 |
123 | // `vpcmpeqb` leaves each comparison result byte as 0 or -1 (0xff).
124 | // This allows us to accumulate the comparisons by subtracting them.
125 | let mut sum = _mm256_setzero_si256();
126 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v1, lf));
127 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v2, lf));
128 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v3, lf));
129 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v4, lf));
130 |
131 | // Calculate the total number of matches in this chunk.
132 | let sum = _mm256_sad_epu8(sum, _mm256_setzero_si256());
133 | let sum = horizontal_sum_i64(sum);
134 |
135 | let line_next = line + sum as CoordType;
136 | if line_next >= line_stop {
137 | break;
138 | }
139 |
140 | beg = beg.add(128);
141 | remaining -= 128;
142 | line = line_next;
143 | }
144 |
145 | while remaining >= 32 {
146 | let v = _mm256_loadu_si256(beg as *const _);
147 | let c = _mm256_cmpeq_epi8(v, lf);
148 |
149 | // If you ask an LLM, the best way to do this is
150 | // to do a `vpmovmskb` followed by `popcnt`.
151 | // One contemporary hardware that's a bad idea though.
152 | let ones = _mm256_and_si256(c, _mm256_set1_epi8(0x01));
153 | let sum = _mm256_sad_epu8(ones, _mm256_setzero_si256());
154 | let sum = horizontal_sum_i64(sum);
155 |
156 | let line_next = line + sum as CoordType;
157 | if line_next >= line_stop {
158 | break;
159 | }
160 |
161 | beg = beg.add(32);
162 | remaining -= 32;
163 | line = line_next;
164 | }
165 | }
166 |
167 | lines_fwd_fallback(beg, end, line, line_stop)
168 | }
169 | }
170 |
171 | #[cfg(target_arch = "aarch64")]
172 | unsafe fn lines_fwd_neon(
173 | mut beg: *const u8,
174 | end: *const u8,
175 | mut line: CoordType,
176 | line_stop: CoordType,
177 | ) -> (*const u8, CoordType) {
178 | unsafe {
179 | use std::arch::aarch64::*;
180 |
181 | let lf = vdupq_n_u8(b'\n');
182 | let mut remaining = end.offset_from_unsigned(beg);
183 |
184 | if line < line_stop {
185 | while remaining >= 64 {
186 | let v1 = vld1q_u8(beg.add(0));
187 | let v2 = vld1q_u8(beg.add(16));
188 | let v3 = vld1q_u8(beg.add(32));
189 | let v4 = vld1q_u8(beg.add(48));
190 |
191 | // `vceqq_u8` leaves each comparison result byte as 0 or -1 (0xff).
192 | // This allows us to accumulate the comparisons by subtracting them.
193 | let mut sum = vdupq_n_u8(0);
194 | sum = vsubq_u8(sum, vceqq_u8(v1, lf));
195 | sum = vsubq_u8(sum, vceqq_u8(v2, lf));
196 | sum = vsubq_u8(sum, vceqq_u8(v3, lf));
197 | sum = vsubq_u8(sum, vceqq_u8(v4, lf));
198 |
199 | let sum = vaddvq_u8(sum);
200 |
201 | let line_next = line + sum as CoordType;
202 | if line_next >= line_stop {
203 | break;
204 | }
205 |
206 | beg = beg.add(64);
207 | remaining -= 64;
208 | line = line_next;
209 | }
210 |
211 | while remaining >= 16 {
212 | let v = vld1q_u8(beg);
213 | let c = vceqq_u8(v, lf);
214 | let c = vandq_u8(c, vdupq_n_u8(0x01));
215 | let sum = vaddvq_u8(c);
216 |
217 | let line_next = line + sum as CoordType;
218 | if line_next >= line_stop {
219 | break;
220 | }
221 |
222 | beg = beg.add(16);
223 | remaining -= 16;
224 | line = line_next;
225 | }
226 | }
227 |
228 | lines_fwd_fallback(beg, end, line, line_stop)
229 | }
230 | }
231 |
232 | #[cfg(test)]
233 | mod test {
234 | use super::*;
235 | use crate::helpers::CoordType;
236 | use crate::simd::test::*;
237 |
238 | #[test]
239 | fn pseudo_fuzz() {
240 | let text = generate_random_text(1024);
241 | let lines = count_lines(&text);
242 | let mut offset_rng = make_rng();
243 | let mut line_rng = make_rng();
244 | let mut line_distance_rng = make_rng();
245 |
246 | for _ in 0..1000 {
247 | let offset = offset_rng() % (text.len() + 1);
248 | let line = line_rng() % 100;
249 | let line_stop = line + line_distance_rng() % (lines + 1);
250 |
251 | let line = line as CoordType;
252 | let line_stop = line_stop as CoordType;
253 |
254 | let expected = reference_lines_fwd(text.as_bytes(), offset, line, line_stop);
255 | let actual = lines_fwd(text.as_bytes(), offset, line, line_stop);
256 |
257 | assert_eq!(expected, actual);
258 | }
259 | }
260 |
261 | fn reference_lines_fwd(
262 | haystack: &[u8],
263 | mut offset: usize,
264 | mut line: CoordType,
265 | line_stop: CoordType,
266 | ) -> (usize, CoordType) {
267 | if line < line_stop {
268 | while offset < haystack.len() {
269 | let c = haystack[offset];
270 | offset += 1;
271 | if c == b'\n' {
272 | line += 1;
273 | if line == line_stop {
274 | break;
275 | }
276 | }
277 | }
278 | }
279 | (offset, line)
280 | }
281 | }
282 |
--------------------------------------------------------------------------------
/src/simd/memchr2.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | //! `memchr`, but with two needles.
5 |
6 | use std::ptr;
7 |
8 | /// `memchr`, but with two needles.
9 | ///
10 | /// Returns the index of the first occurrence of either needle in the
11 | /// `haystack`. If no needle is found, `haystack.len()` is returned.
12 | /// `offset` specifies the index to start searching from.
13 | pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8], offset: usize) -> usize {
14 | unsafe {
15 | let beg = haystack.as_ptr();
16 | let end = beg.add(haystack.len());
17 | let it = beg.add(offset.min(haystack.len()));
18 | let it = memchr2_raw(needle1, needle2, it, end);
19 | it.offset_from_unsigned(beg)
20 | }
21 | }
22 |
23 | unsafe fn memchr2_raw(needle1: u8, needle2: u8, beg: *const u8, end: *const u8) -> *const u8 {
24 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
25 | return unsafe { MEMCHR2_DISPATCH(needle1, needle2, beg, end) };
26 |
27 | #[cfg(target_arch = "aarch64")]
28 | return unsafe { memchr2_neon(needle1, needle2, beg, end) };
29 |
30 | #[allow(unreachable_code)]
31 | return unsafe { memchr2_fallback(needle1, needle2, beg, end) };
32 | }
33 |
34 | unsafe fn memchr2_fallback(
35 | needle1: u8,
36 | needle2: u8,
37 | mut beg: *const u8,
38 | end: *const u8,
39 | ) -> *const u8 {
40 | unsafe {
41 | while !ptr::eq(beg, end) {
42 | let ch = *beg;
43 | if ch == needle1 || ch == needle2 {
44 | break;
45 | }
46 | beg = beg.add(1);
47 | }
48 | beg
49 | }
50 | }
51 |
52 | // In order to make `memchr2_raw` slim and fast, we use a function pointer that updates
53 | // itself to the correct implementation on the first call. This reduces binary size.
54 | // It would also reduce branches if we had >2 implementations (a jump still needs to be predicted).
55 | // NOTE that this ONLY works if Control Flow Guard is disabled on Windows.
56 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
57 | static mut MEMCHR2_DISPATCH: unsafe fn(
58 | needle1: u8,
59 | needle2: u8,
60 | beg: *const u8,
61 | end: *const u8,
62 | ) -> *const u8 = memchr2_dispatch;
63 |
64 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
65 | unsafe fn memchr2_dispatch(needle1: u8, needle2: u8, beg: *const u8, end: *const u8) -> *const u8 {
66 | let func = if is_x86_feature_detected!("avx2") { memchr2_avx2 } else { memchr2_fallback };
67 | unsafe { MEMCHR2_DISPATCH = func };
68 | unsafe { func(needle1, needle2, beg, end) }
69 | }
70 |
71 | // FWIW, I found that adding support for AVX512 was not useful at the time,
72 | // as it only marginally improved file load performance by <5%.
73 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
74 | #[target_feature(enable = "avx2")]
75 | unsafe fn memchr2_avx2(needle1: u8, needle2: u8, mut beg: *const u8, end: *const u8) -> *const u8 {
76 | unsafe {
77 | #[cfg(target_arch = "x86")]
78 | use std::arch::x86::*;
79 | #[cfg(target_arch = "x86_64")]
80 | use std::arch::x86_64::*;
81 |
82 | let n1 = _mm256_set1_epi8(needle1 as i8);
83 | let n2 = _mm256_set1_epi8(needle2 as i8);
84 | let mut remaining = end.offset_from_unsigned(beg);
85 |
86 | while remaining >= 32 {
87 | let v = _mm256_loadu_si256(beg as *const _);
88 | let a = _mm256_cmpeq_epi8(v, n1);
89 | let b = _mm256_cmpeq_epi8(v, n2);
90 | let c = _mm256_or_si256(a, b);
91 | let m = _mm256_movemask_epi8(c) as u32;
92 |
93 | if m != 0 {
94 | return beg.add(m.trailing_zeros() as usize);
95 | }
96 |
97 | beg = beg.add(32);
98 | remaining -= 32;
99 | }
100 |
101 | memchr2_fallback(needle1, needle2, beg, end)
102 | }
103 | }
104 |
105 | #[cfg(target_arch = "aarch64")]
106 | unsafe fn memchr2_neon(needle1: u8, needle2: u8, mut beg: *const u8, end: *const u8) -> *const u8 {
107 | unsafe {
108 | use std::arch::aarch64::*;
109 |
110 | if end.offset_from_unsigned(beg) >= 16 {
111 | let n1 = vdupq_n_u8(needle1);
112 | let n2 = vdupq_n_u8(needle2);
113 |
114 | loop {
115 | let v = vld1q_u8(beg as *const _);
116 | let a = vceqq_u8(v, n1);
117 | let b = vceqq_u8(v, n2);
118 | let c = vorrq_u8(a, b);
119 |
120 | // https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
121 | let m = vreinterpretq_u16_u8(c);
122 | let m = vshrn_n_u16(m, 4);
123 | let m = vreinterpret_u64_u8(m);
124 | let m = vget_lane_u64(m, 0);
125 |
126 | if m != 0 {
127 | return beg.add(m.trailing_zeros() as usize >> 2);
128 | }
129 |
130 | beg = beg.add(16);
131 | if end.offset_from_unsigned(beg) < 16 {
132 | break;
133 | }
134 | }
135 | }
136 |
137 | memchr2_fallback(needle1, needle2, beg, end)
138 | }
139 | }
140 |
141 | #[cfg(test)]
142 | mod tests {
143 | use std::slice;
144 |
145 | use super::*;
146 | use crate::sys;
147 |
148 | #[test]
149 | fn test_empty() {
150 | assert_eq!(memchr2(b'a', b'b', b"", 0), 0);
151 | }
152 |
153 | #[test]
154 | fn test_basic() {
155 | let haystack = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
156 | let haystack = &haystack[..43];
157 |
158 | assert_eq!(memchr2(b'a', b'z', haystack, 0), 0);
159 | assert_eq!(memchr2(b'p', b'q', haystack, 0), 15);
160 | assert_eq!(memchr2(b'Q', b'Z', haystack, 0), 42);
161 | assert_eq!(memchr2(b'0', b'9', haystack, 0), haystack.len());
162 | }
163 |
164 | // Test that it doesn't match before/after the start offset respectively.
165 | #[test]
166 | fn test_with_offset() {
167 | let haystack = b"abcdefghabcdefghabcdefghabcdefghabcdefgh";
168 |
169 | assert_eq!(memchr2(b'a', b'b', haystack, 0), 0);
170 | assert_eq!(memchr2(b'a', b'b', haystack, 1), 1);
171 | assert_eq!(memchr2(b'a', b'b', haystack, 2), 8);
172 | assert_eq!(memchr2(b'a', b'b', haystack, 9), 9);
173 | assert_eq!(memchr2(b'a', b'b', haystack, 16), 16);
174 | assert_eq!(memchr2(b'a', b'b', haystack, 41), 40);
175 | }
176 |
177 | // Test memory access safety at page boundaries.
178 | // The test is a success if it doesn't segfault.
179 | #[test]
180 | fn test_page_boundary() {
181 | let page = unsafe {
182 | const PAGE_SIZE: usize = 64 * 1024; // 64 KiB to cover many architectures.
183 |
184 | // 3 pages: uncommitted, committed, uncommitted
185 | let ptr = sys::virtual_reserve(PAGE_SIZE * 3).unwrap();
186 | sys::virtual_commit(ptr.add(PAGE_SIZE), PAGE_SIZE).unwrap();
187 | slice::from_raw_parts_mut(ptr.add(PAGE_SIZE).as_ptr(), PAGE_SIZE)
188 | };
189 |
190 | page.fill(b'a');
191 |
192 | // Test if it seeks beyond the page boundary.
193 | assert_eq!(memchr2(b'\0', b'\0', &page[page.len() - 40..], 0), 40);
194 | // Test if it seeks before the page boundary for the masked/partial load.
195 | assert_eq!(memchr2(b'\0', b'\0', &page[..10], 0), 10);
196 | }
197 | }
198 |
--------------------------------------------------------------------------------
/src/simd/mod.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | //! Provides various high-throughput utilities.
5 |
6 | pub mod lines_bwd;
7 | pub mod lines_fwd;
8 | mod memchr2;
9 | mod memset;
10 |
11 | pub use lines_bwd::*;
12 | pub use lines_fwd::*;
13 | pub use memchr2::*;
14 | pub use memset::*;
15 |
16 | #[cfg(test)]
17 | mod test {
18 | // Knuth's MMIX LCG
19 | pub fn make_rng() -> impl FnMut() -> usize {
20 | let mut state = 1442695040888963407u64;
21 | move || {
22 | state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
23 | state as usize
24 | }
25 | }
26 |
27 | pub fn generate_random_text(len: usize) -> String {
28 | const ALPHABET: &[u8; 20] = b"0123456789abcdef\n\n\n\n";
29 |
30 | let mut rng = make_rng();
31 | let mut res = String::new();
32 |
33 | for _ in 0..len {
34 | res.push(ALPHABET[rng() % ALPHABET.len()] as char);
35 | }
36 |
37 | res
38 | }
39 |
40 | pub fn count_lines(text: &str) -> usize {
41 | text.lines().count()
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/sys/mod.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | //! Platform abstractions.
5 |
6 | #[cfg(unix)]
7 | mod unix;
8 | #[cfg(windows)]
9 | mod windows;
10 |
11 | #[cfg(not(windows))]
12 | pub use std::fs::canonicalize;
13 |
14 | #[cfg(unix)]
15 | pub use unix::*;
16 | #[cfg(windows)]
17 | pub use windows::*;
18 |
--------------------------------------------------------------------------------
/src/unicode/mod.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | //! Everything related to Unicode lives here.
5 |
6 | mod measurement;
7 | mod tables;
8 | mod utf8;
9 |
10 | pub use measurement::*;
11 | pub use utf8::*;
12 |
--------------------------------------------------------------------------------
/src/unicode/utf8.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation.
2 | // Licensed under the MIT License.
3 |
4 | use std::{hint, iter};
5 |
6 | /// An iterator over UTF-8 encoded characters.
7 | ///
8 | /// This differs from [`std::str::Chars`] in that it works on unsanitized
9 | /// byte slices and transparently replaces invalid UTF-8 sequences with U+FFFD.
10 | ///
11 | /// This follows ICU's bitmask approach for `U8_NEXT_OR_FFFD` relatively
12 | /// closely. This is important for compatibility, because it implements the
13 | /// WHATWG recommendation for UTF8 error recovery. It's also helpful, because
14 | /// the excellent folks at ICU have probably spent a lot of time optimizing it.
15 | #[derive(Clone, Copy)]
16 | pub struct Utf8Chars<'a> {
17 | source: &'a [u8],
18 | offset: usize,
19 | }
20 |
21 | impl<'a> Utf8Chars<'a> {
22 | /// Creates a new `Utf8Chars` iterator starting at the given `offset`.
23 | pub fn new(source: &'a [u8], offset: usize) -> Self {
24 | Self { source, offset }
25 | }
26 |
27 | /// Returns the byte slice this iterator was created with.
28 | pub fn source(&self) -> &'a [u8] {
29 | self.source
30 | }
31 |
32 | /// Checks if the source is empty.
33 | pub fn is_empty(&self) -> bool {
34 | self.source.is_empty()
35 | }
36 |
37 | /// Returns the length of the source.
38 | pub fn len(&self) -> usize {
39 | self.source.len()
40 | }
41 |
42 | /// Returns the current offset in the byte slice.
43 | ///
44 | /// This will be past the last returned character.
45 | pub fn offset(&self) -> usize {
46 | self.offset
47 | }
48 |
49 | /// Sets the offset to continue iterating from.
50 | pub fn seek(&mut self, offset: usize) {
51 | self.offset = offset;
52 | }
53 |
54 | /// Returns true if `next` will return another character.
55 | pub fn has_next(&self) -> bool {
56 | self.offset < self.source.len()
57 | }
58 |
59 | // I found that on mixed 50/50 English/Non-English text,
60 | // performance actually suffers when this gets inlined.
61 | #[cold]
62 | fn next_slow(&mut self, c: u8) -> char {
63 | if self.offset >= self.source.len() {
64 | return Self::fffd();
65 | }
66 |
67 | let mut cp = c as u32;
68 |
69 | if cp < 0xE0 {
70 | // UTF8-2 = %xC2-DF UTF8-tail
71 |
72 | if cp < 0xC2 {
73 | return Self::fffd();
74 | }
75 |
76 | // The lead byte is 110xxxxx
77 | // -> Strip off the 110 prefix
78 | cp &= !0xE0;
79 | } else if cp < 0xF0 {
80 | // UTF8-3 =
81 | // %xE0 %xA0-BF UTF8-tail
82 | // %xE1-EC UTF8-tail UTF8-tail
83 | // %xED %x80-9F UTF8-tail
84 | // %xEE-EF UTF8-tail UTF8-tail
85 |
86 | // This is a pretty neat approach seen in ICU4C, because it's a 1:1 translation of the RFC.
87 | // I don't understand why others don't do the same thing. It's rather performant.
88 | const BITS_80_9F: u8 = 1 << 0b100; // 0x80-9F, aka 0b100xxxxx
89 | const BITS_A0_BF: u8 = 1 << 0b101; // 0xA0-BF, aka 0b101xxxxx
90 | const BITS_BOTH: u8 = BITS_80_9F | BITS_A0_BF;
91 | const LEAD_TRAIL1_BITS: [u8; 16] = [
92 | // v-- lead byte
93 | BITS_A0_BF, // 0xE0
94 | BITS_BOTH, // 0xE1
95 | BITS_BOTH, // 0xE2
96 | BITS_BOTH, // 0xE3
97 | BITS_BOTH, // 0xE4
98 | BITS_BOTH, // 0xE5
99 | BITS_BOTH, // 0xE6
100 | BITS_BOTH, // 0xE7
101 | BITS_BOTH, // 0xE8
102 | BITS_BOTH, // 0xE9
103 | BITS_BOTH, // 0xEA
104 | BITS_BOTH, // 0xEB
105 | BITS_BOTH, // 0xEC
106 | BITS_80_9F, // 0xED
107 | BITS_BOTH, // 0xEE
108 | BITS_BOTH, // 0xEF
109 | ];
110 |
111 | // The lead byte is 1110xxxx
112 | // -> Strip off the 1110 prefix
113 | cp &= !0xF0;
114 |
115 | let t = self.source[self.offset] as u32;
116 | if LEAD_TRAIL1_BITS[cp as usize] & (1 << (t >> 5)) == 0 {
117 | return Self::fffd();
118 | }
119 | cp = (cp << 6) | (t & 0x3F);
120 |
121 | self.offset += 1;
122 | if self.offset >= self.source.len() {
123 | return Self::fffd();
124 | }
125 | } else {
126 | // UTF8-4 =
127 | // %xF0 %x90-BF UTF8-tail UTF8-tail
128 | // %xF1-F3 UTF8-tail UTF8-tail UTF8-tail
129 | // %xF4 %x80-8F UTF8-tail UTF8-tail
130 |
131 | // This is similar to the above, but with the indices flipped:
132 | // The trail byte is the index and the lead byte mask is the value.
133 | // This is because the split at 0x90 requires more bits than fit into an u8.
134 | const TRAIL1_LEAD_BITS: [u8; 16] = [
135 | // --------- 0xF4 lead
136 | // | ...
137 | // | +---- 0xF0 lead
138 | // v v
139 | 0b_00000, //
140 | 0b_00000, //
141 | 0b_00000, //
142 | 0b_00000, //
143 | 0b_00000, //
144 | 0b_00000, //
145 | 0b_00000, // trail bytes:
146 | 0b_00000, //
147 | 0b_11110, // 0x80-8F -> 0x80-8F can be preceded by 0xF1-F4
148 | 0b_01111, // 0x90-9F -v
149 | 0b_01111, // 0xA0-AF -> 0x90-BF can be preceded by 0xF0-F3
150 | 0b_01111, // 0xB0-BF -^
151 | 0b_00000, //
152 | 0b_00000, //
153 | 0b_00000, //
154 | 0b_00000, //
155 | ];
156 |
157 | // The lead byte *may* be 11110xxx, but could also be e.g. 11111xxx.
158 | // -> Only strip off the 1111 prefix
159 | cp &= !0xF0;
160 |
161 | // Now we can verify if it's actually <= 0xF4.
162 | // Curiously, this if condition does a lot of heavy lifting for
163 | // performance (+13%). I think it's just a coincidence though.
164 | if cp > 4 {
165 | return Self::fffd();
166 | }
167 |
168 | let t = self.source[self.offset] as u32;
169 | if TRAIL1_LEAD_BITS[(t >> 4) as usize] & (1 << cp) == 0 {
170 | return Self::fffd();
171 | }
172 | cp = (cp << 6) | (t & 0x3F);
173 |
174 | self.offset += 1;
175 | if self.offset >= self.source.len() {
176 | return Self::fffd();
177 | }
178 |
179 | // UTF8-tail = %x80-BF
180 | let t = (self.source[self.offset] as u32).wrapping_sub(0x80);
181 | if t > 0x3F {
182 | return Self::fffd();
183 | }
184 | cp = (cp << 6) | t;
185 |
186 | self.offset += 1;
187 | if self.offset >= self.source.len() {
188 | return Self::fffd();
189 | }
190 | }
191 |
192 | // SAFETY: All branches above check for `if self.offset >= self.source.len()`
193 | // one way or another. This is here because the compiler doesn't get it otherwise.
194 | unsafe { hint::assert_unchecked(self.offset < self.source.len()) };
195 |
196 | // UTF8-tail = %x80-BF
197 | let t = (self.source[self.offset] as u32).wrapping_sub(0x80);
198 | if t > 0x3F {
199 | return Self::fffd();
200 | }
201 | cp = (cp << 6) | t;
202 |
203 | self.offset += 1;
204 |
205 | // SAFETY: If `cp` wasn't a valid codepoint, we already returned U+FFFD above.
206 | unsafe { char::from_u32_unchecked(cp) }
207 | }
208 |
209 | // This simultaneously serves as a `cold_path` marker.
210 | // It improves performance by ~5% and reduces code size.
211 | #[cold]
212 | #[inline(always)]
213 | fn fffd() -> char {
214 | '\u{FFFD}'
215 | }
216 | }
217 |
218 | impl Iterator for Utf8Chars<'_> {
219 | type Item = char;
220 |
221 | #[inline]
222 | fn next(&mut self) -> Option {
223 | if self.offset >= self.source.len() {
224 | return None;
225 | }
226 |
227 | let c = self.source[self.offset];
228 | self.offset += 1;
229 |
230 | // Fast-passing ASCII allows this function to be trivially inlined everywhere,
231 | // as the full decoder is a little too large for that.
232 | if (c & 0x80) == 0 {
233 | // UTF8-1 = %x00-7F
234 | Some(c as char)
235 | } else {
236 | // Weirdly enough, adding a hint here to assert that `next_slow`
237 | // only returns codepoints >= 0x80 makes `ucd` ~5% slower.
238 | Some(self.next_slow(c))
239 | }
240 | }
241 |
242 | #[inline]
243 | fn size_hint(&self) -> (usize, Option) {
244 | // Lower bound: All remaining bytes are 4-byte sequences.
245 | // Upper bound: All remaining bytes are ASCII.
246 | let remaining = self.source.len() - self.offset;
247 | (remaining / 4, Some(remaining))
248 | }
249 | }
250 |
251 | impl iter::FusedIterator for Utf8Chars<'_> {}
252 |
253 | #[cfg(test)]
254 | mod tests {
255 | use super::*;
256 |
257 | #[test]
258 | fn test_broken_utf8() {
259 | let source = [b'a', 0xED, 0xA0, 0x80, b'b'];
260 | let mut chars = Utf8Chars::new(&source, 0);
261 | let mut offset = 0;
262 | for chunk in source.utf8_chunks() {
263 | for ch in chunk.valid().chars() {
264 | offset += ch.len_utf8();
265 | assert_eq!(chars.next(), Some(ch));
266 | assert_eq!(chars.offset(), offset);
267 | }
268 | if !chunk.invalid().is_empty() {
269 | offset += chunk.invalid().len();
270 | assert_eq!(chars.next(), Some('\u{FFFD}'));
271 | assert_eq!(chars.offset(), offset);
272 | }
273 | }
274 | }
275 | }
276 |
--------------------------------------------------------------------------------
/tools/grapheme-table-gen/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "grapheme-table-gen"
3 | version = "0.1.0"
4 | edition = "2021"
5 |
6 | [dependencies]
7 | anyhow = "1.0.95"
8 | chrono = "0.4.39"
9 | indoc = "2.0.5"
10 | pico-args = { version = "0.5.0", features = ["eq-separator"] }
11 | rayon = "1.10.0"
12 | roxmltree = { version = "0.20.0", default-features = false, features = ["std"] }
13 |
--------------------------------------------------------------------------------
/tools/grapheme-table-gen/README.md:
--------------------------------------------------------------------------------
1 | # Grapheme Table Generator
2 |
3 | This tool processes Unicode Character Database (UCD) XML files to generate efficient, multi-stage trie lookup tables for properties relevant to terminal applications:
4 | * Grapheme cluster breaking rules
5 | * Line breaking rules (optional)
6 | * Character width properties
7 |
8 | ## Usage
9 |
10 | * Download [ucd.nounihan.grouped.zip](https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip)
11 | * Run some equivalent of:
12 | ```sh
13 | grapheme-table-gen --lang=rust --extended --no-ambiguous --line-breaks path/to/ucd.nounihan.grouped.xml
14 | ```
15 | * Place the result in `src/unicode/tables.rs`
16 |
--------------------------------------------------------------------------------