├── .cargo ├── release-windows-ms.toml └── release.toml ├── .github └── workflows │ ├── ci.yml │ └── winget.yml ├── .gitignore ├── .pipelines ├── release.yml └── tsa.json ├── .vscode ├── launch.json └── tasks.json ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── SECURITY.md ├── assets ├── Microsoft_logo_(1980).svg ├── com.microsoft.edit.desktop ├── edit.svg ├── edit_hero_image.png ├── editing-traces │ ├── README.md │ └── rustcode.json.zst ├── manpage │ └── edit.1 ├── microsoft.png └── microsoft.sixel ├── benches └── lib.rs ├── build.rs ├── rust-toolchain.toml ├── rustfmt.toml ├── src ├── apperr.rs ├── arena │ ├── debug.rs │ ├── mod.rs │ ├── release.rs │ ├── scratch.rs │ └── string.rs ├── base64.rs ├── bin │ └── edit │ │ ├── documents.rs │ │ ├── draw_editor.rs │ │ ├── draw_filepicker.rs │ │ ├── draw_menubar.rs │ │ ├── draw_statusbar.rs │ │ ├── edit.exe.manifest │ │ ├── localization.rs │ │ ├── main.rs │ │ └── state.rs ├── buffer │ ├── gap_buffer.rs │ ├── line_cache.rs │ ├── mod.rs │ └── navigation.rs ├── cell.rs ├── document.rs ├── framebuffer.rs ├── fuzzy.rs ├── hash.rs ├── helpers.rs ├── icu.rs ├── input.rs ├── lib.rs ├── oklab.rs ├── path.rs ├── simd │ ├── lines_bwd.rs │ ├── lines_fwd.rs │ ├── memchr2.rs │ ├── memset.rs │ └── mod.rs ├── sys │ ├── mod.rs │ ├── unix.rs │ └── windows.rs ├── tui.rs ├── unicode │ ├── measurement.rs │ ├── mod.rs │ ├── tables.rs │ └── utf8.rs └── vt.rs └── tools └── grapheme-table-gen ├── Cargo.lock ├── Cargo.toml ├── README.md └── src ├── main.rs └── rules.rs /.cargo/release-windows-ms.toml: -------------------------------------------------------------------------------- 1 | # vvv The following parts are identical to release.toml vvv 2 | 3 | # Avoid linking with vcruntime140.dll by statically linking everything, 4 | # and then explicitly linking with ucrtbase.dll dynamically. 5 | # We do this, because vcruntime140.dll is an optional Windows component. 6 | [target.'cfg(target_os = "windows")'] 7 | rustflags = [ 8 | "-Ctarget-feature=+crt-static", 9 | "-Clink-args=/DEFAULTLIB:ucrt.lib", 10 | "-Clink-args=/NODEFAULTLIB:vcruntime.lib", 11 | "-Clink-args=/NODEFAULTLIB:msvcrt.lib", 12 | "-Clink-args=/NODEFAULTLIB:libucrt.lib", 13 | ] 14 | 15 | # The backtrace code for panics in Rust is almost as large as the entire editor. 16 | # = Huge reduction in binary size by removing all that. 17 | [unstable] 18 | build-std = ["std", "panic_abort"] 19 | build-std-features = ["panic_immediate_abort", "optimize_for_size"] 20 | 21 | # vvv The following parts are specific to official Windows builds. vvv 22 | # (The use of internal registries, security features, etc., are mandatory.) 23 | 24 | # Enable shadow stacks: https://learn.microsoft.com/en-us/cpp/build/reference/cetcompat 25 | [target.'cfg(all(target_os = "windows", any(target_arch = "x86", target_arch = "x86_64")))'] 26 | rustflags = ["-Clink-args=/DYNAMICBASE", "-Clink-args=/CETCOMPAT"] 27 | 28 | [registries.Edit_PublicPackages] 29 | index = "sparse+https://pkgs.dev.azure.com/microsoft/Dart/_packaging/Edit_PublicPackages/Cargo/index/" 30 | 31 | [source.crates-io] 32 | replace-with = "Edit_PublicPackages" 33 | -------------------------------------------------------------------------------- /.cargo/release.toml: -------------------------------------------------------------------------------- 1 | # The following is not used by default via .cargo/config.toml, 2 | # because `build-std-features` cannot be keyed by profile. 3 | # This breaks the bench profile which doesn't support panic=abort. 4 | # See: https://github.com/rust-lang/cargo/issues/11214 5 | # See: https://github.com/rust-lang/cargo/issues/13894 6 | 7 | # Avoid linking with vcruntime140.dll by statically linking everything, 8 | # and then explicitly linking with ucrtbase.dll dynamically. 9 | # We do this, because vcruntime140.dll is an optional Windows component. 10 | [target.'cfg(all(target_os = "windows", target_env = "msvc"))'] 11 | rustflags = [ 12 | "-Ctarget-feature=+crt-static", 13 | "-Clink-args=/DEFAULTLIB:ucrt.lib", 14 | "-Clink-args=/NODEFAULTLIB:vcruntime.lib", 15 | "-Clink-args=/NODEFAULTLIB:msvcrt.lib", 16 | "-Clink-args=/NODEFAULTLIB:libucrt.lib", 17 | ] 18 | 19 | # The backtrace code for panics in Rust is almost as large as the entire editor. 20 | # = Huge reduction in binary size by removing all that. 21 | [unstable] 22 | build-std = ["std", "panic_abort"] 23 | build-std-features = ["panic_immediate_abort", "optimize_for_size"] 24 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | env: 12 | CARGO_TERM_COLOR: always 13 | 14 | jobs: 15 | check: 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | os: 21 | - ubuntu-latest 22 | - windows-latest 23 | steps: 24 | # The Windows runners have autocrlf enabled by default. 25 | - name: Disable git autocrlf 26 | run: git config --global core.autocrlf false 27 | if: matrix.os == 'windows-latest' 28 | - name: Checkout 29 | uses: actions/checkout@v4 30 | # https://github.com/actions/cache/blob/main/examples.md#rust---cargo 31 | # Depends on `Cargo.lock` --> Has to be after checkout. 32 | - uses: actions/cache@v4 33 | with: 34 | path: | 35 | ~/.cargo/bin/ 36 | ~/.cargo/registry/index/ 37 | ~/.cargo/registry/cache/ 38 | ~/.cargo/git/db/ 39 | target/ 40 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 41 | - name: Install Rust 42 | run: rustup toolchain install nightly --no-self-update --profile minimal --component rust-src,rustfmt,clippy 43 | - name: Check formatting 44 | run: cargo fmt --all -- --check 45 | - name: Run tests 46 | run: cargo test --all-features --all-targets 47 | - name: Run clippy 48 | run: cargo clippy --all-features --all-targets -- --deny warnings 49 | -------------------------------------------------------------------------------- /.github/workflows/winget.yml: -------------------------------------------------------------------------------- 1 | name: Submit release to the WinGet community repository 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | publish-winget: 9 | name: Submit to WinGet repository 10 | 11 | # winget-create is only supported on Windows 12 | runs-on: windows-latest 13 | 14 | # Only submit stable releases 15 | if: ${{ !github.event.release.prerelease }} 16 | steps: 17 | - name: Submit package using wingetcreate 18 | run: | 19 | # Get installer info from release event 20 | $assets = '${{ toJSON(github.event.release.assets) }}' | ConvertFrom-Json 21 | $x64InstallerUrl = $assets | Where-Object -Property name -like '*x86_64-windows.zip' | Select-Object -ExpandProperty browser_download_url 22 | $arm64InstallerUrl = $assets | Where-Object -Property name -like '*aarch64-windows.zip' | Select-Object -ExpandProperty browser_download_url 23 | $packageVersion = (${{ toJSON(github.event.release.tag_name) }}).Trim('v') 24 | 25 | # Update package using wingetcreate 26 | curl.exe -JLO https://aka.ms/wingetcreate/latest 27 | .\wingetcreate.exe update Microsoft.Edit ` 28 | --version $packageVersion ` 29 | --urls $x64InstallerUrl $arm64InstallerUrl ` 30 | --token "${{ secrets.WINGET_TOKEN }}" ` 31 | --submit 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .vs 3 | *.profraw 4 | lcov.info 5 | target 6 | -------------------------------------------------------------------------------- /.pipelines/release.yml: -------------------------------------------------------------------------------- 1 | # Documentation: https://aka.ms/obpipelines 2 | 3 | trigger: none 4 | 5 | parameters: 6 | - name: debug 7 | displayName: Enable debug output 8 | type: boolean 9 | default: false 10 | - name: official 11 | displayName: Whether to build Official or NonOfficial 12 | type: string 13 | default: NonOfficial 14 | values: 15 | - NonOfficial 16 | - Official 17 | - name: createvpack 18 | displayName: Enable vpack creation 19 | type: boolean 20 | default: false 21 | - name: buildPlatforms 22 | type: object 23 | default: 24 | - x86_64-pc-windows-msvc 25 | - aarch64-pc-windows-msvc 26 | 27 | variables: 28 | system.debug: ${{parameters.debug}} 29 | WindowsContainerImage: onebranch.azurecr.io/windows/ltsc2022/vse2022:latest 30 | # CDP_DEFINITION_BUILD_COUNT is needed for onebranch.pipeline.version task. 31 | # See: https://aka.ms/obpipelines/versioning 32 | CDP_DEFINITION_BUILD_COUNT: $[counter('', 0)] 33 | # LOAD BEARING - the vpack task fails without these 34 | ROOT: $(Build.SourcesDirectory) 35 | REPOROOT: $(Build.SourcesDirectory) 36 | OUTPUTROOT: $(REPOROOT)\out 37 | NUGET_XMLDOC_MODE: none 38 | 39 | resources: 40 | repositories: 41 | - repository: GovernedTemplates 42 | type: git 43 | name: OneBranch.Pipelines/GovernedTemplates 44 | ref: refs/heads/main 45 | 46 | extends: 47 | template: v2/Microsoft.${{parameters.official}}.yml@GovernedTemplates 48 | parameters: 49 | featureFlags: 50 | WindowsHostVersion: 51 | Version: 2022 52 | Network: R1 53 | platform: 54 | name: windows_undocked 55 | product: edit 56 | # https://aka.ms/obpipelines/cloudvault 57 | cloudvault: 58 | enabled: false 59 | # https://aka.ms/obpipelines/sdl 60 | globalSdl: 61 | binskim: 62 | # > Due to some legacy reasons, 1ES PT is scanning full sources directory 63 | # > for BinSkim tool instead of just scanning the output directory [...] 64 | scanOutputDirectoryOnly: true 65 | isNativeCode: true 66 | tsa: 67 | enabled: ${{eq(parameters.official, 'Official')}} 68 | configFile: "$(Build.SourcesDirectory)/.pipelines/tsa.json" 69 | stages: 70 | # Our Build stage will build all three targets in one job, so we don't need 71 | # to repeat most of the boilerplate work in three separate jobs. 72 | - stage: Build 73 | jobs: 74 | - job: Windows 75 | pool: 76 | type: windows 77 | variables: 78 | # Binaries will go here. 79 | # More settings at https://aka.ms/obpipelines/yaml/jobs 80 | ob_outputDirectory: "$(Build.SourcesDirectory)/out" 81 | # The vPack gets created from stuff in here. 82 | # It will have a structure like: 83 | # .../vpack/ 84 | # - amd64/ 85 | # - edit.exe 86 | # - i386/ 87 | # - edit.exe 88 | # - arm64/ 89 | # - edit.exe 90 | ob_createvpack_enabled: ${{parameters.createvpack}} 91 | ob_createvpack_vpackdirectory: "$(ob_outputDirectory)/vpack" 92 | ob_createvpack_packagename: "windows_edit.$(Build.SourceBranchName)" 93 | ob_createvpack_owneralias: lhecker@microsoft.com 94 | ob_createvpack_description: Microsoft Edit 95 | ob_createvpack_targetDestinationDirectory: "$(Destination)" 96 | ob_createvpack_propsFile: false 97 | ob_createvpack_provData: true 98 | ob_createvpack_versionAs: string 99 | ob_createvpack_version: "$(EditVersion)-$(CDP_DEFINITION_BUILD_COUNT)" 100 | ob_createvpack_metadata: "$(Build.SourceVersion)" 101 | ob_createvpack_topLevelRetries: 0 102 | ob_createvpack_failOnStdErr: true 103 | ob_createvpack_verbose: ${{ parameters.debug }} 104 | # For details on this cargo_target_dir setting, see: 105 | # https://eng.ms/docs/more/rust/topics/onebranch-workaround 106 | CARGO_TARGET_DIR: C:\cargo_target_dir 107 | # msrustup only supports stable toolchains, but this project requires nightly. 108 | # We were told RUSTC_BOOTSTRAP=1 is a supported workaround. 109 | RUSTC_BOOTSTRAP: 1 110 | steps: 111 | # NOTE: Step objects have ordered keys and you MUST have "task" as the first key. 112 | # Objects with ordered keys... lol 113 | - task: RustInstaller@1 114 | displayName: Install Rust toolchain 115 | inputs: 116 | rustVersion: ms-stable 117 | additionalTargets: x86_64-pc-windows-msvc aarch64-pc-windows-msvc 118 | # URL of an Azure Artifacts feed configured with a crates.io upstream. Must be within the current ADO collection. 119 | # NOTE: Azure Artifacts support for Rust is not yet public, but it is enabled for internal ADO organizations. 120 | # https://learn.microsoft.com/en-us/azure/devops/artifacts/how-to/set-up-upstream-sources?view=azure-devops 121 | cratesIoFeedOverride: sparse+https://pkgs.dev.azure.com/microsoft/Dart/_packaging/Edit_PublicPackages/Cargo/index/ 122 | # URL of an Azure Artifacts NuGet feed configured with the mscodehub Rust feed as an upstream. 123 | # * The feed must be within the current ADO collection. 124 | # * The CI account, usually "Project Collection Build Service (org-name)", must have at least "Collaborator" permission. 125 | # When setting up the upstream NuGet feed, use following Azure Artifacts feed locator: 126 | # azure-feed://mscodehub/Rust/Rust@Release 127 | toolchainFeed: https://pkgs.dev.azure.com/microsoft/_packaging/RustTools/nuget/v3/index.json 128 | - task: CargoAuthenticate@0 129 | displayName: Authenticate with Azure Artifacts 130 | inputs: 131 | configFile: ".cargo/release-windows-ms.toml" 132 | # We recommend making a separate `cargo fetch` step, as some build systems perform 133 | # fetching entirely prior to the build, and perform the build with the network disabled. 134 | - script: cargo fetch --config .cargo/release-windows-ms.toml 135 | displayName: Fetch crates 136 | - ${{ each platform in parameters.buildPlatforms }}: 137 | - script: cargo build --config .cargo/release-windows-ms.toml --frozen --release --target ${{platform}} 138 | displayName: Build ${{platform}} Release 139 | - task: CopyFiles@2 140 | displayName: Copy files to vpack (${{platform}}) 141 | inputs: 142 | sourceFolder: "$(CARGO_TARGET_DIR)/${{platform}}/release" 143 | ${{ if eq(platform, 'i686-pc-windows-msvc') }}: 144 | targetFolder: "$(ob_createvpack_vpackdirectory)/i386" 145 | ${{ elseif eq(platform, 'x86_64-pc-windows-msvc') }}: 146 | targetFolder: "$(ob_createvpack_vpackdirectory)/amd64" 147 | ${{ else }}: # aarch64-pc-windows-msvc 148 | targetFolder: "$(ob_createvpack_vpackdirectory)/arm64" 149 | contents: | 150 | *.exe 151 | *.pdb 152 | # Extract the version for `ob_createvpack_version`. 153 | - script: |- 154 | @echo off 155 | for /f "tokens=3 delims=- " %%x in ('findstr /c:"version = " Cargo.toml') do ( 156 | echo ##vso[task.setvariable variable=EditVersion]%%~x 157 | goto :EOF 158 | ) 159 | displayName: "Set EditVersion" 160 | - task: onebranch.pipeline.signing@1 161 | displayName: "Sign files" 162 | inputs: 163 | command: "sign" 164 | signing_profile: "external_distribution" 165 | files_to_sign: "**/edit.exe" 166 | search_root: "$(ob_createvpack_vpackdirectory)" 167 | use_testsign: false 168 | in_container: true 169 | 170 | - ${{ each platform in parameters.buildPlatforms }}: 171 | - pwsh: |- 172 | $Dest = New-Item -Type Directory "_staging/${env:RELEASE_NAME}" 173 | Write-Host "Staging files from ${env:VPACK_ROOT} at $Dest" 174 | Get-ChildItem "${env:VPACK_ROOT}\*" -Include *.exe, *.pdb | Copy-Item -Destination $Dest -Verbose 175 | tar.exe -c -v --format=zip -f "$(ob_outputDirectory)\${env:RELEASE_NAME}.zip" -C _staging $env:RELEASE_NAME 176 | env: 177 | RELEASE_NAME: edit-$(EditVersion)-${{ replace(platform, 'pc-windows-msvc', 'windows') }} 178 | ${{ if eq(platform, 'i686-pc-windows-msvc') }}: 179 | VPACK_ROOT: "$(ob_createvpack_vpackdirectory)/i386" 180 | ${{ elseif eq(platform, 'x86_64-pc-windows-msvc') }}: 181 | VPACK_ROOT: "$(ob_createvpack_vpackdirectory)/amd64" 182 | ${{ else }}: # aarch64-pc-windows-msvc 183 | VPACK_ROOT: "$(ob_createvpack_vpackdirectory)/arm64" 184 | displayName: Produce ${{platform}} release archive 185 | -------------------------------------------------------------------------------- /.pipelines/tsa.json: -------------------------------------------------------------------------------- 1 | { 2 | "instanceUrl": "https://microsoft.visualstudio.com", 3 | "projectName": "OS", 4 | "areaPath": "OS\\Windows Client and Services\\WinPD\\DFX-Developer Fundamentals and Experiences\\DEFT\\SHINE\\Commandline Tooling", 5 | "notificationAliases": ["condev@microsoft.com", "duhowett@microsoft.com"], 6 | "template": "VSTS_Microsoft_OSGS" 7 | } 8 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Launch Debug (Windows)", 6 | "preLaunchTask": "rust: cargo build", 7 | "type": "cppvsdbg", 8 | "request": "launch", 9 | "console": "externalTerminal", 10 | "program": "${workspaceFolder}/target/debug/edit", 11 | "cwd": "${workspaceFolder}", 12 | "args": [ 13 | "${workspaceFolder}/src/bin/edit/main.rs" 14 | ], 15 | }, 16 | { 17 | "name": "Launch Debug (GDB/LLDB)", 18 | "preLaunchTask": "rust: cargo build", 19 | "type": "cppdbg", 20 | "request": "launch", 21 | "program": "${workspaceFolder}/target/debug/edit", 22 | "cwd": "${workspaceFolder}", 23 | "args": [ 24 | "${workspaceFolder}/src/bin/edit/main.rs" 25 | ], 26 | } 27 | ] 28 | } 29 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "label": "rust: cargo build", 6 | "type": "process", 7 | "command": "cargo", 8 | "args": [ 9 | "build", 10 | "--package", 11 | "edit", 12 | "--features", 13 | "debug-latency" 14 | ], 15 | "group": { 16 | "kind": "build", 17 | "isDefault": true 18 | }, 19 | "problemMatcher": [ 20 | "$rustc" 21 | ] 22 | } 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | - Employees can reach out at [aka.ms/opensource/moderation-support](https://aka.ms/opensource/moderation-support) 11 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ## Translation improvements 4 | 5 | You can find our translations in [`src/bin/edit/localization.rs`](./src/bin/edit/localization.rs). 6 | Please feel free to open a pull request with your changes at any time. 7 | If you'd like to discuss your changes first, please feel free to open an issue. 8 | 9 | ## Bug reports 10 | 11 | If you find any bugs, we gladly accept pull requests without prior discussion. 12 | Otherwise, you can of course always open an issue for us to look into. 13 | 14 | ## Feature requests 15 | 16 | Please open a new issue for any feature requests you have in mind. 17 | Keeping the binary size of the editor small is a priority for us and so we may need to discuss any new features first until we have support for plugins. 18 | 19 | ## Code changes 20 | 21 | The project has a focus on a small binary size and sufficient (good) performance. 22 | As such, we generally do not accept pull requests that introduce dependencies (there are always exceptions of course). 23 | Otherwise, you can consider this project a playground for trying out any cool ideas you have. 24 | 25 | The overall architecture of the project can be summarized as follows: 26 | * The underlying text buffer in `src/buffer` doesn't keep track of line breaks in the document. 27 | This is a crucial design aspect that permeates throughout the entire codebase. 28 | 29 | To oversimplify, the *only* state that is kept is the current cursor position. 30 | When the user asks to move to another line, the editor will `O(n)` seek through the underlying document until it found the corresponding number of line breaks. 31 | * As a result, `src/simd` contains crucial `memchr2` functions to quickly find the next or previous line break (runs at up to >100GB/s). 32 | * Furthermore, `src/unicode` implements an `Utf8Chars` iterator which transparently inserts U+FFFD replacements during iteration (runs at up to 4GB/s). 33 | * Furthermore, `src/unicode` also implements grapheme cluster segmentation and cluster width measurement via its `MeasurementConfig` (runs at up to 600MB/s). 34 | * If word wrap is disabled, `memchr2` is used for all navigation across lines, allowing us to breeze through 1GB large files as if they were 1MB. 35 | * Even if word-wrap is enabled, it's still sufficiently smooth thanks to `MeasurementConfig`. This is only possible because these base functions are heavily optimized. 36 | * `src/framebuffer.rs` implements a "framebuffer" like in video games. 37 | It allows us to draw the UI output into an intermediate buffer first, accumulating all changes and handling things like color blending. 38 | Then, it can compare the accumulated output with the previous frame and only send the necessary changes to the terminal. 39 | * `src/tui.rs` implements an immediate mode UI. Its module implementation gives an overview how it works and I recommend reading it. 40 | * `src/vt.rs` implements our VT parser. 41 | * `src/sys` contains our platform abstractions. 42 | * Finally, `src/bin/edit` ties everything together. 43 | It's roughly 90% UI code and business logic. 44 | It contains a little bit of VT logic in `setup_terminal`. 45 | 46 | If you have an issue with your terminal, the places of interest are the aforementioned: 47 | * VT parser in `src/vt.rs` 48 | * Platform specific code in `src/sys` 49 | * And the `setup_terminal` function in `src/bin/edit/main.rs` 50 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "edit" 3 | version = "1.1.0" 4 | edition = "2024" 5 | rust-version = "1.87" 6 | readme = "README.md" 7 | repository = "https://github.com/microsoft/edit" 8 | homepage = "https://github.com/microsoft/edit" 9 | license = "MIT" 10 | categories = ["text-editors"] 11 | 12 | [[bench]] 13 | name = "lib" 14 | harness = false 15 | 16 | [features] 17 | debug-latency = [] 18 | 19 | # We use `opt-level = "s"` as it significantly reduces binary size. 20 | # We could then use the `#[optimize(speed)]` attribute for spot optimizations. 21 | # Unfortunately, that attribute currently doesn't work on intrinsics such as memset. 22 | [profile.release] 23 | codegen-units = 1 # reduces binary size by ~2% 24 | debug = "full" # No one needs an undebuggable release binary 25 | lto = true # reduces binary size by ~14% 26 | opt-level = "s" # reduces binary size by ~25% 27 | panic = "abort" # reduces binary size by ~50% in combination with -Zbuild-std-features=panic_immediate_abort 28 | split-debuginfo = "packed" # generates a separate *.dwp/*.dSYM so the binary can get stripped 29 | strip = "symbols" # See split-debuginfo - allows us to drop the size by ~65% 30 | incremental = true # Improves re-compile times 31 | 32 | [profile.bench] 33 | codegen-units = 16 # Make compiling criterion faster (16 is the default, but profile.release sets it to 1) 34 | lto = "thin" # Similarly, speed up linking by a ton 35 | 36 | [dependencies] 37 | 38 | [target.'cfg(unix)'.dependencies] 39 | libc = "0.2" 40 | 41 | [target.'cfg(windows)'.build-dependencies] 42 | winresource = "0.1.22" 43 | 44 | [target.'cfg(windows)'.dependencies.windows-sys] 45 | version = "0.59" 46 | features = [ 47 | "Win32_Globalization", 48 | "Win32_Security", 49 | "Win32_Storage_FileSystem", 50 | "Win32_System_Console", 51 | "Win32_System_Diagnostics_Debug", 52 | "Win32_System_IO", 53 | "Win32_System_LibraryLoader", 54 | "Win32_System_Memory", 55 | "Win32_System_Threading", 56 | ] 57 | 58 | [dev-dependencies] 59 | criterion = { version = "0.6", features = ["html_reports"] } 60 | serde = { version = "1.0", features = ["derive"] } 61 | serde_json = { version = "1.0" } 62 | zstd = { version = "0.13", default-features = false } 63 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ![Application Icon for Edit](./assets/edit.svg) Edit 2 | 3 | A simple editor for simple needs. 4 | 5 | This editor pays homage to the classic [MS-DOS Editor](https://en.wikipedia.org/wiki/MS-DOS_Editor), but with a modern interface and input controls similar to VS Code. The goal is to provide an accessible editor that even users largely unfamiliar with terminals can easily use. 6 | 7 | ![Screenshot of Edit with the About dialog in the foreground](./assets/edit_hero_image.png) 8 | 9 | ## Installation 10 | 11 | [![Packaging status](https://repology.org/badge/vertical-allrepos/microsoft-edit.svg?exclude_unsupported=1)](https://repology.org/project/microsoft-edit/versions) 12 | 13 | You can also download binaries from [our Releases page](https://github.com/microsoft/edit/releases/latest). 14 | 15 | ### Windows 16 | 17 | You can install the latest version with WinGet: 18 | ```powershell 19 | winget install Microsoft.Edit 20 | ``` 21 | 22 | ### Notes to Package Maintainers 23 | 24 | The canonical executable name is "edit" and the alternative name is "msedit". 25 | 26 | We're aware of the potential conflict of "edit" with existing commands and as such recommend naming packages and executables "msedit". 27 | Names such as "ms-edit" should be avoided. 28 | Assigning an "edit" alias is recommended if possible. 29 | 30 | ## Build Instructions 31 | 32 | * [Install Rust](https://www.rust-lang.org/tools/install) 33 | * Install the nightly toolchain: `rustup install nightly` 34 | * Alternatively, set the environment variable `RUSTC_BOOTSTRAP=1` 35 | * Clone the repository 36 | * For a release build, run: `cargo build --config .cargo/release.toml --release` 37 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /assets/Microsoft_logo_(1980).svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | image/svg+xml 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /assets/com.microsoft.edit.desktop: -------------------------------------------------------------------------------- 1 | [Desktop Entry] 2 | Type=Application 3 | Name=Microsoft Edit 4 | GenericName=Text Editor 5 | Comment=A simple editor for simple needs 6 | Icon=edit 7 | Exec=edit %U 8 | Terminal=true 9 | MimeType=text/plain 10 | Keywords=text;editor 11 | -------------------------------------------------------------------------------- /assets/edit.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /assets/edit_hero_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/edit/5c5471e9443353b0f60c5380e515c4559e0316b1/assets/edit_hero_image.png -------------------------------------------------------------------------------- /assets/editing-traces/README.md: -------------------------------------------------------------------------------- 1 | # editing-traces 2 | 3 | This directory contains Seph Gentle's ASCII-only `rustcode` editing traces from: https://github.com/josephg/editing-traces 4 | 5 | The trace was provided under the [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) license. 6 | -------------------------------------------------------------------------------- /assets/editing-traces/rustcode.json.zst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/edit/5c5471e9443353b0f60c5380e515c4559e0316b1/assets/editing-traces/rustcode.json.zst -------------------------------------------------------------------------------- /assets/manpage/edit.1: -------------------------------------------------------------------------------- 1 | .TH EDIT 1 "version 1.0" "May 2025" 2 | .SH NAME 3 | edit \- a simple text editor 4 | .SH SYNOPSIS 5 | \fBedit\fP [\fIOPTIONS\fP]... [\fIARGUMENTS\fP]... 6 | .SH DESCRIPTION 7 | edit is a simple text editor inspired by MS-DOS edit. 8 | .SH EDITING 9 | Edit is an interactive mode-less editor. Use Alt-F to access the menus. 10 | .SH ARGUMENTS 11 | .TP 12 | \fIFILE[:LINE[:COLUMN]]\fP 13 | The file to open, optionally with line and column (e.g., \fBfoo.txt:123:45\fP). 14 | .SH OPTIONS 15 | .TP 16 | \fB\-h\fP, \fB\-\-help\fP 17 | Print the help message. 18 | .TP 19 | \fB\-v\fP, \fB\-\-version\fP 20 | Print the version number. 21 | .SH COPYRIGHT 22 | Copyright (c) Microsoft Corporation. 23 | .br 24 | Licensed under the MIT License. 25 | .SH SEE ALSO 26 | https://github.com/microsoft/edit 27 | -------------------------------------------------------------------------------- /assets/microsoft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/edit/5c5471e9443353b0f60c5380e515c4559e0316b1/assets/microsoft.png -------------------------------------------------------------------------------- /assets/microsoft.sixel: -------------------------------------------------------------------------------- 1 | P;1q"1;1;300;60#0;2;100;100;100#0!42?_ow{}!12?_ow{}!6?_ow{}}!5?_ow{{}}}!17~^NFbpw{}!8~!4}{wwo_!12?_oow{{{!4}!6~!4}{{wwo__!4?_ow{{}}}!23~^Nfrxw{{}}}!9~!4}{{woo_!12?_ow{}!15~^NFbpw{}!17~^NFB@-!36?_ow{}!6~!6?_ow{}!6~??w{}!7~?o{}!10~^^!10NFBpw{}!6~!8N^!9~{_!4?_o{}!8~^^!9N^^!9~{w}!8~^!18NFbx{}!9~^^!8N^^!9~}{o???ow{}!6~!11NFB@GKM!5N!10~!4NFB@-!30?_ow{}!12~_ow{}!12~??!20~FB@!15?!10~!10?r!9~???{!8~NB@!15?@FN!16~!4{!4wooo__!5?_}!8~^FB!16?@F^!8~{o!10~!9o!13?!10~-!24?_ow{}!35~??!19~x!18?!10~?CK[!4{}!9~^B??N!8~x!21?!10~N^^!18~}{o!10~!22?!29~!13?!10~-!18?_ow{}!8~^NFB@?!11~^NFB@?!10~??!10~F!9~}{wo__!12?!10~!5?@BFN^!9~}{wof^!7~}wo__!11?__o{!9~N@!7?!6@Bb!10~N!9~{o__!12?__o{}!8~F@!10~!9B!13?!10~-!12?_ow{}!8~^NFB@!7?!5~^NFB@!7?!10~??!10~??@FN^!20~??!10~!11?@BFN^!23~!7}!10~^NFB~!12}!12~^NB??BFN^!9~!10}!9~^NF@???!10~!22?!5~^NFB@-!6?_ow{}!8~^NFB@!13?FFB@!13?!10F??!10F!7?@@BB!15F??!10F!17?@BFN^!10~|zrfFF!10NFFFBB@@!5?!21FBB@!11?@BBFFNNN!10^NNNFFBB@!8?!10~!22?NFB@-_ow{}!8~^NFB@!119?@BFN^!9~}{wo!88?!10~-!7~^NFB@!131?@BFN^!7~!88?!7~^NF-~^NFB@!143?@BFN^~!88?~^NFB@\ 2 | -------------------------------------------------------------------------------- /benches/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | use std::hint::black_box; 5 | use std::io::Cursor; 6 | use std::{mem, vec}; 7 | 8 | use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; 9 | use edit::helpers::*; 10 | use edit::simd::MemsetSafe; 11 | use edit::{arena, buffer, hash, oklab, simd, unicode}; 12 | use serde::Deserialize; 13 | 14 | #[derive(Deserialize)] 15 | pub struct EditingTracePatch(pub usize, pub usize, pub String); 16 | 17 | #[derive(Deserialize)] 18 | pub struct EditingTraceTransaction { 19 | pub patches: Vec, 20 | } 21 | 22 | #[derive(Deserialize)] 23 | pub struct EditingTraceData { 24 | #[serde(rename = "startContent")] 25 | pub start_content: String, 26 | #[serde(rename = "endContent")] 27 | pub end_content: String, 28 | pub txns: Vec, 29 | } 30 | 31 | fn bench_buffer(c: &mut Criterion) { 32 | let data = include_bytes!("../assets/editing-traces/rustcode.json.zst"); 33 | let data = zstd::decode_all(Cursor::new(data)).unwrap(); 34 | let data: EditingTraceData = serde_json::from_slice(&data).unwrap(); 35 | let mut patches_with_coords = Vec::new(); 36 | 37 | { 38 | let mut tb = buffer::TextBuffer::new(false).unwrap(); 39 | tb.set_crlf(false); 40 | tb.write(data.start_content.as_bytes(), true); 41 | 42 | for t in &data.txns { 43 | for p in &t.patches { 44 | tb.cursor_move_to_offset(p.0); 45 | let beg = tb.cursor_logical_pos(); 46 | 47 | tb.delete(buffer::CursorMovement::Grapheme, p.1 as CoordType); 48 | 49 | tb.write(p.2.as_bytes(), true); 50 | patches_with_coords.push((beg, p.1 as CoordType, p.2.clone())); 51 | } 52 | } 53 | 54 | let mut actual = String::new(); 55 | tb.save_as_string(&mut actual); 56 | assert_eq!(actual, data.end_content); 57 | } 58 | 59 | let bench_gap_buffer = || { 60 | let mut buf = buffer::GapBuffer::new(false).unwrap(); 61 | buf.replace(0..usize::MAX, data.start_content.as_bytes()); 62 | 63 | for t in &data.txns { 64 | for p in &t.patches { 65 | buf.replace(p.0..p.0 + p.1, p.2.as_bytes()); 66 | } 67 | } 68 | 69 | buf 70 | }; 71 | 72 | let bench_text_buffer = || { 73 | let mut tb = buffer::TextBuffer::new(false).unwrap(); 74 | tb.set_crlf(false); 75 | tb.write(data.start_content.as_bytes(), true); 76 | 77 | for p in &patches_with_coords { 78 | tb.cursor_move_to_logical(p.0); 79 | tb.delete(buffer::CursorMovement::Grapheme, p.1); 80 | tb.write(p.2.as_bytes(), true); 81 | } 82 | 83 | tb 84 | }; 85 | 86 | // Sanity check: If this fails, the implementation is incorrect. 87 | { 88 | let buf = bench_gap_buffer(); 89 | let mut actual = Vec::new(); 90 | buf.extract_raw(0..usize::MAX, &mut actual, 0); 91 | assert_eq!(actual, data.end_content.as_bytes()); 92 | } 93 | { 94 | let mut tb = bench_text_buffer(); 95 | let mut actual = String::new(); 96 | tb.save_as_string(&mut actual); 97 | assert_eq!(actual, data.end_content); 98 | } 99 | 100 | c.benchmark_group("buffer") 101 | .bench_function(BenchmarkId::new("GapBuffer", "rustcode"), |b| { 102 | b.iter(bench_gap_buffer); 103 | }) 104 | .bench_function(BenchmarkId::new("TextBuffer", "rustcode"), |b| { 105 | b.iter(bench_text_buffer); 106 | }); 107 | } 108 | 109 | fn bench_hash(c: &mut Criterion) { 110 | c.benchmark_group("hash") 111 | .throughput(Throughput::Bytes(8)) 112 | .bench_function(BenchmarkId::new("hash", 8), |b| { 113 | let data = [0u8; 8]; 114 | b.iter(|| hash::hash(0, black_box(&data))) 115 | }) 116 | .throughput(Throughput::Bytes(16)) 117 | .bench_function(BenchmarkId::new("hash", 16), |b| { 118 | let data = [0u8; 16]; 119 | b.iter(|| hash::hash(0, black_box(&data))) 120 | }) 121 | .throughput(Throughput::Bytes(1024)) 122 | .bench_function(BenchmarkId::new("hash", 1024), |b| { 123 | let data = [0u8; 1024]; 124 | b.iter(|| hash::hash(0, black_box(&data))) 125 | }); 126 | } 127 | 128 | fn bench_oklab(c: &mut Criterion) { 129 | c.benchmark_group("oklab") 130 | .bench_function("srgb_to_oklab", |b| b.iter(|| oklab::srgb_to_oklab(black_box(0xff212cbe)))) 131 | .bench_function("oklab_blend", |b| { 132 | b.iter(|| oklab::oklab_blend(black_box(0x7f212cbe), black_box(0x7f3aae3f))) 133 | }); 134 | } 135 | 136 | fn bench_simd_lines_fwd(c: &mut Criterion) { 137 | let mut group = c.benchmark_group("simd"); 138 | let buf = vec![b'\n'; 128 * MEBI]; 139 | 140 | for &lines in &[1, 8, 128, KIBI, 128 * KIBI, 128 * MEBI] { 141 | group.throughput(Throughput::Bytes(lines as u64)).bench_with_input( 142 | BenchmarkId::new("lines_fwd", lines), 143 | &lines, 144 | |b, &lines| { 145 | b.iter(|| simd::lines_fwd(black_box(&buf), 0, 0, lines as CoordType)); 146 | }, 147 | ); 148 | } 149 | } 150 | 151 | fn bench_simd_memchr2(c: &mut Criterion) { 152 | let mut group = c.benchmark_group("simd"); 153 | let mut buf = vec![0u8; 128 * MEBI + KIBI]; 154 | 155 | // For small sizes we add a small offset of +8, 156 | // to ensure we also benchmark the non-SIMD tail handling. 157 | // For large sizes, its relative impact is negligible. 158 | for &bytes in &[8usize, 128 + 8, KIBI, 128 * KIBI, 128 * MEBI] { 159 | group.throughput(Throughput::Bytes(bytes as u64 + 1)).bench_with_input( 160 | BenchmarkId::new("memchr2", bytes), 161 | &bytes, 162 | |b, &size| { 163 | buf.fill(b'a'); 164 | buf[size] = b'\n'; 165 | b.iter(|| simd::memchr2(b'\n', b'\r', black_box(&buf), 0)); 166 | }, 167 | ); 168 | } 169 | } 170 | 171 | fn bench_simd_memset(c: &mut Criterion) { 172 | let mut group = c.benchmark_group("simd"); 173 | let name = format!("memset<{}>", std::any::type_name::()); 174 | let size = mem::size_of::(); 175 | let mut buf: Vec = vec![Default::default(); 128 * MEBI / size]; 176 | 177 | // For small sizes we add a small offset of +8, 178 | // to ensure we also benchmark the non-SIMD tail handling. 179 | // For large sizes, its relative impact is negligible. 180 | for &bytes in &[8usize, 128 + 8, KIBI, 128 * KIBI, 128 * MEBI] { 181 | group.throughput(Throughput::Bytes(bytes as u64)).bench_with_input( 182 | BenchmarkId::new(&name, bytes), 183 | &bytes, 184 | |b, &bytes| { 185 | let slice = unsafe { buf.get_unchecked_mut(..bytes / size) }; 186 | b.iter(|| simd::memset(black_box(slice), Default::default())); 187 | }, 188 | ); 189 | } 190 | } 191 | 192 | fn bench_unicode(c: &mut Criterion) { 193 | let reference = concat!( 194 | "In the quiet twilight, dreams unfold, soft whispers of a story untold.\n", 195 | "月明かりが静かに照らし出し、夢を見る心の奥で詩が静かに囁かれる\n", 196 | "Stars collide in the early light of hope, echoing the silent call of the night.\n", 197 | "夜の静寂、希望と孤独が混ざり合うその中で詩が永遠に続く\n", 198 | ); 199 | let buffer = reference.repeat(10); 200 | let bytes = buffer.as_bytes(); 201 | 202 | c.benchmark_group("unicode::MeasurementConfig::goto_logical") 203 | .throughput(Throughput::Bytes(bytes.len() as u64)) 204 | .bench_function("basic", |b| { 205 | b.iter(|| unicode::MeasurementConfig::new(&bytes).goto_logical(Point::MAX)) 206 | }) 207 | .bench_function("word_wrap", |b| { 208 | b.iter(|| { 209 | unicode::MeasurementConfig::new(black_box(&bytes)) 210 | .with_word_wrap_column(50) 211 | .goto_logical(Point::MAX) 212 | }) 213 | }); 214 | 215 | c.benchmark_group("unicode::Utf8Chars") 216 | .throughput(Throughput::Bytes(bytes.len() as u64)) 217 | .bench_function("next", |b| { 218 | b.iter(|| { 219 | unicode::Utf8Chars::new(bytes, 0).fold(0u32, |acc, ch| acc.wrapping_add(ch as u32)) 220 | }) 221 | }); 222 | } 223 | 224 | fn bench(c: &mut Criterion) { 225 | arena::init(128 * MEBI).unwrap(); 226 | 227 | bench_buffer(c); 228 | bench_hash(c); 229 | bench_oklab(c); 230 | bench_simd_lines_fwd(c); 231 | bench_simd_memchr2(c); 232 | bench_simd_memset::(c); 233 | bench_simd_memset::(c); 234 | bench_unicode(c); 235 | } 236 | 237 | criterion_group!(benches, bench); 238 | criterion_main!(benches); 239 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | fn main() { 5 | #[cfg(windows)] 6 | if std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default() == "windows" { 7 | winresource::WindowsResource::new() 8 | .set_manifest_file("src/bin/edit/edit.exe.manifest") 9 | .set("FileDescription", "Microsoft Edit") 10 | .set("LegalCopyright", "© Microsoft Corporation. All rights reserved.") 11 | .compile() 12 | .unwrap(); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "nightly" 3 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | style_edition = "2024" 2 | use_small_heuristics = "Max" 3 | group_imports = "StdExternalCrate" 4 | imports_granularity = "Module" 5 | format_code_in_doc_comments = true 6 | newline_style = "Unix" 7 | use_field_init_shorthand = true 8 | -------------------------------------------------------------------------------- /src/apperr.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | //! Provides a transparent error type for edit. 5 | 6 | use std::{io, result}; 7 | 8 | use crate::sys; 9 | 10 | pub const APP_ICU_MISSING: Error = Error::new_app(0); 11 | 12 | /// Edit's transparent `Result` type. 13 | pub type Result = result::Result; 14 | 15 | /// Edit's transparent `Error` type. 16 | /// Abstracts over system and application errors. 17 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 18 | pub enum Error { 19 | App(u32), 20 | Icu(u32), 21 | Sys(u32), 22 | } 23 | 24 | impl Error { 25 | pub const fn new_app(code: u32) -> Self { 26 | Self::App(code) 27 | } 28 | 29 | pub const fn new_icu(code: u32) -> Self { 30 | Self::Icu(code) 31 | } 32 | 33 | pub const fn new_sys(code: u32) -> Self { 34 | Self::Sys(code) 35 | } 36 | } 37 | 38 | impl From for Error { 39 | fn from(err: io::Error) -> Self { 40 | sys::io_error_to_apperr(err) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/arena/debug.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | #![allow(clippy::missing_safety_doc, clippy::mut_from_ref)] 5 | 6 | use std::alloc::{AllocError, Allocator, Layout}; 7 | use std::mem::{self, MaybeUninit}; 8 | use std::ptr::NonNull; 9 | 10 | use super::release; 11 | use crate::apperr; 12 | 13 | /// A debug wrapper for [`release::Arena`]. 14 | /// 15 | /// The problem with [`super::ScratchArena`] is that it only "borrows" an underlying 16 | /// [`release::Arena`]. Once the [`super::ScratchArena`] is dropped it resets the watermark 17 | /// of the underlying [`release::Arena`], freeing all allocations done since borrowing it. 18 | /// 19 | /// It is completely valid for the same [`release::Arena`] to be borrowed multiple times at once, 20 | /// *as long as* you only use the most recent borrow. Bad example: 21 | /// ```should_panic 22 | /// use edit::arena::scratch_arena; 23 | /// 24 | /// let mut scratch1 = scratch_arena(None); 25 | /// let mut scratch2 = scratch_arena(None); 26 | /// 27 | /// let foo = scratch1.alloc_uninit::(); 28 | /// 29 | /// // This will also reset `scratch1`'s allocation. 30 | /// drop(scratch2); 31 | /// 32 | /// *foo; // BOOM! ...if it wasn't for our debug wrapper. 33 | /// ``` 34 | /// 35 | /// To avoid this, this wraps the real [`release::Arena`] in a "debug" one, which pretends as if every 36 | /// instance of itself is a distinct [`release::Arena`] instance. Then we use this "debug" [`release::Arena`] 37 | /// for [`super::ScratchArena`] which allows us to track which borrow is the most recent one. 38 | pub enum Arena { 39 | // Delegate is 'static, because release::Arena requires no lifetime 40 | // annotations, and so this mere debug helper cannot use them either. 41 | Delegated { delegate: &'static release::Arena, borrow: usize }, 42 | Owned { arena: release::Arena }, 43 | } 44 | 45 | impl Drop for Arena { 46 | fn drop(&mut self) { 47 | if let Self::Delegated { delegate, borrow } = self { 48 | let borrows = delegate.borrows.get(); 49 | assert_eq!(*borrow, borrows); 50 | delegate.borrows.set(borrows - 1); 51 | } 52 | } 53 | } 54 | 55 | impl Default for Arena { 56 | fn default() -> Self { 57 | Self::empty() 58 | } 59 | } 60 | 61 | impl Arena { 62 | pub const fn empty() -> Self { 63 | Self::Owned { arena: release::Arena::empty() } 64 | } 65 | 66 | pub fn new(capacity: usize) -> apperr::Result { 67 | Ok(Self::Owned { arena: release::Arena::new(capacity)? }) 68 | } 69 | 70 | pub(super) fn delegated(delegate: &release::Arena) -> Self { 71 | let borrow = delegate.borrows.get() + 1; 72 | delegate.borrows.set(borrow); 73 | Self::Delegated { delegate: unsafe { mem::transmute(delegate) }, borrow } 74 | } 75 | 76 | #[inline] 77 | pub(super) fn delegate_target(&self) -> &release::Arena { 78 | match *self { 79 | Self::Delegated { delegate, borrow } => { 80 | assert!( 81 | borrow == delegate.borrows.get(), 82 | "Arena already borrowed by a newer ScratchArena" 83 | ); 84 | delegate 85 | } 86 | Self::Owned { ref arena } => arena, 87 | } 88 | } 89 | 90 | #[inline] 91 | pub(super) fn delegate_target_unchecked(&self) -> &release::Arena { 92 | match self { 93 | Self::Delegated { delegate, .. } => delegate, 94 | Self::Owned { arena } => arena, 95 | } 96 | } 97 | 98 | pub fn offset(&self) -> usize { 99 | self.delegate_target().offset() 100 | } 101 | 102 | pub unsafe fn reset(&self, to: usize) { 103 | unsafe { self.delegate_target().reset(to) } 104 | } 105 | 106 | pub fn alloc_uninit(&self) -> &mut MaybeUninit { 107 | self.delegate_target().alloc_uninit() 108 | } 109 | 110 | pub fn alloc_uninit_slice(&self, count: usize) -> &mut [MaybeUninit] { 111 | self.delegate_target().alloc_uninit_slice(count) 112 | } 113 | } 114 | 115 | unsafe impl Allocator for Arena { 116 | fn allocate(&self, layout: Layout) -> Result, AllocError> { 117 | self.delegate_target().alloc_raw(layout.size(), layout.align()) 118 | } 119 | 120 | fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> { 121 | self.delegate_target().allocate_zeroed(layout) 122 | } 123 | 124 | // While it is possible to shrink the tail end of the arena, it is 125 | // not very useful given the existence of scoped scratch arenas. 126 | unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { 127 | unsafe { self.delegate_target().deallocate(ptr, layout) } 128 | } 129 | 130 | unsafe fn grow( 131 | &self, 132 | ptr: NonNull, 133 | old_layout: Layout, 134 | new_layout: Layout, 135 | ) -> Result, AllocError> { 136 | unsafe { self.delegate_target().grow(ptr, old_layout, new_layout) } 137 | } 138 | 139 | unsafe fn grow_zeroed( 140 | &self, 141 | ptr: NonNull, 142 | old_layout: Layout, 143 | new_layout: Layout, 144 | ) -> Result, AllocError> { 145 | unsafe { self.delegate_target().grow_zeroed(ptr, old_layout, new_layout) } 146 | } 147 | 148 | unsafe fn shrink( 149 | &self, 150 | ptr: NonNull, 151 | old_layout: Layout, 152 | new_layout: Layout, 153 | ) -> Result, AllocError> { 154 | unsafe { self.delegate_target().shrink(ptr, old_layout, new_layout) } 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /src/arena/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | //! Arena allocators. Small and fast. 5 | 6 | #[cfg(debug_assertions)] 7 | mod debug; 8 | mod release; 9 | mod scratch; 10 | mod string; 11 | 12 | #[cfg(all(not(doc), debug_assertions))] 13 | pub use self::debug::Arena; 14 | #[cfg(any(doc, not(debug_assertions)))] 15 | pub use self::release::Arena; 16 | pub use self::scratch::{ScratchArena, init, scratch_arena}; 17 | pub use self::string::ArenaString; 18 | -------------------------------------------------------------------------------- /src/arena/release.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | #![allow(clippy::mut_from_ref)] 5 | 6 | use std::alloc::{AllocError, Allocator, Layout}; 7 | use std::cell::Cell; 8 | use std::hint::cold_path; 9 | use std::mem::MaybeUninit; 10 | use std::ptr::{self, NonNull}; 11 | use std::{mem, slice}; 12 | 13 | use crate::helpers::*; 14 | use crate::{apperr, sys}; 15 | 16 | const ALLOC_CHUNK_SIZE: usize = 64 * KIBI; 17 | 18 | /// An arena allocator. 19 | /// 20 | /// If you have never used an arena allocator before, think of it as 21 | /// allocating objects on the stack, but the stack is *really* big. 22 | /// Each time you allocate, memory gets pushed at the end of the stack, 23 | /// each time you deallocate, memory gets popped from the end of the stack. 24 | /// 25 | /// One reason you'd want to use this is obviously performance: It's very simple 26 | /// and so it's also very fast, >10x faster than your system allocator. 27 | /// 28 | /// However, modern allocators such as `mimalloc` are just as fast, so why not use them? 29 | /// Because their performance comes at the cost of binary size and we can't have that. 30 | /// 31 | /// The biggest benefit though is that it sometimes massively simplifies lifetime 32 | /// and memory management. This can best be seen by this project's UI code, which 33 | /// uses an arena to allocate a tree of UI nodes. This is infamously difficult 34 | /// to do in Rust, but not so when you got an arena allocator: 35 | /// All nodes have the same lifetime, so you can just use references. 36 | /// 37 | /// # Safety 38 | /// 39 | /// **Do not** push objects into the arena that require destructors. 40 | /// Destructors are not executed. Use a pool allocator for that. 41 | pub struct Arena { 42 | base: NonNull, 43 | capacity: usize, 44 | commit: Cell, 45 | offset: Cell, 46 | 47 | /// See [`super::debug`], which uses this for borrow tracking. 48 | #[cfg(debug_assertions)] 49 | pub(super) borrows: Cell, 50 | } 51 | 52 | impl Arena { 53 | pub const fn empty() -> Self { 54 | Self { 55 | base: NonNull::dangling(), 56 | capacity: 0, 57 | commit: Cell::new(0), 58 | offset: Cell::new(0), 59 | 60 | #[cfg(debug_assertions)] 61 | borrows: Cell::new(0), 62 | } 63 | } 64 | 65 | pub fn new(capacity: usize) -> apperr::Result { 66 | let capacity = (capacity.max(1) + ALLOC_CHUNK_SIZE - 1) & !(ALLOC_CHUNK_SIZE - 1); 67 | let base = unsafe { sys::virtual_reserve(capacity)? }; 68 | 69 | Ok(Self { 70 | base, 71 | capacity, 72 | commit: Cell::new(0), 73 | offset: Cell::new(0), 74 | 75 | #[cfg(debug_assertions)] 76 | borrows: Cell::new(0), 77 | }) 78 | } 79 | 80 | pub fn offset(&self) -> usize { 81 | self.offset.get() 82 | } 83 | 84 | /// "Deallocates" the memory in the arena down to the given offset. 85 | /// 86 | /// # Safety 87 | /// 88 | /// Obviously, this is GIGA UNSAFE. It runs no destructors and does not check 89 | /// whether the offset is valid. You better take care when using this function. 90 | pub unsafe fn reset(&self, to: usize) { 91 | // Fill the deallocated memory with 0xDD to aid debugging. 92 | if cfg!(debug_assertions) && self.offset.get() > to { 93 | let commit = self.commit.get(); 94 | let len = (self.offset.get() + 128).min(commit) - to; 95 | unsafe { slice::from_raw_parts_mut(self.base.add(to).as_ptr(), len).fill(0xDD) }; 96 | } 97 | 98 | self.offset.replace(to); 99 | } 100 | 101 | #[inline] 102 | pub(super) fn alloc_raw( 103 | &self, 104 | bytes: usize, 105 | alignment: usize, 106 | ) -> Result, AllocError> { 107 | let commit = self.commit.get(); 108 | let offset = self.offset.get(); 109 | 110 | let beg = (offset + alignment - 1) & !(alignment - 1); 111 | let end = beg + bytes; 112 | 113 | if end > commit { 114 | return self.alloc_raw_bump(beg, end); 115 | } 116 | 117 | if cfg!(debug_assertions) { 118 | let ptr = unsafe { self.base.add(offset) }; 119 | let len = (end + 128).min(self.commit.get()) - offset; 120 | unsafe { slice::from_raw_parts_mut(ptr.as_ptr(), len).fill(0xCD) }; 121 | } 122 | 123 | self.offset.replace(end); 124 | Ok(unsafe { NonNull::slice_from_raw_parts(self.base.add(beg), bytes) }) 125 | } 126 | 127 | // With the code in `alloc_raw_bump()` out of the way, `alloc_raw()` compiles down to some super tight assembly. 128 | #[cold] 129 | fn alloc_raw_bump(&self, beg: usize, end: usize) -> Result, AllocError> { 130 | let offset = self.offset.get(); 131 | let commit_old = self.commit.get(); 132 | let commit_new = (end + ALLOC_CHUNK_SIZE - 1) & !(ALLOC_CHUNK_SIZE - 1); 133 | 134 | if commit_new > self.capacity 135 | || unsafe { 136 | sys::virtual_commit(self.base.add(commit_old), commit_new - commit_old).is_err() 137 | } 138 | { 139 | return Err(AllocError); 140 | } 141 | 142 | if cfg!(debug_assertions) { 143 | let ptr = unsafe { self.base.add(offset) }; 144 | let len = (end + 128).min(self.commit.get()) - offset; 145 | unsafe { slice::from_raw_parts_mut(ptr.as_ptr(), len).fill(0xCD) }; 146 | } 147 | 148 | self.commit.replace(commit_new); 149 | self.offset.replace(end); 150 | Ok(unsafe { NonNull::slice_from_raw_parts(self.base.add(beg), end - beg) }) 151 | } 152 | 153 | #[allow(clippy::mut_from_ref)] 154 | pub fn alloc_uninit(&self) -> &mut MaybeUninit { 155 | let bytes = mem::size_of::(); 156 | let alignment = mem::align_of::(); 157 | let ptr = self.alloc_raw(bytes, alignment).unwrap(); 158 | unsafe { ptr.cast().as_mut() } 159 | } 160 | 161 | #[allow(clippy::mut_from_ref)] 162 | pub fn alloc_uninit_slice(&self, count: usize) -> &mut [MaybeUninit] { 163 | let bytes = mem::size_of::() * count; 164 | let alignment = mem::align_of::(); 165 | let ptr = self.alloc_raw(bytes, alignment).unwrap(); 166 | unsafe { slice::from_raw_parts_mut(ptr.cast().as_ptr(), count) } 167 | } 168 | } 169 | 170 | impl Drop for Arena { 171 | fn drop(&mut self) { 172 | if self.base != NonNull::dangling() { 173 | unsafe { sys::virtual_release(self.base, self.capacity) }; 174 | } 175 | } 176 | } 177 | 178 | impl Default for Arena { 179 | fn default() -> Self { 180 | Self::empty() 181 | } 182 | } 183 | 184 | unsafe impl Allocator for Arena { 185 | fn allocate(&self, layout: Layout) -> Result, AllocError> { 186 | self.alloc_raw(layout.size(), layout.align()) 187 | } 188 | 189 | fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> { 190 | let p = self.alloc_raw(layout.size(), layout.align())?; 191 | unsafe { p.cast::().as_ptr().write_bytes(0, p.len()) } 192 | Ok(p) 193 | } 194 | 195 | // While it is possible to shrink the tail end of the arena, it is 196 | // not very useful given the existence of scoped scratch arenas. 197 | unsafe fn deallocate(&self, _: NonNull, _: Layout) {} 198 | 199 | unsafe fn grow( 200 | &self, 201 | ptr: NonNull, 202 | old_layout: Layout, 203 | new_layout: Layout, 204 | ) -> Result, AllocError> { 205 | debug_assert!(new_layout.size() >= old_layout.size()); 206 | debug_assert!(new_layout.align() <= old_layout.align()); 207 | 208 | let new_ptr; 209 | 210 | // Growing the given area is possible if it is at the end of the arena. 211 | if unsafe { ptr.add(old_layout.size()) == self.base.add(self.offset.get()) } { 212 | new_ptr = ptr; 213 | let delta = new_layout.size() - old_layout.size(); 214 | // Assuming that the given ptr/length area is at the end of the arena, 215 | // we can just push more memory to the end of the arena to grow it. 216 | self.alloc_raw(delta, 1)?; 217 | } else { 218 | cold_path(); 219 | 220 | new_ptr = self.allocate(new_layout)?.cast(); 221 | 222 | // SAFETY: It's weird to me that this doesn't assert new_layout.size() >= old_layout.size(), 223 | // but neither does the stdlib code at the time of writing. 224 | // So, assuming that is not needed, this code is safe since it just copies the old data over. 225 | unsafe { 226 | ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_layout.size()); 227 | self.deallocate(ptr, old_layout); 228 | } 229 | } 230 | 231 | Ok(NonNull::slice_from_raw_parts(new_ptr, new_layout.size())) 232 | } 233 | 234 | unsafe fn grow_zeroed( 235 | &self, 236 | ptr: NonNull, 237 | old_layout: Layout, 238 | new_layout: Layout, 239 | ) -> Result, AllocError> { 240 | unsafe { 241 | // SAFETY: Same as grow(). 242 | let ptr = self.grow(ptr, old_layout, new_layout)?; 243 | 244 | // SAFETY: At this point, `ptr` must be valid for `new_layout.size()` bytes, 245 | // allowing us to safely zero out the delta since `old_layout.size()`. 246 | ptr.cast::() 247 | .add(old_layout.size()) 248 | .write_bytes(0, new_layout.size() - old_layout.size()); 249 | 250 | Ok(ptr) 251 | } 252 | } 253 | 254 | unsafe fn shrink( 255 | &self, 256 | ptr: NonNull, 257 | old_layout: Layout, 258 | new_layout: Layout, 259 | ) -> Result, AllocError> { 260 | debug_assert!(new_layout.size() <= old_layout.size()); 261 | debug_assert!(new_layout.align() <= old_layout.align()); 262 | 263 | let mut len = old_layout.size(); 264 | 265 | // Shrinking the given area is possible if it is at the end of the arena. 266 | if unsafe { ptr.add(len) == self.base.add(self.offset.get()) } { 267 | self.offset.set(self.offset.get() - len + new_layout.size()); 268 | len = new_layout.size(); 269 | } else { 270 | debug_assert!( 271 | false, 272 | "Did you call shrink_to_fit()? Only the last allocation can be shrunk!" 273 | ); 274 | } 275 | 276 | Ok(NonNull::slice_from_raw_parts(ptr, len)) 277 | } 278 | } 279 | -------------------------------------------------------------------------------- /src/arena/scratch.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | use std::ops::Deref; 5 | 6 | #[cfg(debug_assertions)] 7 | use super::debug; 8 | use super::{Arena, release}; 9 | use crate::apperr; 10 | use crate::helpers::*; 11 | 12 | static mut S_SCRATCH: [release::Arena; 2] = 13 | const { [release::Arena::empty(), release::Arena::empty()] }; 14 | 15 | /// Initialize the scratch arenas with a given capacity. 16 | /// Call this before using [`scratch_arena`]. 17 | pub fn init(capacity: usize) -> apperr::Result<()> { 18 | unsafe { 19 | for s in &mut S_SCRATCH[..] { 20 | *s = release::Arena::new(capacity)?; 21 | } 22 | } 23 | Ok(()) 24 | } 25 | 26 | /// Need an arena for temporary allocations? [`scratch_arena`] got you covered. 27 | /// Call [`scratch_arena`] and it'll return an [`Arena`] that resets when it goes out of scope. 28 | /// 29 | /// --- 30 | /// 31 | /// Most methods make just two kinds of allocations: 32 | /// * Interior: Temporary data that can be deallocated when the function returns. 33 | /// * Exterior: Data that is returned to the caller and must remain alive until the caller stops using it. 34 | /// 35 | /// Such methods only have two lifetimes, for which you consequently also only need two arenas. 36 | /// ...even if your method calls other methods recursively! This is because the exterior allocations 37 | /// of a callee are simply interior allocations to the caller, and so on, recursively. 38 | /// 39 | /// This works as long as the two arenas flip/flop between being used as interior/exterior allocator 40 | /// along the callstack. To ensure that is the case, we use a recursion counter in debug builds. 41 | /// 42 | /// This approach was described among others at: 43 | /// 44 | /// # Safety 45 | /// 46 | /// If your function takes an [`Arena`] argument, you **MUST** pass it to `scratch_arena` as `Some(&arena)`. 47 | pub fn scratch_arena(conflict: Option<&Arena>) -> ScratchArena<'static> { 48 | unsafe { 49 | #[cfg(debug_assertions)] 50 | let conflict = conflict.map(|a| a.delegate_target_unchecked()); 51 | 52 | let index = opt_ptr_eq(conflict, Some(&S_SCRATCH[0])) as usize; 53 | let arena = &mut S_SCRATCH[index]; 54 | ScratchArena::new(arena) 55 | } 56 | } 57 | 58 | /// Borrows an [`Arena`] for temporary allocations. 59 | /// 60 | /// See [`scratch_arena`]. 61 | #[cfg(debug_assertions)] 62 | pub struct ScratchArena<'a> { 63 | arena: debug::Arena, 64 | offset: usize, 65 | _phantom: std::marker::PhantomData<&'a ()>, 66 | } 67 | 68 | #[cfg(not(debug_assertions))] 69 | pub struct ScratchArena<'a> { 70 | arena: &'a Arena, 71 | offset: usize, 72 | } 73 | 74 | #[cfg(debug_assertions)] 75 | impl<'a> ScratchArena<'a> { 76 | fn new(arena: &'a release::Arena) -> Self { 77 | let offset = arena.offset(); 78 | ScratchArena { arena: Arena::delegated(arena), _phantom: std::marker::PhantomData, offset } 79 | } 80 | } 81 | 82 | #[cfg(not(debug_assertions))] 83 | impl<'a> ScratchArena<'a> { 84 | fn new(arena: &'a release::Arena) -> Self { 85 | let offset = arena.offset(); 86 | ScratchArena { arena, offset } 87 | } 88 | } 89 | 90 | impl Drop for ScratchArena<'_> { 91 | fn drop(&mut self) { 92 | unsafe { self.arena.reset(self.offset) }; 93 | } 94 | } 95 | 96 | #[cfg(debug_assertions)] 97 | impl Deref for ScratchArena<'_> { 98 | type Target = debug::Arena; 99 | 100 | fn deref(&self) -> &Self::Target { 101 | &self.arena 102 | } 103 | } 104 | 105 | #[cfg(not(debug_assertions))] 106 | impl Deref for ScratchArena<'_> { 107 | type Target = Arena; 108 | 109 | fn deref(&self) -> &Self::Target { 110 | self.arena 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/arena/string.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | use std::fmt; 5 | use std::ops::{Bound, Deref, DerefMut, RangeBounds}; 6 | 7 | use super::Arena; 8 | use crate::helpers::*; 9 | 10 | /// A custom string type, because `std` lacks allocator support for [`String`]. 11 | /// 12 | /// To keep things simple, this one is hardcoded to [`Arena`]. 13 | #[derive(Clone)] 14 | pub struct ArenaString<'a> { 15 | vec: Vec, 16 | } 17 | 18 | impl<'a> ArenaString<'a> { 19 | /// Creates a new [`ArenaString`] in the given arena. 20 | #[must_use] 21 | pub const fn new_in(arena: &'a Arena) -> Self { 22 | Self { vec: Vec::new_in(arena) } 23 | } 24 | 25 | #[must_use] 26 | pub fn with_capacity_in(capacity: usize, arena: &'a Arena) -> Self { 27 | Self { vec: Vec::with_capacity_in(capacity, arena) } 28 | } 29 | 30 | /// Turns a [`str`] into an [`ArenaString`]. 31 | #[must_use] 32 | pub fn from_str(arena: &'a Arena, s: &str) -> Self { 33 | let mut res = Self::new_in(arena); 34 | res.push_str(s); 35 | res 36 | } 37 | 38 | /// It says right here that you checked if `bytes` is valid UTF-8 39 | /// and you are sure it is. Presto! Here's an `ArenaString`! 40 | /// 41 | /// # Safety 42 | /// 43 | /// You fool! It says "unchecked" right there. Now the house is burning. 44 | #[inline] 45 | #[must_use] 46 | pub unsafe fn from_utf8_unchecked(bytes: Vec) -> Self { 47 | Self { vec: bytes } 48 | } 49 | 50 | /// Checks whether `text` contains only valid UTF-8. 51 | /// If the entire string is valid, it returns `Ok(text)`. 52 | /// Otherwise, it returns `Err(ArenaString)` with all invalid sequences replaced with U+FFFD. 53 | pub fn from_utf8_lossy<'s>(arena: &'a Arena, text: &'s [u8]) -> Result<&'s str, Self> { 54 | let mut iter = text.utf8_chunks(); 55 | let Some(mut chunk) = iter.next() else { 56 | return Ok(""); 57 | }; 58 | 59 | let valid = chunk.valid(); 60 | if chunk.invalid().is_empty() { 61 | debug_assert_eq!(valid.len(), text.len()); 62 | return Ok(unsafe { str::from_utf8_unchecked(text) }); 63 | } 64 | 65 | const REPLACEMENT: &str = "\u{FFFD}"; 66 | 67 | let mut res = Self::new_in(arena); 68 | res.reserve(text.len()); 69 | 70 | loop { 71 | res.push_str(chunk.valid()); 72 | if !chunk.invalid().is_empty() { 73 | res.push_str(REPLACEMENT); 74 | } 75 | chunk = match iter.next() { 76 | Some(chunk) => chunk, 77 | None => break, 78 | }; 79 | } 80 | 81 | Err(res) 82 | } 83 | 84 | /// Turns a [`Vec`] into an [`ArenaString`], replacing invalid UTF-8 sequences with U+FFFD. 85 | #[must_use] 86 | pub fn from_utf8_lossy_owned(v: Vec) -> Self { 87 | match Self::from_utf8_lossy(v.allocator(), &v) { 88 | Ok(..) => unsafe { Self::from_utf8_unchecked(v) }, 89 | Err(s) => s, 90 | } 91 | } 92 | 93 | /// It's empty. 94 | pub fn is_empty(&self) -> bool { 95 | self.vec.is_empty() 96 | } 97 | 98 | /// It's lengthy. 99 | pub fn len(&self) -> usize { 100 | self.vec.len() 101 | } 102 | 103 | /// It's capacatity. 104 | pub fn capacity(&self) -> usize { 105 | self.vec.capacity() 106 | } 107 | 108 | /// It's a [`String`], now it's a [`str`]. Wow! 109 | pub fn as_str(&self) -> &str { 110 | unsafe { str::from_utf8_unchecked(self.vec.as_slice()) } 111 | } 112 | 113 | /// It's a [`String`], now it's a [`str`]. And it's mutable! WOW! 114 | pub fn as_mut_str(&mut self) -> &mut str { 115 | unsafe { str::from_utf8_unchecked_mut(self.vec.as_mut_slice()) } 116 | } 117 | 118 | /// Now it's bytes! 119 | pub fn as_bytes(&self) -> &[u8] { 120 | self.vec.as_slice() 121 | } 122 | 123 | /// Returns a mutable reference to the contents of this `String`. 124 | /// 125 | /// # Safety 126 | /// 127 | /// The underlying `&mut Vec` allows writing bytes which are not valid UTF-8. 128 | pub unsafe fn as_mut_vec(&mut self) -> &mut Vec { 129 | &mut self.vec 130 | } 131 | 132 | /// Reserves *additional* memory. For you old folks out there (totally not me), 133 | /// this is different from C++'s `reserve` which reserves a total size. 134 | pub fn reserve(&mut self, additional: usize) { 135 | self.vec.reserve(additional) 136 | } 137 | 138 | /// Just like [`ArenaString::reserve`], but it doesn't overallocate. 139 | pub fn reserve_exact(&mut self, additional: usize) { 140 | self.vec.reserve_exact(additional) 141 | } 142 | 143 | /// Now it's small! Alarming! 144 | /// 145 | /// *Do not* call this unless this string is the last thing on the arena. 146 | /// Arenas are stacks, they can't deallocate what's in the middle. 147 | pub fn shrink_to_fit(&mut self) { 148 | self.vec.shrink_to_fit() 149 | } 150 | 151 | /// To no surprise, this clears the string. 152 | pub fn clear(&mut self) { 153 | self.vec.clear() 154 | } 155 | 156 | /// Append some text. 157 | pub fn push_str(&mut self, string: &str) { 158 | self.vec.extend_from_slice(string.as_bytes()) 159 | } 160 | 161 | /// Append a single character. 162 | #[inline] 163 | pub fn push(&mut self, ch: char) { 164 | match ch.len_utf8() { 165 | 1 => self.vec.push(ch as u8), 166 | _ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()), 167 | } 168 | } 169 | 170 | /// Same as `push(char)` but with a specified number of character copies. 171 | /// Shockingly absent from the standard library. 172 | pub fn push_repeat(&mut self, ch: char, total_copies: usize) { 173 | if total_copies == 0 { 174 | return; 175 | } 176 | 177 | let buf = unsafe { self.as_mut_vec() }; 178 | 179 | if ch.is_ascii() { 180 | // Compiles down to `memset()`. 181 | buf.extend(std::iter::repeat_n(ch as u8, total_copies)); 182 | } else { 183 | // Implements efficient string padding using quadratic duplication. 184 | let mut utf8_buf = [0; 4]; 185 | let utf8 = ch.encode_utf8(&mut utf8_buf).as_bytes(); 186 | let initial_len = buf.len(); 187 | let added_len = utf8.len() * total_copies; 188 | let final_len = initial_len + added_len; 189 | 190 | buf.reserve(added_len); 191 | buf.extend_from_slice(utf8); 192 | 193 | while buf.len() != final_len { 194 | let end = (final_len - buf.len() + initial_len).min(buf.len()); 195 | buf.extend_from_within(initial_len..end); 196 | } 197 | } 198 | } 199 | 200 | /// Replaces a range of characters with a new string. 201 | pub fn replace_range>(&mut self, range: R, replace_with: &str) { 202 | match range.start_bound() { 203 | Bound::Included(&n) => assert!(self.is_char_boundary(n)), 204 | Bound::Excluded(&n) => assert!(self.is_char_boundary(n + 1)), 205 | Bound::Unbounded => {} 206 | }; 207 | match range.end_bound() { 208 | Bound::Included(&n) => assert!(self.is_char_boundary(n + 1)), 209 | Bound::Excluded(&n) => assert!(self.is_char_boundary(n)), 210 | Bound::Unbounded => {} 211 | }; 212 | unsafe { self.as_mut_vec() }.replace_range(range, replace_with.as_bytes()); 213 | } 214 | 215 | /// Finds `old` in the string and replaces it with `new`. 216 | /// Only performs one replacement. 217 | pub fn replace_once_in_place(&mut self, old: &str, new: &str) { 218 | if let Some(beg) = self.find(old) { 219 | unsafe { self.as_mut_vec() }.replace_range(beg..beg + old.len(), new.as_bytes()); 220 | } 221 | } 222 | } 223 | 224 | impl fmt::Debug for ArenaString<'_> { 225 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 226 | fmt::Debug::fmt(&**self, f) 227 | } 228 | } 229 | 230 | impl PartialEq<&str> for ArenaString<'_> { 231 | fn eq(&self, other: &&str) -> bool { 232 | self.as_str() == *other 233 | } 234 | } 235 | 236 | impl Deref for ArenaString<'_> { 237 | type Target = str; 238 | 239 | fn deref(&self) -> &Self::Target { 240 | self.as_str() 241 | } 242 | } 243 | 244 | impl DerefMut for ArenaString<'_> { 245 | fn deref_mut(&mut self) -> &mut Self::Target { 246 | self.as_mut_str() 247 | } 248 | } 249 | 250 | impl fmt::Display for ArenaString<'_> { 251 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 252 | f.write_str(self.as_str()) 253 | } 254 | } 255 | 256 | impl fmt::Write for ArenaString<'_> { 257 | #[inline] 258 | fn write_str(&mut self, s: &str) -> fmt::Result { 259 | self.push_str(s); 260 | Ok(()) 261 | } 262 | 263 | #[inline] 264 | fn write_char(&mut self, c: char) -> fmt::Result { 265 | self.push(c); 266 | Ok(()) 267 | } 268 | } 269 | 270 | #[macro_export] 271 | macro_rules! arena_format { 272 | ($arena:expr, $($arg:tt)*) => {{ 273 | use std::fmt::Write as _; 274 | let mut output = $crate::arena::ArenaString::new_in($arena); 275 | output.write_fmt(format_args!($($arg)*)).unwrap(); 276 | output 277 | }} 278 | } 279 | -------------------------------------------------------------------------------- /src/base64.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | //! Base64 facilities. 5 | 6 | use crate::arena::ArenaString; 7 | 8 | const CHARSET: [u8; 64] = *b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 9 | 10 | /// One aspect of base64 is that the encoded length can be 11 | /// calculated accurately in advance, which is what this returns. 12 | #[inline] 13 | pub fn encode_len(src_len: usize) -> usize { 14 | src_len.div_ceil(3) * 4 15 | } 16 | 17 | /// Encodes the given bytes as base64 and appends them to the destination string. 18 | pub fn encode(dst: &mut ArenaString, src: &[u8]) { 19 | unsafe { 20 | let mut inp = src.as_ptr(); 21 | let mut remaining = src.len(); 22 | let dst = dst.as_mut_vec(); 23 | 24 | let out_len = encode_len(src.len()); 25 | // ... we can then use this fact to reserve space all at once. 26 | dst.reserve(out_len); 27 | 28 | // SAFETY: Getting a pointer to the reserved space is only safe 29 | // *after* calling `reserve()` as it may change the pointer. 30 | let mut out = dst.as_mut_ptr().add(dst.len()); 31 | 32 | if remaining != 0 { 33 | // Translate chunks of 3 source bytes into 4 base64-encoded bytes. 34 | while remaining > 3 { 35 | // SAFETY: Thanks to `remaining > 3`, reading 4 bytes at once is safe. 36 | // This improves performance massively over a byte-by-byte approach, 37 | // because it allows us to byte-swap the read and use simple bit-shifts below. 38 | let val = u32::from_be((inp as *const u32).read_unaligned()); 39 | inp = inp.add(3); 40 | remaining -= 3; 41 | 42 | *out = CHARSET[(val >> 26) as usize]; 43 | out = out.add(1); 44 | *out = CHARSET[(val >> 20) as usize & 0x3f]; 45 | out = out.add(1); 46 | *out = CHARSET[(val >> 14) as usize & 0x3f]; 47 | out = out.add(1); 48 | *out = CHARSET[(val >> 8) as usize & 0x3f]; 49 | out = out.add(1); 50 | } 51 | 52 | // Convert the remaining 1-3 bytes. 53 | let mut in1 = 0; 54 | let mut in2 = 0; 55 | 56 | // We can simplify the following logic by assuming that there's only 1 57 | // byte left. If there's >1 byte left, these two '=' will be overwritten. 58 | *out.add(3) = b'='; 59 | *out.add(2) = b'='; 60 | 61 | if remaining >= 3 { 62 | in2 = inp.add(2).read() as usize; 63 | *out.add(3) = CHARSET[in2 & 0x3f]; 64 | } 65 | 66 | if remaining >= 2 { 67 | in1 = inp.add(1).read() as usize; 68 | *out.add(2) = CHARSET[(in1 << 2 | in2 >> 6) & 0x3f]; 69 | } 70 | 71 | let in0 = inp.add(0).read() as usize; 72 | *out.add(1) = CHARSET[(in0 << 4 | in1 >> 4) & 0x3f]; 73 | *out.add(0) = CHARSET[in0 >> 2]; 74 | } 75 | 76 | dst.set_len(dst.len() + out_len); 77 | } 78 | } 79 | 80 | #[cfg(test)] 81 | mod tests { 82 | use super::encode; 83 | use crate::arena::{Arena, ArenaString}; 84 | 85 | #[test] 86 | fn test_basic() { 87 | let arena = Arena::new(4 * 1024).unwrap(); 88 | let enc = |s: &[u8]| { 89 | let mut dst = ArenaString::new_in(&arena); 90 | encode(&mut dst, s); 91 | dst 92 | }; 93 | assert_eq!(enc(b""), ""); 94 | assert_eq!(enc(b"a"), "YQ=="); 95 | assert_eq!(enc(b"ab"), "YWI="); 96 | assert_eq!(enc(b"abc"), "YWJj"); 97 | assert_eq!(enc(b"abcd"), "YWJjZA=="); 98 | assert_eq!(enc(b"abcde"), "YWJjZGU="); 99 | assert_eq!(enc(b"abcdef"), "YWJjZGVm"); 100 | assert_eq!(enc(b"abcdefg"), "YWJjZGVmZw=="); 101 | assert_eq!(enc(b"abcdefgh"), "YWJjZGVmZ2g="); 102 | assert_eq!(enc(b"abcdefghi"), "YWJjZGVmZ2hp"); 103 | assert_eq!(enc(b"abcdefghij"), "YWJjZGVmZ2hpag=="); 104 | assert_eq!(enc(b"abcdefghijk"), "YWJjZGVmZ2hpams="); 105 | assert_eq!(enc(b"abcdefghijkl"), "YWJjZGVmZ2hpamts"); 106 | assert_eq!(enc(b"abcdefghijklm"), "YWJjZGVmZ2hpamtsbQ=="); 107 | assert_eq!(enc(b"abcdefghijklmN"), "YWJjZGVmZ2hpamtsbU4="); 108 | assert_eq!(enc(b"abcdefghijklmNO"), "YWJjZGVmZ2hpamtsbU5P"); 109 | assert_eq!(enc(b"abcdefghijklmNOP"), "YWJjZGVmZ2hpamtsbU5PUA=="); 110 | assert_eq!(enc(b"abcdefghijklmNOPQ"), "YWJjZGVmZ2hpamtsbU5PUFE="); 111 | assert_eq!(enc(b"abcdefghijklmNOPQR"), "YWJjZGVmZ2hpamtsbU5PUFFS"); 112 | assert_eq!(enc(b"abcdefghijklmNOPQRS"), "YWJjZGVmZ2hpamtsbU5PUFFSUw=="); 113 | assert_eq!(enc(b"abcdefghijklmNOPQRST"), "YWJjZGVmZ2hpamtsbU5PUFFSU1Q="); 114 | assert_eq!(enc(b"abcdefghijklmNOPQRSTU"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RV"); 115 | assert_eq!(enc(b"abcdefghijklmNOPQRSTUV"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVg=="); 116 | assert_eq!(enc(b"abcdefghijklmNOPQRSTUVW"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVlc="); 117 | assert_eq!(enc(b"abcdefghijklmNOPQRSTUVWX"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVldY"); 118 | assert_eq!(enc(b"abcdefghijklmNOPQRSTUVWXY"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVldYWQ=="); 119 | assert_eq!(enc(b"abcdefghijklmNOPQRSTUVWXYZ"), "YWJjZGVmZ2hpamtsbU5PUFFSU1RVVldYWVo="); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/bin/edit/documents.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | use std::collections::LinkedList; 5 | use std::ffi::OsStr; 6 | use std::fs::File; 7 | use std::path::{Path, PathBuf}; 8 | 9 | use edit::buffer::{RcTextBuffer, TextBuffer}; 10 | use edit::helpers::{CoordType, Point}; 11 | use edit::{apperr, path, sys}; 12 | 13 | use crate::state::DisplayablePathBuf; 14 | 15 | pub struct Document { 16 | pub buffer: RcTextBuffer, 17 | pub path: Option, 18 | pub dir: Option, 19 | pub filename: String, 20 | pub file_id: Option, 21 | pub new_file_counter: usize, 22 | } 23 | 24 | impl Document { 25 | pub fn save(&mut self, new_path: Option) -> apperr::Result<()> { 26 | let path = new_path.as_deref().unwrap_or_else(|| self.path.as_ref().unwrap().as_path()); 27 | let mut file = DocumentManager::open_for_writing(path)?; 28 | 29 | { 30 | let mut tb = self.buffer.borrow_mut(); 31 | tb.write_file(&mut file)?; 32 | } 33 | 34 | if let Ok(id) = sys::file_id(None, path) { 35 | self.file_id = Some(id); 36 | } 37 | 38 | if let Some(path) = new_path { 39 | self.set_path(path); 40 | } 41 | 42 | Ok(()) 43 | } 44 | 45 | pub fn reread(&mut self, encoding: Option<&'static str>) -> apperr::Result<()> { 46 | let path = self.path.as_ref().unwrap().as_path(); 47 | let mut file = DocumentManager::open_for_reading(path)?; 48 | 49 | { 50 | let mut tb = self.buffer.borrow_mut(); 51 | tb.read_file(&mut file, encoding)?; 52 | } 53 | 54 | if let Ok(id) = sys::file_id(None, path) { 55 | self.file_id = Some(id); 56 | } 57 | 58 | Ok(()) 59 | } 60 | 61 | fn set_path(&mut self, path: PathBuf) { 62 | let filename = path.file_name().unwrap_or_default().to_string_lossy().into_owned(); 63 | let dir = path.parent().map(ToOwned::to_owned).unwrap_or_default(); 64 | self.filename = filename; 65 | self.dir = Some(DisplayablePathBuf::from_path(dir)); 66 | self.path = Some(path); 67 | self.update_file_mode(); 68 | } 69 | 70 | fn update_file_mode(&mut self) { 71 | let mut tb = self.buffer.borrow_mut(); 72 | tb.set_ruler(if self.filename == "COMMIT_EDITMSG" { 72 } else { 0 }); 73 | } 74 | } 75 | 76 | #[derive(Default)] 77 | pub struct DocumentManager { 78 | list: LinkedList, 79 | } 80 | 81 | impl DocumentManager { 82 | #[inline] 83 | pub fn len(&self) -> usize { 84 | self.list.len() 85 | } 86 | 87 | #[inline] 88 | pub fn active(&self) -> Option<&Document> { 89 | self.list.front() 90 | } 91 | 92 | #[inline] 93 | pub fn active_mut(&mut self) -> Option<&mut Document> { 94 | self.list.front_mut() 95 | } 96 | 97 | #[inline] 98 | pub fn update_active bool>(&mut self, mut func: F) -> bool { 99 | let mut cursor = self.list.cursor_front_mut(); 100 | while let Some(doc) = cursor.current() { 101 | if func(doc) { 102 | let list = cursor.remove_current_as_list().unwrap(); 103 | self.list.cursor_front_mut().splice_before(list); 104 | return true; 105 | } 106 | cursor.move_next(); 107 | } 108 | false 109 | } 110 | 111 | pub fn remove_active(&mut self) { 112 | self.list.pop_front(); 113 | } 114 | 115 | pub fn add_untitled(&mut self) -> apperr::Result<&mut Document> { 116 | let buffer = Self::create_buffer()?; 117 | let mut doc = Document { 118 | buffer, 119 | path: None, 120 | dir: Default::default(), 121 | filename: Default::default(), 122 | file_id: None, 123 | new_file_counter: 0, 124 | }; 125 | self.gen_untitled_name(&mut doc); 126 | 127 | self.list.push_front(doc); 128 | Ok(self.list.front_mut().unwrap()) 129 | } 130 | 131 | pub fn gen_untitled_name(&self, doc: &mut Document) { 132 | let mut new_file_counter = 0; 133 | for doc in &self.list { 134 | new_file_counter = new_file_counter.max(doc.new_file_counter); 135 | } 136 | new_file_counter += 1; 137 | 138 | doc.filename = format!("Untitled-{new_file_counter}.txt"); 139 | doc.new_file_counter = new_file_counter; 140 | } 141 | 142 | pub fn add_file_path(&mut self, path: &Path) -> apperr::Result<&mut Document> { 143 | let (path, goto) = Self::parse_filename_goto(path); 144 | let path = path::normalize(path); 145 | 146 | let mut file = match Self::open_for_reading(&path) { 147 | Ok(file) => Some(file), 148 | Err(err) if sys::apperr_is_not_found(err) => None, 149 | Err(err) => return Err(err), 150 | }; 151 | 152 | let file_id = if file.is_some() { Some(sys::file_id(file.as_ref(), &path)?) } else { None }; 153 | 154 | // Check if the file is already open. 155 | if file_id.is_some() && self.update_active(|doc| doc.file_id == file_id) { 156 | let doc = self.active_mut().unwrap(); 157 | if let Some(goto) = goto { 158 | doc.buffer.borrow_mut().cursor_move_to_logical(goto); 159 | } 160 | return Ok(doc); 161 | } 162 | 163 | let buffer = Self::create_buffer()?; 164 | { 165 | if let Some(file) = &mut file { 166 | let mut tb = buffer.borrow_mut(); 167 | tb.read_file(file, None)?; 168 | 169 | if let Some(goto) = goto 170 | && goto != Default::default() 171 | { 172 | tb.cursor_move_to_logical(goto); 173 | } 174 | } 175 | } 176 | 177 | let mut doc = Document { 178 | buffer, 179 | path: None, 180 | dir: None, 181 | filename: Default::default(), 182 | file_id, 183 | new_file_counter: 0, 184 | }; 185 | doc.set_path(path); 186 | 187 | if let Some(active) = self.active() 188 | && active.path.is_none() 189 | && active.file_id.is_none() 190 | && !active.buffer.borrow().is_dirty() 191 | { 192 | // If the current document is a pristine Untitled document with no 193 | // name and no ID, replace it with the new document. 194 | self.remove_active(); 195 | } 196 | 197 | self.list.push_front(doc); 198 | Ok(self.list.front_mut().unwrap()) 199 | } 200 | 201 | pub fn reflow_all(&self) { 202 | for doc in &self.list { 203 | let mut tb = doc.buffer.borrow_mut(); 204 | tb.reflow(); 205 | } 206 | } 207 | 208 | pub fn open_for_reading(path: &Path) -> apperr::Result { 209 | File::open(path).map_err(apperr::Error::from) 210 | } 211 | 212 | pub fn open_for_writing(path: &Path) -> apperr::Result { 213 | File::create(path).map_err(apperr::Error::from) 214 | } 215 | 216 | fn create_buffer() -> apperr::Result { 217 | let buffer = TextBuffer::new_rc(false)?; 218 | { 219 | let mut tb = buffer.borrow_mut(); 220 | tb.set_insert_final_newline(!cfg!(windows)); // As mandated by POSIX. 221 | tb.set_margin_enabled(true); 222 | tb.set_line_highlight_enabled(true); 223 | } 224 | Ok(buffer) 225 | } 226 | 227 | // Parse a filename in the form of "filename:line:char". 228 | // Returns the position of the first colon and the line/char coordinates. 229 | fn parse_filename_goto(path: &Path) -> (&Path, Option) { 230 | fn parse(s: &[u8]) -> Option { 231 | if s.is_empty() { 232 | return None; 233 | } 234 | 235 | let mut num: CoordType = 0; 236 | for &b in s { 237 | if !b.is_ascii_digit() { 238 | return None; 239 | } 240 | let digit = (b - b'0') as CoordType; 241 | num = num.checked_mul(10)?.checked_add(digit)?; 242 | } 243 | Some(num) 244 | } 245 | 246 | fn find_colon_rev(bytes: &[u8], offset: usize) -> Option { 247 | (0..offset.min(bytes.len())).rev().find(|&i| bytes[i] == b':') 248 | } 249 | 250 | let bytes = path.as_os_str().as_encoded_bytes(); 251 | let colend = match find_colon_rev(bytes, bytes.len()) { 252 | // Reject filenames that would result in an empty filename after stripping off the :line:char suffix. 253 | // For instance, a filename like ":123:456" will not be processed by this function. 254 | Some(colend) if colend > 0 => colend, 255 | _ => return (path, None), 256 | }; 257 | 258 | let last = match parse(&bytes[colend + 1..]) { 259 | Some(last) => last, 260 | None => return (path, None), 261 | }; 262 | let last = (last - 1).max(0); 263 | let mut len = colend; 264 | let mut goto = Point { x: 0, y: last }; 265 | 266 | if let Some(colbeg) = find_colon_rev(bytes, colend) { 267 | // Same here: Don't allow empty filenames. 268 | if colbeg != 0 269 | && let Some(first) = parse(&bytes[colbeg + 1..colend]) 270 | { 271 | let first = (first - 1).max(0); 272 | len = colbeg; 273 | goto = Point { x: last, y: first }; 274 | } 275 | } 276 | 277 | // Strip off the :line:char suffix. 278 | let path = &bytes[..len]; 279 | let path = unsafe { OsStr::from_encoded_bytes_unchecked(path) }; 280 | let path = Path::new(path); 281 | (path, Some(goto)) 282 | } 283 | } 284 | 285 | #[cfg(test)] 286 | mod tests { 287 | use super::*; 288 | 289 | #[test] 290 | fn test_parse_last_numbers() { 291 | fn parse(s: &str) -> (&str, Option) { 292 | let (p, g) = DocumentManager::parse_filename_goto(Path::new(s)); 293 | (p.to_str().unwrap(), g) 294 | } 295 | 296 | assert_eq!(parse("123"), ("123", None)); 297 | assert_eq!(parse("abc"), ("abc", None)); 298 | assert_eq!(parse(":123"), (":123", None)); 299 | assert_eq!(parse("abc:123"), ("abc", Some(Point { x: 0, y: 122 }))); 300 | assert_eq!(parse("45:123"), ("45", Some(Point { x: 0, y: 122 }))); 301 | assert_eq!(parse(":45:123"), (":45", Some(Point { x: 0, y: 122 }))); 302 | assert_eq!(parse("abc:45:123"), ("abc", Some(Point { x: 122, y: 44 }))); 303 | assert_eq!(parse("abc:def:123"), ("abc:def", Some(Point { x: 0, y: 122 }))); 304 | assert_eq!(parse("1:2:3"), ("1", Some(Point { x: 2, y: 1 }))); 305 | assert_eq!(parse("::3"), (":", Some(Point { x: 0, y: 2 }))); 306 | assert_eq!(parse("1::3"), ("1:", Some(Point { x: 0, y: 2 }))); 307 | assert_eq!(parse(""), ("", None)); 308 | assert_eq!(parse(":"), (":", None)); 309 | assert_eq!(parse("::"), ("::", None)); 310 | assert_eq!(parse("a:1"), ("a", Some(Point { x: 0, y: 0 }))); 311 | assert_eq!(parse("1:a"), ("1:a", None)); 312 | assert_eq!(parse("file.txt:10"), ("file.txt", Some(Point { x: 0, y: 9 }))); 313 | assert_eq!(parse("file.txt:10:5"), ("file.txt", Some(Point { x: 4, y: 9 }))); 314 | } 315 | } 316 | -------------------------------------------------------------------------------- /src/bin/edit/draw_filepicker.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | use std::cmp::Ordering; 5 | use std::fs; 6 | use std::path::{Path, PathBuf}; 7 | 8 | use edit::framebuffer::IndexedColor; 9 | use edit::helpers::*; 10 | use edit::input::vk; 11 | use edit::tui::*; 12 | use edit::{icu, path}; 13 | 14 | use crate::localization::*; 15 | use crate::state::*; 16 | 17 | pub fn draw_file_picker(ctx: &mut Context, state: &mut State) { 18 | // The save dialog is pre-filled with the current document filename. 19 | if state.wants_file_picker == StateFilePicker::SaveAs { 20 | state.wants_file_picker = StateFilePicker::SaveAsShown; 21 | 22 | if state.file_picker_pending_name.as_os_str().is_empty() { 23 | state.file_picker_pending_name = 24 | state.documents.active().map_or("Untitled.txt", |doc| doc.filename.as_str()).into(); 25 | } 26 | } 27 | 28 | let width = (ctx.size().width - 20).max(10); 29 | let height = (ctx.size().height - 10).max(10); 30 | let mut doit = None; 31 | let mut done = false; 32 | 33 | ctx.modal_begin( 34 | "file-picker", 35 | if state.wants_file_picker == StateFilePicker::Open { 36 | loc(LocId::FileOpen) 37 | } else { 38 | loc(LocId::FileSaveAs) 39 | }, 40 | ); 41 | ctx.attr_intrinsic_size(Size { width, height }); 42 | { 43 | let mut activated = false; 44 | 45 | ctx.table_begin("path"); 46 | ctx.table_set_columns(&[0, COORD_TYPE_SAFE_MAX]); 47 | ctx.table_set_cell_gap(Size { width: 1, height: 0 }); 48 | ctx.attr_padding(Rect::two(1, 1)); 49 | ctx.inherit_focus(); 50 | { 51 | ctx.table_next_row(); 52 | 53 | ctx.label("dir-label", loc(LocId::SaveAsDialogPathLabel)); 54 | ctx.label("dir", state.file_picker_pending_dir.as_str()); 55 | ctx.attr_overflow(Overflow::TruncateMiddle); 56 | 57 | ctx.table_next_row(); 58 | ctx.inherit_focus(); 59 | 60 | ctx.label("name-label", loc(LocId::SaveAsDialogNameLabel)); 61 | ctx.editline("name", &mut state.file_picker_pending_name); 62 | ctx.inherit_focus(); 63 | if ctx.is_focused() && ctx.consume_shortcut(vk::RETURN) { 64 | activated = true; 65 | } 66 | } 67 | ctx.table_end(); 68 | 69 | if state.file_picker_entries.is_none() { 70 | draw_dialog_saveas_refresh_files(state); 71 | } 72 | 73 | let files = state.file_picker_entries.as_ref().unwrap(); 74 | 75 | ctx.scrollarea_begin( 76 | "directory", 77 | Size { 78 | width: 0, 79 | // -1 for the label (top) 80 | // -1 for the label (bottom) 81 | // -1 for the editline (bottom) 82 | height: height - 3, 83 | }, 84 | ); 85 | ctx.attr_background_rgba(ctx.indexed_alpha(IndexedColor::Black, 1, 4)); 86 | ctx.next_block_id_mixin(state.file_picker_pending_dir_revision); 87 | { 88 | ctx.list_begin("files"); 89 | ctx.inherit_focus(); 90 | for entry in files { 91 | match ctx.list_item(false, entry.as_str()) { 92 | ListSelection::Unchanged => {} 93 | ListSelection::Selected => { 94 | state.file_picker_pending_name = entry.as_path().into() 95 | } 96 | ListSelection::Activated => activated = true, 97 | } 98 | ctx.attr_overflow(Overflow::TruncateMiddle); 99 | } 100 | ctx.list_end(); 101 | 102 | if ctx.contains_focus() && ctx.consume_shortcut(vk::BACK) { 103 | state.file_picker_pending_name = "..".into(); 104 | activated = true; 105 | } 106 | } 107 | ctx.scrollarea_end(); 108 | 109 | if activated { 110 | doit = draw_file_picker_update_path(state); 111 | 112 | // Check if the file already exists and show an overwrite warning in that case. 113 | if state.wants_file_picker != StateFilePicker::Open 114 | && let Some(path) = doit.as_deref() 115 | && path.exists() 116 | { 117 | state.file_picker_overwrite_warning = doit.take(); 118 | } 119 | } 120 | } 121 | if ctx.modal_end() { 122 | done = true; 123 | } 124 | 125 | if state.file_picker_overwrite_warning.is_some() { 126 | let mut save; 127 | 128 | ctx.modal_begin("overwrite", loc(LocId::FileOverwriteWarning)); 129 | ctx.attr_background_rgba(ctx.indexed(IndexedColor::Red)); 130 | ctx.attr_foreground_rgba(ctx.indexed(IndexedColor::BrightWhite)); 131 | { 132 | let contains_focus = ctx.contains_focus(); 133 | 134 | ctx.label("description", loc(LocId::FileOverwriteWarningDescription)); 135 | ctx.attr_overflow(Overflow::TruncateTail); 136 | ctx.attr_padding(Rect::three(1, 2, 1)); 137 | 138 | ctx.table_begin("choices"); 139 | ctx.inherit_focus(); 140 | ctx.attr_padding(Rect::three(0, 2, 1)); 141 | ctx.attr_position(Position::Center); 142 | ctx.table_set_cell_gap(Size { width: 2, height: 0 }); 143 | { 144 | ctx.table_next_row(); 145 | ctx.inherit_focus(); 146 | 147 | save = ctx.button("yes", loc(LocId::Yes), ButtonStyle::default()); 148 | ctx.inherit_focus(); 149 | 150 | if ctx.button("no", loc(LocId::No), ButtonStyle::default()) { 151 | state.file_picker_overwrite_warning = None; 152 | } 153 | } 154 | ctx.table_end(); 155 | 156 | if contains_focus { 157 | save |= ctx.consume_shortcut(vk::Y); 158 | if ctx.consume_shortcut(vk::N) { 159 | state.file_picker_overwrite_warning = None; 160 | } 161 | } 162 | } 163 | if ctx.modal_end() { 164 | state.file_picker_overwrite_warning = None; 165 | } 166 | 167 | if save { 168 | doit = state.file_picker_overwrite_warning.take(); 169 | } 170 | } 171 | 172 | if let Some(path) = doit { 173 | let res = if state.wants_file_picker == StateFilePicker::Open { 174 | state.documents.add_file_path(&path).map(|_| ()) 175 | } else if let Some(doc) = state.documents.active_mut() { 176 | doc.save(Some(path)) 177 | } else { 178 | Ok(()) 179 | }; 180 | match res { 181 | Ok(..) => { 182 | ctx.needs_rerender(); 183 | done = true; 184 | } 185 | Err(err) => error_log_add(ctx, state, err), 186 | } 187 | } 188 | 189 | if done { 190 | state.wants_file_picker = StateFilePicker::None; 191 | state.file_picker_pending_name = Default::default(); 192 | state.file_picker_entries = Default::default(); 193 | state.file_picker_overwrite_warning = Default::default(); 194 | } 195 | } 196 | 197 | // Returns Some(path) if the path refers to a file. 198 | fn draw_file_picker_update_path(state: &mut State) -> Option { 199 | let old_path = state.file_picker_pending_dir.as_path(); 200 | let path = old_path.join(&state.file_picker_pending_name); 201 | let path = path::normalize(&path); 202 | 203 | let (dir, name) = if path.is_dir() { 204 | // If the current path is C:\ and the user selects "..", we want to 205 | // navigate to the drive picker. Since `path::normalize` will turn C:\.. into C:\, 206 | // we can detect this by checking if the length of the path didn't change. 207 | let dir = if cfg!(windows) 208 | && state.file_picker_pending_name == Path::new("..") 209 | // It's unnecessary to check the contents of the paths. 210 | && old_path.as_os_str().len() == path.as_os_str().len() 211 | { 212 | Path::new("") 213 | } else { 214 | path.as_path() 215 | }; 216 | (dir, PathBuf::new()) 217 | } else { 218 | let dir = path.parent().unwrap_or(&path); 219 | let name = path.file_name().map_or(Default::default(), |s| s.into()); 220 | (dir, name) 221 | }; 222 | if dir != state.file_picker_pending_dir.as_path() { 223 | state.file_picker_pending_dir = DisplayablePathBuf::from_path(dir.to_path_buf()); 224 | state.file_picker_entries = None; 225 | } 226 | 227 | state.file_picker_pending_name = name; 228 | if state.file_picker_pending_name.as_os_str().is_empty() { None } else { Some(path) } 229 | } 230 | 231 | fn draw_dialog_saveas_refresh_files(state: &mut State) { 232 | let dir = state.file_picker_pending_dir.as_path(); 233 | let mut files = Vec::new(); 234 | let mut off = 0; 235 | 236 | #[cfg(windows)] 237 | if dir.as_os_str().is_empty() { 238 | // If the path is empty, we are at the drive picker. 239 | // Add all drives as entries. 240 | for drive in edit::sys::drives() { 241 | files.push(DisplayablePathBuf::from_string(format!("{drive}:\\"))); 242 | } 243 | 244 | state.file_picker_entries = Some(files); 245 | return; 246 | } 247 | 248 | if cfg!(windows) || dir.parent().is_some() { 249 | files.push(DisplayablePathBuf::from("..")); 250 | off = 1; 251 | } 252 | 253 | if let Ok(iter) = fs::read_dir(dir) { 254 | for entry in iter.flatten() { 255 | if let Ok(metadata) = entry.metadata() { 256 | let mut name = entry.file_name(); 257 | if metadata.is_dir() 258 | || (metadata.is_symlink() 259 | && fs::metadata(entry.path()).is_ok_and(|m| m.is_dir())) 260 | { 261 | name.push("/"); 262 | } 263 | files.push(DisplayablePathBuf::from(name)); 264 | } 265 | } 266 | } 267 | 268 | // Sort directories first, then by name, case-insensitive. 269 | files[off..].sort_by(|a, b| { 270 | let a = a.as_bytes(); 271 | let b = b.as_bytes(); 272 | 273 | let a_is_dir = a.last() == Some(&b'/'); 274 | let b_is_dir = b.last() == Some(&b'/'); 275 | 276 | match b_is_dir.cmp(&a_is_dir) { 277 | Ordering::Equal => icu::compare_strings(a, b), 278 | other => other, 279 | } 280 | }); 281 | 282 | state.file_picker_entries = Some(files); 283 | } 284 | -------------------------------------------------------------------------------- /src/bin/edit/draw_menubar.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | use edit::arena_format; 5 | use edit::helpers::*; 6 | use edit::input::{kbmod, vk}; 7 | use edit::tui::*; 8 | 9 | use crate::localization::*; 10 | use crate::state::*; 11 | 12 | pub fn draw_menubar(ctx: &mut Context, state: &mut State) { 13 | ctx.menubar_begin(); 14 | ctx.attr_background_rgba(state.menubar_color_bg); 15 | ctx.attr_foreground_rgba(state.menubar_color_fg); 16 | { 17 | let contains_focus = ctx.contains_focus(); 18 | 19 | if ctx.menubar_menu_begin(loc(LocId::File), 'F') { 20 | draw_menu_file(ctx, state); 21 | } 22 | if !contains_focus && ctx.consume_shortcut(vk::F10) { 23 | ctx.steal_focus(); 24 | } 25 | if state.documents.active().is_some() && ctx.menubar_menu_begin(loc(LocId::Edit), 'E') { 26 | draw_menu_edit(ctx, state); 27 | } 28 | if ctx.menubar_menu_begin(loc(LocId::View), 'V') { 29 | draw_menu_view(ctx, state); 30 | } 31 | if ctx.menubar_menu_begin(loc(LocId::Help), 'H') { 32 | draw_menu_help(ctx, state); 33 | } 34 | } 35 | ctx.menubar_end(); 36 | } 37 | 38 | fn draw_menu_file(ctx: &mut Context, state: &mut State) { 39 | if ctx.menubar_menu_button(loc(LocId::FileNew), 'N', kbmod::CTRL | vk::N) { 40 | draw_add_untitled_document(ctx, state); 41 | } 42 | if ctx.menubar_menu_button(loc(LocId::FileOpen), 'O', kbmod::CTRL | vk::O) { 43 | state.wants_file_picker = StateFilePicker::Open; 44 | } 45 | if state.documents.active().is_some() { 46 | if ctx.menubar_menu_button(loc(LocId::FileSave), 'S', kbmod::CTRL | vk::S) { 47 | state.wants_save = true; 48 | } 49 | if ctx.menubar_menu_button(loc(LocId::FileSaveAs), 'A', vk::NULL) { 50 | state.wants_file_picker = StateFilePicker::SaveAs; 51 | } 52 | if ctx.menubar_menu_button(loc(LocId::FileClose), 'C', kbmod::CTRL | vk::W) { 53 | state.wants_close = true; 54 | } 55 | } 56 | if ctx.menubar_menu_button(loc(LocId::FileExit), 'X', kbmod::CTRL | vk::Q) { 57 | state.wants_exit = true; 58 | } 59 | ctx.menubar_menu_end(); 60 | } 61 | 62 | fn draw_menu_edit(ctx: &mut Context, state: &mut State) { 63 | let doc = state.documents.active().unwrap(); 64 | let mut tb = doc.buffer.borrow_mut(); 65 | 66 | if ctx.menubar_menu_button(loc(LocId::EditUndo), 'U', kbmod::CTRL | vk::Z) { 67 | tb.undo(); 68 | ctx.needs_rerender(); 69 | } 70 | if ctx.menubar_menu_button(loc(LocId::EditRedo), 'R', kbmod::CTRL | vk::Y) { 71 | tb.redo(); 72 | ctx.needs_rerender(); 73 | } 74 | if ctx.menubar_menu_button(loc(LocId::EditCut), 'T', kbmod::CTRL | vk::X) { 75 | ctx.set_clipboard(tb.extract_selection(true)); 76 | } 77 | if ctx.menubar_menu_button(loc(LocId::EditCopy), 'C', kbmod::CTRL | vk::C) { 78 | ctx.set_clipboard(tb.extract_selection(false)); 79 | } 80 | if ctx.menubar_menu_button(loc(LocId::EditPaste), 'P', kbmod::CTRL | vk::V) { 81 | tb.write(ctx.clipboard(), true); 82 | ctx.needs_rerender(); 83 | } 84 | if state.wants_search.kind != StateSearchKind::Disabled { 85 | if ctx.menubar_menu_button(loc(LocId::EditFind), 'F', kbmod::CTRL | vk::F) { 86 | state.wants_search.kind = StateSearchKind::Search; 87 | state.wants_search.focus = true; 88 | } 89 | if ctx.menubar_menu_button(loc(LocId::EditReplace), 'L', kbmod::CTRL | vk::R) { 90 | state.wants_search.kind = StateSearchKind::Replace; 91 | state.wants_search.focus = true; 92 | } 93 | } 94 | if ctx.menubar_menu_button(loc(LocId::EditSelectAll), 'A', kbmod::CTRL | vk::A) { 95 | tb.select_all(); 96 | ctx.needs_rerender(); 97 | } 98 | ctx.menubar_menu_end(); 99 | } 100 | 101 | fn draw_menu_view(ctx: &mut Context, state: &mut State) { 102 | if ctx.menubar_menu_button(loc(LocId::ViewFocusStatusbar), 'S', vk::NULL) { 103 | state.wants_statusbar_focus = true; 104 | } 105 | 106 | if let Some(doc) = state.documents.active() { 107 | let mut tb = doc.buffer.borrow_mut(); 108 | let word_wrap = tb.is_word_wrap_enabled(); 109 | 110 | if ctx.menubar_menu_button(loc(LocId::ViewDocumentPicker), 'P', kbmod::CTRL | vk::P) { 111 | state.wants_document_picker = true; 112 | } 113 | if ctx.menubar_menu_button(loc(LocId::FileGoto), 'G', kbmod::CTRL | vk::G) { 114 | state.wants_goto = true; 115 | } 116 | if ctx.menubar_menu_checkbox(loc(LocId::ViewWordWrap), 'W', kbmod::ALT | vk::Z, word_wrap) { 117 | tb.set_word_wrap(!word_wrap); 118 | ctx.needs_rerender(); 119 | } 120 | } 121 | 122 | ctx.menubar_menu_end(); 123 | } 124 | 125 | fn draw_menu_help(ctx: &mut Context, state: &mut State) { 126 | if ctx.menubar_menu_button(loc(LocId::HelpAbout), 'A', vk::NULL) { 127 | state.wants_about = true; 128 | } 129 | ctx.menubar_menu_end(); 130 | } 131 | 132 | pub fn draw_dialog_about(ctx: &mut Context, state: &mut State) { 133 | ctx.modal_begin("about", loc(LocId::AboutDialogTitle)); 134 | { 135 | ctx.block_begin("content"); 136 | ctx.inherit_focus(); 137 | ctx.attr_padding(Rect::three(1, 2, 1)); 138 | { 139 | ctx.label("description", "Microsoft Edit"); 140 | ctx.attr_overflow(Overflow::TruncateTail); 141 | ctx.attr_position(Position::Center); 142 | 143 | ctx.label( 144 | "version", 145 | &arena_format!( 146 | ctx.arena(), 147 | "{}{}", 148 | loc(LocId::AboutDialogVersion), 149 | env!("CARGO_PKG_VERSION") 150 | ), 151 | ); 152 | ctx.attr_overflow(Overflow::TruncateHead); 153 | ctx.attr_position(Position::Center); 154 | 155 | ctx.label("copyright", "Copyright (c) Microsoft Corp 2025"); 156 | ctx.attr_overflow(Overflow::TruncateTail); 157 | ctx.attr_position(Position::Center); 158 | 159 | ctx.block_begin("choices"); 160 | ctx.inherit_focus(); 161 | ctx.attr_padding(Rect::three(1, 2, 0)); 162 | ctx.attr_position(Position::Center); 163 | { 164 | if ctx.button("ok", loc(LocId::Ok), ButtonStyle::default()) { 165 | state.wants_about = false; 166 | } 167 | ctx.inherit_focus(); 168 | } 169 | ctx.block_end(); 170 | } 171 | ctx.block_end(); 172 | } 173 | if ctx.modal_end() { 174 | state.wants_about = false; 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/bin/edit/edit.exe.manifest: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | true 13 | UTF-8 14 | SegmentHeap 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /src/bin/edit/state.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | use std::borrow::Cow; 5 | use std::ffi::{OsStr, OsString}; 6 | use std::mem; 7 | use std::path::{Path, PathBuf}; 8 | 9 | use edit::framebuffer::IndexedColor; 10 | use edit::helpers::*; 11 | use edit::tui::*; 12 | use edit::{apperr, buffer, icu, sys}; 13 | 14 | use crate::documents::DocumentManager; 15 | use crate::localization::*; 16 | 17 | #[repr(transparent)] 18 | pub struct FormatApperr(apperr::Error); 19 | 20 | impl From for FormatApperr { 21 | fn from(err: apperr::Error) -> Self { 22 | Self(err) 23 | } 24 | } 25 | 26 | impl std::fmt::Display for FormatApperr { 27 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 28 | match self.0 { 29 | apperr::APP_ICU_MISSING => f.write_str(loc(LocId::ErrorIcuMissing)), 30 | apperr::Error::App(code) => write!(f, "Unknown app error code: {code}"), 31 | apperr::Error::Icu(code) => icu::apperr_format(f, code), 32 | apperr::Error::Sys(code) => sys::apperr_format(f, code), 33 | } 34 | } 35 | } 36 | 37 | pub struct DisplayablePathBuf { 38 | value: PathBuf, 39 | str: Cow<'static, str>, 40 | } 41 | 42 | impl DisplayablePathBuf { 43 | #[allow(dead_code, reason = "only used on Windows")] 44 | pub fn from_string(string: String) -> Self { 45 | let str = Cow::Borrowed(string.as_str()); 46 | let str = unsafe { mem::transmute::, Cow<'_, str>>(str) }; 47 | let value = PathBuf::from(string); 48 | Self { value, str } 49 | } 50 | 51 | pub fn from_path(value: PathBuf) -> Self { 52 | let str = value.to_string_lossy(); 53 | let str = unsafe { mem::transmute::, Cow<'_, str>>(str) }; 54 | Self { value, str } 55 | } 56 | 57 | pub fn as_path(&self) -> &Path { 58 | &self.value 59 | } 60 | 61 | pub fn as_str(&self) -> &str { 62 | &self.str 63 | } 64 | 65 | pub fn as_bytes(&self) -> &[u8] { 66 | self.value.as_os_str().as_encoded_bytes() 67 | } 68 | } 69 | 70 | impl Default for DisplayablePathBuf { 71 | fn default() -> Self { 72 | Self { value: Default::default(), str: Cow::Borrowed("") } 73 | } 74 | } 75 | 76 | impl Clone for DisplayablePathBuf { 77 | fn clone(&self) -> Self { 78 | Self::from_path(self.value.clone()) 79 | } 80 | } 81 | 82 | impl From for DisplayablePathBuf { 83 | fn from(s: OsString) -> Self { 84 | Self::from_path(PathBuf::from(s)) 85 | } 86 | } 87 | 88 | impl> From<&T> for DisplayablePathBuf { 89 | fn from(s: &T) -> Self { 90 | Self::from_path(PathBuf::from(s)) 91 | } 92 | } 93 | 94 | pub struct StateSearch { 95 | pub kind: StateSearchKind, 96 | pub focus: bool, 97 | } 98 | 99 | #[derive(Clone, Copy, PartialEq, Eq)] 100 | pub enum StateSearchKind { 101 | Hidden, 102 | Disabled, 103 | Search, 104 | Replace, 105 | } 106 | 107 | #[derive(Clone, Copy, PartialEq, Eq)] 108 | pub enum StateFilePicker { 109 | None, 110 | Open, 111 | SaveAs, 112 | 113 | SaveAsShown, // Transitioned from SaveAs 114 | } 115 | 116 | #[derive(Clone, Copy, PartialEq, Eq)] 117 | pub enum StateEncodingChange { 118 | None, 119 | Convert, 120 | Reopen, 121 | } 122 | 123 | pub struct State { 124 | pub menubar_color_bg: u32, 125 | pub menubar_color_fg: u32, 126 | 127 | pub documents: DocumentManager, 128 | 129 | // A ring buffer of the last 10 errors. 130 | pub error_log: [String; 10], 131 | pub error_log_index: usize, 132 | pub error_log_count: usize, 133 | 134 | pub wants_file_picker: StateFilePicker, 135 | pub file_picker_pending_dir: DisplayablePathBuf, 136 | pub file_picker_pending_dir_revision: u64, // Bumped every time `file_picker_pending_dir` changes. 137 | pub file_picker_pending_name: PathBuf, 138 | pub file_picker_entries: Option>, 139 | pub file_picker_overwrite_warning: Option, // The path the warning is about. 140 | 141 | pub wants_search: StateSearch, 142 | pub search_needle: String, 143 | pub search_replacement: String, 144 | pub search_options: buffer::SearchOptions, 145 | pub search_success: bool, 146 | 147 | pub wants_encoding_picker: bool, 148 | pub encoding_picker_needle: String, 149 | pub encoding_picker_results: Option>, 150 | 151 | pub wants_save: bool, 152 | pub wants_statusbar_focus: bool, 153 | pub wants_encoding_change: StateEncodingChange, 154 | pub wants_indentation_picker: bool, 155 | pub wants_document_picker: bool, 156 | pub wants_about: bool, 157 | pub wants_close: bool, 158 | pub wants_exit: bool, 159 | pub wants_goto: bool, 160 | pub goto_target: String, 161 | pub goto_invalid: bool, 162 | 163 | pub osc_title_filename: String, 164 | pub osc_clipboard_seen_generation: u32, 165 | pub osc_clipboard_send_generation: u32, 166 | pub osc_clipboard_always_send: bool, 167 | pub exit: bool, 168 | } 169 | 170 | impl State { 171 | pub fn new() -> apperr::Result { 172 | Ok(Self { 173 | menubar_color_bg: 0, 174 | menubar_color_fg: 0, 175 | 176 | documents: Default::default(), 177 | 178 | error_log: [const { String::new() }; 10], 179 | error_log_index: 0, 180 | error_log_count: 0, 181 | 182 | wants_file_picker: StateFilePicker::None, 183 | file_picker_pending_dir: Default::default(), 184 | file_picker_pending_dir_revision: 0, 185 | file_picker_pending_name: Default::default(), 186 | file_picker_entries: None, 187 | file_picker_overwrite_warning: None, 188 | 189 | wants_search: StateSearch { kind: StateSearchKind::Hidden, focus: false }, 190 | search_needle: Default::default(), 191 | search_replacement: Default::default(), 192 | search_options: Default::default(), 193 | search_success: true, 194 | 195 | wants_encoding_picker: false, 196 | encoding_picker_needle: Default::default(), 197 | encoding_picker_results: Default::default(), 198 | 199 | wants_save: false, 200 | wants_statusbar_focus: false, 201 | wants_encoding_change: StateEncodingChange::None, 202 | wants_indentation_picker: false, 203 | wants_document_picker: false, 204 | wants_about: false, 205 | wants_close: false, 206 | wants_exit: false, 207 | wants_goto: false, 208 | goto_target: Default::default(), 209 | goto_invalid: false, 210 | 211 | osc_title_filename: Default::default(), 212 | osc_clipboard_seen_generation: 0, 213 | osc_clipboard_send_generation: 0, 214 | osc_clipboard_always_send: false, 215 | exit: false, 216 | }) 217 | } 218 | } 219 | 220 | pub fn draw_add_untitled_document(ctx: &mut Context, state: &mut State) { 221 | if let Err(err) = state.documents.add_untitled() { 222 | error_log_add(ctx, state, err); 223 | } 224 | } 225 | 226 | pub fn error_log_add(ctx: &mut Context, state: &mut State, err: apperr::Error) { 227 | let msg = format!("{}", FormatApperr::from(err)); 228 | if !msg.is_empty() { 229 | state.error_log[state.error_log_index] = msg; 230 | state.error_log_index = (state.error_log_index + 1) % state.error_log.len(); 231 | state.error_log_count = state.error_log.len().min(state.error_log_count + 1); 232 | ctx.needs_rerender(); 233 | } 234 | } 235 | 236 | pub fn draw_error_log(ctx: &mut Context, state: &mut State) { 237 | ctx.modal_begin("error", loc(LocId::ErrorDialogTitle)); 238 | ctx.attr_background_rgba(ctx.indexed(IndexedColor::Red)); 239 | ctx.attr_foreground_rgba(ctx.indexed(IndexedColor::BrightWhite)); 240 | { 241 | ctx.block_begin("content"); 242 | ctx.attr_padding(Rect::three(0, 2, 1)); 243 | { 244 | let off = state.error_log_index + state.error_log.len() - state.error_log_count; 245 | 246 | for i in 0..state.error_log_count { 247 | let idx = (off + i) % state.error_log.len(); 248 | let msg = &state.error_log[idx][..]; 249 | 250 | if !msg.is_empty() { 251 | ctx.next_block_id_mixin(i as u64); 252 | ctx.label("error", msg); 253 | ctx.attr_overflow(Overflow::TruncateTail); 254 | } 255 | } 256 | } 257 | ctx.block_end(); 258 | 259 | if ctx.button("ok", loc(LocId::Ok), ButtonStyle::default()) { 260 | state.error_log_count = 0; 261 | } 262 | ctx.attr_position(Position::Center); 263 | ctx.inherit_focus(); 264 | } 265 | if ctx.modal_end() { 266 | state.error_log_count = 0; 267 | } 268 | } 269 | -------------------------------------------------------------------------------- /src/buffer/line_cache.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Range; 2 | 3 | use crate::{document::ReadableDocument, simd::memchr2}; 4 | 5 | /// Cache a line/offset pair every CACHE_EVERY lines to speed up line/offset calculations 6 | const CACHE_EVERY: usize = 1024 * 64; 7 | 8 | #[derive(Clone)] 9 | pub struct CachePoint { 10 | pub index: usize, 11 | pub line: usize, 12 | // pub snapshot: ParserSnapshot 13 | } 14 | 15 | pub struct LineCache { 16 | cache: Vec, 17 | } 18 | 19 | impl LineCache { 20 | pub fn new() -> Self { 21 | Self { cache: vec![] } 22 | } 23 | 24 | pub fn from_document(&mut self, document: &T) { 25 | self.cache.clear(); 26 | 27 | let mut offset = 0; 28 | let mut line = 0; 29 | loop { 30 | let text = document.read_forward(offset); 31 | if text.is_empty() { return; } 32 | 33 | let mut off = 0; 34 | loop { 35 | off = memchr2(b'\n', b'\n', text, off); 36 | if off == text.len() { break; } 37 | 38 | if line % CACHE_EVERY == 0 { 39 | self.cache.push(CachePoint { index: offset+off, line }); 40 | } 41 | line += 1; 42 | off += 1; 43 | } 44 | 45 | offset += text.len(); 46 | } 47 | } 48 | 49 | /// Updates the cache after a deletion. 50 | /// `range` is the deleted byte range, and `text` is the content that was deleted. 51 | pub fn delete(&mut self, range: Range, text: &Vec) { 52 | let mut newlines = 0; 53 | for c in text { 54 | if *c == b'\n' { 55 | newlines += 1; 56 | } 57 | } 58 | 59 | let mut beg_del = None; 60 | let mut end_del = None; 61 | for (i, point) in self.cache.iter_mut().enumerate() { 62 | if point.index >= range.start { 63 | if point.index < range.end { 64 | // cache point is within the deleted range 65 | if beg_del.is_none() { beg_del = Some(i); } 66 | end_del = Some(i + 1); 67 | } 68 | else { 69 | point.index -= text.len(); 70 | point.line -= newlines; 71 | } 72 | } 73 | } 74 | 75 | if let (Some(beg), Some(end)) = (beg_del, end_del) { 76 | self.cache.drain(beg..end); 77 | } 78 | } 79 | 80 | /// Updates the cache after an insertion. 81 | /// `offset` is where the insertion occurs, and `text` is the inserted content. 82 | pub fn insert(&mut self, offset: usize, text: &[u8]) { 83 | // Count how many newlines were inserted 84 | let mut newlines = 0; 85 | for c in text { 86 | if *c == b'\n' { 87 | newlines += 1; 88 | } 89 | } 90 | 91 | let len = text.len(); 92 | for point in &mut self.cache { 93 | if point.index > offset { 94 | point.index += len; 95 | point.line += newlines; 96 | } 97 | } 98 | 99 | // TODO: This also needs to insert new cache points 100 | } 101 | 102 | /// Finds the nearest cached line-offset pair relative to a target line. 103 | /// If `reverse` is false, it returns the closest *before* the target. 104 | /// If `reverse` is true, it returns the closest *after or at* the target. 105 | pub fn nearest_offset(&self, target_count: usize, reverse: bool) -> Option { 106 | match self.cache.binary_search_by_key(&target_count, |p| p.line) { 107 | Ok(i) => Some(self.cache[i].clone()), 108 | Err(i) => { 109 | if i == 0 || i == self.cache.len() { None } // target < lowest cache point || target > highest cache point 110 | else { 111 | Some(self.cache[ if reverse {i} else {i-1} ].clone()) 112 | } 113 | } 114 | } 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/buffer/navigation.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | use std::ops::Range; 5 | 6 | use crate::document::ReadableDocument; 7 | 8 | #[derive(Clone, Copy, PartialEq, Eq)] 9 | enum CharClass { 10 | Whitespace, 11 | Newline, 12 | Separator, 13 | Word, 14 | } 15 | 16 | const fn construct_classifier(separators: &[u8]) -> [CharClass; 256] { 17 | let mut classifier = [CharClass::Word; 256]; 18 | 19 | classifier[b' ' as usize] = CharClass::Whitespace; 20 | classifier[b'\t' as usize] = CharClass::Whitespace; 21 | classifier[b'\n' as usize] = CharClass::Newline; 22 | classifier[b'\r' as usize] = CharClass::Newline; 23 | 24 | let mut i = 0; 25 | let len = separators.len(); 26 | while i < len { 27 | let ch = separators[i]; 28 | assert!(ch < 128, "Only ASCII separators are supported."); 29 | classifier[ch as usize] = CharClass::Separator; 30 | i += 1; 31 | } 32 | 33 | classifier 34 | } 35 | 36 | const WORD_CLASSIFIER: [CharClass; 256] = 37 | construct_classifier(br#"`~!@#$%^&*()-=+[{]}\|;:'",.<>/?"#); 38 | 39 | /// Finds the next word boundary given a document cursor offset. 40 | /// Returns the offset of the next word boundary. 41 | pub fn word_forward(doc: &dyn ReadableDocument, offset: usize) -> usize { 42 | word_navigation(WordForward { doc, offset, chunk: &[], chunk_off: 0 }) 43 | } 44 | 45 | /// The backward version of `word_forward`. 46 | pub fn word_backward(doc: &dyn ReadableDocument, offset: usize) -> usize { 47 | word_navigation(WordBackward { doc, offset, chunk: &[], chunk_off: 0 }) 48 | } 49 | 50 | /// Word navigation implementation. Matches the behavior of VS Code. 51 | fn word_navigation(mut nav: T) -> usize { 52 | // First, fill `self.chunk` with at least 1 grapheme. 53 | nav.read(); 54 | 55 | // Skip one newline, if any. 56 | nav.skip_newline(); 57 | 58 | // Skip any whitespace. 59 | nav.skip_class(CharClass::Whitespace); 60 | 61 | // Skip one word or separator and take note of the class. 62 | let class = nav.peek(CharClass::Whitespace); 63 | if matches!(class, CharClass::Separator | CharClass::Word) { 64 | nav.next(); 65 | 66 | let off = nav.offset(); 67 | 68 | // Continue skipping the same class. 69 | nav.skip_class(class); 70 | 71 | // If the class was a separator and we only moved one character, 72 | // continue skipping characters of the word class. 73 | if off == nav.offset() && class == CharClass::Separator { 74 | nav.skip_class(CharClass::Word); 75 | } 76 | } 77 | 78 | nav.offset() 79 | } 80 | 81 | trait WordNavigation { 82 | fn read(&mut self); 83 | fn skip_newline(&mut self); 84 | fn skip_class(&mut self, class: CharClass); 85 | fn peek(&self, default: CharClass) -> CharClass; 86 | fn next(&mut self); 87 | fn offset(&self) -> usize; 88 | } 89 | 90 | struct WordForward<'a> { 91 | doc: &'a dyn ReadableDocument, 92 | offset: usize, 93 | chunk: &'a [u8], 94 | chunk_off: usize, 95 | } 96 | 97 | impl WordNavigation for WordForward<'_> { 98 | fn read(&mut self) { 99 | self.chunk = self.doc.read_forward(self.offset); 100 | self.chunk_off = 0; 101 | } 102 | 103 | fn skip_newline(&mut self) { 104 | // We can rely on the fact that the document does not split graphemes across chunks. 105 | // = If there's a newline it's wholly contained in this chunk. 106 | // Unlike with `WordBackward`, we can't check for CR and LF separately as only a CR followed 107 | // by a LF is a newline. A lone CR in the document is just a regular control character. 108 | self.chunk_off += match self.chunk.get(self.chunk_off) { 109 | Some(&b'\n') => 1, 110 | Some(&b'\r') if self.chunk.get(self.chunk_off + 1) == Some(&b'\n') => 2, 111 | _ => 0, 112 | } 113 | } 114 | 115 | fn skip_class(&mut self, class: CharClass) { 116 | while !self.chunk.is_empty() { 117 | while self.chunk_off < self.chunk.len() { 118 | if WORD_CLASSIFIER[self.chunk[self.chunk_off] as usize] != class { 119 | return; 120 | } 121 | self.chunk_off += 1; 122 | } 123 | 124 | self.offset += self.chunk.len(); 125 | self.chunk = self.doc.read_forward(self.offset); 126 | self.chunk_off = 0; 127 | } 128 | } 129 | 130 | fn peek(&self, default: CharClass) -> CharClass { 131 | if self.chunk_off < self.chunk.len() { 132 | WORD_CLASSIFIER[self.chunk[self.chunk_off] as usize] 133 | } else { 134 | default 135 | } 136 | } 137 | 138 | fn next(&mut self) { 139 | self.chunk_off += 1; 140 | } 141 | 142 | fn offset(&self) -> usize { 143 | self.offset + self.chunk_off 144 | } 145 | } 146 | 147 | struct WordBackward<'a> { 148 | doc: &'a dyn ReadableDocument, 149 | offset: usize, 150 | chunk: &'a [u8], 151 | chunk_off: usize, 152 | } 153 | 154 | impl WordNavigation for WordBackward<'_> { 155 | fn read(&mut self) { 156 | self.chunk = self.doc.read_backward(self.offset); 157 | self.chunk_off = self.chunk.len(); 158 | } 159 | 160 | fn skip_newline(&mut self) { 161 | // We can rely on the fact that the document does not split graphemes across chunks. 162 | // = If there's a newline it's wholly contained in this chunk. 163 | if self.chunk_off > 0 && self.chunk[self.chunk_off - 1] == b'\n' { 164 | self.chunk_off -= 1; 165 | } 166 | if self.chunk_off > 0 && self.chunk[self.chunk_off - 1] == b'\r' { 167 | self.chunk_off -= 1; 168 | } 169 | } 170 | 171 | fn skip_class(&mut self, class: CharClass) { 172 | while !self.chunk.is_empty() { 173 | while self.chunk_off > 0 { 174 | if WORD_CLASSIFIER[self.chunk[self.chunk_off - 1] as usize] != class { 175 | return; 176 | } 177 | self.chunk_off -= 1; 178 | } 179 | 180 | self.offset -= self.chunk.len(); 181 | self.chunk = self.doc.read_backward(self.offset); 182 | self.chunk_off = self.chunk.len(); 183 | } 184 | } 185 | 186 | fn peek(&self, default: CharClass) -> CharClass { 187 | if self.chunk_off > 0 { 188 | WORD_CLASSIFIER[self.chunk[self.chunk_off - 1] as usize] 189 | } else { 190 | default 191 | } 192 | } 193 | 194 | fn next(&mut self) { 195 | self.chunk_off -= 1; 196 | } 197 | 198 | fn offset(&self) -> usize { 199 | self.offset - self.chunk.len() + self.chunk_off 200 | } 201 | } 202 | 203 | /// Returns the offset range of the "word" at the given offset. 204 | /// Does not cross newlines. Works similar to VS Code. 205 | pub fn word_select(doc: &dyn ReadableDocument, offset: usize) -> Range { 206 | let mut beg = offset; 207 | let mut end = offset; 208 | let mut class = CharClass::Newline; 209 | 210 | let mut chunk = doc.read_forward(end); 211 | if !chunk.is_empty() { 212 | // Not at the end of the document? Great! 213 | // We default to using the next char as the class, because in terminals 214 | // the cursor is usually always to the left of the cell you clicked on. 215 | class = WORD_CLASSIFIER[chunk[0] as usize]; 216 | 217 | let mut chunk_off = 0; 218 | 219 | // Select the word, unless we hit a newline. 220 | if class != CharClass::Newline { 221 | loop { 222 | chunk_off += 1; 223 | end += 1; 224 | 225 | if chunk_off >= chunk.len() { 226 | chunk = doc.read_forward(end); 227 | chunk_off = 0; 228 | if chunk.is_empty() { 229 | break; 230 | } 231 | } 232 | 233 | if WORD_CLASSIFIER[chunk[chunk_off] as usize] != class { 234 | break; 235 | } 236 | } 237 | } 238 | } 239 | 240 | let mut chunk = doc.read_backward(beg); 241 | if !chunk.is_empty() { 242 | let mut chunk_off = chunk.len(); 243 | 244 | // If we failed to determine the class, because we hit the end of the document 245 | // or a newline, we fall back to using the previous character, of course. 246 | if class == CharClass::Newline { 247 | class = WORD_CLASSIFIER[chunk[chunk_off - 1] as usize]; 248 | } 249 | 250 | // Select the word, unless we hit a newline. 251 | if class != CharClass::Newline { 252 | loop { 253 | if WORD_CLASSIFIER[chunk[chunk_off - 1] as usize] != class { 254 | break; 255 | } 256 | 257 | chunk_off -= 1; 258 | beg -= 1; 259 | 260 | if chunk_off == 0 { 261 | chunk = doc.read_backward(beg); 262 | chunk_off = chunk.len(); 263 | if chunk.is_empty() { 264 | break; 265 | } 266 | } 267 | } 268 | } 269 | } 270 | 271 | beg..end 272 | } 273 | 274 | #[cfg(test)] 275 | mod test { 276 | use super::*; 277 | 278 | #[test] 279 | fn test_word_navigation() { 280 | assert_eq!(word_forward(&"Hello World".as_bytes(), 0), 5); 281 | assert_eq!(word_forward(&"Hello,World".as_bytes(), 0), 5); 282 | assert_eq!(word_forward(&" Hello".as_bytes(), 0), 8); 283 | assert_eq!(word_forward(&"\n\nHello".as_bytes(), 0), 1); 284 | 285 | assert_eq!(word_backward(&"Hello World".as_bytes(), 11), 6); 286 | assert_eq!(word_backward(&"Hello,World".as_bytes(), 10), 6); 287 | assert_eq!(word_backward(&"Hello ".as_bytes(), 7), 0); 288 | assert_eq!(word_backward(&"Hello\n\n".as_bytes(), 7), 6); 289 | } 290 | } 291 | -------------------------------------------------------------------------------- /src/cell.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | //! [`std::cell::RefCell`], but without runtime checks in release builds. 5 | 6 | #[cfg(debug_assertions)] 7 | pub use debug::*; 8 | #[cfg(not(debug_assertions))] 9 | pub use release::*; 10 | 11 | #[allow(unused)] 12 | #[cfg(debug_assertions)] 13 | mod debug { 14 | pub type SemiRefCell = std::cell::RefCell; 15 | pub type Ref<'b, T> = std::cell::Ref<'b, T>; 16 | pub type RefMut<'b, T> = std::cell::RefMut<'b, T>; 17 | } 18 | 19 | #[cfg(not(debug_assertions))] 20 | mod release { 21 | #[derive(Default)] 22 | #[repr(transparent)] 23 | pub struct SemiRefCell(std::cell::UnsafeCell); 24 | 25 | impl SemiRefCell { 26 | #[inline(always)] 27 | pub const fn new(value: T) -> Self { 28 | Self(std::cell::UnsafeCell::new(value)) 29 | } 30 | 31 | #[inline(always)] 32 | pub const fn as_ptr(&self) -> *mut T { 33 | self.0.get() 34 | } 35 | 36 | #[inline(always)] 37 | pub const fn borrow(&self) -> Ref<'_, T> { 38 | Ref(unsafe { &*self.0.get() }) 39 | } 40 | 41 | #[inline(always)] 42 | pub const fn borrow_mut(&self) -> RefMut<'_, T> { 43 | RefMut(unsafe { &mut *self.0.get() }) 44 | } 45 | } 46 | 47 | #[repr(transparent)] 48 | pub struct Ref<'b, T>(&'b T); 49 | 50 | impl<'b, T> Ref<'b, T> { 51 | #[inline(always)] 52 | pub fn clone(orig: &Self) -> Self { 53 | Ref(orig.0) 54 | } 55 | } 56 | 57 | impl<'b, T> std::ops::Deref for Ref<'b, T> { 58 | type Target = T; 59 | 60 | #[inline(always)] 61 | fn deref(&self) -> &Self::Target { 62 | self.0 63 | } 64 | } 65 | 66 | #[repr(transparent)] 67 | pub struct RefMut<'b, T>(&'b mut T); 68 | 69 | impl<'b, T> std::ops::Deref for RefMut<'b, T> { 70 | type Target = T; 71 | 72 | #[inline(always)] 73 | fn deref(&self) -> &Self::Target { 74 | self.0 75 | } 76 | } 77 | 78 | impl<'b, T> std::ops::DerefMut for RefMut<'b, T> { 79 | #[inline(always)] 80 | fn deref_mut(&mut self) -> &mut Self::Target { 81 | self.0 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/document.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | //! Abstractions over reading/writing arbitrary text containers. 5 | 6 | use std::ffi::OsString; 7 | use std::mem; 8 | use std::ops::Range; 9 | use std::path::PathBuf; 10 | 11 | use crate::arena::{ArenaString, scratch_arena}; 12 | use crate::helpers::ReplaceRange as _; 13 | 14 | /// An abstraction over reading from text containers. 15 | pub trait ReadableDocument { 16 | /// Read some bytes starting at (including) the given absolute offset. 17 | /// 18 | /// # Warning 19 | /// 20 | /// * Be lenient on inputs: 21 | /// * The given offset may be out of bounds and you MUST clamp it. 22 | /// * You should not assume that offsets are at grapheme cluster boundaries. 23 | /// * Be strict on outputs: 24 | /// * You MUST NOT break grapheme clusters across chunks. 25 | /// * You MUST NOT return an empty slice unless the offset is at or beyond the end. 26 | fn read_forward(&self, off: usize) -> &[u8]; 27 | 28 | /// Read some bytes before (but not including) the given absolute offset. 29 | /// 30 | /// # Warning 31 | /// 32 | /// * Be lenient on inputs: 33 | /// * The given offset may be out of bounds and you MUST clamp it. 34 | /// * You should not assume that offsets are at grapheme cluster boundaries. 35 | /// * Be strict on outputs: 36 | /// * You MUST NOT break grapheme clusters across chunks. 37 | /// * You MUST NOT return an empty slice unless the offset is zero. 38 | fn read_backward(&self, off: usize) -> &[u8]; 39 | } 40 | 41 | /// An abstraction over writing to text containers. 42 | pub trait WriteableDocument: ReadableDocument { 43 | /// Replace the given range with the given bytes. 44 | /// 45 | /// # Warning 46 | /// 47 | /// * The given range may be out of bounds and you MUST clamp it. 48 | /// * The replacement may not be valid UTF8. 49 | fn replace(&mut self, range: Range, replacement: &[u8]); 50 | } 51 | 52 | impl ReadableDocument for &[u8] { 53 | fn read_forward(&self, off: usize) -> &[u8] { 54 | let s = *self; 55 | &s[off.min(s.len())..] 56 | } 57 | 58 | fn read_backward(&self, off: usize) -> &[u8] { 59 | let s = *self; 60 | &s[..off.min(s.len())] 61 | } 62 | } 63 | 64 | impl ReadableDocument for String { 65 | fn read_forward(&self, off: usize) -> &[u8] { 66 | let s = self.as_bytes(); 67 | &s[off.min(s.len())..] 68 | } 69 | 70 | fn read_backward(&self, off: usize) -> &[u8] { 71 | let s = self.as_bytes(); 72 | &s[..off.min(s.len())] 73 | } 74 | } 75 | 76 | impl WriteableDocument for String { 77 | fn replace(&mut self, range: Range, replacement: &[u8]) { 78 | // `replacement` is not guaranteed to be valid UTF-8, so we need to sanitize it. 79 | let scratch = scratch_arena(None); 80 | let utf8 = ArenaString::from_utf8_lossy(&scratch, replacement); 81 | let src = match &utf8 { 82 | Ok(s) => s, 83 | Err(s) => s.as_str(), 84 | }; 85 | 86 | // SAFETY: `range` is guaranteed to be on codepoint boundaries. 87 | unsafe { self.as_mut_vec() }.replace_range(range, src.as_bytes()); 88 | } 89 | } 90 | 91 | impl ReadableDocument for PathBuf { 92 | fn read_forward(&self, off: usize) -> &[u8] { 93 | let s = self.as_os_str().as_encoded_bytes(); 94 | &s[off.min(s.len())..] 95 | } 96 | 97 | fn read_backward(&self, off: usize) -> &[u8] { 98 | let s = self.as_os_str().as_encoded_bytes(); 99 | &s[..off.min(s.len())] 100 | } 101 | } 102 | 103 | impl WriteableDocument for PathBuf { 104 | fn replace(&mut self, range: Range, replacement: &[u8]) { 105 | let mut vec = mem::take(self).into_os_string().into_encoded_bytes(); 106 | vec.replace_range(range, replacement); 107 | *self = unsafe { Self::from(OsString::from_encoded_bytes_unchecked(vec)) }; 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/fuzzy.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | //! Fuzzy search algorithm based on the one used in VS Code (`/src/vs/base/common/fuzzyScorer.ts`). 5 | //! Other algorithms exist, such as Sublime Text's, or the one used in `fzf`, 6 | //! but I figured that this one is what lots of people may be familiar with. 7 | 8 | use std::vec; 9 | 10 | use crate::arena::{Arena, scratch_arena}; 11 | use crate::icu; 12 | 13 | const NO_MATCH: i32 = 0; 14 | 15 | pub fn score_fuzzy<'a>( 16 | arena: &'a Arena, 17 | haystack: &str, 18 | needle: &str, 19 | allow_non_contiguous_matches: bool, 20 | ) -> (i32, Vec) { 21 | if haystack.is_empty() || needle.is_empty() { 22 | // return early if target or query are empty 23 | return (NO_MATCH, Vec::new_in(arena)); 24 | } 25 | 26 | let scratch = scratch_arena(Some(arena)); 27 | let target = map_chars(&scratch, haystack); 28 | let query = map_chars(&scratch, needle); 29 | 30 | if target.len() < query.len() { 31 | // impossible for query to be contained in target 32 | return (NO_MATCH, Vec::new_in(arena)); 33 | } 34 | 35 | let target_lower = icu::fold_case(&scratch, haystack); 36 | let query_lower = icu::fold_case(&scratch, needle); 37 | let target_lower = map_chars(&scratch, &target_lower); 38 | let query_lower = map_chars(&scratch, &query_lower); 39 | 40 | let area = query.len() * target.len(); 41 | let mut scores = vec::from_elem_in(0, area, &*scratch); 42 | let mut matches = vec::from_elem_in(0, area, &*scratch); 43 | 44 | // 45 | // Build Scorer Matrix: 46 | // 47 | // The matrix is composed of query q and target t. For each index we score 48 | // q[i] with t[i] and compare that with the previous score. If the score is 49 | // equal or larger, we keep the match. In addition to the score, we also keep 50 | // the length of the consecutive matches to use as boost for the score. 51 | // 52 | // t a r g e t 53 | // q 54 | // u 55 | // e 56 | // r 57 | // y 58 | // 59 | for query_index in 0..query.len() { 60 | let query_index_offset = query_index * target.len(); 61 | let query_index_previous_offset = 62 | if query_index > 0 { (query_index - 1) * target.len() } else { 0 }; 63 | 64 | for target_index in 0..target.len() { 65 | let current_index = query_index_offset + target_index; 66 | let diag_index = if query_index > 0 && target_index > 0 { 67 | query_index_previous_offset + target_index - 1 68 | } else { 69 | 0 70 | }; 71 | let left_score = if target_index > 0 { scores[current_index - 1] } else { 0 }; 72 | let diag_score = 73 | if query_index > 0 && target_index > 0 { scores[diag_index] } else { 0 }; 74 | let matches_sequence_len = 75 | if query_index > 0 && target_index > 0 { matches[diag_index] } else { 0 }; 76 | 77 | // If we are not matching on the first query character anymore, we only produce a 78 | // score if we had a score previously for the last query index (by looking at the diagScore). 79 | // This makes sure that the query always matches in sequence on the target. For example 80 | // given a target of "ede" and a query of "de", we would otherwise produce a wrong high score 81 | // for query[1] ("e") matching on target[0] ("e") because of the "beginning of word" boost. 82 | let score = if diag_score == 0 && query_index != 0 { 83 | 0 84 | } else { 85 | compute_char_score( 86 | query[query_index], 87 | query_lower[query_index], 88 | if target_index != 0 { Some(target[target_index - 1]) } else { None }, 89 | target[target_index], 90 | target_lower[target_index], 91 | matches_sequence_len, 92 | ) 93 | }; 94 | 95 | // We have a score and its equal or larger than the left score 96 | // Match: sequence continues growing from previous diag value 97 | // Score: increases by diag score value 98 | let is_valid_score = score != 0 && diag_score + score >= left_score; 99 | if is_valid_score 100 | && ( 101 | // We don't need to check if it's contiguous if we allow non-contiguous matches 102 | allow_non_contiguous_matches || 103 | // We must be looking for a contiguous match. 104 | // Looking at an index above 0 in the query means we must have already 105 | // found out this is contiguous otherwise there wouldn't have been a score 106 | query_index > 0 || 107 | // lastly check if the query is completely contiguous at this index in the target 108 | target_lower[target_index..].starts_with(&query_lower) 109 | ) 110 | { 111 | matches[current_index] = matches_sequence_len + 1; 112 | scores[current_index] = diag_score + score; 113 | } else { 114 | // We either have no score or the score is lower than the left score 115 | // Match: reset to 0 116 | // Score: pick up from left hand side 117 | matches[current_index] = NO_MATCH; 118 | scores[current_index] = left_score; 119 | } 120 | } 121 | } 122 | 123 | // Restore Positions (starting from bottom right of matrix) 124 | let mut positions = Vec::new_in(arena); 125 | 126 | if !query.is_empty() && !target.is_empty() { 127 | let mut query_index = query.len() - 1; 128 | let mut target_index = target.len() - 1; 129 | 130 | loop { 131 | let current_index = query_index * target.len() + target_index; 132 | if matches[current_index] == NO_MATCH { 133 | if target_index == 0 { 134 | break; 135 | } 136 | target_index -= 1; // go left 137 | } else { 138 | positions.push(target_index); 139 | 140 | // go up and left 141 | if query_index == 0 || target_index == 0 { 142 | break; 143 | } 144 | query_index -= 1; 145 | target_index -= 1; 146 | } 147 | } 148 | 149 | positions.reverse(); 150 | } 151 | 152 | (scores[area - 1], positions) 153 | } 154 | 155 | fn compute_char_score( 156 | query: char, 157 | query_lower: char, 158 | target_prev: Option, 159 | target_curr: char, 160 | target_curr_lower: char, 161 | matches_sequence_len: i32, 162 | ) -> i32 { 163 | let mut score = 0; 164 | 165 | if !consider_as_equal(query_lower, target_curr_lower) { 166 | return score; // no match of characters 167 | } 168 | 169 | // Character match bonus 170 | score += 1; 171 | 172 | // Consecutive match bonus 173 | if matches_sequence_len > 0 { 174 | score += matches_sequence_len * 5; 175 | } 176 | 177 | // Same case bonus 178 | if query == target_curr { 179 | score += 1; 180 | } 181 | 182 | if let Some(target_prev) = target_prev { 183 | // After separator bonus 184 | let separator_bonus = score_separator_at_pos(target_prev); 185 | if separator_bonus > 0 { 186 | score += separator_bonus; 187 | } 188 | // Inside word upper case bonus (camel case). We only give this bonus if we're not in a contiguous sequence. 189 | // For example: 190 | // NPE => NullPointerException = boost 191 | // HTTP => HTTP = not boost 192 | else if target_curr != target_curr_lower && matches_sequence_len == 0 { 193 | score += 2; 194 | } 195 | } else { 196 | // Start of word bonus 197 | score += 8; 198 | } 199 | 200 | score 201 | } 202 | 203 | fn consider_as_equal(a: char, b: char) -> bool { 204 | // Special case path separators: ignore platform differences 205 | a == b || (a == '/' && b == '\\') || (a == '\\' && b == '/') 206 | } 207 | 208 | fn score_separator_at_pos(ch: char) -> i32 { 209 | match ch { 210 | '/' | '\\' => 5, // prefer path separators... 211 | '_' | '-' | '.' | ' ' | '\'' | '"' | ':' => 4, // ...over other separators 212 | _ => 0, 213 | } 214 | } 215 | 216 | fn map_chars<'a>(arena: &'a Arena, s: &str) -> Vec { 217 | let mut chars = Vec::with_capacity_in(s.len(), arena); 218 | chars.extend(s.chars()); 219 | chars.shrink_to_fit(); 220 | chars 221 | } 222 | -------------------------------------------------------------------------------- /src/hash.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | //! Provides fast, non-cryptographic hash functions. 5 | 6 | /// The venerable wyhash hash function. 7 | /// 8 | /// It's fast, has good statistical properties, and is in the public domain. 9 | /// See: 10 | /// If you visit the link, you'll find that it was superseded by "rapidhash", 11 | /// but that's not particularly interesting for this project. rapidhash results 12 | /// in way larger assembly and isn't faster when hashing small amounts of data. 13 | pub fn hash(mut seed: u64, data: &[u8]) -> u64 { 14 | unsafe { 15 | const S0: u64 = 0xa0761d6478bd642f; 16 | const S1: u64 = 0xe7037ed1a0b428db; 17 | const S2: u64 = 0x8ebc6af09c88c6e3; 18 | const S3: u64 = 0x589965cc75374cc3; 19 | 20 | let len = data.len(); 21 | let mut p = data.as_ptr(); 22 | let a; 23 | let b; 24 | 25 | seed ^= S0; 26 | 27 | if len <= 16 { 28 | if len >= 4 { 29 | a = (wyr4(p) << 32) | wyr4(p.add((len >> 3) << 2)); 30 | b = (wyr4(p.add(len - 4)) << 32) | wyr4(p.add(len - 4 - ((len >> 3) << 2))); 31 | } else if len > 0 { 32 | a = wyr3(p, len); 33 | b = 0; 34 | } else { 35 | a = 0; 36 | b = 0; 37 | } 38 | } else { 39 | let mut i = len; 40 | if i > 48 { 41 | let mut seed1 = seed; 42 | let mut seed2 = seed; 43 | while { 44 | seed = wymix(wyr8(p) ^ S1, wyr8(p.add(8)) ^ seed); 45 | seed1 = wymix(wyr8(p.add(16)) ^ S2, wyr8(p.add(24)) ^ seed1); 46 | seed2 = wymix(wyr8(p.add(32)) ^ S3, wyr8(p.add(40)) ^ seed2); 47 | p = p.add(48); 48 | i -= 48; 49 | i > 48 50 | } {} 51 | seed ^= seed1 ^ seed2; 52 | } 53 | while i > 16 { 54 | seed = wymix(wyr8(p) ^ S1, wyr8(p.add(8)) ^ seed); 55 | i -= 16; 56 | p = p.add(16); 57 | } 58 | a = wyr8(p.offset(i as isize - 16)); 59 | b = wyr8(p.offset(i as isize - 8)); 60 | } 61 | 62 | wymix(S1 ^ (len as u64), wymix(a ^ S1, b ^ seed)) 63 | } 64 | } 65 | 66 | unsafe fn wyr3(p: *const u8, k: usize) -> u64 { 67 | let p0 = unsafe { p.read() as u64 }; 68 | let p1 = unsafe { p.add(k >> 1).read() as u64 }; 69 | let p2 = unsafe { p.add(k - 1).read() as u64 }; 70 | (p0 << 16) | (p1 << 8) | p2 71 | } 72 | 73 | unsafe fn wyr4(p: *const u8) -> u64 { 74 | unsafe { (p as *const u32).read_unaligned() as u64 } 75 | } 76 | 77 | unsafe fn wyr8(p: *const u8) -> u64 { 78 | unsafe { (p as *const u64).read_unaligned() } 79 | } 80 | 81 | // This is a weak mix function on its own. It may be worth considering 82 | // replacing external uses of this function with a stronger one. 83 | // On the other hand, it's very fast. 84 | pub fn wymix(lhs: u64, rhs: u64) -> u64 { 85 | let lhs = lhs as u128; 86 | let rhs = rhs as u128; 87 | let r = lhs * rhs; 88 | (r >> 64) as u64 ^ (r as u64) 89 | } 90 | 91 | pub fn hash_str(seed: u64, s: &str) -> u64 { 92 | hash(seed, s.as_bytes()) 93 | } 94 | -------------------------------------------------------------------------------- /src/helpers.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | //! Random assortment of helpers I didn't know where to put. 5 | 6 | use std::alloc::Allocator; 7 | use std::cmp::Ordering; 8 | use std::io::Read; 9 | use std::mem::{self, MaybeUninit}; 10 | use std::ops::{Bound, Range, RangeBounds}; 11 | use std::{fmt, ptr, slice, str}; 12 | 13 | use crate::apperr; 14 | 15 | pub const KILO: usize = 1000; 16 | pub const MEGA: usize = 1000 * 1000; 17 | pub const GIGA: usize = 1000 * 1000 * 1000; 18 | 19 | pub const KIBI: usize = 1024; 20 | pub const MEBI: usize = 1024 * 1024; 21 | pub const GIBI: usize = 1024 * 1024 * 1024; 22 | 23 | pub struct MetricFormatter(pub T); 24 | 25 | impl fmt::Display for MetricFormatter { 26 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 27 | let mut value = self.0; 28 | let mut suffix = "B"; 29 | if value >= GIGA { 30 | value /= GIGA; 31 | suffix = "GB"; 32 | } else if value >= MEGA { 33 | value /= MEGA; 34 | suffix = "MB"; 35 | } else if value >= KILO { 36 | value /= KILO; 37 | suffix = "kB"; 38 | } 39 | write!(f, "{value}{suffix}") 40 | } 41 | } 42 | 43 | /// A viewport coordinate type used throughout the application. 44 | pub type CoordType = isize; 45 | 46 | /// To avoid overflow issues because you're adding two [`CoordType::MAX`] 47 | /// values together, you can use [`COORD_TYPE_SAFE_MAX`] instead. 48 | /// 49 | /// It equates to half the bits contained in [`CoordType`], which 50 | /// for instance is 32767 (0x7FFF) when [`CoordType`] is a [`i32`]. 51 | pub const COORD_TYPE_SAFE_MAX: CoordType = (1 << (CoordType::BITS / 2 - 1)) - 1; 52 | 53 | /// A 2D point. Uses [`CoordType`]. 54 | #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] 55 | pub struct Point { 56 | pub x: CoordType, 57 | pub y: CoordType, 58 | } 59 | 60 | impl Point { 61 | pub const MIN: Self = Self { x: CoordType::MIN, y: CoordType::MIN }; 62 | pub const MAX: Self = Self { x: CoordType::MAX, y: CoordType::MAX }; 63 | } 64 | 65 | impl PartialOrd for Point { 66 | fn partial_cmp(&self, other: &Self) -> Option { 67 | Some(self.cmp(other)) 68 | } 69 | } 70 | 71 | impl Ord for Point { 72 | fn cmp(&self, other: &Self) -> Ordering { 73 | self.y.cmp(&other.y).then(self.x.cmp(&other.x)) 74 | } 75 | } 76 | 77 | /// A 2D size. Uses [`CoordType`]. 78 | #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] 79 | pub struct Size { 80 | pub width: CoordType, 81 | pub height: CoordType, 82 | } 83 | 84 | impl Size { 85 | pub fn as_rect(&self) -> Rect { 86 | Rect { left: 0, top: 0, right: self.width, bottom: self.height } 87 | } 88 | } 89 | 90 | /// A 2D rectangle. Uses [`CoordType`]. 91 | #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] 92 | pub struct Rect { 93 | pub left: CoordType, 94 | pub top: CoordType, 95 | pub right: CoordType, 96 | pub bottom: CoordType, 97 | } 98 | 99 | impl Rect { 100 | /// Mimics CSS's `padding` property where `padding: a` is `a a a a`. 101 | pub fn one(value: CoordType) -> Self { 102 | Self { left: value, top: value, right: value, bottom: value } 103 | } 104 | 105 | /// Mimics CSS's `padding` property where `padding: a b` is `a b a b`, 106 | /// and `a` is top/bottom and `b` is left/right. 107 | pub fn two(top_bottom: CoordType, left_right: CoordType) -> Self { 108 | Self { left: left_right, top: top_bottom, right: left_right, bottom: top_bottom } 109 | } 110 | 111 | /// Mimics CSS's `padding` property where `padding: a b c` is `a b c b`, 112 | /// and `a` is top, `b` is left/right, and `c` is bottom. 113 | pub fn three(top: CoordType, left_right: CoordType, bottom: CoordType) -> Self { 114 | Self { left: left_right, top, right: left_right, bottom } 115 | } 116 | 117 | /// Is the rectangle empty? 118 | pub fn is_empty(&self) -> bool { 119 | self.left >= self.right || self.top >= self.bottom 120 | } 121 | 122 | /// Width of the rectangle. 123 | pub fn width(&self) -> CoordType { 124 | self.right - self.left 125 | } 126 | 127 | /// Height of the rectangle. 128 | pub fn height(&self) -> CoordType { 129 | self.bottom - self.top 130 | } 131 | 132 | /// Check if it contains a point. 133 | pub fn contains(&self, point: Point) -> bool { 134 | point.x >= self.left && point.x < self.right && point.y >= self.top && point.y < self.bottom 135 | } 136 | 137 | /// Intersect two rectangles. 138 | pub fn intersect(&self, rhs: Self) -> Self { 139 | let l = self.left.max(rhs.left); 140 | let t = self.top.max(rhs.top); 141 | let r = self.right.min(rhs.right); 142 | let b = self.bottom.min(rhs.bottom); 143 | 144 | // Ensure that the size is non-negative. This avoids bugs, 145 | // because some height/width is negative all of a sudden. 146 | let r = l.max(r); 147 | let b = t.max(b); 148 | 149 | Self { left: l, top: t, right: r, bottom: b } 150 | } 151 | } 152 | 153 | /// [`std::cmp::minmax`] is unstable, as per usual. 154 | pub fn minmax(v1: T, v2: T) -> [T; 2] 155 | where 156 | T: Ord, 157 | { 158 | if v2 < v1 { [v2, v1] } else { [v1, v2] } 159 | } 160 | 161 | #[inline(always)] 162 | #[allow(clippy::ptr_eq)] 163 | fn opt_ptr(a: Option<&T>) -> *const T { 164 | unsafe { mem::transmute(a) } 165 | } 166 | 167 | /// Surprisingly, there's no way in Rust to do a `ptr::eq` on `Option<&T>`. 168 | /// Uses `unsafe` so that the debug performance isn't too bad. 169 | #[inline(always)] 170 | #[allow(clippy::ptr_eq)] 171 | pub fn opt_ptr_eq(a: Option<&T>, b: Option<&T>) -> bool { 172 | opt_ptr(a) == opt_ptr(b) 173 | } 174 | 175 | /// Creates a `&str` from a pointer and a length. 176 | /// Exists, because `std::str::from_raw_parts` is unstable, par for the course. 177 | /// 178 | /// # Safety 179 | /// 180 | /// The given data must be valid UTF-8. 181 | /// The given data must outlive the returned reference. 182 | #[inline] 183 | #[must_use] 184 | pub const unsafe fn str_from_raw_parts<'a>(ptr: *const u8, len: usize) -> &'a str { 185 | unsafe { str::from_utf8_unchecked(slice::from_raw_parts(ptr, len)) } 186 | } 187 | 188 | /// [`<[T]>::copy_from_slice`] panics if the two slices have different lengths. 189 | /// This one just returns the copied amount. 190 | pub fn slice_copy_safe(dst: &mut [T], src: &[T]) -> usize { 191 | let len = src.len().min(dst.len()); 192 | unsafe { ptr::copy_nonoverlapping(src.as_ptr(), dst.as_mut_ptr(), len) }; 193 | len 194 | } 195 | 196 | /// [`Vec::splice`] results in really bad assembly. 197 | /// This doesn't. Don't use [`Vec::splice`]. 198 | pub trait ReplaceRange { 199 | fn replace_range>(&mut self, range: R, src: &[T]); 200 | } 201 | 202 | impl ReplaceRange for Vec { 203 | fn replace_range>(&mut self, range: R, src: &[T]) { 204 | let start = match range.start_bound() { 205 | Bound::Included(&start) => start, 206 | Bound::Excluded(start) => start + 1, 207 | Bound::Unbounded => 0, 208 | }; 209 | let end = match range.end_bound() { 210 | Bound::Included(end) => end + 1, 211 | Bound::Excluded(&end) => end, 212 | Bound::Unbounded => usize::MAX, 213 | }; 214 | vec_replace_impl(self, start..end, src); 215 | } 216 | } 217 | 218 | fn vec_replace_impl(dst: &mut Vec, range: Range, src: &[T]) { 219 | unsafe { 220 | let dst_len = dst.len(); 221 | let src_len = src.len(); 222 | let off = range.start.min(dst_len); 223 | let del_len = range.end.saturating_sub(off).min(dst_len - off); 224 | 225 | if del_len == 0 && src_len == 0 { 226 | return; // nothing to do 227 | } 228 | 229 | let tail_len = dst_len - off - del_len; 230 | let new_len = dst_len - del_len + src_len; 231 | 232 | if src_len > del_len { 233 | dst.reserve(src_len - del_len); 234 | } 235 | 236 | // NOTE: drop_in_place() is not needed here, because T is constrained to Copy. 237 | 238 | // SAFETY: as_mut_ptr() must called after reserve() to ensure that the pointer is valid. 239 | let ptr = dst.as_mut_ptr().add(off); 240 | 241 | // Shift the tail. 242 | if tail_len > 0 && src_len != del_len { 243 | ptr::copy(ptr.add(del_len), ptr.add(src_len), tail_len); 244 | } 245 | 246 | // Copy in the replacement. 247 | ptr::copy_nonoverlapping(src.as_ptr(), ptr, src_len); 248 | dst.set_len(new_len); 249 | } 250 | } 251 | 252 | /// [`Read`] but with [`MaybeUninit`] buffers. 253 | pub fn file_read_uninit( 254 | file: &mut T, 255 | buf: &mut [MaybeUninit], 256 | ) -> apperr::Result { 257 | unsafe { 258 | let buf_slice = slice::from_raw_parts_mut(buf.as_mut_ptr() as *mut u8, buf.len()); 259 | let n = file.read(buf_slice)?; 260 | Ok(n) 261 | } 262 | } 263 | 264 | /// Turns a [`&[u8]`] into a [`&[MaybeUninit]`]. 265 | #[inline(always)] 266 | pub const fn slice_as_uninit_ref(slice: &[T]) -> &[MaybeUninit] { 267 | unsafe { slice::from_raw_parts(slice.as_ptr() as *const MaybeUninit, slice.len()) } 268 | } 269 | 270 | /// Turns a [`&mut [T]`] into a [`&mut [MaybeUninit]`]. 271 | #[inline(always)] 272 | pub const fn slice_as_uninit_mut(slice: &mut [T]) -> &mut [MaybeUninit] { 273 | unsafe { slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut MaybeUninit, slice.len()) } 274 | } 275 | 276 | /// Helpers for ASCII string comparisons. 277 | pub trait AsciiStringHelpers { 278 | /// Tests if a string starts with a given ASCII prefix. 279 | /// 280 | /// This function name really is a mouthful, but it's a combination 281 | /// of [`str::starts_with`] and [`str::eq_ignore_ascii_case`]. 282 | fn starts_with_ignore_ascii_case(&self, prefix: &str) -> bool; 283 | } 284 | 285 | impl AsciiStringHelpers for str { 286 | fn starts_with_ignore_ascii_case(&self, prefix: &str) -> bool { 287 | // Casting to bytes first ensures we skip any UTF8 boundary checks. 288 | // Since the comparison is ASCII, we don't need to worry about that. 289 | let s = self.as_bytes(); 290 | let p = prefix.as_bytes(); 291 | p.len() <= s.len() && s[..p.len()].eq_ignore_ascii_case(p) 292 | } 293 | } 294 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | #![feature( 5 | allocator_api, 6 | breakpoint, 7 | cold_path, 8 | let_chains, 9 | linked_list_cursors, 10 | maybe_uninit_fill, 11 | maybe_uninit_slice, 12 | maybe_uninit_uninit_array_transpose 13 | )] 14 | #![allow(clippy::missing_transmute_annotations, clippy::new_without_default, stable_features)] 15 | 16 | #[macro_use] 17 | pub mod arena; 18 | 19 | pub mod apperr; 20 | pub mod base64; 21 | pub mod buffer; 22 | pub mod cell; 23 | pub mod document; 24 | pub mod framebuffer; 25 | pub mod fuzzy; 26 | pub mod hash; 27 | pub mod helpers; 28 | pub mod icu; 29 | pub mod input; 30 | pub mod oklab; 31 | pub mod path; 32 | pub mod simd; 33 | pub mod sys; 34 | pub mod tui; 35 | pub mod unicode; 36 | pub mod vt; 37 | -------------------------------------------------------------------------------- /src/oklab.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | //! Oklab colorspace conversions. 5 | //! 6 | //! Implements Oklab as defined at: 7 | 8 | #![allow(clippy::excessive_precision)] 9 | 10 | /// An Oklab color with alpha. 11 | pub struct Lab { 12 | pub l: f32, 13 | pub a: f32, 14 | pub b: f32, 15 | pub alpha: f32, 16 | } 17 | 18 | /// Converts a 32-bit sRGB color to Oklab. 19 | pub fn srgb_to_oklab(color: u32) -> Lab { 20 | let r = SRGB_TO_RGB_LUT[(color & 0xff) as usize]; 21 | let g = SRGB_TO_RGB_LUT[((color >> 8) & 0xff) as usize]; 22 | let b = SRGB_TO_RGB_LUT[((color >> 16) & 0xff) as usize]; 23 | let alpha = (color >> 24) as f32 * (1.0 / 255.0); 24 | 25 | let l = 0.4122214708 * r + 0.5363325363 * g + 0.0514459929 * b; 26 | let m = 0.2119034982 * r + 0.6806995451 * g + 0.1073969566 * b; 27 | let s = 0.0883024619 * r + 0.2817188376 * g + 0.6299787005 * b; 28 | 29 | let l_ = cbrtf_est(l); 30 | let m_ = cbrtf_est(m); 31 | let s_ = cbrtf_est(s); 32 | 33 | Lab { 34 | l: 0.2104542553 * l_ + 0.7936177850 * m_ - 0.0040720468 * s_, 35 | a: 1.9779984951 * l_ - 2.4285922050 * m_ + 0.4505937099 * s_, 36 | b: 0.0259040371 * l_ + 0.7827717662 * m_ - 0.8086757660 * s_, 37 | alpha, 38 | } 39 | } 40 | 41 | /// Converts an Oklab color to a 32-bit sRGB color. 42 | pub fn oklab_to_srgb(c: Lab) -> u32 { 43 | let l_ = c.l + 0.3963377774 * c.a + 0.2158037573 * c.b; 44 | let m_ = c.l - 0.1055613458 * c.a - 0.0638541728 * c.b; 45 | let s_ = c.l - 0.0894841775 * c.a - 1.2914855480 * c.b; 46 | 47 | let l = l_ * l_ * l_; 48 | let m = m_ * m_ * m_; 49 | let s = s_ * s_ * s_; 50 | 51 | let r = 4.0767416621 * l - 3.3077115913 * m + 0.2309699292 * s; 52 | let g = -1.2684380046 * l + 2.6097574011 * m - 0.3413193965 * s; 53 | let b = -0.0041960863 * l - 0.7034186147 * m + 1.7076147010 * s; 54 | 55 | let r = r.clamp(0.0, 1.0); 56 | let g = g.clamp(0.0, 1.0); 57 | let b = b.clamp(0.0, 1.0); 58 | let alpha = c.alpha.clamp(0.0, 1.0); 59 | 60 | let r = linear_to_srgb(r); 61 | let g = linear_to_srgb(g); 62 | let b = linear_to_srgb(b); 63 | let a = (alpha * 255.0) as u32; 64 | 65 | r | (g << 8) | (b << 16) | (a << 24) 66 | } 67 | 68 | /// Blends two 32-bit sRGB colors in the Oklab color space. 69 | pub fn oklab_blend(dst: u32, src: u32) -> u32 { 70 | let dst = srgb_to_oklab(dst); 71 | let src = srgb_to_oklab(src); 72 | 73 | let inv_a = 1.0 - src.alpha; 74 | let l = src.l + dst.l * inv_a; 75 | let a = src.a + dst.a * inv_a; 76 | let b = src.b + dst.b * inv_a; 77 | let alpha = src.alpha + dst.alpha * inv_a; 78 | 79 | oklab_to_srgb(Lab { l, a, b, alpha }) 80 | } 81 | 82 | fn linear_to_srgb(c: f32) -> u32 { 83 | (if c > 0.0031308 { 84 | 255.0 * 1.055 * c.powf(1.0 / 2.4) - 255.0 * 0.055 85 | } else { 86 | 255.0 * 12.92 * c 87 | }) as u32 88 | } 89 | 90 | #[inline] 91 | fn cbrtf_est(a: f32) -> f32 { 92 | // http://metamerist.com/cbrt/cbrt.htm showed a great estimator for the cube root: 93 | // f32_as_uint32_t / 3 + 709921077 94 | // It's similar to the well known "fast inverse square root" trick. 95 | // Lots of numbers around 709921077 perform at least equally well to 709921077, 96 | // and it is unknown how and why 709921077 was chosen specifically. 97 | let u: u32 = f32::to_bits(a); // evil f32ing point bit level hacking 98 | let u = u / 3 + 709921077; // what the fuck? 99 | let x: f32 = f32::from_bits(u); 100 | 101 | // One round of Newton's method. It follows the Wikipedia article at 102 | // https://en.wikipedia.org/wiki/Cube_root#Numerical_methods 103 | // For `a`s in the range between 0 and 1, this results in a maximum error of 104 | // less than 6.7e-4f, which is not good, but good enough for us, because 105 | // we're not an image editor. The benefit is that it's really fast. 106 | (1.0 / 3.0) * (a / (x * x) + (x + x)) // 1st iteration 107 | } 108 | 109 | #[rustfmt::skip] 110 | #[allow(clippy::excessive_precision)] 111 | const SRGB_TO_RGB_LUT: [f32; 256] = [ 112 | 0.0000000000, 0.0003035270, 0.0006070540, 0.0009105810, 0.0012141080, 0.0015176350, 0.0018211619, 0.0021246888, 0.0024282159, 0.0027317430, 0.0030352699, 0.0033465356, 0.0036765069, 0.0040247170, 0.0043914421, 0.0047769533, 113 | 0.0051815170, 0.0056053917, 0.0060488326, 0.0065120910, 0.0069954102, 0.0074990317, 0.0080231922, 0.0085681248, 0.0091340570, 0.0097212177, 0.0103298230, 0.0109600937, 0.0116122449, 0.0122864870, 0.0129830306, 0.0137020806, 114 | 0.0144438436, 0.0152085144, 0.0159962922, 0.0168073755, 0.0176419523, 0.0185002182, 0.0193823613, 0.0202885624, 0.0212190095, 0.0221738834, 0.0231533647, 0.0241576303, 0.0251868572, 0.0262412224, 0.0273208916, 0.0284260381, 115 | 0.0295568332, 0.0307134409, 0.0318960287, 0.0331047624, 0.0343398079, 0.0356013142, 0.0368894450, 0.0382043645, 0.0395462364, 0.0409151986, 0.0423114114, 0.0437350273, 0.0451862030, 0.0466650836, 0.0481718220, 0.0497065634, 116 | 0.0512694679, 0.0528606549, 0.0544802807, 0.0561284944, 0.0578054339, 0.0595112406, 0.0612460710, 0.0630100295, 0.0648032799, 0.0666259527, 0.0684781820, 0.0703601092, 0.0722718611, 0.0742135793, 0.0761853904, 0.0781874284, 117 | 0.0802198276, 0.0822827145, 0.0843762159, 0.0865004659, 0.0886556059, 0.0908417329, 0.0930589810, 0.0953074843, 0.0975873619, 0.0998987406, 0.1022417471, 0.1046164930, 0.1070231125, 0.1094617173, 0.1119324341, 0.1144353822, 118 | 0.1169706732, 0.1195384338, 0.1221387982, 0.1247718409, 0.1274376959, 0.1301364899, 0.1328683347, 0.1356333494, 0.1384316236, 0.1412633061, 0.1441284865, 0.1470272839, 0.1499598026, 0.1529261619, 0.1559264660, 0.1589608639, 119 | 0.1620294005, 0.1651322246, 0.1682693958, 0.1714410931, 0.1746473908, 0.1778884083, 0.1811642349, 0.1844749898, 0.1878207624, 0.1912016720, 0.1946178079, 0.1980693042, 0.2015562356, 0.2050787061, 0.2086368501, 0.2122307271, 120 | 0.2158605307, 0.2195262313, 0.2232279778, 0.2269658893, 0.2307400703, 0.2345506549, 0.2383976579, 0.2422811985, 0.2462013960, 0.2501583695, 0.2541521788, 0.2581829131, 0.2622507215, 0.2663556635, 0.2704978585, 0.2746773660, 121 | 0.2788943350, 0.2831487954, 0.2874408960, 0.2917706966, 0.2961383164, 0.3005438447, 0.3049873710, 0.3094689548, 0.3139887452, 0.3185468316, 0.3231432438, 0.3277781308, 0.3324515820, 0.3371636569, 0.3419144452, 0.3467040956, 122 | 0.3515326977, 0.3564002514, 0.3613068759, 0.3662526906, 0.3712377846, 0.3762622178, 0.3813261092, 0.3864295185, 0.3915725648, 0.3967553079, 0.4019778669, 0.4072403014, 0.4125427008, 0.4178851545, 0.4232677519, 0.4286905527, 123 | 0.4341537058, 0.4396572411, 0.4452012479, 0.4507858455, 0.4564110637, 0.4620770514, 0.4677838385, 0.4735315442, 0.4793202281, 0.4851499796, 0.4910208881, 0.4969330430, 0.5028865933, 0.5088814497, 0.5149177909, 0.5209956765, 124 | 0.5271152258, 0.5332764983, 0.5394796133, 0.5457245708, 0.5520114899, 0.5583404899, 0.5647116303, 0.5711249113, 0.5775805116, 0.5840784907, 0.5906189084, 0.5972018838, 0.6038274169, 0.6104956269, 0.6172066331, 0.6239604354, 125 | 0.6307572126, 0.6375969648, 0.6444797516, 0.6514056921, 0.6583748460, 0.6653873324, 0.6724432111, 0.6795425415, 0.6866854429, 0.6938719153, 0.7011020184, 0.7083759308, 0.7156936526, 0.7230552435, 0.7304608822, 0.7379105687, 126 | 0.7454043627, 0.7529423237, 0.7605246305, 0.7681512833, 0.7758223414, 0.7835379243, 0.7912980318, 0.7991028428, 0.8069523573, 0.8148466945, 0.8227858543, 0.8307699561, 0.8387991190, 0.8468732834, 0.8549926877, 0.8631572723, 127 | 0.8713672161, 0.8796223402, 0.8879231811, 0.8962693810, 0.9046613574, 0.9130986929, 0.9215820432, 0.9301108718, 0.9386858940, 0.9473065734, 0.9559735060, 0.9646862745, 0.9734454751, 0.9822505713, 0.9911022186, 1.0000000000, 128 | ]; 129 | -------------------------------------------------------------------------------- /src/path.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | //! Path related helpers. 5 | 6 | use std::ffi::{OsStr, OsString}; 7 | use std::path::{Component, MAIN_SEPARATOR_STR, Path, PathBuf}; 8 | 9 | /// Normalizes a given path by removing redundant components. 10 | /// The given path must be absolute (e.g. by joining it with the current working directory). 11 | pub fn normalize(path: &Path) -> PathBuf { 12 | debug_assert!(path.is_absolute()); 13 | 14 | let mut res = PathBuf::with_capacity(path.as_os_str().as_encoded_bytes().len()); 15 | let mut root_len = 0; 16 | 17 | for component in path.components() { 18 | match component { 19 | Component::Prefix(p) => res.push(p.as_os_str()), 20 | Component::RootDir => { 21 | res.push(OsStr::new(MAIN_SEPARATOR_STR)); 22 | root_len = res.as_os_str().as_encoded_bytes().len(); 23 | } 24 | Component::CurDir => {} 25 | Component::ParentDir => { 26 | // Get the length up to the parent directory 27 | if let Some(len) = res 28 | .parent() 29 | .map(|p| p.as_os_str().as_encoded_bytes().len()) 30 | // Ensure we don't pop the root directory 31 | && len >= root_len 32 | { 33 | // Pop the last component from `res`. 34 | // 35 | // This can be replaced with a plain `res.as_mut_os_string().truncate(len)` 36 | // once `os_string_truncate` is stabilized (#133262). 37 | let mut bytes = res.into_os_string().into_encoded_bytes(); 38 | bytes.truncate(len); 39 | res = PathBuf::from(unsafe { OsString::from_encoded_bytes_unchecked(bytes) }); 40 | } 41 | } 42 | Component::Normal(p) => res.push(p), 43 | } 44 | } 45 | 46 | res 47 | } 48 | 49 | #[cfg(test)] 50 | mod tests { 51 | use std::ffi::OsString; 52 | use std::path::Path; 53 | 54 | use super::*; 55 | 56 | fn norm(s: &str) -> OsString { 57 | normalize(Path::new(s)).into_os_string() 58 | } 59 | 60 | #[cfg(unix)] 61 | #[test] 62 | fn test_unix() { 63 | assert_eq!(norm("/a/b/c"), "/a/b/c"); 64 | assert_eq!(norm("/a/b/c/"), "/a/b/c"); 65 | assert_eq!(norm("/a/./b"), "/a/b"); 66 | assert_eq!(norm("/a/b/../c"), "/a/c"); 67 | assert_eq!(norm("/../../a"), "/a"); 68 | assert_eq!(norm("/../"), "/"); 69 | assert_eq!(norm("/a//b/c"), "/a/b/c"); 70 | assert_eq!(norm("/a/b/c/../../../../d"), "/d"); 71 | assert_eq!(norm("//"), "/"); 72 | } 73 | 74 | #[cfg(windows)] 75 | #[test] 76 | fn test_windows() { 77 | assert_eq!(norm(r"C:\a\b\c"), r"C:\a\b\c"); 78 | assert_eq!(norm(r"C:\a\b\c\"), r"C:\a\b\c"); 79 | assert_eq!(norm(r"C:\a\.\b"), r"C:\a\b"); 80 | assert_eq!(norm(r"C:\a\b\..\c"), r"C:\a\c"); 81 | assert_eq!(norm(r"C:\..\..\a"), r"C:\a"); 82 | assert_eq!(norm(r"C:\..\"), r"C:\"); 83 | assert_eq!(norm(r"C:\a\\b\c"), r"C:\a\b\c"); 84 | assert_eq!(norm(r"C:/a\b/c"), r"C:\a\b\c"); 85 | assert_eq!(norm(r"C:\a\b\c\..\..\..\..\d"), r"C:\d"); 86 | assert_eq!(norm(r"\\server\share\path"), r"\\server\share\path"); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/simd/lines_bwd.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | use std::ptr; 5 | 6 | use crate::helpers::CoordType; 7 | 8 | /// Starting from the `offset` in `haystack` with a current line index of 9 | /// `line`, this seeks backwards to the `line_stop`-nth line and returns the 10 | /// new offset and the line index at that point. 11 | /// 12 | /// Note that this function differs from `lines_fwd` in that it 13 | /// seeks backwards even if the `line` is already at `line_stop`. 14 | /// This allows you to ensure (or test) whether `offset` is at a line start. 15 | /// 16 | /// It returns an offset *past* a newline and thus at the start of a line. 17 | pub fn lines_bwd( 18 | haystack: &[u8], 19 | offset: usize, 20 | line: CoordType, 21 | line_stop: CoordType, 22 | ) -> (usize, CoordType) { 23 | unsafe { 24 | let beg = haystack.as_ptr(); 25 | let it = beg.add(offset.min(haystack.len())); 26 | let (it, line) = lines_bwd_raw(beg, it, line, line_stop); 27 | (it.offset_from_unsigned(beg), line) 28 | } 29 | } 30 | 31 | unsafe fn lines_bwd_raw( 32 | beg: *const u8, 33 | end: *const u8, 34 | line: CoordType, 35 | line_stop: CoordType, 36 | ) -> (*const u8, CoordType) { 37 | #[cfg(target_arch = "x86_64")] 38 | return unsafe { LINES_BWD_DISPATCH(beg, end, line, line_stop) }; 39 | 40 | #[cfg(target_arch = "aarch64")] 41 | return unsafe { lines_bwd_neon(beg, end, line, line_stop) }; 42 | 43 | #[allow(unreachable_code)] 44 | return unsafe { lines_bwd_fallback(beg, end, line, line_stop) }; 45 | } 46 | 47 | unsafe fn lines_bwd_fallback( 48 | beg: *const u8, 49 | mut end: *const u8, 50 | mut line: CoordType, 51 | line_stop: CoordType, 52 | ) -> (*const u8, CoordType) { 53 | unsafe { 54 | while !ptr::eq(end, beg) { 55 | let n = end.sub(1); 56 | if *n == b'\n' { 57 | if line <= line_stop { 58 | break; 59 | } 60 | line -= 1; 61 | } 62 | end = n; 63 | } 64 | (end, line) 65 | } 66 | } 67 | 68 | #[cfg(target_arch = "x86_64")] 69 | static mut LINES_BWD_DISPATCH: unsafe fn( 70 | beg: *const u8, 71 | end: *const u8, 72 | line: CoordType, 73 | line_stop: CoordType, 74 | ) -> (*const u8, CoordType) = lines_bwd_dispatch; 75 | 76 | #[cfg(target_arch = "x86_64")] 77 | unsafe fn lines_bwd_dispatch( 78 | beg: *const u8, 79 | end: *const u8, 80 | line: CoordType, 81 | line_stop: CoordType, 82 | ) -> (*const u8, CoordType) { 83 | let func = if is_x86_feature_detected!("avx2") { lines_bwd_avx2 } else { lines_bwd_fallback }; 84 | unsafe { LINES_BWD_DISPATCH = func }; 85 | unsafe { func(beg, end, line, line_stop) } 86 | } 87 | 88 | #[cfg(target_arch = "x86_64")] 89 | #[target_feature(enable = "avx2")] 90 | unsafe fn lines_bwd_avx2( 91 | beg: *const u8, 92 | mut end: *const u8, 93 | mut line: CoordType, 94 | line_stop: CoordType, 95 | ) -> (*const u8, CoordType) { 96 | unsafe { 97 | use std::arch::x86_64::*; 98 | 99 | #[inline(always)] 100 | unsafe fn horizontal_sum_i64(v: __m256i) -> i64 { 101 | unsafe { 102 | let hi = _mm256_extracti128_si256::<1>(v); 103 | let lo = _mm256_castsi256_si128(v); 104 | let sum = _mm_add_epi64(lo, hi); 105 | let shuf = _mm_shuffle_epi32::<0b11_10_11_10>(sum); 106 | let sum = _mm_add_epi64(sum, shuf); 107 | _mm_cvtsi128_si64(sum) 108 | } 109 | } 110 | 111 | let lf = _mm256_set1_epi8(b'\n' as i8); 112 | let line_stop = line_stop.min(line); 113 | let mut remaining = end.offset_from_unsigned(beg); 114 | 115 | while remaining >= 128 { 116 | let chunk_start = end.sub(128); 117 | 118 | let v1 = _mm256_loadu_si256(chunk_start.add(0) as *const _); 119 | let v2 = _mm256_loadu_si256(chunk_start.add(32) as *const _); 120 | let v3 = _mm256_loadu_si256(chunk_start.add(64) as *const _); 121 | let v4 = _mm256_loadu_si256(chunk_start.add(96) as *const _); 122 | 123 | let mut sum = _mm256_setzero_si256(); 124 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v1, lf)); 125 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v2, lf)); 126 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v3, lf)); 127 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v4, lf)); 128 | 129 | let sum = _mm256_sad_epu8(sum, _mm256_setzero_si256()); 130 | let sum = horizontal_sum_i64(sum); 131 | 132 | let line_next = line - sum as CoordType; 133 | if line_next <= line_stop { 134 | break; 135 | } 136 | 137 | end = chunk_start; 138 | remaining -= 128; 139 | line = line_next; 140 | } 141 | 142 | while remaining >= 32 { 143 | let chunk_start = end.sub(32); 144 | let v = _mm256_loadu_si256(chunk_start as *const _); 145 | let c = _mm256_cmpeq_epi8(v, lf); 146 | 147 | let ones = _mm256_and_si256(c, _mm256_set1_epi8(0x01)); 148 | let sum = _mm256_sad_epu8(ones, _mm256_setzero_si256()); 149 | let sum = horizontal_sum_i64(sum); 150 | 151 | let line_next = line - sum as CoordType; 152 | if line_next <= line_stop { 153 | break; 154 | } 155 | 156 | end = chunk_start; 157 | remaining -= 32; 158 | line = line_next; 159 | } 160 | 161 | lines_bwd_fallback(beg, end, line, line_stop) 162 | } 163 | } 164 | 165 | #[cfg(target_arch = "aarch64")] 166 | unsafe fn lines_bwd_neon( 167 | beg: *const u8, 168 | mut end: *const u8, 169 | mut line: CoordType, 170 | line_stop: CoordType, 171 | ) -> (*const u8, CoordType) { 172 | unsafe { 173 | use std::arch::aarch64::*; 174 | 175 | let lf = vdupq_n_u8(b'\n'); 176 | let line_stop = line_stop.min(line); 177 | let mut remaining = end.offset_from_unsigned(beg); 178 | 179 | while remaining >= 64 { 180 | let chunk_start = end.sub(64); 181 | 182 | let v1 = vld1q_u8(chunk_start.add(0)); 183 | let v2 = vld1q_u8(chunk_start.add(16)); 184 | let v3 = vld1q_u8(chunk_start.add(32)); 185 | let v4 = vld1q_u8(chunk_start.add(48)); 186 | 187 | let mut sum = vdupq_n_u8(0); 188 | sum = vsubq_u8(sum, vceqq_u8(v1, lf)); 189 | sum = vsubq_u8(sum, vceqq_u8(v2, lf)); 190 | sum = vsubq_u8(sum, vceqq_u8(v3, lf)); 191 | sum = vsubq_u8(sum, vceqq_u8(v4, lf)); 192 | 193 | let sum = vaddvq_u8(sum); 194 | 195 | let line_next = line - sum as CoordType; 196 | if line_next <= line_stop { 197 | break; 198 | } 199 | 200 | end = chunk_start; 201 | remaining -= 64; 202 | line = line_next; 203 | } 204 | 205 | while remaining >= 16 { 206 | let chunk_start = end.sub(16); 207 | let v = vld1q_u8(chunk_start); 208 | let c = vceqq_u8(v, lf); 209 | let c = vandq_u8(c, vdupq_n_u8(0x01)); 210 | let sum = vaddvq_u8(c); 211 | 212 | let line_next = line - sum as CoordType; 213 | if line_next <= line_stop { 214 | break; 215 | } 216 | 217 | end = chunk_start; 218 | remaining -= 16; 219 | line = line_next; 220 | } 221 | 222 | lines_bwd_fallback(beg, end, line, line_stop) 223 | } 224 | } 225 | 226 | #[cfg(test)] 227 | mod test { 228 | use super::*; 229 | use crate::helpers::CoordType; 230 | use crate::simd::test::*; 231 | 232 | #[test] 233 | fn pseudo_fuzz() { 234 | let text = generate_random_text(1024); 235 | let lines = count_lines(&text); 236 | let mut offset_rng = make_rng(); 237 | let mut line_rng = make_rng(); 238 | let mut line_distance_rng = make_rng(); 239 | 240 | for _ in 0..1000 { 241 | let offset = offset_rng() % (text.len() + 1); 242 | let line_stop = line_distance_rng() % (lines + 1); 243 | let line = line_stop + line_rng() % 100; 244 | 245 | let line = line as CoordType; 246 | let line_stop = line_stop as CoordType; 247 | 248 | let expected = reference_lines_bwd(text.as_bytes(), offset, line, line_stop); 249 | let actual = lines_bwd(text.as_bytes(), offset, line, line_stop); 250 | 251 | assert_eq!(expected, actual); 252 | } 253 | } 254 | 255 | fn reference_lines_bwd( 256 | haystack: &[u8], 257 | mut offset: usize, 258 | mut line: CoordType, 259 | line_stop: CoordType, 260 | ) -> (usize, CoordType) { 261 | if line >= line_stop { 262 | while offset > 0 { 263 | let c = haystack[offset - 1]; 264 | if c == b'\n' { 265 | if line == line_stop { 266 | break; 267 | } 268 | line -= 1; 269 | } 270 | offset -= 1; 271 | } 272 | } 273 | (offset, line) 274 | } 275 | #[test] 276 | fn seeks_to_start() { 277 | for i in 6..=11 { 278 | let (off, line) = lines_bwd(b"Hello\nWorld\n", i, 123, 456); 279 | assert_eq!(off, 6); // After "Hello\n" 280 | assert_eq!(line, 123); // Still on the same line 281 | } 282 | } 283 | } 284 | -------------------------------------------------------------------------------- /src/simd/lines_fwd.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | use std::ptr; 5 | 6 | use crate::helpers::CoordType; 7 | 8 | /// Starting from the `offset` in `haystack` with a current line index of 9 | /// `line`, this seeks to the `line_stop`-nth line and returns the 10 | /// new offset and the line index at that point. 11 | /// 12 | /// It returns an offset *past* the newline. 13 | /// If `line` is already at or past `line_stop`, it returns immediately. 14 | pub fn lines_fwd( 15 | haystack: &[u8], 16 | offset: usize, 17 | line: CoordType, 18 | line_stop: CoordType, 19 | ) -> (usize, CoordType) { 20 | unsafe { 21 | let beg = haystack.as_ptr(); 22 | let end = beg.add(haystack.len()); 23 | let it = beg.add(offset.min(haystack.len())); 24 | let (it, line) = lines_fwd_raw(it, end, line, line_stop); 25 | (it.offset_from_unsigned(beg), line) 26 | } 27 | } 28 | 29 | unsafe fn lines_fwd_raw( 30 | beg: *const u8, 31 | end: *const u8, 32 | line: CoordType, 33 | line_stop: CoordType, 34 | ) -> (*const u8, CoordType) { 35 | #[cfg(target_arch = "x86_64")] 36 | return unsafe { LINES_FWD_DISPATCH(beg, end, line, line_stop) }; 37 | 38 | #[cfg(target_arch = "aarch64")] 39 | return unsafe { lines_fwd_neon(beg, end, line, line_stop) }; 40 | 41 | #[allow(unreachable_code)] 42 | return unsafe { lines_fwd_fallback(beg, end, line, line_stop) }; 43 | } 44 | 45 | unsafe fn lines_fwd_fallback( 46 | mut beg: *const u8, 47 | end: *const u8, 48 | mut line: CoordType, 49 | line_stop: CoordType, 50 | ) -> (*const u8, CoordType) { 51 | unsafe { 52 | if line < line_stop { 53 | while !ptr::eq(beg, end) { 54 | let c = *beg; 55 | beg = beg.add(1); 56 | if c == b'\n' { 57 | line += 1; 58 | if line == line_stop { 59 | break; 60 | } 61 | } 62 | } 63 | } 64 | (beg, line) 65 | } 66 | } 67 | 68 | #[cfg(target_arch = "x86_64")] 69 | static mut LINES_FWD_DISPATCH: unsafe fn( 70 | beg: *const u8, 71 | end: *const u8, 72 | line: CoordType, 73 | line_stop: CoordType, 74 | ) -> (*const u8, CoordType) = lines_fwd_dispatch; 75 | 76 | #[cfg(target_arch = "x86_64")] 77 | unsafe fn lines_fwd_dispatch( 78 | beg: *const u8, 79 | end: *const u8, 80 | line: CoordType, 81 | line_stop: CoordType, 82 | ) -> (*const u8, CoordType) { 83 | let func = if is_x86_feature_detected!("avx2") { lines_fwd_avx2 } else { lines_fwd_fallback }; 84 | unsafe { LINES_FWD_DISPATCH = func }; 85 | unsafe { func(beg, end, line, line_stop) } 86 | } 87 | 88 | #[cfg(target_arch = "x86_64")] 89 | #[target_feature(enable = "avx2")] 90 | unsafe fn lines_fwd_avx2( 91 | mut beg: *const u8, 92 | end: *const u8, 93 | mut line: CoordType, 94 | line_stop: CoordType, 95 | ) -> (*const u8, CoordType) { 96 | unsafe { 97 | use std::arch::x86_64::*; 98 | 99 | #[inline(always)] 100 | unsafe fn horizontal_sum_i64(v: __m256i) -> i64 { 101 | unsafe { 102 | let hi = _mm256_extracti128_si256::<1>(v); 103 | let lo = _mm256_castsi256_si128(v); 104 | let sum = _mm_add_epi64(lo, hi); 105 | let shuf = _mm_shuffle_epi32::<0b11_10_11_10>(sum); 106 | let sum = _mm_add_epi64(sum, shuf); 107 | _mm_cvtsi128_si64(sum) 108 | } 109 | } 110 | 111 | let lf = _mm256_set1_epi8(b'\n' as i8); 112 | let mut remaining = end.offset_from_unsigned(beg); 113 | 114 | if line < line_stop { 115 | // Unrolling the loop by 4x speeds things up by >3x. 116 | // It allows us to accumulate matches before doing a single `vpsadbw`. 117 | while remaining >= 128 { 118 | let v1 = _mm256_loadu_si256(beg.add(0) as *const _); 119 | let v2 = _mm256_loadu_si256(beg.add(32) as *const _); 120 | let v3 = _mm256_loadu_si256(beg.add(64) as *const _); 121 | let v4 = _mm256_loadu_si256(beg.add(96) as *const _); 122 | 123 | // `vpcmpeqb` leaves each comparison result byte as 0 or -1 (0xff). 124 | // This allows us to accumulate the comparisons by subtracting them. 125 | let mut sum = _mm256_setzero_si256(); 126 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v1, lf)); 127 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v2, lf)); 128 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v3, lf)); 129 | sum = _mm256_sub_epi8(sum, _mm256_cmpeq_epi8(v4, lf)); 130 | 131 | // Calculate the total number of matches in this chunk. 132 | let sum = _mm256_sad_epu8(sum, _mm256_setzero_si256()); 133 | let sum = horizontal_sum_i64(sum); 134 | 135 | let line_next = line + sum as CoordType; 136 | if line_next >= line_stop { 137 | break; 138 | } 139 | 140 | beg = beg.add(128); 141 | remaining -= 128; 142 | line = line_next; 143 | } 144 | 145 | while remaining >= 32 { 146 | let v = _mm256_loadu_si256(beg as *const _); 147 | let c = _mm256_cmpeq_epi8(v, lf); 148 | 149 | // If you ask an LLM, the best way to do this is 150 | // to do a `vpmovmskb` followed by `popcnt`. 151 | // One contemporary hardware that's a bad idea though. 152 | let ones = _mm256_and_si256(c, _mm256_set1_epi8(0x01)); 153 | let sum = _mm256_sad_epu8(ones, _mm256_setzero_si256()); 154 | let sum = horizontal_sum_i64(sum); 155 | 156 | let line_next = line + sum as CoordType; 157 | if line_next >= line_stop { 158 | break; 159 | } 160 | 161 | beg = beg.add(32); 162 | remaining -= 32; 163 | line = line_next; 164 | } 165 | } 166 | 167 | lines_fwd_fallback(beg, end, line, line_stop) 168 | } 169 | } 170 | 171 | #[cfg(target_arch = "aarch64")] 172 | unsafe fn lines_fwd_neon( 173 | mut beg: *const u8, 174 | end: *const u8, 175 | mut line: CoordType, 176 | line_stop: CoordType, 177 | ) -> (*const u8, CoordType) { 178 | unsafe { 179 | use std::arch::aarch64::*; 180 | 181 | let lf = vdupq_n_u8(b'\n'); 182 | let mut remaining = end.offset_from_unsigned(beg); 183 | 184 | if line < line_stop { 185 | while remaining >= 64 { 186 | let v1 = vld1q_u8(beg.add(0)); 187 | let v2 = vld1q_u8(beg.add(16)); 188 | let v3 = vld1q_u8(beg.add(32)); 189 | let v4 = vld1q_u8(beg.add(48)); 190 | 191 | // `vceqq_u8` leaves each comparison result byte as 0 or -1 (0xff). 192 | // This allows us to accumulate the comparisons by subtracting them. 193 | let mut sum = vdupq_n_u8(0); 194 | sum = vsubq_u8(sum, vceqq_u8(v1, lf)); 195 | sum = vsubq_u8(sum, vceqq_u8(v2, lf)); 196 | sum = vsubq_u8(sum, vceqq_u8(v3, lf)); 197 | sum = vsubq_u8(sum, vceqq_u8(v4, lf)); 198 | 199 | let sum = vaddvq_u8(sum); 200 | 201 | let line_next = line + sum as CoordType; 202 | if line_next >= line_stop { 203 | break; 204 | } 205 | 206 | beg = beg.add(64); 207 | remaining -= 64; 208 | line = line_next; 209 | } 210 | 211 | while remaining >= 16 { 212 | let v = vld1q_u8(beg); 213 | let c = vceqq_u8(v, lf); 214 | let c = vandq_u8(c, vdupq_n_u8(0x01)); 215 | let sum = vaddvq_u8(c); 216 | 217 | let line_next = line + sum as CoordType; 218 | if line_next >= line_stop { 219 | break; 220 | } 221 | 222 | beg = beg.add(16); 223 | remaining -= 16; 224 | line = line_next; 225 | } 226 | } 227 | 228 | lines_fwd_fallback(beg, end, line, line_stop) 229 | } 230 | } 231 | 232 | #[cfg(test)] 233 | mod test { 234 | use super::*; 235 | use crate::helpers::CoordType; 236 | use crate::simd::test::*; 237 | 238 | #[test] 239 | fn pseudo_fuzz() { 240 | let text = generate_random_text(1024); 241 | let lines = count_lines(&text); 242 | let mut offset_rng = make_rng(); 243 | let mut line_rng = make_rng(); 244 | let mut line_distance_rng = make_rng(); 245 | 246 | for _ in 0..1000 { 247 | let offset = offset_rng() % (text.len() + 1); 248 | let line = line_rng() % 100; 249 | let line_stop = line + line_distance_rng() % (lines + 1); 250 | 251 | let line = line as CoordType; 252 | let line_stop = line_stop as CoordType; 253 | 254 | let expected = reference_lines_fwd(text.as_bytes(), offset, line, line_stop); 255 | let actual = lines_fwd(text.as_bytes(), offset, line, line_stop); 256 | 257 | assert_eq!(expected, actual); 258 | } 259 | } 260 | 261 | fn reference_lines_fwd( 262 | haystack: &[u8], 263 | mut offset: usize, 264 | mut line: CoordType, 265 | line_stop: CoordType, 266 | ) -> (usize, CoordType) { 267 | if line < line_stop { 268 | while offset < haystack.len() { 269 | let c = haystack[offset]; 270 | offset += 1; 271 | if c == b'\n' { 272 | line += 1; 273 | if line == line_stop { 274 | break; 275 | } 276 | } 277 | } 278 | } 279 | (offset, line) 280 | } 281 | } 282 | -------------------------------------------------------------------------------- /src/simd/memchr2.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | //! `memchr`, but with two needles. 5 | 6 | use std::ptr; 7 | 8 | /// `memchr`, but with two needles. 9 | /// 10 | /// Returns the index of the first occurrence of either needle in the 11 | /// `haystack`. If no needle is found, `haystack.len()` is returned. 12 | /// `offset` specifies the index to start searching from. 13 | pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8], offset: usize) -> usize { 14 | unsafe { 15 | let beg = haystack.as_ptr(); 16 | let end = beg.add(haystack.len()); 17 | let it = beg.add(offset.min(haystack.len())); 18 | let it = memchr2_raw(needle1, needle2, it, end); 19 | it.offset_from_unsigned(beg) 20 | } 21 | } 22 | 23 | unsafe fn memchr2_raw(needle1: u8, needle2: u8, beg: *const u8, end: *const u8) -> *const u8 { 24 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 25 | return unsafe { MEMCHR2_DISPATCH(needle1, needle2, beg, end) }; 26 | 27 | #[cfg(target_arch = "aarch64")] 28 | return unsafe { memchr2_neon(needle1, needle2, beg, end) }; 29 | 30 | #[allow(unreachable_code)] 31 | return unsafe { memchr2_fallback(needle1, needle2, beg, end) }; 32 | } 33 | 34 | unsafe fn memchr2_fallback( 35 | needle1: u8, 36 | needle2: u8, 37 | mut beg: *const u8, 38 | end: *const u8, 39 | ) -> *const u8 { 40 | unsafe { 41 | while !ptr::eq(beg, end) { 42 | let ch = *beg; 43 | if ch == needle1 || ch == needle2 { 44 | break; 45 | } 46 | beg = beg.add(1); 47 | } 48 | beg 49 | } 50 | } 51 | 52 | // In order to make `memchr2_raw` slim and fast, we use a function pointer that updates 53 | // itself to the correct implementation on the first call. This reduces binary size. 54 | // It would also reduce branches if we had >2 implementations (a jump still needs to be predicted). 55 | // NOTE that this ONLY works if Control Flow Guard is disabled on Windows. 56 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 57 | static mut MEMCHR2_DISPATCH: unsafe fn( 58 | needle1: u8, 59 | needle2: u8, 60 | beg: *const u8, 61 | end: *const u8, 62 | ) -> *const u8 = memchr2_dispatch; 63 | 64 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 65 | unsafe fn memchr2_dispatch(needle1: u8, needle2: u8, beg: *const u8, end: *const u8) -> *const u8 { 66 | let func = if is_x86_feature_detected!("avx2") { memchr2_avx2 } else { memchr2_fallback }; 67 | unsafe { MEMCHR2_DISPATCH = func }; 68 | unsafe { func(needle1, needle2, beg, end) } 69 | } 70 | 71 | // FWIW, I found that adding support for AVX512 was not useful at the time, 72 | // as it only marginally improved file load performance by <5%. 73 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 74 | #[target_feature(enable = "avx2")] 75 | unsafe fn memchr2_avx2(needle1: u8, needle2: u8, mut beg: *const u8, end: *const u8) -> *const u8 { 76 | unsafe { 77 | #[cfg(target_arch = "x86")] 78 | use std::arch::x86::*; 79 | #[cfg(target_arch = "x86_64")] 80 | use std::arch::x86_64::*; 81 | 82 | let n1 = _mm256_set1_epi8(needle1 as i8); 83 | let n2 = _mm256_set1_epi8(needle2 as i8); 84 | let mut remaining = end.offset_from_unsigned(beg); 85 | 86 | while remaining >= 32 { 87 | let v = _mm256_loadu_si256(beg as *const _); 88 | let a = _mm256_cmpeq_epi8(v, n1); 89 | let b = _mm256_cmpeq_epi8(v, n2); 90 | let c = _mm256_or_si256(a, b); 91 | let m = _mm256_movemask_epi8(c) as u32; 92 | 93 | if m != 0 { 94 | return beg.add(m.trailing_zeros() as usize); 95 | } 96 | 97 | beg = beg.add(32); 98 | remaining -= 32; 99 | } 100 | 101 | memchr2_fallback(needle1, needle2, beg, end) 102 | } 103 | } 104 | 105 | #[cfg(target_arch = "aarch64")] 106 | unsafe fn memchr2_neon(needle1: u8, needle2: u8, mut beg: *const u8, end: *const u8) -> *const u8 { 107 | unsafe { 108 | use std::arch::aarch64::*; 109 | 110 | if end.offset_from_unsigned(beg) >= 16 { 111 | let n1 = vdupq_n_u8(needle1); 112 | let n2 = vdupq_n_u8(needle2); 113 | 114 | loop { 115 | let v = vld1q_u8(beg as *const _); 116 | let a = vceqq_u8(v, n1); 117 | let b = vceqq_u8(v, n2); 118 | let c = vorrq_u8(a, b); 119 | 120 | // https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon 121 | let m = vreinterpretq_u16_u8(c); 122 | let m = vshrn_n_u16(m, 4); 123 | let m = vreinterpret_u64_u8(m); 124 | let m = vget_lane_u64(m, 0); 125 | 126 | if m != 0 { 127 | return beg.add(m.trailing_zeros() as usize >> 2); 128 | } 129 | 130 | beg = beg.add(16); 131 | if end.offset_from_unsigned(beg) < 16 { 132 | break; 133 | } 134 | } 135 | } 136 | 137 | memchr2_fallback(needle1, needle2, beg, end) 138 | } 139 | } 140 | 141 | #[cfg(test)] 142 | mod tests { 143 | use std::slice; 144 | 145 | use super::*; 146 | use crate::sys; 147 | 148 | #[test] 149 | fn test_empty() { 150 | assert_eq!(memchr2(b'a', b'b', b"", 0), 0); 151 | } 152 | 153 | #[test] 154 | fn test_basic() { 155 | let haystack = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; 156 | let haystack = &haystack[..43]; 157 | 158 | assert_eq!(memchr2(b'a', b'z', haystack, 0), 0); 159 | assert_eq!(memchr2(b'p', b'q', haystack, 0), 15); 160 | assert_eq!(memchr2(b'Q', b'Z', haystack, 0), 42); 161 | assert_eq!(memchr2(b'0', b'9', haystack, 0), haystack.len()); 162 | } 163 | 164 | // Test that it doesn't match before/after the start offset respectively. 165 | #[test] 166 | fn test_with_offset() { 167 | let haystack = b"abcdefghabcdefghabcdefghabcdefghabcdefgh"; 168 | 169 | assert_eq!(memchr2(b'a', b'b', haystack, 0), 0); 170 | assert_eq!(memchr2(b'a', b'b', haystack, 1), 1); 171 | assert_eq!(memchr2(b'a', b'b', haystack, 2), 8); 172 | assert_eq!(memchr2(b'a', b'b', haystack, 9), 9); 173 | assert_eq!(memchr2(b'a', b'b', haystack, 16), 16); 174 | assert_eq!(memchr2(b'a', b'b', haystack, 41), 40); 175 | } 176 | 177 | // Test memory access safety at page boundaries. 178 | // The test is a success if it doesn't segfault. 179 | #[test] 180 | fn test_page_boundary() { 181 | let page = unsafe { 182 | const PAGE_SIZE: usize = 64 * 1024; // 64 KiB to cover many architectures. 183 | 184 | // 3 pages: uncommitted, committed, uncommitted 185 | let ptr = sys::virtual_reserve(PAGE_SIZE * 3).unwrap(); 186 | sys::virtual_commit(ptr.add(PAGE_SIZE), PAGE_SIZE).unwrap(); 187 | slice::from_raw_parts_mut(ptr.add(PAGE_SIZE).as_ptr(), PAGE_SIZE) 188 | }; 189 | 190 | page.fill(b'a'); 191 | 192 | // Test if it seeks beyond the page boundary. 193 | assert_eq!(memchr2(b'\0', b'\0', &page[page.len() - 40..], 0), 40); 194 | // Test if it seeks before the page boundary for the masked/partial load. 195 | assert_eq!(memchr2(b'\0', b'\0', &page[..10], 0), 10); 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /src/simd/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | //! Provides various high-throughput utilities. 5 | 6 | pub mod lines_bwd; 7 | pub mod lines_fwd; 8 | mod memchr2; 9 | mod memset; 10 | 11 | pub use lines_bwd::*; 12 | pub use lines_fwd::*; 13 | pub use memchr2::*; 14 | pub use memset::*; 15 | 16 | #[cfg(test)] 17 | mod test { 18 | // Knuth's MMIX LCG 19 | pub fn make_rng() -> impl FnMut() -> usize { 20 | let mut state = 1442695040888963407u64; 21 | move || { 22 | state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407); 23 | state as usize 24 | } 25 | } 26 | 27 | pub fn generate_random_text(len: usize) -> String { 28 | const ALPHABET: &[u8; 20] = b"0123456789abcdef\n\n\n\n"; 29 | 30 | let mut rng = make_rng(); 31 | let mut res = String::new(); 32 | 33 | for _ in 0..len { 34 | res.push(ALPHABET[rng() % ALPHABET.len()] as char); 35 | } 36 | 37 | res 38 | } 39 | 40 | pub fn count_lines(text: &str) -> usize { 41 | text.lines().count() 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/sys/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | //! Platform abstractions. 5 | 6 | #[cfg(unix)] 7 | mod unix; 8 | #[cfg(windows)] 9 | mod windows; 10 | 11 | #[cfg(not(windows))] 12 | pub use std::fs::canonicalize; 13 | 14 | #[cfg(unix)] 15 | pub use unix::*; 16 | #[cfg(windows)] 17 | pub use windows::*; 18 | -------------------------------------------------------------------------------- /src/unicode/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | //! Everything related to Unicode lives here. 5 | 6 | mod measurement; 7 | mod tables; 8 | mod utf8; 9 | 10 | pub use measurement::*; 11 | pub use utf8::*; 12 | -------------------------------------------------------------------------------- /src/unicode/utf8.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | use std::{hint, iter}; 5 | 6 | /// An iterator over UTF-8 encoded characters. 7 | /// 8 | /// This differs from [`std::str::Chars`] in that it works on unsanitized 9 | /// byte slices and transparently replaces invalid UTF-8 sequences with U+FFFD. 10 | /// 11 | /// This follows ICU's bitmask approach for `U8_NEXT_OR_FFFD` relatively 12 | /// closely. This is important for compatibility, because it implements the 13 | /// WHATWG recommendation for UTF8 error recovery. It's also helpful, because 14 | /// the excellent folks at ICU have probably spent a lot of time optimizing it. 15 | #[derive(Clone, Copy)] 16 | pub struct Utf8Chars<'a> { 17 | source: &'a [u8], 18 | offset: usize, 19 | } 20 | 21 | impl<'a> Utf8Chars<'a> { 22 | /// Creates a new `Utf8Chars` iterator starting at the given `offset`. 23 | pub fn new(source: &'a [u8], offset: usize) -> Self { 24 | Self { source, offset } 25 | } 26 | 27 | /// Returns the byte slice this iterator was created with. 28 | pub fn source(&self) -> &'a [u8] { 29 | self.source 30 | } 31 | 32 | /// Checks if the source is empty. 33 | pub fn is_empty(&self) -> bool { 34 | self.source.is_empty() 35 | } 36 | 37 | /// Returns the length of the source. 38 | pub fn len(&self) -> usize { 39 | self.source.len() 40 | } 41 | 42 | /// Returns the current offset in the byte slice. 43 | /// 44 | /// This will be past the last returned character. 45 | pub fn offset(&self) -> usize { 46 | self.offset 47 | } 48 | 49 | /// Sets the offset to continue iterating from. 50 | pub fn seek(&mut self, offset: usize) { 51 | self.offset = offset; 52 | } 53 | 54 | /// Returns true if `next` will return another character. 55 | pub fn has_next(&self) -> bool { 56 | self.offset < self.source.len() 57 | } 58 | 59 | // I found that on mixed 50/50 English/Non-English text, 60 | // performance actually suffers when this gets inlined. 61 | #[cold] 62 | fn next_slow(&mut self, c: u8) -> char { 63 | if self.offset >= self.source.len() { 64 | return Self::fffd(); 65 | } 66 | 67 | let mut cp = c as u32; 68 | 69 | if cp < 0xE0 { 70 | // UTF8-2 = %xC2-DF UTF8-tail 71 | 72 | if cp < 0xC2 { 73 | return Self::fffd(); 74 | } 75 | 76 | // The lead byte is 110xxxxx 77 | // -> Strip off the 110 prefix 78 | cp &= !0xE0; 79 | } else if cp < 0xF0 { 80 | // UTF8-3 = 81 | // %xE0 %xA0-BF UTF8-tail 82 | // %xE1-EC UTF8-tail UTF8-tail 83 | // %xED %x80-9F UTF8-tail 84 | // %xEE-EF UTF8-tail UTF8-tail 85 | 86 | // This is a pretty neat approach seen in ICU4C, because it's a 1:1 translation of the RFC. 87 | // I don't understand why others don't do the same thing. It's rather performant. 88 | const BITS_80_9F: u8 = 1 << 0b100; // 0x80-9F, aka 0b100xxxxx 89 | const BITS_A0_BF: u8 = 1 << 0b101; // 0xA0-BF, aka 0b101xxxxx 90 | const BITS_BOTH: u8 = BITS_80_9F | BITS_A0_BF; 91 | const LEAD_TRAIL1_BITS: [u8; 16] = [ 92 | // v-- lead byte 93 | BITS_A0_BF, // 0xE0 94 | BITS_BOTH, // 0xE1 95 | BITS_BOTH, // 0xE2 96 | BITS_BOTH, // 0xE3 97 | BITS_BOTH, // 0xE4 98 | BITS_BOTH, // 0xE5 99 | BITS_BOTH, // 0xE6 100 | BITS_BOTH, // 0xE7 101 | BITS_BOTH, // 0xE8 102 | BITS_BOTH, // 0xE9 103 | BITS_BOTH, // 0xEA 104 | BITS_BOTH, // 0xEB 105 | BITS_BOTH, // 0xEC 106 | BITS_80_9F, // 0xED 107 | BITS_BOTH, // 0xEE 108 | BITS_BOTH, // 0xEF 109 | ]; 110 | 111 | // The lead byte is 1110xxxx 112 | // -> Strip off the 1110 prefix 113 | cp &= !0xF0; 114 | 115 | let t = self.source[self.offset] as u32; 116 | if LEAD_TRAIL1_BITS[cp as usize] & (1 << (t >> 5)) == 0 { 117 | return Self::fffd(); 118 | } 119 | cp = (cp << 6) | (t & 0x3F); 120 | 121 | self.offset += 1; 122 | if self.offset >= self.source.len() { 123 | return Self::fffd(); 124 | } 125 | } else { 126 | // UTF8-4 = 127 | // %xF0 %x90-BF UTF8-tail UTF8-tail 128 | // %xF1-F3 UTF8-tail UTF8-tail UTF8-tail 129 | // %xF4 %x80-8F UTF8-tail UTF8-tail 130 | 131 | // This is similar to the above, but with the indices flipped: 132 | // The trail byte is the index and the lead byte mask is the value. 133 | // This is because the split at 0x90 requires more bits than fit into an u8. 134 | const TRAIL1_LEAD_BITS: [u8; 16] = [ 135 | // --------- 0xF4 lead 136 | // | ... 137 | // | +---- 0xF0 lead 138 | // v v 139 | 0b_00000, // 140 | 0b_00000, // 141 | 0b_00000, // 142 | 0b_00000, // 143 | 0b_00000, // 144 | 0b_00000, // 145 | 0b_00000, // trail bytes: 146 | 0b_00000, // 147 | 0b_11110, // 0x80-8F -> 0x80-8F can be preceded by 0xF1-F4 148 | 0b_01111, // 0x90-9F -v 149 | 0b_01111, // 0xA0-AF -> 0x90-BF can be preceded by 0xF0-F3 150 | 0b_01111, // 0xB0-BF -^ 151 | 0b_00000, // 152 | 0b_00000, // 153 | 0b_00000, // 154 | 0b_00000, // 155 | ]; 156 | 157 | // The lead byte *may* be 11110xxx, but could also be e.g. 11111xxx. 158 | // -> Only strip off the 1111 prefix 159 | cp &= !0xF0; 160 | 161 | // Now we can verify if it's actually <= 0xF4. 162 | // Curiously, this if condition does a lot of heavy lifting for 163 | // performance (+13%). I think it's just a coincidence though. 164 | if cp > 4 { 165 | return Self::fffd(); 166 | } 167 | 168 | let t = self.source[self.offset] as u32; 169 | if TRAIL1_LEAD_BITS[(t >> 4) as usize] & (1 << cp) == 0 { 170 | return Self::fffd(); 171 | } 172 | cp = (cp << 6) | (t & 0x3F); 173 | 174 | self.offset += 1; 175 | if self.offset >= self.source.len() { 176 | return Self::fffd(); 177 | } 178 | 179 | // UTF8-tail = %x80-BF 180 | let t = (self.source[self.offset] as u32).wrapping_sub(0x80); 181 | if t > 0x3F { 182 | return Self::fffd(); 183 | } 184 | cp = (cp << 6) | t; 185 | 186 | self.offset += 1; 187 | if self.offset >= self.source.len() { 188 | return Self::fffd(); 189 | } 190 | } 191 | 192 | // SAFETY: All branches above check for `if self.offset >= self.source.len()` 193 | // one way or another. This is here because the compiler doesn't get it otherwise. 194 | unsafe { hint::assert_unchecked(self.offset < self.source.len()) }; 195 | 196 | // UTF8-tail = %x80-BF 197 | let t = (self.source[self.offset] as u32).wrapping_sub(0x80); 198 | if t > 0x3F { 199 | return Self::fffd(); 200 | } 201 | cp = (cp << 6) | t; 202 | 203 | self.offset += 1; 204 | 205 | // SAFETY: If `cp` wasn't a valid codepoint, we already returned U+FFFD above. 206 | unsafe { char::from_u32_unchecked(cp) } 207 | } 208 | 209 | // This simultaneously serves as a `cold_path` marker. 210 | // It improves performance by ~5% and reduces code size. 211 | #[cold] 212 | #[inline(always)] 213 | fn fffd() -> char { 214 | '\u{FFFD}' 215 | } 216 | } 217 | 218 | impl Iterator for Utf8Chars<'_> { 219 | type Item = char; 220 | 221 | #[inline] 222 | fn next(&mut self) -> Option { 223 | if self.offset >= self.source.len() { 224 | return None; 225 | } 226 | 227 | let c = self.source[self.offset]; 228 | self.offset += 1; 229 | 230 | // Fast-passing ASCII allows this function to be trivially inlined everywhere, 231 | // as the full decoder is a little too large for that. 232 | if (c & 0x80) == 0 { 233 | // UTF8-1 = %x00-7F 234 | Some(c as char) 235 | } else { 236 | // Weirdly enough, adding a hint here to assert that `next_slow` 237 | // only returns codepoints >= 0x80 makes `ucd` ~5% slower. 238 | Some(self.next_slow(c)) 239 | } 240 | } 241 | 242 | #[inline] 243 | fn size_hint(&self) -> (usize, Option) { 244 | // Lower bound: All remaining bytes are 4-byte sequences. 245 | // Upper bound: All remaining bytes are ASCII. 246 | let remaining = self.source.len() - self.offset; 247 | (remaining / 4, Some(remaining)) 248 | } 249 | } 250 | 251 | impl iter::FusedIterator for Utf8Chars<'_> {} 252 | 253 | #[cfg(test)] 254 | mod tests { 255 | use super::*; 256 | 257 | #[test] 258 | fn test_broken_utf8() { 259 | let source = [b'a', 0xED, 0xA0, 0x80, b'b']; 260 | let mut chars = Utf8Chars::new(&source, 0); 261 | let mut offset = 0; 262 | for chunk in source.utf8_chunks() { 263 | for ch in chunk.valid().chars() { 264 | offset += ch.len_utf8(); 265 | assert_eq!(chars.next(), Some(ch)); 266 | assert_eq!(chars.offset(), offset); 267 | } 268 | if !chunk.invalid().is_empty() { 269 | offset += chunk.invalid().len(); 270 | assert_eq!(chars.next(), Some('\u{FFFD}')); 271 | assert_eq!(chars.offset(), offset); 272 | } 273 | } 274 | } 275 | } 276 | -------------------------------------------------------------------------------- /tools/grapheme-table-gen/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "grapheme-table-gen" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | anyhow = "1.0.95" 8 | chrono = "0.4.39" 9 | indoc = "2.0.5" 10 | pico-args = { version = "0.5.0", features = ["eq-separator"] } 11 | rayon = "1.10.0" 12 | roxmltree = { version = "0.20.0", default-features = false, features = ["std"] } 13 | -------------------------------------------------------------------------------- /tools/grapheme-table-gen/README.md: -------------------------------------------------------------------------------- 1 | # Grapheme Table Generator 2 | 3 | This tool processes Unicode Character Database (UCD) XML files to generate efficient, multi-stage trie lookup tables for properties relevant to terminal applications: 4 | * Grapheme cluster breaking rules 5 | * Line breaking rules (optional) 6 | * Character width properties 7 | 8 | ## Usage 9 | 10 | * Download [ucd.nounihan.grouped.zip](https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip) 11 | * Run some equivalent of: 12 | ```sh 13 | grapheme-table-gen --lang=rust --extended --no-ambiguous --line-breaks path/to/ucd.nounihan.grouped.xml 14 | ``` 15 | * Place the result in `src/unicode/tables.rs` 16 | --------------------------------------------------------------------------------