├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── actions-rs │ └── grcov.yml ├── dependabot.yml └── workflows │ ├── README.md │ ├── cargo-check.yml │ ├── cargo-clippy.yml │ ├── cargo-test.yml │ ├── codecov-io.yml │ ├── coveralls.yml │ ├── pages.yml │ └── release.yml ├── .gitignore ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── Cargo.toml ├── FUNDING.yml ├── LICENSE ├── README.md ├── examples ├── README.md ├── deprecated │ ├── constraint.wr │ ├── generic.wr │ ├── represent.wr │ ├── types.wr │ └── union.wr ├── fizzbuzz.wr └── hello-world.wr ├── pages ├── book │ └── src │ │ ├── INTRODUCTION.md │ │ ├── SUMMARY.md │ │ └── design-notes │ │ ├── backend.md │ │ ├── language-constructs.md │ │ ├── threads.md │ │ └── user-defined-optimizations.md └── static │ ├── assets │ ├── favicon.png │ ├── transparent_logo.png │ ├── white_logo.png │ └── wright_logo.svg │ └── index.html └── wright ├── Cargo.toml ├── benches ├── lexer.rs └── parser.rs ├── build.rs ├── rustfmt.toml ├── src ├── ast.rs ├── ast │ ├── decl.rs │ ├── decl │ │ └── import.rs │ ├── identifier.rs │ ├── literal.rs │ ├── old │ │ ├── astOld.rs │ │ ├── expression.rs │ │ ├── expression │ │ │ ├── primary.rs │ │ │ ├── primary │ │ │ │ ├── integer_literal.rs │ │ │ │ └── parens.rs │ │ │ └── unary.rs │ │ ├── test_utils.rs │ │ └── ty.rs │ ├── path.rs │ └── ty.rs ├── bin │ └── wright.rs ├── lexer.rs ├── lexer │ ├── comments.rs │ ├── identifier.rs │ ├── integer_literal.rs │ ├── quoted.rs │ ├── token.rs │ └── trivial.rs ├── lib.rs ├── parser.rs ├── parser │ ├── decl.rs │ ├── decl │ │ └── import.rs │ ├── error.rs │ ├── identifier.rs │ ├── literal.rs │ ├── literal │ │ ├── boolean.rs │ │ └── integer.rs │ ├── old │ │ ├── ast.rs │ │ ├── ast │ │ │ ├── declaration.rs │ │ │ ├── declaration │ │ │ │ ├── class.rs │ │ │ │ ├── enum.rs │ │ │ │ ├── function.rs │ │ │ │ ├── generics.rs │ │ │ │ ├── import.rs │ │ │ │ ├── module.rs │ │ │ │ ├── type.rs │ │ │ │ ├── union.rs │ │ │ │ ├── visibility.rs │ │ │ │ └── where_clause.rs │ │ │ ├── expression.rs │ │ │ ├── expression │ │ │ │ ├── block.rs │ │ │ │ ├── literal.rs │ │ │ │ ├── literal │ │ │ │ │ ├── boolean.rs │ │ │ │ │ ├── character.rs │ │ │ │ │ ├── escapes.rs │ │ │ │ │ ├── integer.rs │ │ │ │ │ └── string.rs │ │ │ │ ├── parentheses.rs │ │ │ │ └── primary.rs │ │ │ ├── identifier.rs │ │ │ ├── metadata.rs │ │ │ ├── path.rs │ │ │ ├── statement.rs │ │ │ ├── statement │ │ │ │ └── bind.rs │ │ │ └── types.rs │ │ ├── error.rs │ │ ├── state.rs │ │ ├── util.rs │ │ └── util │ │ │ ├── discard_error.rs │ │ │ ├── erase.rs │ │ │ ├── first_successful.rs │ │ │ ├── ignore.rs │ │ │ └── map.rs │ ├── path.rs │ ├── ty.rs │ ├── ty │ │ ├── primitive.rs │ │ └── reference.rs │ └── whitespace.rs ├── repl.rs ├── reporting.rs ├── source_tracking.rs ├── source_tracking │ ├── filename.rs │ ├── fragment.rs │ ├── immutable_string.rs │ └── source.rs ├── util.rs └── util │ └── supports_unicode.rs └── tests ├── lexer.rs └── parser.rs /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Screenshots/** 20 | If applicable, add screenshots or logs to help explain your problem. 21 | 22 | **System Information** 23 | - OS: [e.g. Ubuntu 18.04] 24 | - Wright Version [e.g. 1.0.0] 25 | 26 | **Additional context** 27 | Add any other context about the problem here. 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[FEATURE REQUEST]" 5 | labels: feature request 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/actions-rs/grcov.yml: -------------------------------------------------------------------------------- 1 | branch: true 2 | output-type: lcov 3 | output-path: ./lcov.info 4 | ignore-not-existing: true 5 | llvm: true 6 | ignore: 7 | - "/*" 8 | - "../*" 9 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/wright" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | -------------------------------------------------------------------------------- /.github/workflows/README.md: -------------------------------------------------------------------------------- 1 | # Note 2 | If you edit any of the files in this directory that include an LLVM build on multiple platforms, please make sure to 3 | update the other ones to match unless you have a reason not to. 4 | -------------------------------------------------------------------------------- /.github/workflows/cargo-check.yml: -------------------------------------------------------------------------------- 1 | # Combined cargo check with LLVM installation for the three major platforms. 2 | 3 | name: Cargo Check 4 | on: ["push", "pull_request"] 5 | 6 | # Cancel in-progress runs for previous commits if there are any that haven't completed yet. 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 9 | cancel-in-progress: true 10 | 11 | jobs: 12 | check: 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | # Different features to check against on all platforms. Currently each group is one feature, since each 17 | # depends on the previous, but more combinations could be added in the future. 18 | features: 19 | - none 20 | - std 21 | - supports-unicode 22 | - source-tracking 23 | - reporting 24 | - file_memmap 25 | - ast-models 26 | - lexer 27 | - parser 28 | - wright_library_defaults 29 | - wright_binary 30 | - default 31 | os: [ubuntu-24.04, windows-latest, macos-latest] 32 | shell: ["bash", "msys2 {0}"] 33 | include: 34 | - os: macos-latest 35 | llvm-install-dir: /opt/homebrew/opt/llvm 36 | - targets: --tests 37 | - features: wright_binary 38 | targets: --bins --tests 39 | - features: default 40 | targets: --bins --tests 41 | exclude: 42 | - os: windows-latest 43 | shell: bash 44 | - os: macos-latest 45 | shell: 'msys2 {0}' 46 | - os: ubuntu-24.04 47 | shell: 'msys2 {0}' 48 | 49 | runs-on: ${{ matrix.os }} 50 | continue-on-error: ${{ matrix.allow-failure || false }} 51 | 52 | defaults: 53 | run: 54 | shell: ${{ matrix.shell }} 55 | 56 | steps: 57 | - name: Checkout Wright source 58 | uses: actions/checkout@v4 59 | 60 | # Use MSYS2 on windows to install and check LLVM 61 | - uses: msys2/setup-msys2@v2 62 | if: ${{ matrix.os == 'windows-latest' }} 63 | with: 64 | update: true 65 | # Use special mingw LLVM package. 66 | # Also install the current stable rust 67 | install: >- 68 | mingw-w64-x86_64-llvm 69 | mingw-w64-x86_64-rust 70 | 71 | # Use stable Rust toolchain 72 | - uses: actions-rs/toolchain@v1 73 | with: 74 | toolchain: stable 75 | 76 | - name: Install LLVM (Ubuntu Only) 77 | if: ${{ matrix.os == 'ubuntu-24.04' }} 78 | # See: https://apt.llvm.org/ 79 | # Last line: https://gitlab.com/taricorp/llvm-sys.rs/-/issues/13 80 | run: | 81 | wget https://apt.llvm.org/llvm.sh 82 | chmod +x llvm.sh 83 | sudo ./llvm.sh 18 all 84 | sudo apt install libpolly-18-dev libz-dev 85 | 86 | - name: Install LLVM 18 (Mac Only) 87 | if: ${{ matrix.os == 'macos-latest' }} 88 | run: brew install llvm@18 89 | 90 | - name: Get the LLVM version (Windows Only) 91 | if: ${{ matrix.os == 'windows-latest' }} 92 | run: llvm-config --version 93 | 94 | - name: Get the LLVM version (Mac Only) 95 | if: ${{ matrix.os == 'macos-latest' }} 96 | run: ${{ matrix.llvm-install-dir }}/bin/llvm-config --version 97 | # For some reason, this seems to error even when llvm-config is available somewhere. Leaving it in for now. 98 | continue-on-error: true 99 | 100 | - name: Get the LLVM version (Ubuntu Only) 101 | if: ${{ matrix.os == 'ubuntu-24.04' }} 102 | run: llvm-config --version 103 | # For some reason, this seems to error even when llvm-config is available somewhere. Leaving it in for now. 104 | continue-on-error: true 105 | 106 | - name: Run cargo check (Mac Only) 107 | if: ${{ matrix.os == 'macos-latest' }} 108 | run: cargo check --no-default-features -F ${{ matrix.features }} ${{ matrix.targets }} 109 | env: 110 | LLVM_SYS_180_PREFIX: ${{ matrix.llvm-install-dir }} 111 | 112 | - name: Run cargo check (Ubuntu & Windows) 113 | if: ${{ matrix.os != 'macos-latest' }} 114 | run: cargo check --no-default-features -F ${{ matrix.features }} ${{ matrix.targets }} 115 | -------------------------------------------------------------------------------- /.github/workflows/cargo-clippy.yml: -------------------------------------------------------------------------------- 1 | name: Clippy 2 | 3 | on: ["push", "pull_request"] 4 | 5 | # Cancel in-progress runs for previous commits if there are any that haven't completed yet. 6 | concurrency: 7 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 8 | cancel-in-progress: true 9 | 10 | jobs: 11 | clippy: 12 | runs-on: ubuntu-24.04 13 | steps: 14 | - uses: actions/checkout@v4 15 | # Use stable Rust toolchain 16 | - uses: actions-rs/toolchain@v1 17 | with: 18 | toolchain: stable 19 | - name: Install LLVM 20 | # See: https://apt.llvm.org/ 21 | # Last line: https://gitlab.com/taricorp/llvm-sys.rs/-/issues/13 22 | run: | 23 | wget https://apt.llvm.org/llvm.sh 24 | chmod +x llvm.sh 25 | sudo ./llvm.sh 18 all 26 | sudo apt install libpolly-18-dev libz-dev 27 | 28 | - name: Run Clippy 29 | run: cargo clippy -- --deny clippy::all --deny warnings 30 | -------------------------------------------------------------------------------- /.github/workflows/cargo-test.yml: -------------------------------------------------------------------------------- 1 | # Combined cargo test with LLVM installation for the three major platforms. 2 | 3 | name: Cargo Test 4 | on: ["push", "pull_request"] 5 | 6 | # Cancel in-progress runs for previous commits if there are any that haven't completed yet. 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 9 | cancel-in-progress: true 10 | 11 | jobs: 12 | test: 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | # Different features to check against on all platforms. Currently each group is one feature, since each 17 | # depends on the previous, but more combinations could be added in the future. 18 | features: 19 | - none 20 | - std 21 | - supports-unicode 22 | - source-tracking 23 | - reporting 24 | - file_memmap 25 | - ast-models 26 | - lexer 27 | - parser 28 | - wright_library_defaults 29 | - wright_binary 30 | - default 31 | os: [ubuntu-24.04, windows-latest, macos-latest] 32 | shell: ["bash", "msys2 {0}"] 33 | include: 34 | - os: macos-latest 35 | llvm-install-dir: /opt/homebrew/opt/llvm 36 | exclude: 37 | - os: windows-latest 38 | shell: bash 39 | - os: macos-latest 40 | shell: 'msys2 {0}' 41 | - os: ubuntu-24.04 42 | shell: 'msys2 {0}' 43 | 44 | runs-on: ${{ matrix.os }} 45 | continue-on-error: ${{ matrix.allow-failure || false }} 46 | 47 | defaults: 48 | run: 49 | shell: ${{ matrix.shell }} 50 | 51 | steps: 52 | - name: Checkout Wright source 53 | uses: actions/checkout@v4 54 | 55 | # Use MSYS2 on windows to install and check LLVM 56 | - uses: msys2/setup-msys2@v2 57 | if: ${{ matrix.os == 'windows-latest' }} 58 | with: 59 | update: true 60 | # Use special mingw LLVM package. 61 | # Also install the current stable rust 62 | install: >- 63 | mingw-w64-x86_64-llvm 64 | mingw-w64-x86_64-rust 65 | 66 | # Use stable Rust toolchain 67 | - uses: actions-rs/toolchain@v1 68 | with: 69 | toolchain: stable 70 | 71 | - name: Install LLVM (Ubuntu Only) 72 | if: ${{ matrix.os == 'ubuntu-24.04' }} 73 | # See: https://apt.llvm.org/ 74 | # Last line: https://gitlab.com/taricorp/llvm-sys.rs/-/issues/13 75 | run: | 76 | wget https://apt.llvm.org/llvm.sh 77 | chmod +x llvm.sh 78 | sudo ./llvm.sh 18 all 79 | sudo apt install libpolly-18-dev libz-dev 80 | 81 | - name: Install LLVM 18 (Mac Only) 82 | if: ${{ matrix.os == 'macos-latest' }} 83 | run: brew install llvm@18 84 | 85 | - name: Get the LLVM version (Windows Only) 86 | if: ${{ matrix.os == 'windows-latest' }} 87 | run: llvm-config --version 88 | 89 | - name: Get the LLVM version (Mac Only) 90 | if: ${{ matrix.os == 'macos-latest' }} 91 | run: ${{ matrix.llvm-install-dir }}/bin/llvm-config --version 92 | # For some reason, this seems to error even when llvm-config is available somewhere. Leaving it in for now. 93 | continue-on-error: true 94 | 95 | - name: Get the LLVM version (Ubuntu Only) 96 | if: ${{ matrix.os == 'ubuntu-24.04' }} 97 | run: llvm-config --version 98 | # For some reason, this seems to error even when llvm-config is available somewhere. Leaving it in for now. 99 | continue-on-error: true 100 | 101 | - name: Run cargo test (Mac Only) 102 | if: ${{ matrix.os == 'macos-latest' }} 103 | run: cargo test --no-default-features -F ${{ matrix.features }} --lib 104 | env: 105 | LLVM_SYS_180_PREFIX: ${{ matrix.llvm-install-dir }} 106 | 107 | - name: Run cargo test (Ubuntu & Windows) 108 | if: ${{ matrix.os != 'macos-latest' }} 109 | run: cargo test --no-default-features -F ${{ matrix.features }} --lib 110 | -------------------------------------------------------------------------------- /.github/workflows/codecov-io.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | pull_request: 5 | branches: 6 | - "main" 7 | env: 8 | CARGO_TERM_COLOR: always 9 | 10 | # Cancel in-progress runs for previous commits if there are any that haven't completed yet. 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 13 | cancel-in-progress: true 14 | 15 | name: codecov.io Code Coverage 16 | jobs: 17 | coverage: 18 | runs-on: ubuntu-24.04 19 | steps: 20 | - uses: actions/checkout@v4 21 | # - name: Install LLVM 22 | # # See: https://apt.llvm.org/ 23 | # # Last line: https://gitlab.com/taricorp/llvm-sys.rs/-/issues/13 24 | # run: | 25 | # wget https://apt.llvm.org/llvm.sh 26 | # chmod +x llvm.sh 27 | # sudo ./llvm.sh 18 all 28 | # sudo apt install libpolly-18-dev libz-dev 29 | - uses: dtolnay/rust-toolchain@nightly 30 | - name: Install cargo-llvm-cov 31 | uses: taiki-e/install-action@cargo-llvm-cov 32 | - name: Generate code coverage 33 | run: cargo llvm-cov --workspace --no-fail-fast --lcov --output-path lcov.info 34 | - name: Upload coverage reports to Codecov 35 | uses: codecov/codecov-action@v4.0.1 36 | with: 37 | token: ${{ secrets.CODECOV_TOKEN }} 38 | slug: vcfxb/wright-lang 39 | -------------------------------------------------------------------------------- /.github/workflows/coveralls.yml: -------------------------------------------------------------------------------- 1 | on: ["push", "pull_request"] 2 | 3 | name: coveralls Code Coverage 4 | 5 | # Cancel in-progress runs for previous commits if there are any that haven't completed yet. 6 | concurrency: 7 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 8 | cancel-in-progress: true 9 | 10 | jobs: 11 | coverage: 12 | runs-on: ubuntu-24.04 13 | steps: 14 | - uses: actions/checkout@v4 15 | # - name: Install LLVM 16 | # # See: https://apt.llvm.org/ 17 | # # Last line: https://gitlab.com/taricorp/llvm-sys.rs/-/issues/13 18 | # run: | 19 | # wget https://apt.llvm.org/llvm.sh 20 | # chmod +x llvm.sh 21 | # sudo ./llvm.sh 18 all 22 | # sudo apt install libpolly-18-dev libz-dev 23 | - uses: dtolnay/rust-toolchain@nightly 24 | - name: Install cargo-llvm-cov 25 | uses: taiki-e/install-action@cargo-llvm-cov 26 | - name: Generate code coverage 27 | run: cargo llvm-cov --workspace --no-fail-fast --lcov --output-path lcov.info 28 | - name: Coveralls upload 29 | uses: coverallsapp/github-action@v2.3.6 30 | with: 31 | github-token: ${{ secrets.GITHUB_TOKEN }} 32 | file: lcov.info 33 | format: lcov 34 | -------------------------------------------------------------------------------- /.github/workflows/pages.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Pages 2 | 3 | on: 4 | # Runs on pushes targeting the default branch 5 | push: 6 | branches: ["main"] 7 | 8 | # Allows you to run this workflow manually from the Actions tab 9 | workflow_dispatch: 10 | 11 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 12 | permissions: 13 | contents: read 14 | pages: write 15 | id-token: write 16 | 17 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 18 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 19 | concurrency: 20 | group: "pages" 21 | cancel-in-progress: false 22 | 23 | jobs: 24 | # Build job 25 | build: 26 | runs-on: ubuntu-24.04 27 | steps: 28 | - uses: actions/checkout@v4 29 | # Use nightly Rust toolchain since that's what docs.rs uses and some docs for features/compiler flags 30 | # only work on nightly. 31 | - uses: actions-rs/toolchain@v1 32 | with: 33 | toolchain: nightly 34 | - name: Install LLVM 35 | # See: https://apt.llvm.org/ 36 | # Last line: https://gitlab.com/taricorp/llvm-sys.rs/-/issues/13 37 | run: | 38 | wget https://apt.llvm.org/llvm.sh 39 | chmod +x llvm.sh 40 | sudo ./llvm.sh 18 all 41 | sudo apt install libpolly-18-dev libz-dev 42 | - name: Install mdBook 43 | run: cargo install mdbook 44 | - name: Setup Pages 45 | id: pages 46 | uses: actions/configure-pages@v3 47 | - name: Build rust docs 48 | run: | 49 | mkdir tmp 50 | cargo +nightly doc 51 | cp -rv target/doc tmp 52 | - name: Build mdBook 53 | run: | 54 | mdbook build pages/book 55 | cp -rv pages/book/book tmp 56 | - name: Copy static files 57 | run: | 58 | cp -rv pages/static/* tmp 59 | - name: Upload artifact 60 | uses: actions/upload-pages-artifact@v3 61 | with: 62 | path: tmp 63 | 64 | # Deployment job 65 | deploy: 66 | environment: 67 | name: github-pages 68 | url: ${{ steps.deployment.outputs.page_url }} 69 | runs-on: ubuntu-24.04 70 | needs: build 71 | steps: 72 | - name: Deploy to GitHub Pages 73 | id: deployment 74 | uses: actions/deploy-pages@v4 75 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | permissions: 4 | contents: write 5 | 6 | on: 7 | push: 8 | tags: 9 | - v[0-9]+.* 10 | 11 | jobs: 12 | create-release: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: taiki-e/create-gh-release-action@v1 17 | with: 18 | # (optional) Path to changelog. 19 | changelog: CHANGELOG.md 20 | # (required) GitHub token for creating GitHub Releases. 21 | token: ${{ secrets.GITHUB_TOKEN }} 22 | 23 | upload-assets: 24 | needs: create-release 25 | strategy: 26 | matrix: 27 | include: 28 | - target: x86_64-unknown-linux-gnu 29 | os: ubuntu-latest 30 | - target: x86_64-apple-darwin 31 | os: macos-latest 32 | - target: x86_64-pc-windows-msvc 33 | os: windows-latest 34 | runs-on: ${{ matrix.os }} 35 | steps: 36 | - uses: actions/checkout@v4 37 | - run: rustup update stable 38 | - uses: taiki-e/upload-rust-binary-action@v1 39 | with: 40 | # (required) Comma-separated list of binary names (non-extension portion of filename) to build and upload. 41 | # Note that glob pattern is not supported yet. 42 | bin: wright 43 | # (optional) Target triple, default is host triple. 44 | # This is optional but it is recommended that this always be set to 45 | # clarify which target you are building for if macOS is included in 46 | # the matrix because GitHub Actions changed the default architecture 47 | # of macos-latest since macos-14. 48 | target: ${{ matrix.target }} 49 | # (optional) On which platform to distribute the `.tar.gz` file. 50 | # [default value: unix] 51 | # [possible values: all, unix, windows, none] 52 | tar: unix 53 | # (optional) On which platform to distribute the `.zip` file. 54 | # [default value: windows] 55 | # [possible values: all, unix, windows, none] 56 | zip: windows 57 | # (required) GitHub token for uploading assets to GitHub Releases. 58 | token: ${{ secrets.GITHUB_TOKEN }} 59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | wright/target/ 2 | wright/Cargo.lock 3 | Cargo.lock 4 | .idea/ 5 | wright/.idea/ 6 | target/ 7 | scratchpad/ 8 | pages/book/book -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.11.0 2 | - Atomic primitive type signature parsing 3 | - Referernce type signature parsing 4 | - Tweaks to printing/formatting of help messages in diagnostics 5 | - Tweaks/updates to links on website 6 | - Fix bug with import declaration parsing not accepting comments in certain places 7 | 8 | ## 0.10.1 9 | - Fix bug in release workflow that prevented wright binaries from being built 10 | 11 | ## 0.10.0 12 | - Boolean literal parsing 13 | - Import declaration parsing 14 | - Fixes to CI workflows 15 | - Updates to a variety of dependencies 16 | - Update rust edition to 2024 and minimum rust version to 1.85.1 17 | 18 | ## Changelog not kept before version 0.10.0 19 | 20 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## 1. Purpose 4 | 5 | A primary goal of the Wright Programming language community is to be inclusive to the largest number of contributors, with the most varied and diverse backgrounds possible. As such, we are committed to providing a friendly, safe and welcoming environment for all, regardless of gender, sexual orientation, ability, ethnicity, socioeconomic status, and religion (or lack thereof). 6 | 7 | This code of conduct outlines our expectations for all those who participate in our community, as well as the consequences for unacceptable behavior. 8 | 9 | We invite all those who participate in the Wright Programming language community to help us create safe and positive experiences for everyone. 10 | 11 | ## 2. Open Source Citizenship 12 | 13 | A supplemental goal of this Code of Conduct is to increase open source citizenship by encouraging participants to recognize and strengthen the relationships between our actions and their effects on our community. 14 | 15 | Communities mirror the societies in which they exist and positive action is essential to counteract the many forms of inequality and abuses of power that exist in society. 16 | 17 | If you see someone who is making an extra effort to ensure our community is welcoming, friendly, and encourages all participants to contribute to the fullest extent, we want to know. 18 | 19 | ## 3. Expected Behavior 20 | 21 | The following behaviors are expected and requested of all community members: 22 | 23 | * Participate in an authentic and active way. In doing so, you contribute to the health and longevity of this community. 24 | * Exercise consideration and respect in your speech and actions. 25 | * Attempt collaboration before conflict. 26 | * Refrain from demeaning, discriminatory, or harassing behavior and speech. 27 | * Be mindful of your surroundings and of your fellow participants. Alert community leaders if you notice a dangerous situation, someone in distress, or violations of this Code of Conduct, even if they seem inconsequential. 28 | * Remember that community event venues may be shared with members of the public; please be respectful to all patrons of these locations. 29 | 30 | ## 4. Unacceptable Behavior 31 | 32 | The following behaviors are considered harassment and are unacceptable within our community: 33 | 34 | * Violence, threats of violence or violent language directed against another person. 35 | * Sexist, racist, homophobic, transphobic, ableist or otherwise discriminatory jokes and language. 36 | * Posting or displaying sexually explicit or violent material. 37 | * Posting or threatening to post other people’s personally identifying information ("doxing"). 38 | * Personal insults, particularly those related to gender, sexual orientation, race, religion, or disability. 39 | * Inappropriate photography or recording. 40 | * Inappropriate physical contact. You should have someone’s consent before touching them. 41 | * Unwelcome sexual attention. This includes, sexualized comments or jokes; inappropriate touching, groping, and unwelcomed sexual advances. 42 | * Deliberate intimidation, stalking or following (online or in person). 43 | * Advocating for, or encouraging, any of the above behavior. 44 | * Sustained disruption of community events, including talks and presentations. 45 | 46 | ## 5. Consequences of Unacceptable Behavior 47 | 48 | Unacceptable behavior from any community member, including sponsors and those with decision-making authority, will not be tolerated. 49 | 50 | Anyone asked to stop unacceptable behavior is expected to comply immediately. 51 | 52 | If a community member engages in unacceptable behavior, the community organizers may take any action they deem appropriate, up to and including a temporary ban or permanent expulsion from the community without warning (and without refund in the case of a paid event). 53 | 54 | ## 6. Reporting Guidelines 55 | 56 | If you are subject to or witness unacceptable behavior, or have any other concerns, please notify a community organizer as soon as possible. venusflameblonde@gmail.com. 57 | 58 | 59 | 60 | Additionally, community organizers are available to help community members engage with local law enforcement or to otherwise help those experiencing unacceptable behavior feel safe. In the context of in-person events, organizers will also provide escorts as desired by the person experiencing distress. 61 | 62 | ## 7. Addressing Grievances 63 | 64 | If you feel you have been falsely or unfairly accused of violating this Code of Conduct, you should notify Venus Xeon-Blonde with a concise description of your grievance. Your grievance will be handled in accordance with our existing governing policies. 65 | 66 | 67 | 68 | ## 8. Scope 69 | 70 | We expect all community participants (contributors, paid or otherwise; sponsors; and other guests) to abide by this Code of Conduct in all community venues–online and in-person–as well as in all one-on-one communications pertaining to community business. 71 | 72 | This code of conduct and its related procedures also applies to unacceptable behavior occurring outside the scope of community activities when such behavior has the potential to adversely affect the safety and well-being of community members. 73 | 74 | ## 9. Contact info 75 | 76 | venusflameblonde@gmail.com 77 | 78 | ## 10. License and attribution 79 | 80 | This Code of Conduct is distributed under a [Creative Commons Attribution-ShareAlike license](http://creativecommons.org/licenses/by-sa/3.0/). 81 | 82 | Portions of text derived from the [Django Code of Conduct](https://www.djangoproject.com/conduct/) and the [Geek Feminism Anti-Harassment Policy](http://geekfeminism.wikia.com/wiki/Conference_anti-harassment/Policy). 83 | 84 | Retrieved on November 22, 2016 from [http://citizencodeofconduct.org/](http://citizencodeofconduct.org/) 85 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["wright"] 3 | # Use the Rust 2021 resolver. 4 | resolver = "2" 5 | 6 | [workspace.package] 7 | edition = "2024" 8 | # Use [cargo msrv](https://crates.io/crates/cargo-msrv) to make sure we get an accurate value for this. 9 | rust-version = "1.85.1" 10 | -------------------------------------------------------------------------------- /FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: vcfxb 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Venus Xeon-Blonde 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Wright has not yet reached version 1.0.0 yet, and is currently in an incomplete/experimental state. 2 | 3 | # The Wright Programming Language 4 | ## *A language that flies* 5 | 6 | *Wright is an all-purpose programming language inspired by Rust, Ada, and Typescript. 7 | Pulling from all three of these excellent languages, Wright intends to offer a combination of speed, ergonomics, and precision.* 8 | 9 | ### Badges 10 | *Wright is automatically checked and tested using the latest available github runners for Ubuntu, MacOS, and Windows* 11 | | Service | Badge | 12 | |:---:|:---:| 13 | | Cargo Check Status | ![Cargo Check status](https://github.com/vcfxb/wright-lang/actions/workflows/cargo-check.yml/badge.svg?branch=main) | 14 | | Cargo Test Status | ![Cargo Test status](https://github.com/vcfxb/wright-lang/actions/workflows/cargo-test.yml/badge.svg?branch=main) | 15 | | Cargo Clippy Status | ![Cargo Clippy status](https://github.com/vcfxb/wright-lang/actions/workflows/cargo-clippy.yml/badge.svg?branch=main) | 16 | | Code Coverage (Coveralls) | [![Coverage Status](https://coveralls.io/repos/github/vcfxb/wright-lang/badge.svg?branch=main)](https://coveralls.io/github/vcfxb/wright-lang?branch=main) | 17 | | Code Coverage (Codecov.io) | [![codecov](https://codecov.io/github/vcfxb/wright-lang/branch/main/graph/badge.svg?token=HO07JEYMIH)](https://codecov.io/github/vcfxb/wright-lang/commits?branch=main) | 18 | | Docs.rs | [![Documentation](https://docs.rs/wright/badge.svg)](https://docs.rs/wright) | 19 | | Crates.io | [![Crates.io](https://img.shields.io/crates/v/wright.svg)](https://crates.io/crates/wright) | 20 | | GitHub release | [![GitHub release](https://img.shields.io/github/release/vcfxb/wright-lang.svg)](https://github.com/vcfxb/wright-lang/releases) | 21 | | GitHub (pre-)release | [![GitHub (pre-)release](https://img.shields.io/github/release/vcfxb/wright-lang/all.svg)](https://github.com/vcfxb/wright-lang/releases) | 22 | | Development Status | ![Status](https://img.shields.io/badge/status-actively--developed-green.svg) | 23 | 24 | 25 | 26 | | | Downloads| 27 | |:---:|:---:| 28 | | Total |![Github All Releases](https://img.shields.io/github/downloads/vcfxb/wright-lang/total.svg) | 29 | | Releases | ![Github Releases](https://img.shields.io/github/downloads/vcfxb/wright-lang/latest/total.svg) | 30 | | Pre-Releases| ![Github Pre-Releases](https://img.shields.io/github/downloads-pre/vcfxb/wright-lang/latest/total.svg) | 31 | | Crates.io | [![Crates.io](https://img.shields.io/crates/d/wright.svg)](https://crates.io/crates/wright) | 32 | | Crates.io (Latest) | [![Crates.io](https://img.shields.io/crates/dv/wright.svg)](https://crates.io/crates/wright/0.10.1) | 33 | 34 | ### Syntax Samples 35 | ``` 36 | // Hello World! 37 | use wright::io::println; 38 | 39 | func main() { 40 | println("Hello World!"); 41 | } 42 | ``` 43 | 44 | ``` 45 | // FizzBuzz 1 through 100 46 | use wright::io::println; 47 | 48 | type FizzBuzzInteger = integer constrain |i| { i <= 100 && i >= 0 }; 49 | 50 | func fizzbuzz(i: FizzBuzzInteger) { 51 | if i % 15 == 0 { println("FizzBuzz"); } 52 | else if i % 5 == 0 { println("Buzz"); } 53 | else if i % 3 == 0 { println("Fizz"); } 54 | else { println(i); } 55 | } 56 | 57 | func main() { 58 | // Compiler error here if we use a range iterator that contains a value violating the constraints of 59 | // `FizzBuzzInteger`. 60 | (1..=100).for_each(fizzbuzz); 61 | } 62 | ``` 63 | 64 | ### The core goals of the language: 65 | * __Developer experience__ -- Every error message, syntax choice, and standard library function should be friendly and well 66 | documented. 67 | * __Robustness__ -- Wright's type system should be expressive enough to appropriately capture the domain, representation, 68 | and functionality of every symbol the programmer interacts with. 69 | * __Speed__ -- Wright leverages the newest major version of LLVM (at the time of writing, LLVM 18), to compile code 70 | directly to assembly, avoiding the overhead of an interpreter, garbage collector, and other associated tools 71 | by default. 72 | * __Memory Safety__ -- Wright pulls significant inspiration from Rust's lifetime system, with some modifications. 73 | 74 | ### Installation: 75 | There are several installation options. 76 | - Get the latest stable version from [the releases page](https://github.com/vcfxb/wright-lang/releases). 77 | - If you have rust, via `cargo install wright`. 78 | - Building from source, by cloning this repository, and running `cargo build --release` in the wright directory, and 79 | then adding `wright/target/release` to your system path. You will need LLVM 18 installed and appropriately 80 | configured to compile Wright. See the [llvm-sys crate docs](https://crates.io/crates/llvm-sys) for tips on how to do 81 | this. 82 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples of wright source code. 2 | 3 | These examples are used to showcase different syntax and design patterns featured in the wright programming language. 4 | 5 | Some of these have been moved to the `deprecated` folder to indicate that they have not been maintained, and may not 6 | reflect the current state of wright and its syntax. 7 | -------------------------------------------------------------------------------- /examples/deprecated/constraint.wr: -------------------------------------------------------------------------------- 1 | 2 | import wright.cmp; 3 | 4 | 5 | 6 | type KnownMin is integer constrain { self >= MIN }; 7 | 8 | 9 | type KnownMax 10 | where T: cmp.Ord 11 | is T constrain T <= MAX; 12 | 13 | type KnownMin 14 | where T: cmp.Ord 15 | is T constrain T >= MIN; 16 | 17 | ## --- OR --- 18 | 19 | constraint KnownMax(t: T) as T 20 | where T: cmp.Ord { 21 | t <= self 22 | } 23 | 24 | constraint KnownMin(t: T) as T 25 | where T: cmp.Ord { 26 | t >= self 27 | } 28 | 29 | constraint KnownRange(t: T) as void 30 | where T: KnownMin + KnownMax 31 | { 32 | ## The maximum value that the constrained T could be. 33 | max_inclusive: T, 34 | ## The minimum value that the constrained T could be. 35 | min_inclusive: T, 36 | } 37 | 38 | 39 | 40 | constraint KnownMax(a: T, b: T) 41 | 42 | 43 | ... not sure yet what I want syntax to look like here. 44 | 45 | -------------------------------------------------------------------------------- /examples/deprecated/generic.wr: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /examples/deprecated/represent.wr: -------------------------------------------------------------------------------- 1 | 2 | 3 | import wright::box::Box; 4 | import wright::box::NullableBox; 5 | 6 | type Option { 7 | func some(t: T) -> Self; 8 | func none() -> Self; 9 | func is_some(&self) -> bool; 10 | func is_none(&self) -> bool; 11 | # ... etc 12 | } 13 | 14 | union DefaultOptionRepresentation { some: T | none: void }; 15 | 16 | implement Option as DefaultOptionRepresentation { 17 | const func some(t: T) -> Self { 18 | DefaultOptionRepresentation { some: t } 19 | } 20 | 21 | const func none() -> Self { 22 | DefaultOptionRepresentation { none: void } 23 | } 24 | 25 | const func is_some(&self) -> bool { 26 | self is DefaultOptionRepresentation.some 27 | } 28 | 29 | const func is_none(&self) -> bool { 30 | self is DefaultOptionRepresentation.none 31 | } 32 | 33 | # ... etc 34 | } 35 | 36 | implement Option> as NullableBox { 37 | func some(t: T) -> Self { 38 | Box::new(t) as NullableBox 39 | } 40 | 41 | const func none() -> Self { 42 | NullableBox::null() 43 | } 44 | 45 | const fn is_some(&self) -> bool { 46 | !self.is_null() 47 | } 48 | 49 | const fn is_none(&self) -> bool { 50 | self.is_null() 51 | } 52 | } 53 | 54 | -------------------------------------------------------------------------------- /examples/deprecated/types.wr: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Some types in wright. 4 | 5 | type MyUnion = void | String constrain not String::empty; 6 | type TaggedUnion = { None: void | Name: String constrain not String::empty }; 7 | 8 | type MyRecord = { field1: String, field2: integer, field3: integer constrain in 0..=255 }; 9 | 10 | type MyEnum = enum of void { VariantA, VariantB, VariantC }; 11 | 12 | record MyRecord2 { 13 | field1: String constrain not String::empty, 14 | 15 | } 16 | -------------------------------------------------------------------------------- /examples/deprecated/union.wr: -------------------------------------------------------------------------------- 1 | 2 | type PerhapsUrl = union { url: Url | not_url: String }; 3 | 4 | func main() { 5 | let google: PerhapsUrl = { url: Url::from("https://google.com") }; 6 | 7 | if google is PerhapsUrl::url { // could also do `if google is Url`. 8 | println(f"{type of google}"); 9 | # prints "PerhapsUrl constrain union.variant = PerhapsUrl=>google and union.type = Url and union.state = "google.com" ..." 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /examples/fizzbuzz.wr: -------------------------------------------------------------------------------- 1 | // FizzBuzz 1 through 100 2 | use wright::io::println; 3 | 4 | type FizzBuzzInteger = integer constrain |i| { i <= 100 && i >= 0 }; 5 | 6 | func fizzbuzz(i: FizzBuzzInteger) { 7 | if i % 15 == 0 { println("FizzBuzz"); } 8 | else if i % 5 == 0 { println("Buzz"); } 9 | else if i % 3 == 0 { println("Fizz"); } 10 | else { println(i); } 11 | } 12 | 13 | func main() { 14 | // Compiler error here if we use a range iterator that contains a value violating the constraints of 15 | // `FizzBuzzInteger`. 16 | (1..=100).for_each(fizzbuzz); 17 | } 18 | -------------------------------------------------------------------------------- /examples/hello-world.wr: -------------------------------------------------------------------------------- 1 | // Hello World! 2 | use wright::io::println; 3 | 4 | func main() { 5 | println("Hello World!"); 6 | } 7 | -------------------------------------------------------------------------------- /pages/book/src/INTRODUCTION.md: -------------------------------------------------------------------------------- 1 | Test -------------------------------------------------------------------------------- /pages/book/src/SUMMARY.md: -------------------------------------------------------------------------------- 1 | 2 | # Summary 3 | 4 | [Introduction](./INTRODUCTION.md) 5 | 6 | # Design Notes 7 | 8 | - [Language Constructs](./design-notes/language-constructs.md) 9 | - [User Defined Optimizations](./design-notes/user-defined-optimizations.md) 10 | - [Threads](./design-notes/threads.md) 11 | - [Backend](./design-notes/backend.md) 12 | 13 | -------------------------------------------------------------------------------- /pages/book/src/design-notes/backend.md: -------------------------------------------------------------------------------- 1 | 2 | # The Backend(s) of the Wright Compiler and Interpreter. 3 | 4 | I have had a many thoughts, opinions, and different stances on what I wanted to build for Wright's backend. I have 5 | changed my mind more times than I can count, and I'm certain I will continue to change my mind on this several more 6 | times. 7 | 8 | So far it appears there are a few main target options: 9 | 10 | | | LLVM | Cranelift | JVM / Java Bytecode | Bespoke bytecode compiler & interpreter | Bespoke bytecode compiler & transpiler | 11 | |--- | --- | --- | --- | --- | --- | 12 | | Output | Machine code | Machine code | .class file | Custom bytecode | Custom bytecode & transpiler targets | 13 | | Targets | very many | `x86_64`, `aarch64` (ARM64), `s390x` (IBM Z), `riscv64` | JVM | Anything that the rust based interpreter can run on | very many (assuming transpile to LLVM) | 14 | 15 | Right now I'm largely tempted to target both a bespoke bytecode interpreter (perhaps in addition to a transpiler) and 16 | LLVM. I like the idea of compiling to Cranelift as well, but the additional work for it may be more than it's worth. 17 | Compiling to the JVM would be cool for interoperability with Java/Scala/Kotlin/etc programs, but my language is so 18 | different from them that there would be a significant amount lost in translation from Wright to the JVM. I will start 19 | with the bespoke interpreter/transpiler. 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /pages/book/src/design-notes/language-constructs.md: -------------------------------------------------------------------------------- 1 | 2 | # Language constructs 3 | 4 | There are a variety of constructed abstractions in a language that not only give it logical/expressive power, but help 5 | guide and define the user-friendlyness of the language. A good example of this is Rust's lifetime and borrowing rules. 6 | While these rules make it possible to write and express programs that would be difficult to keep track of in a language 7 | like C, it also steepens the language's learning curve. 8 | 9 | In designing Wright, I want to make a wide variety of language constructs available to the user to help make the language 10 | more elegant, without making it too much more difficult to use. In designing these language constructs, a few principles 11 | should be kept in mind. 12 | 13 | 1. Wright aims to be a relatively simple, easy to use language. 14 | 2. Wright aims to protect the user, to the greatest extent possible, from writing buggy code. 15 | 3. Wright aims to show an appropriate amount of the language's internals. Users should be able to reason about how their 16 | code runs and what allocates, or doesn't. 17 | 4. Wright is a multipurpose programming language. No one paradigm or style should be expressly promoted over others. 18 | 19 | With those principles in mind, we can start to establish a set of features to guide the language's design. 20 | 21 | 1. Wright is strongly typed, and will infer types wherever possible. 22 | 2. ~~Wright is garbage collected.~~ -- I changed my mind on this -- Wright will have lifetimes and borrowing similar to 23 | Rust. 24 | 3. Wright has traits. 25 | 4. Wright has enumerations. 26 | 5. Wright has tagged unions. 27 | 6. Wright has ~~classes~~ record types. 28 | 7. Wright has type aliases. 29 | 8. Wright has constraints (to be discussed further). 30 | 9. Wright has inheritance for traits, enumerations, tagged unions, and constraints. 31 | 10. Functions are first class language constructs in Wright. 32 | 11. Wright does not have macros -- most macro-style meta programming should be achievable with generics. 33 | 12. Wright has abstract types -- representation & implementation can be dependent on the generic used. 34 | 35 | ## On Constraints: 36 | 37 | Wright will be one of the few multipurpose languages that I know of to use constraints. Constraints can be a very 38 | powerful tool for logical induction. They allow a programmer to define and check parts of their program at compile time. 39 | Wright constraints will be invokable both at compile time and at runtime. There may be some exceptions if we ever decide 40 | to allow definition of compile-time only (`const constraint`) constraints. Constraints will be strongly bound to a type, 41 | but that type may be generic (so constraints on lists and arrays will be possible). Constraints will act very similarly 42 | to functions, carrying zero sense of state or instantiation like a class might. 43 | 44 | ## Note 45 | 46 | This document is a work in progress, and may be changed or updated further at a later date. 47 | -------------------------------------------------------------------------------- /pages/book/src/design-notes/threads.md: -------------------------------------------------------------------------------- 1 | 2 | For many languages, threading can be a point of tension. When to use it (especially now that single-threaded async is more common), 3 | how to use it, and how to optimize it are all common issues. 4 | 5 | In building wright, I decided it would be best to separate async and syncronous code/threads to avoid unnecessarily 6 | compiling/linking/running an async runtime to manage futures. 7 | -------------------------------------------------------------------------------- /pages/book/src/design-notes/user-defined-optimizations.md: -------------------------------------------------------------------------------- 1 | 2 | # User defined optimizations 3 | 4 | One of the hardest things for me to reconcile as I build this language is how to make it high-level, while still 5 | providing the ability to do relatively low-level things. I would make it completely low-level, however Rust already 6 | exists as a well-liked, mature, production-ready, memory-safe language with many of the same features I hope to build 7 | into Wright. Building Wright as another low-level language with a borrow checker and functional programming elements 8 | would not only make it completely derivative of Rust, but also introduce many of the same drawbacks that Rust has in 9 | terms of expressing Futures & other complex memory-related types and in terms of learning-curve (especially around 10 | the borrow checker). 11 | 12 | In order to do both, the vast majority of programming in wright will be covered under a garbage 13 | collector. Programmers will write classes, enums, and unions, without ever thinking too hard about memory allocation or 14 | management. 15 | 16 | ... TBD 17 | -------------------------------------------------------------------------------- /pages/static/assets/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vcfxb/wright-lang/35d28bb47428ee3bac7d4552b2d0bf98e1efd499/pages/static/assets/favicon.png -------------------------------------------------------------------------------- /pages/static/assets/transparent_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vcfxb/wright-lang/35d28bb47428ee3bac7d4552b2d0bf98e1efd499/pages/static/assets/transparent_logo.png -------------------------------------------------------------------------------- /pages/static/assets/white_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vcfxb/wright-lang/35d28bb47428ee3bac7d4552b2d0bf98e1efd499/pages/static/assets/white_logo.png -------------------------------------------------------------------------------- /pages/static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Wright 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |

The Wright Programming Language

21 | The wright language logo 22 | 23 | API docs (latest release) 24 | 25 | 26 | API docs (main) 27 | 28 | 29 | Book 30 | 31 | 32 | Github 33 | 34 | 35 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /wright/Cargo.toml: -------------------------------------------------------------------------------- 1 | # PACKAGE METADATA 2 | [package] 3 | name = "wright" 4 | description = "The wright programming language compiler and tooling." 5 | license = "MIT" 6 | version = "0.11.0" 7 | authors = ["Venus Xeon-Blonde "] 8 | repository = "https://github.com/vcfxb/wright-lang" 9 | documentation = "https://docs.rs/wright" 10 | readme = "../README.md" 11 | keywords = ["wright", "language", "bytecode", "compiler", "interpreter"] 12 | edition.workspace = true 13 | rust-version.workspace = true 14 | 15 | # DOC.RS SPECIFIC METADATA 16 | [package.metadata.docs.rs] 17 | features = ["wright_library_defaults"] 18 | 19 | # CRATES.IO BADGES 20 | [badges] 21 | maintenance = {status = "actively-developed"} 22 | 23 | # LIBRARY METADATA 24 | [lib] 25 | name = "wright" 26 | test = true 27 | doctest = true 28 | doc = true 29 | 30 | # BINARIES 31 | [[bin]] 32 | name = "wright" 33 | test = false 34 | doc = false 35 | doctest = false 36 | required-features = [] 37 | 38 | # BENCHMARKS 39 | [[bench]] 40 | name = "lexer" 41 | harness = false 42 | 43 | [[bench]] 44 | name = "parser" 45 | harness = false 46 | 47 | # FEATURE FLAGS 48 | # These are used to determine which parts of the crate are compiled/available. 49 | [features] 50 | 51 | # By default include everything required for building the wright binary, which includes everything used for building 52 | # wright as a library. 53 | # Also include support for detecting unicode capabilities on the host terminal. 54 | default = [ 55 | "wright_binary", 56 | "supports-unicode" 57 | ] 58 | 59 | # Features and dependencies required for the wright binary (currently just the library defaults and `clap`). 60 | wright_binary = [ 61 | "wright_library_defaults", 62 | "dep:clap" 63 | ] 64 | 65 | # Features and dependencies useful when the wright binary is not being built or used. 66 | wright_library_defaults = [ 67 | "file_memmap", 68 | "parser" 69 | ] 70 | 71 | # Wright's parser depends on the ability to report parsing errors and construct AST models. 72 | parser = [ 73 | "reporting", 74 | "ast-models", 75 | "lexer" 76 | ] 77 | 78 | # Wright's abstract syntax tree model is built on types from the "source_tracking" module. 79 | ast-models = [ 80 | "source-tracking", 81 | "dep:num" 82 | ] 83 | 84 | # Wright's lexical analyzer is build using types from the "source_tracking" module. 85 | lexer = [ 86 | "source-tracking", 87 | "dep:unicode-ident" 88 | ] 89 | 90 | # Loading memory mapped files from the disk requires memmap2, fs4, and the reporting feature to correctly and efficiently 91 | # read from disk. We also use `anyhow` to make error handling easier. 92 | file_memmap = [ 93 | "reporting", 94 | "dep:memmap2", 95 | "dep:fs4" 96 | ] 97 | 98 | # Reporting errors requires source tracking, codespan-reporting (for rendering diagnostics), and 99 | # termcolor (for pretty output). 100 | reporting = [ 101 | "source-tracking", 102 | "dep:termcolor", 103 | "dep:codespan-reporting" 104 | ] 105 | 106 | # Source tracking requires just a few dependencies and standard library. 107 | source-tracking = [ 108 | "std", 109 | "dep:dashmap", 110 | "derive_more/display", 111 | ] 112 | 113 | # Optional dependency that enables terminal unicode support selection. 114 | # There are fallbacks -- this is not required for anything else. 115 | supports-unicode = [ 116 | "dep:supports-unicode" 117 | ] 118 | 119 | # Feature flag to indicate use of the standard library. 120 | std = [ 121 | "derive_more?/std" 122 | ] 123 | 124 | # Feature flag indicating no features are enabled. 125 | none = [] 126 | 127 | # SIMPLE DEPENDENCIES: 128 | [dependencies] 129 | 130 | # DEPENDENCIES: 131 | 132 | # Use supports-unicode to determine how we display tokens to the user in debug commands. 133 | # Optional -- can be used in debugging token outputs. 134 | [dependencies.supports-unicode] 135 | version = "3.0.0" 136 | optional = true 137 | 138 | # Num gives us integer types of unbound size/domain. 139 | # Used in AST node representations for integer literals. 140 | [dependencies.num] 141 | version = "0.4" 142 | optional = true 143 | 144 | # Unicode identifier functions. 145 | # Used by: 146 | # - "parser" 147 | [dependencies.unicode-ident] 148 | version = "1.0" 149 | optional = true 150 | 151 | # derive_more is used for allowing us to derive additional traits like From and Display. 152 | # Currently used by features: 153 | # - "source-tracking" 154 | [dependencies.derive_more] 155 | version = "2" 156 | default-features = false 157 | optional = true 158 | 159 | # dashmap is used as a fast, concurrent hash map implementation 160 | # Optional since it's used for source tracking, which can be turned off. 161 | [dependencies.dashmap] 162 | version = "6.0.1" 163 | features = ["rayon"] 164 | optional = true 165 | 166 | # codespan-reporting is the internal engine used to render diagnostics. 167 | # Optional since it's only used when error reporting is required. 168 | [dependencies.codespan-reporting] 169 | version = "0.11.1" 170 | optional = true 171 | 172 | # Terminal output colors 173 | # Optional: Required for reporting. 174 | [dependencies.termcolor] 175 | version = "1.4.1" 176 | optional = true 177 | 178 | # Memory mapped files. 179 | # Optional: Required for memmory mapped file access. 180 | [dependencies.memmap2] 181 | version = "0.9.3" 182 | optional = true 183 | 184 | # Portable (windows, mac, linux) file locking 185 | # Optional: Required for memmory mapped file access. 186 | [dependencies.fs4] 187 | version = "0.12.0" 188 | features = ["sync"] 189 | optional = true 190 | 191 | # Comand-line interface generator 192 | # Optional: Used only by the wright binary. 193 | [dependencies.clap] 194 | version = "4" 195 | features = ["derive"] 196 | optional = true 197 | 198 | # TODO: LLVM has been removed until I'm actually using it and have a better build system to go against it. 199 | # (currently the state of it breaking docs.rs builds and complicating everything else makes me kinda sad). 200 | # # Unsafe bindings to LLVM 201 | # # See https://llvm.org/. 202 | # # Optional: Currently not required by anything yet. 203 | # [dependencies.llvm-sys] 204 | # version = "181" 205 | # features = ["force-static"] 206 | # optional = true 207 | 208 | # TEST DEPENDENCIES 209 | [dev-dependencies] 210 | 211 | # Criterion is used for benchmarking. 212 | criterion = "0.5.1" 213 | 214 | # Rayon is used to do various brute-force tests in parallel 215 | rayon = "1.8.0" 216 | 217 | # indoc is used for indentation in tests 218 | indoc = "2.0.5" 219 | 220 | # For creating in memory buffers to test reporting. 221 | termcolor = "1.4.1" 222 | 223 | # BUILD DEPENDENCIES 224 | [build-dependencies] 225 | 226 | # Used for showing feature/cfg info on rustdoc/docs.rs. 227 | rustc_version = "0.4.0" 228 | 229 | # Used for capturing build time and platform information and making it available at runtime. 230 | built = "0.7" 231 | -------------------------------------------------------------------------------- /wright/benches/lexer.rs: -------------------------------------------------------------------------------- 1 | //! Lexer benchmarks. 2 | 3 | use std::sync::Arc; 4 | 5 | use criterion::{Bencher, Criterion, black_box, criterion_group, criterion_main}; 6 | use wright::{ 7 | lexer::Lexer, 8 | source_tracking::{filename::FileName, source::Source}, 9 | }; 10 | 11 | fn make_test_lexer(s: &str) -> Lexer { 12 | let source = Source::new_from_string(FileName::None, s.to_owned()); 13 | Lexer::new(Arc::new(source)) 14 | } 15 | 16 | fn bench_symbol_tokens(c: &mut Criterion) { 17 | // Make a benchmark group. 18 | let mut group = c.benchmark_group("lexer symbol benchmarks"); 19 | 20 | // Function to make a lexer and get a token from it. 21 | fn make_lexer_and_get_token(b: &mut Bencher, input: &str) { 22 | b.iter(|| black_box(make_test_lexer(input).next_token())); 23 | } 24 | 25 | let inputs = ["+", "+=", "*", "@", "?"]; 26 | 27 | for i in inputs { 28 | group.bench_with_input(format!("lexer {i}"), i, make_lexer_and_get_token); 29 | } 30 | } 31 | 32 | fn bench_block_doc_comment(c: &mut Criterion) { 33 | c.bench_function("lexer block style doc comment", move |b: &mut Bencher| { 34 | b.iter(move || { 35 | black_box(make_test_lexer("/*! \n this is a block-style comment \n\n */").next_token()) 36 | }); 37 | }); 38 | } 39 | 40 | criterion_group!(benches, bench_symbol_tokens, bench_block_doc_comment); 41 | criterion_main!(benches); 42 | -------------------------------------------------------------------------------- /wright/benches/parser.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use criterion::{Bencher, Criterion, black_box, criterion_group, criterion_main}; 4 | use wright::{ 5 | ast::identifier::Identifier, 6 | lexer::Lexer, 7 | parser::Parser, 8 | source_tracking::{SourceMap, filename::FileName, source::Source}, 9 | }; 10 | 11 | fn bench_parse_identifier(c: &mut Criterion) { 12 | c.bench_function("parse identifier", move |b: &mut Bencher| { 13 | let map = SourceMap::new(); 14 | let source_ref = map.add(Source::new_from_static_str(FileName::None, "test_ident")); 15 | b.iter(|| { 16 | let parser = Parser::new(Lexer::new(Arc::clone(&source_ref))); 17 | Identifier::parse(&mut black_box(parser)).unwrap() 18 | }); 19 | }); 20 | } 21 | 22 | criterion_group!(benches, bench_parse_identifier); 23 | criterion_main!(benches); 24 | -------------------------------------------------------------------------------- /wright/build.rs: -------------------------------------------------------------------------------- 1 | //! Build script for wright. 2 | //! This is used for capturing build environment info which is used at runtime. 3 | 4 | use rustc_version::{Channel, version_meta}; 5 | 6 | fn main() { 7 | // Set a cfg flag if we're on the nightly channel. 8 | 9 | println!("cargo::rustc-check-cfg=cfg(CHANNEL_NIGHTLY)"); 10 | if version_meta().unwrap().channel == Channel::Nightly { 11 | println!("cargo:rustc-cfg=CHANNEL_NIGHTLY"); 12 | } 13 | 14 | // Save build info. 15 | // See https://docs.rs/built/0.7.4/built/index.html. 16 | built::write_built_file().expect("Failed to acquire build-time information"); 17 | } 18 | -------------------------------------------------------------------------------- /wright/rustfmt.toml: -------------------------------------------------------------------------------- 1 | # Unstable currently 2 | #merge_imports = true 3 | 4 | newline_style = "Native" 5 | max_width = 100 6 | fn_call_width = 80 7 | match_arm_leading_pipes = "Preserve" 8 | 9 | -------------------------------------------------------------------------------- /wright/src/ast.rs: -------------------------------------------------------------------------------- 1 | //! [Abstract syntax tree] modeling. 2 | //! 3 | //! [Abstract syntax tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree 4 | 5 | pub mod decl; 6 | pub mod identifier; 7 | pub mod literal; 8 | pub mod path; 9 | pub mod ty; 10 | -------------------------------------------------------------------------------- /wright/src/ast/decl.rs: -------------------------------------------------------------------------------- 1 | //! Abstract syntax trees related to top-level declarations in source code. 2 | 3 | pub mod import; 4 | -------------------------------------------------------------------------------- /wright/src/ast/decl/import.rs: -------------------------------------------------------------------------------- 1 | //! Import declarations. 2 | //! 3 | //! These are similar to the rust `use ...;` style delcarations however with the caveat that wright currently 4 | //! only supports a single path in declarations, rather than a tree of items with curly braces. (we also don't support 5 | //! starting with a `::` prefix yet). 6 | 7 | use crate::{ 8 | ast::{identifier::Identifier, path::Path}, 9 | source_tracking::fragment::Fragment, 10 | }; 11 | 12 | /// A `use item::from::elsewhere [as name];` declaration in a wright source file. 13 | #[derive(Debug)] 14 | pub struct ImportDecl { 15 | /// The full matching source of the declaration, whitespace and all. 16 | pub matching_source: Fragment, 17 | 18 | /// The item being imported. 19 | pub imported_item: Path, 20 | 21 | /// The name it's imported as (usually [None]). 22 | pub imported_as: Option, 23 | } 24 | -------------------------------------------------------------------------------- /wright/src/ast/identifier.rs: -------------------------------------------------------------------------------- 1 | //! [Identifier]s are used throughout wright as variable names, type names, function names, etc. 2 | //! Their modeling is pretty simple, and is defined here. 3 | //! 4 | //! [Identifier]: https://en.wikipedia.org/wiki/Identifier 5 | 6 | use crate::source_tracking::fragment::Fragment; 7 | 8 | /// Identifiers are used as names for variables, functions, modules, etc. 9 | /// These are defined using [Fragment]s of source code, which will contain the identifier itself. 10 | #[derive(Debug, Clone)] 11 | pub struct Identifier { 12 | /// The fragment of source code containing the identifier. 13 | pub fragment: Fragment, 14 | } 15 | -------------------------------------------------------------------------------- /wright/src/ast/literal.rs: -------------------------------------------------------------------------------- 1 | //! AST node models representing literal values in source code. 2 | 3 | use num::BigUint; 4 | 5 | use crate::source_tracking::fragment::Fragment; 6 | 7 | /// An integer literal from source. This only contains unsigned integers as writing negative numbers is considered 8 | /// to be a combination of an integer literal with a unary negation. 9 | #[derive(Debug)] 10 | pub struct IntegerLiteral { 11 | /// The [Fragment] of source code containing this integer literal. 12 | pub fragment: Fragment, 13 | 14 | /// The value of the integer parsed from the matching source. 15 | pub value: BigUint, 16 | } 17 | 18 | /// A boolean literal from source. 19 | #[derive(Debug)] 20 | pub struct BooleanLiteral { 21 | /// The [Fragment] of source code containing this boolean literal. 22 | pub fragment: Fragment, 23 | 24 | /// The value of the boolean literal. 25 | pub value: bool, 26 | } 27 | -------------------------------------------------------------------------------- /wright/src/ast/old/expression.rs: -------------------------------------------------------------------------------- 1 | //! Expression parsing in Wright source code. 2 | 3 | use self::primary::{PrimaryExpression, PrimaryExpressionParsingError}; 4 | use super::AstNode; 5 | use crate::parser::fragment::Fragment; 6 | 7 | pub mod primary; 8 | pub mod unary; 9 | 10 | /// An expression in Wright source code is anything that can be evaluated to a value. 11 | #[derive(Debug)] 12 | pub enum Expression<'src> { 13 | Primary(PrimaryExpression<'src>), 14 | } 15 | 16 | /// An error that occurs while parsing an expression. 17 | #[derive(Debug, Clone)] 18 | pub enum ExpressionParsingError<'src> { 19 | /// An expression was expected but not found. 20 | ExpectedExpression { 21 | /// Where the expression was expected. 22 | at: Fragment<'src>, 23 | }, 24 | 25 | /// An error parsing a primary expression not caused by inavailability. 26 | PrimaryExpressionParsingError(PrimaryExpressionParsingError<'src>), 27 | } 28 | 29 | #[rustfmt::skip] // Do not auto-reformat this block -- the match arms get too mangled. 30 | impl<'src> AstNode<'src> for Expression<'src> { 31 | type Error = ExpressionParsingError<'src>; 32 | 33 | fn fragment(&self) -> Fragment<'src> { 34 | match self { 35 | Expression::Primary(primary) => primary.fragment(), 36 | } 37 | } 38 | 39 | fn try_parse(ctx: &mut super::AstGeneratorContext<'src>) -> Result 40 | where 41 | Self: Sized, 42 | { 43 | // We need to go in reverse order of strength here (from weakest to strongest) to avoid under parsing. 44 | // (i.e. parsing a primary when the primary expression was the left side of a binary expression). 45 | 46 | // Try parsing a binary expression. 47 | match PrimaryExpression::try_parse(ctx) { 48 | // If we get a primary, early return it. 49 | Ok(primary) => return Ok(Expression::Primary(primary)), 50 | 51 | // If we get an error that is not unavailability, return it early too. 52 | Err(err @ ( 53 | | PrimaryExpressionParsingError::OtherIntegerLiteralParsingError(_) 54 | | PrimaryExpressionParsingError::OtherParensExpressionParsingError(_) 55 | )) => { 56 | return Err(ExpressionParsingError::PrimaryExpressionParsingError(err)); 57 | } 58 | 59 | // If we get an error that is unavailability, just ignore it and keep going. 60 | Err(PrimaryExpressionParsingError::ExpectedPrimaryExpression { .. }) => {} 61 | } 62 | 63 | // If we get to the end of the function with no parse sucessful, we error. 64 | Err(ExpressionParsingError::ExpectedExpression { 65 | at: ctx.peek_fragment(), 66 | }) 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /wright/src/ast/old/expression/primary.rs: -------------------------------------------------------------------------------- 1 | //! Primary expression parsing in Wright source code. 2 | //! 3 | //! Primary expressions are considered the atoms of most expressions most primary expressions are literals, 4 | //! which cannot be broken up into sub-expressions. 5 | 6 | use self::{ 7 | integer_literal::{IntegerLiteral, IntegerLiteralParsingError}, 8 | parens::{ParensExpression, ParensParsingError}, 9 | }; 10 | use crate::parser::{ast::{AstGeneratorContext, AstNode}, fragment::Fragment}; 11 | 12 | pub mod integer_literal; 13 | pub mod parens; 14 | 15 | /// A primary expression in Wright source code. These are the atoms of expressions, and can be combined using operators 16 | /// to form more complicated expressions. 17 | #[derive(Debug)] 18 | pub enum PrimaryExpression<'src> { 19 | /// An integer literal in wright source code. 20 | IntegerLiteral(IntegerLiteral<'src>), 21 | 22 | /// An expression in parentheses. 23 | ParensExpression(ParensExpression<'src>), 24 | } 25 | 26 | /// Error occuring when someone attempts to parse a primary expression and there is not one. 27 | #[derive(Clone, Debug)] 28 | pub enum PrimaryExpressionParsingError<'src> { 29 | /// An attempt was made to parse a primary expression and there was not one available. 30 | ExpectedPrimaryExpression { 31 | /// The location in source code where a primary expression was expected. 32 | at: Fragment<'src>, 33 | }, 34 | 35 | /// An error in parsing an integer literal besides in-availability. 36 | OtherIntegerLiteralParsingError(IntegerLiteralParsingError<'src>), 37 | 38 | /// An error parsing an expression in parentheses besides lack of an opening parenthese. 39 | OtherParensExpressionParsingError(ParensParsingError<'src>), 40 | } 41 | 42 | #[rustfmt::skip] // Do not auto-reformat this block -- the match arms get too mangled. 43 | impl<'src> AstNode<'src> for PrimaryExpression<'src> { 44 | type Error = PrimaryExpressionParsingError<'src>; 45 | 46 | fn fragment(&self) -> Fragment<'src> { 47 | match self { 48 | PrimaryExpression::IntegerLiteral(integer_literal) => integer_literal.fragment(), 49 | PrimaryExpression::ParensExpression(parens_expr) => parens_expr.fragment(), 50 | } 51 | } 52 | 53 | fn try_parse(ctx: &mut AstGeneratorContext<'src>) -> Result 54 | where 55 | Self: Sized, 56 | { 57 | // If it's a successful parse, return Ok. `num` errors also fast-return. If it's an unavailability 58 | // error, keep trying other types of primary. 59 | match IntegerLiteral::try_parse(ctx) { 60 | Ok(int_lit) => return Ok(PrimaryExpression::IntegerLiteral(int_lit)), 61 | 62 | Err(num_err @ IntegerLiteralParsingError::NumParsingError { .. }) => { 63 | return Err(PrimaryExpressionParsingError::OtherIntegerLiteralParsingError( 64 | num_err, 65 | )); 66 | } 67 | 68 | Err(IntegerLiteralParsingError::ExpectedIntegerLiteral { .. }) => {} 69 | } 70 | 71 | // Do the same with a parens expression. 72 | match ParensExpression::try_parse(ctx) { 73 | Ok(parens_expr) => return Ok(PrimaryExpression::ParensExpression(parens_expr)), 74 | 75 | Err(err @ ( 76 | | ParensParsingError::ClosingParenNotFound { .. } 77 | | ParensParsingError::ErrorInParentheses { .. } 78 | )) => return Err(PrimaryExpressionParsingError::OtherParensExpressionParsingError(err)), 79 | 80 | // Do nothing -- try parsing other primaries, or let this become an "expected primary expression error". 81 | Err(ParensParsingError::ExpectedParensExpression { .. }) => {} 82 | } 83 | 84 | // If we get to the end of the function, it's an error. 85 | Err(PrimaryExpressionParsingError::ExpectedPrimaryExpression { 86 | at: ctx.peek_fragment(), 87 | }) 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /wright/src/ast/old/expression/primary/integer_literal.rs: -------------------------------------------------------------------------------- 1 | //! Parsing and AST node model for integer literals in wright source code. 2 | 3 | use crate::parser::ast::AstGeneratorContext; 4 | use crate::parser::lexer::token::TokenTy; 5 | use crate::parser::{ast::AstNode, fragment::Fragment}; 6 | use num::bigint::ParseBigIntError; 7 | use num::{BigUint, Num}; 8 | 9 | /// An integer literal in wright source code. Currently these are very simple. 10 | /// The format for integers is currently: 11 | /// 12 | /// `("0x" | "0X" | "0o" | "0b" | "0B")? (digit of given radix or underscore)+` 13 | /// 14 | /// See the [lexer module] for more details. 15 | /// 16 | /// [lexer module]: crate::parser::lexer::integer_literal 17 | #[derive(Debug)] 18 | pub struct IntegerLiteral<'src> { 19 | /// The associated [Fragment] of source code. This is generally pulled directly from the 20 | /// matched [TokenTy::IntegerLiteral] token. 21 | pub fragment: Fragment<'src>, 22 | 23 | /// The value that is represented by this integer literal -- this is represented using a [BigUint] 24 | /// so that the actual type of the literal may be assertained later on depending on its value. Wright may 25 | /// or may not support integer literals larger than [`u64`] eventually, so we do this to keep our options 26 | /// open/flexible. 27 | pub value: BigUint, 28 | } 29 | 30 | /// Errors tha can occur when parsing an integer literal. 31 | #[derive(Clone, Debug)] 32 | pub enum IntegerLiteralParsingError<'src> { 33 | /// Expected to find an [TokenTy::IntegerLiteral] [Token] and didn't. 34 | ExpectedIntegerLiteral { 35 | // The fragment we expected to see an integer literal at. 36 | at: Fragment<'src>, 37 | }, 38 | 39 | /// Error after passing string to [`num`]. 40 | NumParsingError { 41 | /// The error from [`num`]. 42 | error: ParseBigIntError, 43 | 44 | /// The fragment we were trying to parse to an integer literal. 45 | at: Fragment<'src>, 46 | }, 47 | } 48 | 49 | impl<'src> AstNode<'src> for IntegerLiteral<'src> { 50 | type Error = IntegerLiteralParsingError<'src>; 51 | 52 | fn fragment(&self) -> Fragment<'src> { 53 | self.fragment 54 | } 55 | 56 | fn try_parse(ctx: &mut AstGeneratorContext<'src>) -> Result 57 | where 58 | Self: Sized, 59 | { 60 | // Get the next token from the context if it is an integer literal. 61 | // Otherwise error. 62 | // We only care about the fragment from the token, so extract that. 63 | let fragment: Fragment = ctx 64 | .next_if_is(TokenTy::IntegerLiteral) 65 | .ok_or(IntegerLiteralParsingError::ExpectedIntegerLiteral { 66 | at: ctx.peek_fragment(), 67 | })? 68 | .fragment; 69 | 70 | // Get the fragment's internal string so we can slice it up and pass it to the num crate for 71 | // heavy lifting. 72 | let literal: &str = fragment.inner; 73 | 74 | // Make a list of prefixes with their radixes to try. 75 | let prefixes = [("0x", 16), ("0X", 16), ("0o", 8), ("0b", 2), ("0B", 2)]; 76 | 77 | for (prefix, radix) in prefixes { 78 | if let Some(prefix_stripped) = literal.strip_prefix(prefix) { 79 | // Strip any leading undescores, since `num` eerors on those but we're less strict. 80 | let fully_stripped: &str = prefix_stripped.trim_start_matches('_'); 81 | // Pass the rest of the parsing to `num`. 82 | // If this errors, pass it upwards -- it shouldn't because the lexer should radix check 83 | // for us and we just removed all leading undescores but on the off chance that it does, just 84 | // report it. 85 | let value: BigUint = BigUint::from_str_radix(fully_stripped, radix).map_err( 86 | |err: ParseBigIntError| IntegerLiteralParsingError::NumParsingError { 87 | error: err, 88 | at: fragment, 89 | }, 90 | )?; 91 | 92 | return Ok(IntegerLiteral { fragment, value }); 93 | } 94 | } 95 | 96 | // If no prefixes matched, it's a decimal number -- pass it right to `num`. 97 | // Deal with any errors the same way as above, but this time don't bother stripping undescores 98 | // since the lexer enforces starting with an ascii digit. 99 | let value: BigUint = BigUint::from_str_radix(literal, 10).map_err(|err| { 100 | IntegerLiteralParsingError::NumParsingError { 101 | error: err, 102 | at: fragment, 103 | } 104 | })?; 105 | 106 | Ok(IntegerLiteral { fragment, value }) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /wright/src/ast/old/expression/primary/parens.rs: -------------------------------------------------------------------------------- 1 | //! Expressions grouped in parentheses in Wright. 2 | 3 | use crate::parser::{ 4 | ast::{ 5 | expression::{Expression, ExpressionParsingError}, 6 | AstGeneratorContext, AstNode, 7 | }, 8 | fragment::Fragment, 9 | lexer::token::TokenTy, 10 | }; 11 | 12 | /// An expression enclosed in parentheses in wright source code. 13 | #[derive(Debug)] 14 | pub struct ParensExpression<'src> { 15 | /// The matching fragment of source code. 16 | pub fragment: Fragment<'src>, 17 | 18 | /// The expression enclosed in parenthesese. 19 | pub expression: Box>, 20 | } 21 | 22 | /// Error parsing parentheses expression. 23 | #[derive(Debug, Clone)] 24 | pub enum ParensParsingError<'src> { 25 | /// A closing parenthese was not found. 26 | ClosingParenNotFound { 27 | /// The location a closing parenthese was expected. 28 | at: Fragment<'src>, 29 | }, 30 | 31 | /// An error occurred while parsing withing the parenthesese. 32 | ErrorInParentheses(Box>), 33 | 34 | /// A parentheses expression was expected and was not found. 35 | ExpectedParensExpression { 36 | /// The location a parentheses expression was expected. 37 | at: Fragment<'src>, 38 | }, 39 | } 40 | 41 | #[rustfmt::skip] // Do not auto-reformat this block -- parser code gets too mangled. 42 | impl<'src> AstNode<'src> for ParensExpression<'src> { 43 | type Error = ParensParsingError<'src>; 44 | 45 | fn fragment(&self) -> Fragment<'src> { 46 | self.fragment 47 | } 48 | 49 | fn try_parse(ctx: &mut AstGeneratorContext<'src>) -> Result 50 | where 51 | Self: Sized 52 | { 53 | // Fork the parser and attempt to parse on the fork. 54 | let mut fork: AstGeneratorContext = ctx.fork(); 55 | 56 | // Parse through the left paren. 57 | fork 58 | .next_if_is(TokenTy::LeftParen) 59 | .ok_or_else( || ParensParsingError::ExpectedParensExpression { at: fork.peek_fragment() })?; 60 | 61 | // Parse the expression in the parentheseses. Error if there is not one. 62 | let expr: Expression = Expression::try_parse(&mut fork) 63 | // Box up the error and then wrap it in the correct variant. 64 | .map_err(Box::new) 65 | .map_err(ParensParsingError::ErrorInParentheses)?; 66 | 67 | // Parse the closing parentheses. 68 | fork 69 | .next_if_is(TokenTy::RightParen) 70 | .ok_or_else(|| ParensParsingError::ClosingParenNotFound { at: fork.peek_fragment() })?; 71 | 72 | // Update the parsing context. Use the trimmed fragment since the lexer may have consumed whitespace 73 | // before the first paren. 74 | let consumed_source: Fragment = ctx.update(&fork).trimmed(); 75 | // Return the parens expression. 76 | Ok(Self { fragment: consumed_source, expression: Box::new(expr) }) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /wright/src/ast/old/expression/unary.rs: -------------------------------------------------------------------------------- 1 | //! Unary expressions in Wright source code. 2 | -------------------------------------------------------------------------------- /wright/src/ast/old/test_utils.rs: -------------------------------------------------------------------------------- 1 | //! Test utilities for checking AST generation code. 2 | //! The functions in this module are only available when running cargo test. 3 | 4 | use crate::filemap::{FileId, FileMap, FileName}; 5 | use super::{AstGeneratorContext, AstNode}; 6 | 7 | /// Run a parser against a given test string of source code. 8 | pub fn test_parser<'file_map, 'src: 'file_map, T: AstNode<'file_map, 'src>>(test_src: &'src str) -> Result 9 | where T: Sized + 'src 10 | { 11 | // Construct a new file map. 12 | let mut file_map: FileMap = FileMap::new(); 13 | 14 | // Add the test string to create a file ID. 15 | let file_id: FileId = file_map.add_str_ref(FileName::None, test_src); 16 | 17 | // Create the ast generator context. 18 | let mut ctx: AstGeneratorContext = AstGeneratorContext::new(file_id, &file_map); 19 | 20 | T::try_parse(&mut ctx) 21 | } 22 | -------------------------------------------------------------------------------- /wright/src/ast/old/ty.rs: -------------------------------------------------------------------------------- 1 | //! AST nodes and modeling for types in source code. 2 | 3 | 4 | -------------------------------------------------------------------------------- /wright/src/ast/path.rs: -------------------------------------------------------------------------------- 1 | //! [Path]s are used in import statements, and can take the place of an [Identifier] in many people. 2 | 3 | use super::identifier::Identifier; 4 | use crate::source_tracking::fragment::Fragment; 5 | 6 | /// A double-colon separated path/reference to a module/function. This can be used in an `import` declaration and 7 | /// some other places. [Path]s with length of 1 are just [Identifier]s -- [Identifier]s can be considered paths in some 8 | /// instances. 9 | #[derive(Debug, Clone)] 10 | pub struct Path { 11 | /// The [Fragment] of source code containing the full source of this path (including the double-colon separators). 12 | pub full_path: Fragment, 13 | 14 | /// The first (left-most) identifier in this [Path]. This can also be considered the "root" of the path -- 15 | /// the module that the following item/identifier can be found in. 16 | pub head: Identifier, 17 | 18 | /// The rest of the [Path], following the first separator. 19 | pub tail: Vec, 20 | } 21 | -------------------------------------------------------------------------------- /wright/src/ast/ty.rs: -------------------------------------------------------------------------------- 1 | //! AST models for type signatures in wright source. 2 | 3 | use crate::source_tracking::fragment::Fragment; 4 | 5 | /// A type signature in source code. 6 | #[derive(Debug, Clone)] 7 | #[allow(missing_docs)] 8 | pub enum Type { 9 | Atomic(AtomicTy), 10 | Reference(ReferenceTy), 11 | } 12 | 13 | impl Type { 14 | /// Get the matching source for this type signature in source code. 15 | pub fn matching_source(&self) -> &Fragment { 16 | match self { 17 | Type::Atomic(atomic_ty) => &atomic_ty.matching_source, 18 | Type::Reference(reference_ty) => &reference_ty.matching_source, 19 | } 20 | } 21 | 22 | /// Attempt to "downcast" this to an atomic type signature if it is one. 23 | pub fn downcast_primitive(&self) -> Option<&AtomicTy> { 24 | match self { 25 | Type::Atomic(atomic) => Some(atomic), 26 | _ => None, 27 | } 28 | } 29 | 30 | /// Attempt to "downcast" this to a reference type signature if it is one. 31 | pub fn downcast_reference(&self) -> Option<&ReferenceTy> { 32 | match self { 33 | Type::Reference(reference) => Some(reference), 34 | _ => None, 35 | } 36 | } 37 | } 38 | 39 | /// The atomic types of wright -- primitive numeric types, boolean, char, etc. 40 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] 41 | #[allow(missing_docs)] 42 | pub enum AtomicTyVariant { 43 | Bool, 44 | U8, 45 | I8, 46 | U16, 47 | I16, 48 | U32, 49 | I32, 50 | U64, 51 | I64, 52 | F32, 53 | F64, 54 | Char, 55 | } 56 | 57 | /// An atomic type signature in wright source code. 58 | #[derive(Clone, Debug)] 59 | #[allow(missing_docs)] 60 | pub struct AtomicTy { 61 | pub variant: AtomicTyVariant, 62 | pub matching_source: Fragment, 63 | } 64 | 65 | /// Source code for a reference type signature, such as `@u64`. 66 | #[derive(Debug, Clone)] 67 | pub struct ReferenceTy { 68 | /// The source code of the target type. 69 | pub target_ty: Box, 70 | /// The fragment of the whole reference. 71 | pub matching_source: Fragment, 72 | } 73 | -------------------------------------------------------------------------------- /wright/src/bin/wright.rs: -------------------------------------------------------------------------------- 1 | //! Command line interface for wright. 2 | 3 | use clap::{Parser, Subcommand}; 4 | use std::{io, path::PathBuf}; 5 | use wright::{ 6 | lexer::Lexer, 7 | source_tracking::{SourceMap, SourceRef, source::Source}, 8 | }; 9 | 10 | /// The wright cli. 11 | #[derive(Parser, Debug)] 12 | #[command(author, version, about, long_about = None, arg_required_else_help = true)] 13 | struct Cli { 14 | /// Whether the output should be only ASCII characters (default auto-detected, if `supports-unicode` 15 | /// crate is compiled in). 16 | /// 17 | /// This option does nothing if the `supports-unicode` crate was not enabled at compile time (in that case all 18 | /// output will be ASCII regardless). 19 | #[arg(short = 'A', long = "ascii")] 20 | force_ascii: bool, 21 | /// The subcommand passed to the wright cli. 22 | #[command(subcommand)] 23 | command: Command, 24 | } 25 | 26 | /// Different sub-commands that the wright cli supports. 27 | #[derive(Subcommand, Debug)] 28 | enum Command { 29 | /// Subcommand for debugging wright's source code and interpreter. 30 | Debug { 31 | #[command(subcommand)] 32 | command: DebugCommand, 33 | }, 34 | 35 | /// Subcommand for showing information about this version of wright. 36 | Show { 37 | #[command(subcommand)] 38 | command: ShowCommand, 39 | }, 40 | } 41 | 42 | /// Different sub-commands that the debug sub-command supports. 43 | #[derive(Subcommand, Debug)] 44 | enum DebugCommand { 45 | /// Debug the tokens/lexemes for a source file. 46 | Tokens { 47 | /// A file of wright source code. 48 | file: PathBuf, 49 | // /// Pretty print the source code with the tokens lined under them. 50 | // /// If not used, a list of tokens will be printed with their metadata. 51 | // #[arg(short, long)] 52 | // pretty: bool, 53 | }, 54 | } 55 | 56 | /// Different subcommands that can be used to get info about a copy of the wright CLI/compiler/etc. 57 | #[derive(Subcommand, Debug)] 58 | enum ShowCommand { 59 | /// Get the version string of this copy of the wright compiler. 60 | Version, 61 | 62 | /// Get the full list of feature names/strings that were enabled when this copy of wright was compiled. 63 | Features, 64 | } 65 | 66 | fn main() -> io::Result<()> { 67 | // Parse the command line arguments. 68 | let cli: Cli = Cli::parse(); 69 | 70 | #[cfg(feature = "supports-unicode")] 71 | { 72 | wright::util::supports_unicode::set_force_ascii(cli.force_ascii); 73 | } 74 | 75 | match cli.command { 76 | // Print all the tokens for a given file. 77 | Command::Debug { 78 | command: DebugCommand::Tokens { file }, 79 | } => { 80 | let source_map: SourceMap = SourceMap::new(); 81 | // Add the given file to the file map. 82 | let source_ref: SourceRef = source_map.add(Source::new_mapped_or_read(file)?); 83 | // Make a lexer over the entirety of the given file. 84 | let mut lexer: Lexer = Lexer::new(source_ref); 85 | // Get all the tokens from the lexer and print them each. 86 | while let Some(token) = lexer.next_token() { 87 | println!("{token}"); 88 | } 89 | } 90 | 91 | Command::Show { 92 | command: ShowCommand::Version, 93 | } => { 94 | println!("wright {}", wright::build_info::PKG_VERSION); 95 | } 96 | 97 | Command::Show { 98 | command: ShowCommand::Features, 99 | } => { 100 | for feature in wright::build_info::FEATURES { 101 | println!("{feature}"); 102 | } 103 | } 104 | } 105 | 106 | Ok(()) 107 | } 108 | -------------------------------------------------------------------------------- /wright/src/lexer/comments.rs: -------------------------------------------------------------------------------- 1 | //! Implementation of comment token lexing. 2 | 3 | use super::{Lexer, token::TokenTy}; 4 | 5 | /// The pattern that begins any single line comments (including doc comments). 6 | pub const SINGLE_LINE_COMMENT_PREFIX: &str = "//"; 7 | 8 | /// The pattern that starts any multi-line comments (including doc comments). 9 | pub const MULTI_LINE_COMMENT_START: &str = "/*"; 10 | 11 | /// The pattern that ends any multi-line comments (including doc comments). 12 | pub const MULTI_LINE_COMMENT_END: &str = "*/"; 13 | 14 | /// Attempt to match a sinlgle line comment from the start of the [Lexer::remaining] fragment. 15 | /// Return a [usize] and optionally a [TokenTy]. The [usize] indicates how many bytes were in the comment. 16 | /// The [TokenTy] (if it's not [None]) should be either [TokenTy::InnerDocComment] or [TokenTy::OuterDocComment]. 17 | /// 18 | /// If the [TokenTy] is not [None], the lexer should consume the specified number of bytes (by the [usize]) and 19 | /// Produce a token with the [variant](super::token::Token::variant) from this function. 20 | /// 21 | /// Generally I'm trying to follow the [rust comment spec] here. 22 | /// 23 | /// [rust comment spec]: https://doc.rust-lang.org/reference/comments.html 24 | pub fn try_match_single_line_comment(lexer: &Lexer) -> (usize, Option) { 25 | // Fork the lexer so we can do all the parsing on the fork without worrying about modifying the original 26 | // unnecessarily. 27 | let mut fork: Lexer = lexer.fork(); 28 | 29 | // Try to consume the single line comment prefix from the fork. 30 | if fork.consume(SINGLE_LINE_COMMENT_PREFIX) { 31 | // We consumed it successfully, read through a newline or the end of the forked lexer if we get there. 32 | 33 | // First determine if this is a doc comment of some kind. 34 | let is_inner_doc: bool = fork.matches("/") && !fork.matches("//"); 35 | let is_outer_doc: bool = fork.matches("!"); 36 | 37 | // The consume until a newline, carraige return, or the end of the source fragment. 38 | while !fork.remaining.is_empty() && !fork.matches("\r") && !fork.matches("\n") { 39 | fork.consume_any(); 40 | } 41 | 42 | // Determine the kind of token to produce (if any). 43 | let variant: Option = match (is_inner_doc, is_outer_doc) { 44 | (true, false) => Some(TokenTy::InnerDocComment), 45 | (false, true) => Some(TokenTy::OuterDocComment), 46 | (false, false) => None, 47 | (true, true) => unreachable!( 48 | "It is impossible for the `remaining` fragment to start with an `!` and a `/` simultaneously." 49 | ), 50 | }; 51 | 52 | // Return the number of bytes consumed and the type of token to 53 | // produce if any. 54 | return (fork.offset_from(lexer), variant); 55 | } 56 | 57 | // If the single line comment prefix was not immediately available, there is no comment. 58 | (0, None) 59 | } 60 | 61 | /// Attempt to match a block comment from the start of the [Lexer::remaining] fragment. 62 | /// Return a [usize] and optionally a [TokenTy]. The [usize] indicates how many bytes were in the comment. 63 | /// The [TokenTy] (if it's not [None]) should be [TokenTy::InnerBlockDocComment], [TokenTy::OuterBlockDocComment], or 64 | /// [TokenTy::UnterminatedBlockComment]. 65 | /// 66 | /// If the [TokenTy] is not [None], the lexer should consume the specified number of bytes (by the [usize]) and 67 | /// Produce a token with the [variant](super::token::Token::variant) from this function. 68 | pub fn try_match_block_comment(lexer: &Lexer) -> (usize, Option) { 69 | // Handle corner cases here so we don't have to below. 70 | // These are both considered empty non-documenting comments. 71 | if lexer.matches("/***/") { 72 | return (5, None); 73 | } 74 | 75 | if lexer.matches("/**/") { 76 | return (4, None); 77 | } 78 | 79 | // Make a fork of the lexer to avoid modifying this lexer if we fail to parse. 80 | let mut fork: Lexer = lexer.fork(); 81 | 82 | // Try to parse the start of a multi-line comment. 83 | if fork.consume(MULTI_LINE_COMMENT_START) { 84 | // Check if this is a doc comment. 85 | let is_outer_doc: bool = fork.matches("!"); 86 | // Use this to indicate that more than one following asterix is not a doc comment. 87 | let is_inner_doc: bool = fork.matches("*") && !fork.matches("**"); 88 | 89 | // Consume until we see the end of the doc comment. If we run out of characters, consider the 90 | // comment unterminated. 91 | while !fork.matches(MULTI_LINE_COMMENT_END) { 92 | // Handle nested comments here: 93 | if fork.matches(MULTI_LINE_COMMENT_START) { 94 | // Discard the output -- don't care about doc comments in other comments. 95 | let (nested_comment_bytes, _) = try_match_block_comment(&fork); 96 | 97 | // SAFETY: the return from this function should never be on a char boundary or out of bounds. 98 | // This is because the return value is always either 0 or calculated using `offset_from`. 99 | unsafe { fork.advance_unchecked(nested_comment_bytes) }; 100 | 101 | // Restart the loop to keep consuming this comment. 102 | continue; 103 | } 104 | 105 | // Handle unterminated comments here. 106 | if fork.remaining.is_empty() { 107 | // If we have not hit a "*/" before the end of the input, return an unterminated block comment. 108 | let bytes_consumed: usize = fork.offset_from(lexer); 109 | return (bytes_consumed, Some(TokenTy::UnterminatedBlockComment)); 110 | } 111 | 112 | // If there's still input, and not a nested comment, consume it. 113 | fork.consume_any(); 114 | } 115 | 116 | // If we get here, the comment was terminated. Consume the terminating characters, and return. 117 | // Use debug assert here to make sure that the comment is actually terminated. 118 | let consumed_comment_terminator: bool = fork.consume(MULTI_LINE_COMMENT_END); 119 | debug_assert!(consumed_comment_terminator, "comment is actually terminated"); 120 | 121 | // Determine the kind of token to produce (if any). 122 | let variant: Option = match (is_inner_doc, is_outer_doc) { 123 | (true, false) => Some(TokenTy::InnerBlockDocComment), 124 | (false, true) => Some(TokenTy::OuterBlockDocComment), 125 | (false, false) => None, 126 | (true, true) => { 127 | unreachable!("Lexer should not match multiple comment types at once.") 128 | } 129 | }; 130 | 131 | return (fork.offset_from(lexer), variant); 132 | } 133 | 134 | (0, None) 135 | } 136 | 137 | #[cfg(test)] 138 | mod tests { 139 | use super::Lexer; 140 | 141 | #[test] 142 | fn ignored_single_line_comment() { 143 | let mut lexer = Lexer::new_test("// test comment "); 144 | assert!(lexer.next_token().is_none()); 145 | assert_eq!(lexer.remaining.len(), 0); 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /wright/src/lexer/identifier.rs: -------------------------------------------------------------------------------- 1 | //! Implementation related to parsing keywords and identifiers. 2 | 3 | use super::{Lexer, token::Token, token::TokenTy}; 4 | use crate::source_tracking::fragment::Fragment; 5 | use std::str::Chars; 6 | use unicode_ident::{is_xid_continue, is_xid_start}; 7 | 8 | /// Try to match a fragment recognized to be an identifier or keyword to 9 | /// a keyword or return [TokenTy::Identifier]. 10 | fn identifier_or_keyword(fragment: Fragment) -> TokenTy { 11 | use TokenTy::*; 12 | 13 | match fragment.as_str() { 14 | "record" => KwRecord, 15 | "type" => KwType, 16 | "enum" => KwEnum, 17 | "union" => KwUnion, 18 | "func" => KwFunc, 19 | "pure" => KwPure, 20 | "repr" => KwRepr, 21 | "impl" => KwImpl, 22 | "constraint" => KwConstraint, 23 | "references" => KwReferences, 24 | "trait" => KwTrait, 25 | "const" => KwConst, 26 | "where" => KwWhere, 27 | 28 | "use" => KwUse, 29 | "as" => KwAs, 30 | "mod" => KwMod, 31 | "pub" => KwPub, 32 | 33 | "if" => KwIf, 34 | "else" => KwElse, 35 | "match" => KwMatch, 36 | 37 | "for" => KwFor, 38 | "in" => KwIn, 39 | "while" => KwWhile, 40 | "loop" => KwLoop, 41 | 42 | "let" => KwLet, 43 | "var" => KwVar, 44 | 45 | "true" => KwTrue, 46 | "false" => KwFalse, 47 | 48 | "bool" => KwBool, 49 | "u8" => KwU8, 50 | "i8" => KwI8, 51 | "u16" => KwU16, 52 | "i16" => KwI16, 53 | "u32" => KwU32, 54 | "i32" => KwI32, 55 | "f32" => KwF32, 56 | "u64" => KwU64, 57 | "i64" => KwI64, 58 | "f64" => KwF64, 59 | "char" => KwChar, 60 | 61 | "_" => Underscore, 62 | 63 | _ => Identifier, 64 | } 65 | } 66 | 67 | /// Attempt to consume a keyword/[identifier](TokenTy::Identifier)/[underscore](TokenTy::Underscore) from the lexer. 68 | pub fn try_consume_keyword_or_identifier(lexer: &mut Lexer) -> Option { 69 | // Get a character iterator that we can pull from. 70 | let mut chars: Chars = lexer.remaining.chars(); 71 | // Get the next character from the iterator, consider it the first char of any potential match. 72 | // Make sure it's a valid identifier start (includes start to all keywords) or is an underscore. 73 | // If it does not exist or match predicates, return None. 74 | let next: char = chars.next().filter(|c| is_xid_start(*c) || *c == '_')?; 75 | // Store/track the number of bytes consumed so far. 76 | let mut bytes_consumed: usize = next.len_utf8(); 77 | 78 | // Take remaining chars and add to sum. 79 | bytes_consumed += chars 80 | .take_while(|c| is_xid_continue(*c)) 81 | .map(char::len_utf8) 82 | .sum::(); 83 | 84 | // Split the token and the new remaining fragment. 85 | // VALIDITY: The character iterator should guarantee that we land on a valid character boundary within the bounds 86 | // of the fragment. 87 | let (token_fragment, new_remaining): (Fragment, Fragment) = 88 | lexer.remaining.split_at_unchecked(bytes_consumed); 89 | 90 | // Get the variant of token to produce. 91 | let variant: TokenTy = identifier_or_keyword(token_fragment.clone()); 92 | 93 | // Update the lexer's remaining fragment. 94 | lexer.remaining = new_remaining; 95 | 96 | // Return the token. 97 | Some(Token { 98 | variant, 99 | fragment: token_fragment, 100 | }) 101 | } 102 | 103 | #[cfg(test)] 104 | mod tests { 105 | use super::{Lexer, TokenTy}; 106 | 107 | #[test] 108 | fn identifiers_and_keywords() { 109 | let mut lexer = Lexer::new_test("const TEST"); 110 | 111 | assert_eq!(lexer.next_token().unwrap().variant, TokenTy::KwConst); 112 | assert_eq!(lexer.next_token().unwrap().variant, TokenTy::Whitespace); 113 | assert_eq!(lexer.next_token().unwrap().variant, TokenTy::Identifier); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /wright/src/lexer/integer_literal.rs: -------------------------------------------------------------------------------- 1 | //! Implementation for lexing integer literals. 2 | 3 | use super::{ 4 | Lexer, 5 | token::{Token, TokenTy}, 6 | }; 7 | use std::{iter::Peekable, str::Chars}; 8 | 9 | /// Attempt to lex and consume an [TokenTy::IntegerLiteral] from the lexer. 10 | pub fn try_consume_integer_literal(lexer: &mut Lexer) -> Option { 11 | // Make a peekable character iterator. 12 | let mut chars: Peekable = lexer.remaining.chars().peekable(); 13 | // Get the first character from the iterator. We can only continue lexing if one exists and is an ascii 14 | // decimal digit. 15 | let next: char = chars.next().filter(char::is_ascii_digit)?; 16 | // Track the number of bytes consumed. We use the length of the parsed first char here but we could probably 17 | // assume it to be 1. 18 | let mut bytes_consumed: usize = next.len_utf8(); 19 | // Track the radix 20 | let mut radix: u32 = 10; 21 | 22 | // Change the radix if necessary 23 | if next == '0' { 24 | if let Some(prefix) = chars.next_if(|x| ['x', 'o', 'b', 'X', 'B'].contains(x)) { 25 | // All the possible prefix chars are 1 byte ascii characters. 26 | bytes_consumed += 1; 27 | 28 | radix = match prefix { 29 | 'x' | 'X' => 16, 30 | 'b' | 'B' => 2, 31 | 'o' => 8, 32 | _ => unreachable!("the prefix byte is checked above"), 33 | }; 34 | } 35 | } 36 | 37 | // The first character after the optional prefix is required to be a digit, not underscore. 38 | bytes_consumed += chars.next_if(|c| c.is_digit(radix))?.len_utf8(); 39 | 40 | // Add the rest of the integer literal. 41 | bytes_consumed += chars 42 | .take_while(|c| c.is_digit(radix) || *c == '_') 43 | .map(char::len_utf8) 44 | .sum::(); 45 | 46 | Some(lexer.split_token(bytes_consumed, TokenTy::IntegerLiteral)) 47 | } 48 | 49 | #[cfg(test)] 50 | mod tests { 51 | use crate::lexer::integer_literal::try_consume_integer_literal; 52 | 53 | use super::{Lexer, TokenTy}; 54 | 55 | #[test] 56 | fn integer_literal() { 57 | let mut lexer = Lexer::new_test("123_456_789."); 58 | 59 | let token = lexer.next_token().unwrap(); 60 | 61 | assert_eq!(token.fragment.as_str(), "123_456_789"); 62 | assert_eq!(token.variant, TokenTy::IntegerLiteral); 63 | assert_eq!(lexer.remaining.as_str(), "."); 64 | } 65 | 66 | #[test] 67 | fn cant_start_with_underscore() { 68 | let mut lexer = Lexer::new_test("0x__10"); 69 | 70 | assert!(try_consume_integer_literal(&mut lexer).is_none()); 71 | 72 | assert_eq!(lexer.remaining.as_str(), "0x__10"); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /wright/src/lexer/quoted.rs: -------------------------------------------------------------------------------- 1 | //! Lexing implementation for quoted literals. 2 | 3 | use super::{Lexer, token::Token, token::TokenTy}; 4 | use std::str::Chars; 5 | 6 | /// Attempt to parse a quoted literal. This includes [TokenTy::StringLiteral], [TokenTy::CharLiteral], and 7 | /// [TokenTy::FormatStringLiteral]. 8 | pub fn try_consume_quoted_literal(lexer: &mut Lexer) -> Option { 9 | // Make a chars iterator to lex from. 10 | let mut chars: Chars = lexer.remaining.chars(); 11 | // Get the first char from the character iterator. 12 | // Return none if the first character doesn't exist or is not one of the quote terminating characters. 13 | let first: char = chars.next().filter(|c| ['\'', '"', '`'].contains(c))?; 14 | // Track number of bytes consumed. 15 | let mut bytes_consumed: usize = first.len_utf8(); 16 | // Track whether the quoted literal is terminated. 17 | let mut is_terminated: bool = false; 18 | 19 | // Consume from the iterator while possible. 20 | while let Some(consumed) = chars.next() { 21 | // Update the number of bytes consumed. 22 | bytes_consumed += consumed.len_utf8(); 23 | 24 | // Check if the character matches the starting char. 25 | // If so, record the literal as terminated and break this loop. 26 | if consumed == first { 27 | is_terminated = true; 28 | break; 29 | } 30 | 31 | // If the character we just consumed is a backslash. 32 | // We only handle escaped terminators here, rather than parsing actual meaning. 33 | // Consume the next character if there is one, regardless of what it is. 34 | // This prevents an escaped terminator from ending the literal. 35 | if consumed == '\\' { 36 | // If there is no next char, do not add anything to the number of bytes consumed. 37 | bytes_consumed += chars.next().map(char::len_utf8).unwrap_or(0); 38 | } 39 | } 40 | 41 | // Return when we have either reached a terminator or run out of characters. 42 | // First determine the variant to return. 43 | let variant: TokenTy = match first { 44 | '\'' => TokenTy::CharLiteral { 45 | terminated: is_terminated, 46 | }, 47 | 48 | '\"' => TokenTy::StringLiteral { 49 | terminated: is_terminated, 50 | }, 51 | 52 | '`' => TokenTy::FormatStringLiteral { 53 | terminated: is_terminated, 54 | }, 55 | 56 | _ => unreachable!("There are no other quoted literals"), 57 | }; 58 | 59 | // Summing char lengths from the iterator should never give us an invalid or out of bounds index. 60 | Some(lexer.split_token_unchecked(bytes_consumed, variant)) 61 | } 62 | 63 | #[cfg(test)] 64 | mod tests { 65 | use super::super::{Lexer, token::TokenTy}; 66 | 67 | #[test] 68 | fn string_literal() { 69 | let mut lexer = Lexer::new_test(r#""Test string literal""#); 70 | let token = lexer.next_token().unwrap(); 71 | assert_eq!(token.variant, TokenTy::StringLiteral { terminated: true }); 72 | assert_eq!(token.fragment.as_str(), "\"Test string literal\""); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /wright/src/lexer/token.rs: -------------------------------------------------------------------------------- 1 | //! Token models. 2 | 3 | use crate::source_tracking::fragment::Fragment; 4 | use std::fmt::{self, Display}; 5 | 6 | /// A token in wright source code. 7 | #[derive(Debug)] 8 | pub struct Token { 9 | /// What type of token this is. 10 | pub variant: TokenTy, 11 | /// The matching fragment of source code -- this contains the location and length data for the token. 12 | pub fragment: Fragment, 13 | } 14 | 15 | /// The different types of tokens in wright source. 16 | #[rustfmt::skip] // Turn off auto reformat. 17 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] 18 | // Allow missing docs (most of these should be self-evident). 19 | #[allow(missing_docs)] 20 | pub enum TokenTy { 21 | LeftCurly, RightCurly, 22 | LeftBracket, RightBracket, 23 | LeftParen, RightParen, 24 | 25 | Plus, PlusEq, 26 | Star, StarEq, 27 | Div, DivEq, 28 | Xor, XorEq, 29 | Mod, ModEq, 30 | Bang, BangEq, 31 | 32 | Minus, MinusEq, SingleArrow, 33 | Eq, EqEq, DoubleArrow, 34 | 35 | Lt, LtEq, LtLt, 36 | Gt, GtEq, GtGt, 37 | And, AndEq, AndAnd, 38 | Or, OrEq, OrOr, 39 | Colon, ColonEq, ColonColon, 40 | 41 | At, 42 | Tilde, 43 | Semi, 44 | Dot, 45 | Comma, 46 | Hash, 47 | Question, 48 | Dollar, 49 | 50 | // Not in the same group as the other ones there since it can be used at the start of identifiers. 51 | Underscore, 52 | 53 | Identifier, 54 | 55 | OuterDocComment, OuterBlockDocComment, 56 | InnerDocComment, InnerBlockDocComment, 57 | 58 | /// Indicates a block style comment without termination. 59 | /// Separate from [TokenTy::InnerDocComment] and [TokenTy::OuterDocComment] to indicate that 60 | /// unterminated comments will be handled differently (produce errors eventually). 61 | UnterminatedBlockComment, 62 | 63 | KwRecord, 64 | KwType, 65 | KwEnum, 66 | KwUnion, 67 | KwFunc, 68 | KwPure, 69 | KwRepr, 70 | KwImpl, 71 | KwConstraint, 72 | KwReferences, 73 | KwTrait, 74 | KwUse, 75 | KwAs, 76 | KwConst, 77 | KwMod, 78 | KwIf, 79 | KwElse, 80 | KwMatch, 81 | KwFor, 82 | KwIn, 83 | KwWhile, 84 | KwTrue, 85 | KwFalse, 86 | KwLoop, 87 | KwWhere, 88 | KwPub, 89 | 90 | KwLet, 91 | KwVar, 92 | 93 | // Keyword primitive types. 94 | KwBool, 95 | KwU8, 96 | KwI8, 97 | KwU16, 98 | KwI16, 99 | KwU32, 100 | KwI32, 101 | KwF32, 102 | KwU64, 103 | KwI64, 104 | KwF64, 105 | KwChar, 106 | 107 | IntegerLiteral, 108 | StringLiteral { terminated: bool }, 109 | FormatStringLiteral { terminated: bool }, 110 | CharLiteral { terminated: bool }, 111 | 112 | /// Whitespace counts as a token. 113 | Whitespace, 114 | 115 | /// Unknown character in lexer fragment. 116 | Unknown 117 | } 118 | 119 | impl Display for Token { 120 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 121 | // If the host terminal supports unicode, replace the newline & carriage return characters with pictures, 122 | // otherwise use ascii. 123 | let replacements = match crate::util::supports_unicode::supports_unicode() { 124 | true => &[("\n", "\u{240A}"), ("\r", "\u{240D}")], 125 | false => &[("\n", "[nl]"), ("\r", "[cr]")], 126 | }; 127 | 128 | let mut with_replacements = self.fragment.as_str().to_owned(); 129 | 130 | for (replace, replace_with) in replacements { 131 | with_replacements = with_replacements.replace(replace, replace_with); 132 | } 133 | 134 | write!(f, "\"{with_replacements}\" ({:?})", self.variant) 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /wright/src/lexer/trivial.rs: -------------------------------------------------------------------------------- 1 | //! Trivial tokens and their implementation. 2 | 3 | use super::{ 4 | Lexer, 5 | token::{Token, TokenTy}, 6 | }; 7 | 8 | /// Trivial tokens that are two ASCII characters and can be matched directly 9 | /// against the input source code. 10 | pub const TWO_ASCII_TRIVIAL_TOKENS: &[(&[u8; 2], TokenTy)] = &[ 11 | (b"->", TokenTy::SingleArrow), 12 | (b"-=", TokenTy::MinusEq), 13 | (b"=>", TokenTy::DoubleArrow), 14 | (b"==", TokenTy::EqEq), 15 | (b"&&", TokenTy::AndAnd), 16 | (b"||", TokenTy::OrOr), 17 | (b"<<", TokenTy::LtLt), 18 | (b">>", TokenTy::GtGt), 19 | (b"::", TokenTy::ColonColon), 20 | (b"|=", TokenTy::OrEq), 21 | (b"&=", TokenTy::AndEq), 22 | (b":=", TokenTy::ColonEq), 23 | (b">=", TokenTy::GtEq), 24 | (b"<=", TokenTy::LtEq), 25 | (b"!=", TokenTy::BangEq), 26 | (b"%=", TokenTy::ModEq), 27 | (b"^=", TokenTy::XorEq), 28 | (b"*=", TokenTy::StarEq), 29 | (b"+=", TokenTy::PlusEq), 30 | (b"/=", TokenTy::DivEq), 31 | ]; 32 | 33 | /// Single ASCII character trivial tokens that can be matched directly against 34 | /// the source code. 35 | pub const SINGLE_ASCII_CHAR_TRIVIAL_TOKENS: &[(u8, TokenTy)] = &[ 36 | (b'(', TokenTy::LeftParen), 37 | (b')', TokenTy::RightParen), 38 | (b'[', TokenTy::LeftBracket), 39 | (b']', TokenTy::RightBracket), 40 | (b'{', TokenTy::LeftCurly), 41 | (b'}', TokenTy::RightCurly), 42 | (b'@', TokenTy::At), 43 | (b';', TokenTy::Semi), 44 | (b'?', TokenTy::Question), 45 | (b',', TokenTy::Comma), 46 | (b'#', TokenTy::Hash), 47 | (b'$', TokenTy::Dollar), 48 | (b'>', TokenTy::Gt), 49 | (b'<', TokenTy::Lt), 50 | (b'-', TokenTy::Minus), 51 | (b':', TokenTy::Colon), 52 | (b'!', TokenTy::Bang), 53 | (b'=', TokenTy::Eq), 54 | (b'&', TokenTy::And), 55 | (b'|', TokenTy::Or), 56 | (b'/', TokenTy::Div), 57 | (b'+', TokenTy::Plus), 58 | (b'^', TokenTy::Xor), 59 | (b'*', TokenTy::Star), 60 | (b'%', TokenTy::Mod), 61 | ]; 62 | 63 | /// Attempt to consume a "trivial" token from the start of the [Lexer]'s [Lexer::remaining] fragment. 64 | /// 65 | /// Leave the lexer unmodified if one is not available. 66 | pub fn try_consume_trivial_token(lexer: &mut Lexer) -> Option { 67 | // Get the number of bytes remaining, since we need at least 1 to parse anything. 68 | let bytes_remaining: usize = lexer.bytes_remaining(); 69 | 70 | // No token if there are no bytes of source left. 71 | if bytes_remaining == 0 { 72 | return None; 73 | } 74 | 75 | // Attempt to match any two-byte ASCII trivial tokens. 76 | // This must be done before single-ascii byte tokens since matching is greedy. 77 | if bytes_remaining >= 2 { 78 | // Get the first two bytes of the remaining fragment. 79 | // SAFETY: We just checked length. 80 | let bytes: &[u8] = unsafe { lexer.remaining.as_str().as_bytes().get_unchecked(0..2) }; 81 | 82 | // Match against each possible token pattern. 83 | for (pattern, kind) in TWO_ASCII_TRIVIAL_TOKENS { 84 | if bytes == *pattern { 85 | // We have already done bounds checking, and this cannot be a character 86 | // boundary since we just matched against ASCII characters. 87 | return Some(lexer.split_token_unchecked(2, *kind)); 88 | } 89 | } 90 | } 91 | 92 | // Do the same for single byte patterns. 93 | // SAFETY: We checked that the number of bytes remaining is not 0 above. 94 | let byte: &u8 = unsafe { lexer.remaining.as_str().as_bytes().get_unchecked(0) }; 95 | 96 | for (pattern, kind) in SINGLE_ASCII_CHAR_TRIVIAL_TOKENS { 97 | if byte == pattern { 98 | // If we matched, then the first byte is ASCII, and therefore we don't have to worry 99 | // about bounds or unicode boundaries. 100 | return Some(lexer.split_token_unchecked(1, *kind)); 101 | } 102 | } 103 | 104 | // If nothing else has matched, there is no trivial token available. 105 | None 106 | } 107 | 108 | #[cfg(test)] 109 | mod tests { 110 | use super::{Lexer, TokenTy}; 111 | 112 | #[test] 113 | fn plus_and_plus_eq_tokens() { 114 | let mut plus = Lexer::new_test("+"); 115 | let mut plus_eq = Lexer::new_test("+="); 116 | 117 | let plus_token = plus.next_token().unwrap(); 118 | let plus_eq_token = plus_eq.next_token().unwrap(); 119 | 120 | assert_eq!(plus.bytes_remaining(), 0); 121 | assert_eq!(plus_eq.bytes_remaining(), 0); 122 | assert_eq!(plus_token.variant, TokenTy::Plus); 123 | assert_eq!(plus_eq_token.variant, TokenTy::PlusEq); 124 | } 125 | 126 | #[test] 127 | fn plus_one_token() { 128 | let mut plus_one = Lexer::new_test("+1"); 129 | let plus_token = plus_one.next_token().unwrap(); 130 | assert_eq!(plus_one.bytes_remaining(), 1); 131 | assert_eq!(plus_token.variant, TokenTy::Plus); 132 | assert_eq!(plus_token.fragment.len(), 1); 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /wright/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! The wright programming language crate. This is being re-written from the ground up as of September 2022. 2 | 3 | // Compile without the standard library if the user chooses to do so. 4 | #![cfg_attr(not(any(feature = "std", test)), no_std)] 5 | // We want to enforce good stuff by default. 6 | #![deny(missing_copy_implementations, missing_debug_implementations)] 7 | #![deny(rustdoc::broken_intra_doc_links)] 8 | #![warn(missing_docs)] 9 | // Compiler directive to get docs.rs (which uses the nightly version of the rust compiler) to show 10 | // info about feature required for various modules and functionality. 11 | // 12 | // See: https://stackoverflow.com/a/70914430. 13 | #![cfg_attr(all(doc, CHANNEL_NIGHTLY), feature(doc_auto_cfg))] 14 | 15 | // We cannot use memory mapped files on architectures that do not support memmap2. 16 | #[cfg(all( 17 | feature = "file_memmap", 18 | any(target_arch = "wasm32", target_arch = "wasm64") 19 | ))] 20 | compile_error!("Memory mapped files not available on WASM targets"); 21 | 22 | // If the "none" feature is enabled, make sure the user has no other features enabled. 23 | // 24 | // Currently all of the features besides "none" depend on "std" so if both "none" and "std" 25 | // are present, raise an error at compile time. 26 | // 27 | // Make sure to keep this updated as more features are added. 28 | #[cfg(all(feature = "none", feature = "std"))] 29 | compile_error!( 30 | "feature \"none\" is enabled, which restricts the usage of any other features including \"std\"." 31 | ); 32 | 33 | /// Build information about this copy of wright, provided using . 34 | pub mod build_info { 35 | include!(concat!(env!("OUT_DIR"), "/built.rs")); 36 | } 37 | 38 | #[cfg(feature = "source-tracking")] 39 | pub mod source_tracking; 40 | 41 | #[cfg(feature = "reporting")] 42 | pub mod reporting; 43 | 44 | #[cfg(feature = "lexer")] 45 | pub mod lexer; 46 | 47 | #[cfg(feature = "ast-models")] 48 | pub mod ast; 49 | 50 | #[cfg(feature = "parser")] 51 | pub mod parser; 52 | 53 | pub mod util; 54 | 55 | // pub mod repl; 56 | -------------------------------------------------------------------------------- /wright/src/parser.rs: -------------------------------------------------------------------------------- 1 | //! This parser module is responsible for turning the stream of [Token]s from the [Lexer] into a tree of [AST] nodes. 2 | //! 3 | //! [AST]: crate::ast 4 | //! [Token]: crate::lexer::token::Token 5 | 6 | use error::{ParserError, ParserErrorKind}; 7 | 8 | use super::lexer::Lexer; 9 | use crate::{ 10 | lexer::token::{Token, TokenTy}, 11 | source_tracking::fragment::Fragment, 12 | }; 13 | use std::collections::VecDeque; 14 | 15 | mod decl; 16 | pub mod error; 17 | mod identifier; 18 | mod literal; 19 | mod path; 20 | mod ty; 21 | pub mod whitespace; 22 | 23 | /// The [Parser] struct wraps a [Lexer] and adds lookahead and functions that are useful for parsing. 24 | #[derive(Debug)] 25 | pub struct Parser { 26 | lexer: Lexer, 27 | lookahead: VecDeque, 28 | } 29 | 30 | impl Parser { 31 | /// Construct a new parser around a given [Lexer]. 32 | pub fn new(lexer: Lexer) -> Self { 33 | Parser { 34 | lexer, 35 | lookahead: VecDeque::new(), 36 | } 37 | } 38 | 39 | /// Get the number of remaining bytes on this parser. This is potentially useful for checking 40 | /// if a parser has advanced between two calls (or checking if a parser has reached end of input). 41 | pub fn bytes_remaining(&self) -> usize { 42 | let bytes_remaining_in_lookahead_buffer = self 43 | .lookahead 44 | .iter() 45 | .map(|t| t.fragment.len()) 46 | .sum::(); 47 | 48 | let bytes_remaining_in_lexer = self.lexer.bytes_remaining(); 49 | 50 | bytes_remaining_in_lexer + bytes_remaining_in_lookahead_buffer 51 | } 52 | 53 | /// Get the next [Token] from this [Parser]. This may be a token that's already been peeked. 54 | /// 55 | /// Skips any non-document comments encountered via the lexer implementation. 56 | /// 57 | /// Return an error if a [Token] with [TokenTy::Unknown] is encountered. 58 | pub fn next_token(&mut self) -> Result, ParserError> { 59 | let token = self 60 | .lookahead 61 | .pop_front() 62 | .or_else(|| self.lexer.next_token()); 63 | 64 | // Check for unknown tokens, which should always convert to an error. 65 | match token { 66 | Some(Token { 67 | variant: TokenTy::Unknown, 68 | fragment, 69 | }) => Err(ParserErrorKind::EncounteredUnknownToken.at(fragment)), 70 | known_token_or_none => Ok(known_token_or_none), 71 | } 72 | } 73 | 74 | /// Advance this [Parser] by `n` [Token]s. If this [Parser] runs out of [Token]s, panic. 75 | /// 76 | /// Panics 77 | /// - If `n` is greater than the number of remaining tokens. 78 | pub fn advance(&mut self, n: usize) { 79 | // Add tokens to the lookahead buffer until we have enough to split off. 80 | while self.lookahead.len() < n { 81 | let token = self 82 | .lexer 83 | .next_token() 84 | .expect("advance: `n` <= number of remaining tokens"); 85 | 86 | self.lookahead.push_back(token); 87 | } 88 | 89 | // Split them off. 90 | self.lookahead = self.lookahead.split_off(n); 91 | } 92 | 93 | /// Peek at the next token from the [Lexer] (cached in the lookahead queue if peeked before). 94 | pub fn peek(&mut self) -> Option<&Token> { 95 | if self.lookahead.is_empty() { 96 | self.lookahead.push_back(self.lexer.next_token()?); 97 | } 98 | 99 | self.lookahead.front() 100 | } 101 | 102 | /// Peek the [Fragment] of the next [Token]. 103 | pub fn peek_fragment(&mut self) -> Option<&Fragment> { 104 | self.peek().map(|token| &token.fragment) 105 | } 106 | 107 | /// Peek the [TokenTy] of the next [Token]. 108 | pub fn peek_variant(&mut self) -> Option { 109 | self.peek().map(|token| token.variant) 110 | } 111 | 112 | /// Peek the [Fragment] of the next [Token] and clone it or return a clone of the 113 | /// remainder [Fragment] of the internal [Lexer] 114 | /// (which will be empty, since there wasn't a [Token] to peek). 115 | /// 116 | /// This is likely only useful for error reporting -- a clone of a potentially empty fragment is 117 | /// rarely ever useful otherwise. 118 | pub fn peek_fragment_or_rest_cloned(&mut self) -> Fragment { 119 | match self.peek() { 120 | Some(Token { fragment, .. }) => fragment.clone(), 121 | None => { 122 | let rest = self.lexer.remaining.clone(); 123 | 124 | // Assert that we're making the right assumptions about the remaining fragment. 125 | // These are (unidiomatically) done using debug_assert -- perhaps that changes eventually 126 | // however it should be fine for now, since this can only produce logic bugs (never memory or 127 | // concurrency bugs). 128 | debug_assert!(rest.is_valid()); 129 | debug_assert!(rest.is_empty()); 130 | debug_assert!(rest.is_empty_at_end_of_source()); 131 | 132 | rest 133 | } 134 | } 135 | } 136 | 137 | /// Get the [Lexer] that's wrapped. 138 | pub fn lexer(&self) -> &Lexer { 139 | &self.lexer 140 | } 141 | 142 | /// Lookahead `k` [Token]s. 143 | /// 144 | /// If `k == 0` then this is effectively peeking at the next [Token] from the wrapped [Lexer]. 145 | pub fn lookahead(&mut self, k: usize) -> Option<&Token> { 146 | while self.lookahead.len() <= k { 147 | self.lookahead.push_back(self.lexer.next_token()?); 148 | } 149 | 150 | self.lookahead.get(k) 151 | } 152 | 153 | /// Similar to [Parser::lookahead] but instead returns a slice of `n` [Token]s, starting with the next [Token]. 154 | /// 155 | /// Returns [None] if `n` is greater than the number of remaining [Token]s for this [Parser]. 156 | pub fn lookahead_window(&mut self, n: usize) -> Option<&[Token]> { 157 | while self.lookahead.len() < n { 158 | self.lookahead.push_back(self.lexer.next_token()?); 159 | } 160 | 161 | // Use make contiguous here to get a unified/single slice. 162 | Some(&self.lookahead.make_contiguous()[..n]) 163 | } 164 | 165 | /// Get the next [Token] from this parser if its [Token::variant] is the given `token_ty`. 166 | pub fn next_if_is(&mut self, token_ty: TokenTy) -> Option { 167 | // Peeking successfully first means that the lookahead vec will never be empty here. 168 | (self.peek()?.variant == token_ty) 169 | // SAFETY: We just peeked a token to check its variant so this unwrap is always ok. 170 | .then(|| unsafe { self.lookahead.pop_front().unwrap_unchecked() }) 171 | } 172 | 173 | /// Peek at the next [Token]s of this [Parser] and determine if the [Token::variant]s match this 174 | /// sequence of [TokenTy]s. 175 | pub fn matches(&mut self, seq: &[TokenTy]) -> bool { 176 | // Use the rare let-else to ensure there are at minimum, the given number of tokens remaining. 177 | let Some(lookahead_window) = self.lookahead_window(seq.len()) else { 178 | return false; 179 | }; 180 | 181 | // Use a zipped iterator to compare all the token variants. 182 | lookahead_window 183 | .iter() 184 | .zip(seq) 185 | .all(|(token, matches)| token.variant == *matches) 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /wright/src/parser/decl.rs: -------------------------------------------------------------------------------- 1 | //! Declaration parsing. 2 | 3 | mod import; 4 | -------------------------------------------------------------------------------- /wright/src/parser/decl/import.rs: -------------------------------------------------------------------------------- 1 | //! Parser implementation for `use path::to::thing;` declaration. 2 | 3 | use crate::{ 4 | ast::{decl::import::ImportDecl, identifier::Identifier, path::Path}, 5 | lexer::token::{Token, TokenTy}, 6 | parser::{ 7 | Parser, 8 | error::{ParserError, ParserErrorKind}, 9 | whitespace, 10 | }, 11 | source_tracking::fragment::Fragment, 12 | }; 13 | 14 | impl ImportDecl { 15 | /// Parse an import declaration. 16 | /// 17 | /// This will advance the parser if `use` is seen -- if a valid formed import does not follow, 18 | /// the parser may be left in the middle of a malformed declaration. 19 | pub fn parse(parser: &mut Parser) -> Result { 20 | let use_kw: Token = parser.next_if_is(TokenTy::KwUse).ok_or( 21 | ParserErrorKind::ExpectedImportDeclaration.at(parser.peek_fragment_or_rest_cloned()), 22 | )?; 23 | 24 | // Require a whitespace after the keyword. 25 | whitespace::require_whitespace(parser)?; 26 | // Parse the path. 27 | let path: Path = Path::parse(parser)?; 28 | 29 | // Whitespace and then "as ...;" or optional whitespace and semi ";". 30 | 31 | // The "as ...;" requires a whitespace. 32 | let imported_as = match parser.next_if_is(TokenTy::Whitespace) { 33 | // If there's no whitespace after the path, we expect it to be followed by a semicolon (no renaming). 34 | None => None, 35 | 36 | // If there is a whitespace, then it could be followed by `as ...;` or just `;`. 37 | Some(_) => { 38 | // Either way, consume any additional whitespace/comments. 39 | whitespace::optional_whitespace(parser); 40 | 41 | // Check if we have an `as` and if so read the renaming clause. 42 | // Otherwise pass on to reading the semicolon. 43 | match parser.next_if_is(TokenTy::KwAs) { 44 | // No `as` -- do nothing (return no renaming clause). 45 | None => None, 46 | 47 | // `as ...;` -- consume the ` ...` part. 48 | Some(_) => { 49 | whitespace::require_whitespace(parser).map_err(|e| { 50 | e.with_help("whitespace needed between \"as\" and binding.") 51 | })?; 52 | 53 | let imported_as = Identifier::parse(parser).map_err(|e| { 54 | e.with_help("expected binding in \"use ... as\" declaration.") 55 | })?; 56 | 57 | Some(imported_as) 58 | } 59 | } 60 | } 61 | }; 62 | 63 | whitespace::optional_whitespace(parser); 64 | 65 | if let Some(semi) = parser.next_if_is(TokenTy::Semi) { 66 | Ok(ImportDecl { 67 | matching_source: Fragment::cover(&use_kw.fragment, &semi.fragment), 68 | imported_item: path, 69 | imported_as, 70 | }) 71 | } else { 72 | Err(ParserErrorKind::ImportMustEndWithSemicolon 73 | .at(parser.peek_fragment_or_rest_cloned())) 74 | } 75 | } 76 | } 77 | 78 | #[cfg(test)] 79 | mod tests { 80 | use crate::{ast::decl::import::ImportDecl, lexer::Lexer, parser::Parser}; 81 | 82 | #[test] 83 | fn test_import() { 84 | let mut parser = Parser::new(Lexer::new_test("use wright::util;")); 85 | let import_decl = ImportDecl::parse(&mut parser).unwrap(); 86 | assert!(parser.lexer.remaining.is_empty()); 87 | assert_eq!(import_decl.imported_item.head.fragment.as_str(), "wright"); 88 | assert_eq!(import_decl.imported_item.tail[0].fragment.as_str(), "util"); 89 | } 90 | 91 | #[test] 92 | fn test_import_with_whitespace() { 93 | let mut parser = Parser::new(Lexer::new_test("use wright :: util ;")); 94 | let import_decl = ImportDecl::parse(&mut parser).unwrap(); 95 | assert!(parser.lexer.remaining.is_empty()); 96 | assert_eq!(import_decl.imported_item.head.fragment.as_str(), "wright"); 97 | assert_eq!(import_decl.imported_item.tail[0].fragment.as_str(), "util"); 98 | } 99 | 100 | #[test] 101 | fn test_import_as() { 102 | let mut parser = Parser::new(Lexer::new_test("use wright::util as u;")); 103 | let import_decl = ImportDecl::parse(&mut parser).unwrap(); 104 | assert!(parser.lexer.remaining.is_empty()); 105 | assert_eq!(import_decl.imported_item.head.fragment.as_str(), "wright"); 106 | assert_eq!(import_decl.imported_item.tail[0].fragment.as_str(), "util"); 107 | assert_eq!(import_decl.imported_as.unwrap().fragment.as_str(), "u"); 108 | } 109 | 110 | #[test] 111 | fn test_import_as_with_comment() { 112 | let mut parser = Parser::new(Lexer::new_test("use wright::util as /* old_name */ u;")); 113 | let import_decl = ImportDecl::parse(&mut parser).unwrap(); 114 | assert!(parser.lexer.remaining.is_empty()); 115 | assert_eq!(import_decl.imported_item.head.fragment.as_str(), "wright"); 116 | assert_eq!(import_decl.imported_item.tail[0].fragment.as_str(), "util"); 117 | assert_eq!(import_decl.imported_as.unwrap().fragment.as_str(), "u"); 118 | } 119 | 120 | #[test] 121 | fn test_import_as_with_preceding_comment() { 122 | let mut parser = Parser::new(Lexer::new_test("use wright::util /* as old_name */ as u;")); 123 | let import_decl = ImportDecl::parse(&mut parser).unwrap(); 124 | assert!(parser.lexer.remaining.is_empty()); 125 | assert_eq!(import_decl.imported_item.head.fragment.as_str(), "wright"); 126 | assert_eq!(import_decl.imported_item.tail[0].fragment.as_str(), "util"); 127 | assert_eq!(import_decl.imported_as.unwrap().fragment.as_str(), "u"); 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /wright/src/parser/error.rs: -------------------------------------------------------------------------------- 1 | //! Representation and implementation relating to errors that may be encountered in parsing. 2 | 3 | use crate::{ 4 | reporting::{Diagnostic, Highlight}, 5 | source_tracking::fragment::Fragment, 6 | }; 7 | use std::borrow::Cow; 8 | 9 | /// All the different errors that can be produced in the process of parsing. 10 | /// The names of these should be self-describing, but in cases when one of these needs to appear in a diagnostic, 11 | /// use [ParserErrorKind::describe]. 12 | #[allow(missing_docs)] 13 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] 14 | pub enum ParserErrorKind { 15 | EncounteredUnknownToken, 16 | EncounteredUnterminatedComment, 17 | EncounteredUnterminatedString, 18 | ExpectedAtomicTypeSignature, 19 | ExpectedBooleanLiteral, 20 | ExpectedIdentifier, 21 | ExpectedImportDeclaration, 22 | ExpectedIntegerLiteral, 23 | ExpectedPath, 24 | ExpectedReferenceTypeSignature, 25 | ExpectedTypeSignature, 26 | ExpectedWhitespace, 27 | ImportMustEndWithSemicolon, 28 | } 29 | 30 | impl ParserErrorKind { 31 | /// Get a short description of this kind of error. 32 | pub const fn describe(self) -> &'static str { 33 | use ParserErrorKind::*; 34 | 35 | match self { 36 | EncounteredUnknownToken => "encountered unknown token", 37 | EncounteredUnterminatedComment => { 38 | "encountered unterminated multiline comment while parsing" 39 | } 40 | EncounteredUnterminatedString => { 41 | "encountered unterminated string literal while parsing" 42 | } 43 | ExpectedAtomicTypeSignature => "expected atomic primitive type", 44 | ExpectedBooleanLiteral => "expected boolean literal", 45 | ExpectedIdentifier => "expected identifier", 46 | ExpectedImportDeclaration => "expected import declaration", 47 | ExpectedIntegerLiteral => "expected integer literal", 48 | ExpectedPath => "expected path or identifier", 49 | ExpectedReferenceTypeSignature => "expected reference type signature", 50 | ExpectedTypeSignature => "expected type signature", 51 | ExpectedWhitespace => "expected whitespace character(s)", 52 | ImportMustEndWithSemicolon => "import declarations must end with a semicolon", 53 | } 54 | } 55 | 56 | /// Construct a [ParserError] by adding a location [Fragment] to this error variant. 57 | pub const fn at(self, f: Fragment) -> ParserError { 58 | ParserError { 59 | kind: self, 60 | location: f, 61 | help: Vec::new(), 62 | } 63 | } 64 | } 65 | 66 | /// An error that occurred while parsing. 67 | /// This error structure is pretty simple compared to what can be represented using a diagnostic. That's fine, 68 | /// since most of the more complex errors arise when typechecking, rather than checking syntax. 69 | #[derive(Debug)] 70 | pub struct ParserError { 71 | /// What type/cause there is for this error. 72 | pub kind: ParserErrorKind, 73 | 74 | /// Where this error occurred. 75 | pub location: Fragment, 76 | 77 | /// Optional help strings that can be printed with this error. 78 | pub help: Vec>, 79 | } 80 | 81 | impl ParserError { 82 | /// Builder-style method to add a help string to a [ParserError]. 83 | pub fn with_help(mut self, help: impl Into>) -> Self { 84 | self.help.push(help.into()); 85 | self 86 | } 87 | 88 | /// Turn this parser error into a full blown compiler error. 89 | pub fn as_diagnostic(self) -> Diagnostic { 90 | let description = self.kind.describe(); 91 | 92 | // Put a little clarification if the parser reached end of source and then produced an error. 93 | let message = if self.location.is_empty_at_end_of_source() { 94 | Cow::Borrowed("found end of source here") 95 | } else { 96 | Cow::Borrowed("") 97 | }; 98 | 99 | let mut diagnostic = Diagnostic::error() 100 | .with_message(description) 101 | .with_highlights([Highlight::primary(self.location.clone(), message)]); 102 | 103 | if !self.help.is_empty() { 104 | diagnostic = diagnostic.with_notes(self.help); 105 | } 106 | 107 | diagnostic 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /wright/src/parser/identifier.rs: -------------------------------------------------------------------------------- 1 | //! Parsing implementation for [Identifier]. 2 | 3 | use super::{ 4 | Parser, 5 | error::{ParserError, ParserErrorKind}, 6 | }; 7 | use crate::{ast::identifier::Identifier, lexer::token::TokenTy}; 8 | 9 | impl Identifier { 10 | /// Parse an [Identifier] from a [Parser]. Leave the [Parser] unadvanced otherwise. 11 | pub fn parse(parser: &mut Parser) -> Result { 12 | parser 13 | .next_if_is(TokenTy::Identifier) 14 | .map(|token| Identifier { 15 | fragment: token.fragment, 16 | }) 17 | .ok_or_else(|| { 18 | ParserErrorKind::ExpectedIdentifier.at(parser.peek_fragment_or_rest_cloned()) 19 | }) 20 | } 21 | } 22 | 23 | #[cfg(test)] 24 | mod tests { 25 | use crate::{ 26 | ast::identifier::Identifier, 27 | lexer::Lexer, 28 | parser::{Parser, error::ParserErrorKind}, 29 | }; 30 | 31 | #[test] 32 | fn test_parse_ident() { 33 | let mut parser = Parser::new(Lexer::new_test("source")); 34 | let ident = Identifier::parse(&mut parser).unwrap(); 35 | assert_eq!(ident.fragment.as_str(), "source"); 36 | assert_eq!(parser.lexer().remaining.len(), 0); 37 | } 38 | 39 | #[test] 40 | fn test_parse_ident_fail() { 41 | for fail in ["12", "+", " ", " test", "_", "record"] { 42 | let mut parser = Parser::new(Lexer::new_test(&fail)); 43 | let error = Identifier::parse(&mut parser).unwrap_err(); 44 | assert_eq!(error.kind, ParserErrorKind::ExpectedIdentifier); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /wright/src/parser/literal.rs: -------------------------------------------------------------------------------- 1 | //! Literal parser implementations. 2 | 3 | mod boolean; 4 | mod integer; 5 | -------------------------------------------------------------------------------- /wright/src/parser/literal/boolean.rs: -------------------------------------------------------------------------------- 1 | //! Boolean literal parsing logic. 2 | 3 | use crate::{ 4 | ast::literal::BooleanLiteral, 5 | lexer::token::TokenTy, 6 | parser::{ 7 | Parser, 8 | error::{ParserError, ParserErrorKind}, 9 | }, 10 | }; 11 | 12 | impl BooleanLiteral { 13 | /// Parse a boolean literal from the given [Parser]. 14 | pub fn parse(parser: &mut Parser) -> Result { 15 | if let Some(token) = parser.next_if_is(TokenTy::KwTrue) { 16 | return Ok(BooleanLiteral { 17 | fragment: token.fragment, 18 | value: true, 19 | }); 20 | } 21 | 22 | if let Some(token) = parser.next_if_is(TokenTy::KwFalse) { 23 | return Ok(BooleanLiteral { 24 | fragment: token.fragment, 25 | value: false, 26 | }); 27 | } 28 | 29 | Err(ParserErrorKind::ExpectedBooleanLiteral.at(parser.peek_fragment_or_rest_cloned())) 30 | } 31 | } 32 | 33 | #[cfg(test)] 34 | mod tests { 35 | use crate::{ast::literal::BooleanLiteral, lexer::Lexer, parser::Parser}; 36 | 37 | #[test] 38 | fn works() { 39 | for s in ["true", "false"] { 40 | let mut p = Parser::new(Lexer::new_test(s)); 41 | 42 | assert!(BooleanLiteral::parse(&mut p).is_ok()); 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /wright/src/parser/literal/integer.rs: -------------------------------------------------------------------------------- 1 | //! Integer literal parsing implementation. 2 | 3 | use crate::parser::Parser; 4 | use crate::parser::error::{ParserError, ParserErrorKind}; 5 | use crate::{ast::literal::IntegerLiteral, lexer::token::TokenTy}; 6 | use num::{BigUint, Num}; 7 | 8 | impl IntegerLiteral { 9 | /// Parse an integer literal from the given [Parser]. 10 | pub fn parse(parser: &mut Parser) -> Result { 11 | // Get the token containing the integer literal from the parser. 12 | let int_lit_token = parser.next_if_is(TokenTy::IntegerLiteral).ok_or_else(|| { 13 | ParserErrorKind::ExpectedIntegerLiteral.at(parser.peek_fragment_or_rest_cloned()) 14 | })?; 15 | 16 | // Get the string to pass to num for the rest of parsing. 17 | let mut parse_str: &str = int_lit_token.fragment.as_str(); 18 | let mut chars = parse_str.chars(); 19 | 20 | // Unwrap: Integer literals must be at minimum 1 character, enforced by the lexer. 21 | // use null byte as a sentinel value for the second one, since we're just using the prefix to check for 22 | // a radix to pass to num. 23 | let prefix: [char; 2] = [chars.next().unwrap(), chars.next().unwrap_or('\0')]; 24 | 25 | // Determine the radix and remove any prefix in the process. 26 | let radix: u32 = match prefix { 27 | // Hexidecimal. 28 | ['0', 'x' | 'X'] => { 29 | parse_str = &parse_str[2..]; 30 | 16 31 | } 32 | 33 | // Binary. 34 | ['0', 'b' | 'B'] => { 35 | parse_str = &parse_str[2..]; 36 | 2 37 | } 38 | 39 | // Octal 40 | ['0', 'o'] => { 41 | parse_str = &parse_str[2..]; 42 | 8 43 | } 44 | 45 | // All other patterns are not radix-prefixes. 46 | _ => 10, 47 | }; 48 | 49 | // Pass the remainder of parsing off to num. 50 | let value = BigUint::from_str_radix(parse_str, radix) 51 | // We can use expect here for now since we have validated the format of the string 52 | // on our own before passing it off. 53 | .expect("num should successfully parse"); 54 | 55 | Ok(IntegerLiteral { 56 | fragment: int_lit_token.fragment, 57 | value, 58 | }) 59 | } 60 | } 61 | 62 | #[cfg(test)] 63 | mod tests { 64 | use num::BigUint; 65 | 66 | use crate::{ast::literal::IntegerLiteral, lexer::Lexer, parser::Parser}; 67 | 68 | #[test] 69 | fn normal() { 70 | let mut parser = Parser::new(Lexer::new_test("1000")); 71 | 72 | let int_lit = IntegerLiteral::parse(&mut parser).unwrap(); 73 | 74 | assert_eq!(int_lit.value, BigUint::new(vec![1000])); 75 | assert_eq!(parser.lexer.remaining.as_str(), ""); 76 | assert_eq!(int_lit.fragment.as_str(), "1000"); 77 | } 78 | 79 | // #[test] 80 | // fn ingore_underscores 81 | } 82 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast.rs: -------------------------------------------------------------------------------- 1 | //! Various [AST] (abstract syntax tree) constructs used in Wright. 2 | //! 3 | //! [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree 4 | 5 | pub mod declaration; 6 | pub mod expression; 7 | pub mod identifier; 8 | pub mod metadata; 9 | pub mod path; 10 | pub mod statement; 11 | pub mod types; 12 | 13 | /// Trait implementd 14 | pub trait AstNode<'src> {} 15 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/declaration.rs: -------------------------------------------------------------------------------- 1 | //! AST Node structure for declarations in a Wright source file. 2 | 3 | pub mod class; 4 | pub mod r#enum; 5 | pub mod function; 6 | pub mod generics; 7 | pub mod import; 8 | pub mod module; 9 | pub mod r#type; 10 | pub mod union; 11 | pub mod visibility; 12 | pub mod where_clause; 13 | 14 | use self::visibility::Visibility; 15 | use super::{identifier::Identifier, metadata::AstNodeMeta}; 16 | 17 | /// A top-level declaration in source code. 18 | #[derive(Debug)] 19 | pub enum Declaration<'src> { 20 | Module(module::ModuldeDeclaration<'src>), 21 | Import(import::ImportDeclaration<'src>), 22 | Class(class::ClassDeclaration<'src>), 23 | Union(union::UnionDeclaration<'src>), 24 | Type(r#type::TypeDeclaration<'src>), 25 | Enum(r#enum::EnumDeclaration<'src>), 26 | 27 | Function, 28 | Trait, 29 | Implementation, 30 | Struct, 31 | Record, 32 | Constraint, 33 | } 34 | 35 | /// A struct declaration in source code. 36 | #[derive(Debug)] 37 | pub struct StructDeclaration<'src> { 38 | /// The metadata for this node. 39 | pub meta: AstNodeMeta<'src>, 40 | /// The struct's visibility. 41 | pub vis: Visibility<'src>, 42 | /// The struct's name. 43 | pub name: Identifier<'src>, 44 | } 45 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/declaration/class.rs: -------------------------------------------------------------------------------- 1 | //! Class declarations in wright source code. 2 | 3 | use crate::parser::ast::{ 4 | declaration::generics::{GenericConstArg, GenericTypeArg}, 5 | declaration::visibility::Visibility, 6 | identifier::Identifier, 7 | metadata::AstNodeMeta, 8 | types::TypeInstantiation, 9 | }; 10 | 11 | /// A class declaration in source code. 12 | #[derive(Debug)] 13 | pub struct ClassDeclaration<'src> { 14 | /// The metadata for this node. 15 | pub meta: AstNodeMeta<'src>, 16 | /// The class's visibility. 17 | pub vis: Visibility<'src>, 18 | /// The class's name. 19 | pub name: Identifier<'src>, 20 | /// Generic types that this class declares. 21 | pub generic_type_arguments: Vec>, 22 | /// Generic constants that this class declares. 23 | pub generic_const_arguments: Vec>, 24 | /// The fields of the class. 25 | pub fields: Vec>, 26 | } 27 | 28 | /// A class field declaration in a class declaration. 29 | #[derive(Debug)] 30 | pub struct ClassField<'src> { 31 | /// AST Node metadata. 32 | pub meta: AstNodeMeta<'src>, 33 | /// Visibility of this class field. 34 | pub vis: Visibility<'src>, 35 | /// Is this class field mutable by default or is it set-once. 36 | /// This is based on whether `const` is specified before the field name. 37 | pub mutable: bool, 38 | /// The type of the field. 39 | pub ty: TypeInstantiation<'src>, 40 | } 41 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/declaration/enum.rs: -------------------------------------------------------------------------------- 1 | //! Enum declarations in wright souce code. 2 | 3 | use crate::parser::ast::{ 4 | declaration::visibility::Visibility, expression::Expression, identifier::Identifier, 5 | metadata::AstNodeMeta, types::TypeInstantiation, 6 | }; 7 | 8 | /// An enumeration in source code. 9 | #[derive(Debug)] 10 | pub struct EnumDeclaration<'src> { 11 | /// The metadata for this AST node. 12 | pub meta: AstNodeMeta<'src>, 13 | /// The visibility of the enum. 14 | pub vis: Visibility<'src>, 15 | /// The name of the enum 16 | pub name: Identifier<'src>, 17 | /// The parent type or enumeration that this enumeration is a strict subset of. 18 | pub parent: TypeInstantiation<'src>, 19 | } 20 | 21 | /// A variant of an enum in an enum declaration. 22 | #[derive(Debug)] 23 | pub struct EnumVariant<'src> { 24 | /// The metadata for this AST node. 25 | pub meta: AstNodeMeta<'src>, 26 | /// The name of this variant of the enum. 27 | pub name: Identifier<'src>, 28 | /// The value that this variant represents. 29 | pub value: Expression<'src>, 30 | } 31 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/declaration/function.rs: -------------------------------------------------------------------------------- 1 | //! Structures representing function declarations in wright source code. 2 | 3 | use crate::parser::ast::{ 4 | declaration::generics::{GenericConstArg, GenericTypeArg}, 5 | declaration::visibility::Visibility, 6 | expression::block::Block, 7 | identifier::Identifier, 8 | metadata::AstNodeMeta, 9 | types::TypeInstantiation, 10 | }; 11 | 12 | use super::where_clause::WhereClause; 13 | 14 | /// A function declaration in source code. 15 | #[derive(Debug)] 16 | pub struct FunctionDeclaration<'src> { 17 | /// The metadata about this AST node. 18 | pub meta: AstNodeMeta<'src>, 19 | /// The visibility of this function declaration. 20 | pub vis: Visibility<'src>, 21 | /// Is the function marked as `dyn` (not to be used in comile-time expressions). 22 | pub is_dynamic: bool, 23 | /// The name of the function. 24 | pub name: Identifier<'src>, 25 | /// Any generic type arguments that the function uses. 26 | pub generic_type_args: Vec>, 27 | /// Any generic constant arguments that the function uses. 28 | pub generic_constant_args: Vec>, 29 | /// Arguments declared for this function. 30 | pub args: Vec>, 31 | /// The return type declared for this function. 32 | pub return_type: Option>, 33 | /// Optional clause to define bounds on the generics declared in this function. 34 | pub where_clause: Option>, 35 | /// The function body. 36 | pub body: Block<'src>, 37 | } 38 | 39 | /// A function argument in wright source code. 40 | #[derive(Debug)] 41 | pub struct FunctionArg<'src> { 42 | /// Metadata about this AST node. 43 | pub meta: AstNodeMeta<'src>, 44 | /// The name of this argument. 45 | pub name: Identifier<'src>, 46 | /// The type of this argument. 47 | pub ty: TypeInstantiation<'src>, 48 | } 49 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/declaration/generics.rs: -------------------------------------------------------------------------------- 1 | //! AST Node structures relating to generics. 2 | 3 | use crate::parser::ast::{identifier::Identifier, metadata::AstNodeMeta, types::TypeInstantiation}; 4 | 5 | /// A generic type argument in a type/class/etc declaration. 6 | #[derive(Debug)] 7 | pub struct GenericTypeArg<'src> { 8 | /// The node metadata for this generic argument. 9 | pub meta: AstNodeMeta<'src>, 10 | /// The identifier for the generic type. 11 | pub name: Identifier<'src>, 12 | } 13 | 14 | /// A generic const argument in a type/class/etc declaration. 15 | #[derive(Debug)] 16 | pub struct GenericConstArg<'src> { 17 | /// The metadata associated with this node. 18 | pub meta: AstNodeMeta<'src>, 19 | /// The identifier for this generic constant. 20 | pub name: Identifier<'src>, 21 | /// The type expected in the generic instantiation. 22 | pub ty: TypeInstantiation<'src>, 23 | } 24 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/declaration/import.rs: -------------------------------------------------------------------------------- 1 | //! Import declaration. 2 | 3 | use crate::parser::ast::{identifier::Identifier, metadata::AstNodeMeta, path::Path}; 4 | 5 | use super::visibility::Visibility; 6 | 7 | /// The three different flavors that an import can be. 8 | #[derive(Debug)] 9 | pub enum ImportOptions<'src> { 10 | /// The import is aliased to a new name in the current file/scope. 11 | Aliased(Identifier<'src>), 12 | /// The import ends with `::*` indicating that all items in the imported path are available in scope. 13 | Glob, 14 | /// The import is neither aliased or blobbed. 15 | None, 16 | } 17 | 18 | /// An import in source code. These can be aliased using the `as ;` suffix. 19 | /// Glob imports ending with `::*;` are supported without aliasing. 20 | #[derive(Debug)] 21 | pub struct ImportDeclaration<'src> { 22 | /// The metadata for this node. 23 | pub meta: AstNodeMeta<'src>, 24 | /// The visibility of the import, used for re-exporting. 25 | pub vis: Visibility<'src>, 26 | /// The path being imported. This could theoretically be a single identifier. 27 | pub path: Path<'src>, 28 | /// Any changes to the import (aliasing or glob import). 29 | pub opts: ImportOptions<'src>, 30 | } 31 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/declaration/module.rs: -------------------------------------------------------------------------------- 1 | //! Module declaration. 2 | 3 | use crate::parser::ast::{ 4 | declaration::visibility::Visibility, identifier::Identifier, metadata::AstNodeMeta, 5 | }; 6 | 7 | /// A module declaration in Wright source. 8 | /// 9 | /// These are always in the form `[vis] mod ;`. 10 | #[derive(Debug)] 11 | pub struct ModuldeDeclaration<'src> { 12 | /// The metadata about this node. 13 | pub meta: AstNodeMeta<'src>, 14 | /// The visibility of this module. 15 | pub vis: Visibility<'src>, 16 | /// The name of this module (which will be searched for in the file system at the time of module resolution). 17 | pub name: Identifier<'src>, 18 | } 19 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/declaration/type.rs: -------------------------------------------------------------------------------- 1 | //! Type alias declarations in wright source code. 2 | 3 | use crate::parser::ast::{ 4 | declaration::generics::{GenericConstArg, GenericTypeArg}, 5 | declaration::visibility::Visibility, 6 | identifier::Identifier, 7 | metadata::AstNodeMeta, 8 | types::TypeInstantiation, 9 | }; 10 | 11 | use super::where_clause::WhereClause; 12 | 13 | /// A type alias in source code. 14 | #[derive(Debug)] 15 | pub struct TypeDeclaration<'src> { 16 | /// The metadata for this node. 17 | pub meta: AstNodeMeta<'src>, 18 | /// The type alias's visibility. 19 | pub vis: Visibility<'src>, 20 | /// The name of the type. 21 | pub name: Identifier<'src>, 22 | /// The generic type arguments that need to be passed to this type. 23 | pub generic_type_arguments: Vec>, 24 | /// The generic constant arguments that need to be passed to this type. 25 | pub generic_const_arguments: Vec>, 26 | /// Optional clause to define bounds on the generics declared in this type alias. 27 | pub where_clause: Option>, 28 | /// The type being aliased to. This is optional, as traits may declare associated types. 29 | pub dest: Option>, 30 | } 31 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/declaration/union.rs: -------------------------------------------------------------------------------- 1 | //! Union declarations in wright source code. 2 | 3 | use crate::parser::ast::{ 4 | declaration::generics::{GenericConstArg, GenericTypeArg}, 5 | declaration::visibility::Visibility, 6 | identifier::Identifier, 7 | metadata::AstNodeMeta, 8 | types::TypeInstantiation, 9 | }; 10 | 11 | use super::where_clause::WhereClause; 12 | 13 | /// A union declaration in source code. 14 | #[derive(Debug)] 15 | pub struct UnionDeclaration<'src> { 16 | /// The metadata for this node. 17 | pub meta: AstNodeMeta<'src>, 18 | /// The visibility of the union. 19 | pub vis: Visibility<'src>, 20 | /// The name of the union. 21 | pub name: Identifier<'src>, 22 | /// Generic types that this class declares. 23 | pub generic_type_arguments: Vec>, 24 | /// Generic constants that this class declares. 25 | pub generic_const_arguments: Vec>, 26 | /// Optional clause to define bounds on the generic types declared in this union. 27 | pub where_clause: Option>, 28 | /// The variants available to this union. 29 | pub variants: Vec>, 30 | } 31 | 32 | /// A variant of a union declaration in source code. 33 | #[derive(Debug)] 34 | pub struct UnionVariant<'src> { 35 | /// The metadata for this node. 36 | pub meta: AstNodeMeta<'src>, 37 | /// The name of this variant of the union. 38 | pub name: Identifier<'src>, 39 | /// The type of this variant of the union. 40 | pub ty: TypeInstantiation<'src>, 41 | } 42 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/declaration/visibility.rs: -------------------------------------------------------------------------------- 1 | //! A node in the AST representing the visibility of a declaration. 2 | 3 | use crate::parser::ast::metadata::AstNodeMeta; 4 | use derive_more::Display; 5 | 6 | /// The possible visibilities of a declaration in Wright. 7 | #[derive(Clone, Copy, PartialEq, Eq, Debug, Hash, Display)] 8 | pub enum VisibilityTy { 9 | /// Externally public. 10 | Public, 11 | /// Package private. 12 | Package, 13 | /// Module/file private. This is default. 14 | Private, 15 | } 16 | 17 | /// A visibility modifier in wright source. 18 | #[derive(Clone, Debug)] 19 | pub struct Visibility<'src> { 20 | /// Node metadata 21 | pub meta: AstNodeMeta<'src>, 22 | /// Which visibility is represented. 23 | pub variant: VisibilityTy, 24 | } 25 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/declaration/where_clause.rs: -------------------------------------------------------------------------------- 1 | //! Structural representation of `where` clauses in wright. 2 | 3 | use crate::parser::ast::{metadata::AstNodeMeta, types::TypeInstantiation}; 4 | 5 | /// A where clause in wright source code. 6 | #[derive(Debug)] 7 | pub struct WhereClause<'src> { 8 | /// The metadata for this AST node. 9 | pub meta: AstNodeMeta<'src>, 10 | /// The type bounds defined in this where clause. 11 | pub bounds: Vec>, 12 | } 13 | 14 | /// A bound on a type defined in a where clause. 15 | #[derive(Debug)] 16 | pub struct TypeBound<'src> { 17 | /// The metadata for this AST node. 18 | pub meta: AstNodeMeta<'src>, 19 | /// The type being bound. 20 | pub lhs_ty: TypeInstantiation<'src>, 21 | /// The trait being required of it. 22 | pub requirement: TypeInstantiation<'src>, 23 | } 24 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/expression.rs: -------------------------------------------------------------------------------- 1 | //! Structures used for representing expressions in wright source code. 2 | 3 | use self::primary::Primary; 4 | 5 | pub mod block; 6 | pub mod literal; 7 | pub mod parentheses; 8 | pub mod primary; 9 | 10 | /// Enumeration of all the different kinds of expression in wright. 11 | #[derive(Debug)] 12 | pub enum Expression<'src> { 13 | /// A literal in source code. 14 | Primary(Primary<'src>), 15 | // Block(block::Block<'src>), 16 | } 17 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/expression/block.rs: -------------------------------------------------------------------------------- 1 | //! Representation of block expressions in wright source code. 2 | 3 | use crate::parser::ast::{expression::Expression, metadata::AstNodeMeta}; 4 | 5 | /// A block in wright source code. 6 | #[derive(Debug)] 7 | pub struct Block<'src> { 8 | /// The AST node metadata. 9 | pub meta: AstNodeMeta<'src>, 10 | /// The body of this block as an expression. 11 | pub body: Box>, 12 | } 13 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/expression/literal.rs: -------------------------------------------------------------------------------- 1 | //! Representation for literal expressions in wright source code. 2 | 3 | use crate::parser::{ 4 | state::ParserState, 5 | util::{ 6 | discard_error::discard_errors, erase::erase, first_successful::first_sucessful, 7 | map::map_node_type, BoxedParserFn, NodeParserOption, NodeParserResult, 8 | }, 9 | }; 10 | 11 | use self::{ 12 | boolean::BooleanLiteral, 13 | integer::IntegerLiteral, 14 | }; 15 | 16 | pub mod boolean; 17 | pub mod integer; 18 | pub mod string; 19 | pub mod character; 20 | pub(self) mod escapes; 21 | 22 | #[derive(Debug)] 23 | pub enum Literal<'src> { 24 | /// An integer literal in source code. 25 | Integer(IntegerLiteral<'src>), 26 | /// A boolean literal in source code. 27 | Boolean(BooleanLiteral<'src>), 28 | } 29 | 30 | /// Convenience function for converting a child parser to one that is erased and generates 31 | /// [`Literal`]s in [`NodeParserOption`]s. 32 | fn convert_to_literal_parser<'src, PF, LC, N>( 33 | parser_function: PF, 34 | literal_conversion: LC, 35 | ) -> BoxedParserFn<'src, NodeParserOption>> 36 | where 37 | PF: (Fn(&mut ParserState<'src>) -> NodeParserResult) + 'src, 38 | LC: (Fn(N) -> Literal<'src>) + 'src, 39 | { 40 | erase(discard_errors(map_node_type(parser_function, literal_conversion))) 41 | } 42 | 43 | impl<'src> Literal<'src> { 44 | /// Parse a literal value in source code. 45 | pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserOption { 46 | // Make a parser that finds the first successfull literal parse. 47 | let parser = first_sucessful(vec![ 48 | convert_to_literal_parser(IntegerLiteral::parse, Literal::Integer), 49 | convert_to_literal_parser(BooleanLiteral::parse, Literal::Boolean), 50 | ]); 51 | 52 | // Call that parser. 53 | (parser)(parser_state) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/expression/literal/boolean.rs: -------------------------------------------------------------------------------- 1 | //! Boolean literal representation and parsing in Wright source. 2 | 3 | use crate::parser::{ 4 | ast::metadata::AstNodeMeta, 5 | error::{ParserError, ParserErrorVariant}, 6 | lexer::{ 7 | tokens::{Token, TokenTy}, 8 | IndexedToken, 9 | }, 10 | state::ParserState, 11 | util::NodeParserResult, 12 | }; 13 | 14 | /// A boolean literal (true or false) in Wright source code. 15 | #[derive(Debug)] 16 | pub struct BooleanLiteral<'src> { 17 | /// The AST Node Metadata. 18 | pub meta: AstNodeMeta<'src>, 19 | /// The value of this literal 20 | pub value: bool, 21 | } 22 | 23 | impl<'src> BooleanLiteral<'src> { 24 | /// Attempt to parse a boolean literal from the lexer held by the parser state. 25 | /// Do not mutate parser state if a viable [`TokenTy`] is not available from the lexer. 26 | pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserResult { 27 | // Try to parse a `true` token and a `false` token. 28 | for (token_ty, value) in [(TokenTy::True, true), (TokenTy::False, false)] { 29 | // Try to take the appropriate token from the parser state. 30 | if let Some(IndexedToken { 31 | index, 32 | token: Token { length, .. }, 33 | }) = parser_state.next_token_if_ty_eq(token_ty) 34 | { 35 | // On success, return the popped token's appropriate AST node. 36 | return Ok(BooleanLiteral { 37 | meta: parser_state.make_ast_node_meta(index, length), 38 | value, 39 | }); 40 | } 41 | } 42 | 43 | // If neither parse succeeds, return an error. 44 | Err(ParserError { 45 | byte_range: parser_state.peek_byte_range(), 46 | ty: ParserErrorVariant::Expected("boolean literal"), 47 | }) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/expression/literal/character.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vcfxb/wright-lang/35d28bb47428ee3bac7d4552b2d0bf98e1efd499/wright/src/parser/old/ast/expression/literal/character.rs -------------------------------------------------------------------------------- /wright/src/parser/old/ast/expression/literal/integer.rs: -------------------------------------------------------------------------------- 1 | //! Integer literal representation and parsing in wright source. 2 | 3 | use crate::parser::{ 4 | ast::metadata::AstNodeMeta, 5 | error::{ParserError, ParserErrorVariant}, 6 | lexer::{ 7 | tokens::{Token, TokenTy}, 8 | IndexedToken, 9 | }, 10 | state::ParserState, 11 | util::NodeParserResult, 12 | }; 13 | use num::{BigUint, Num}; 14 | use std::cmp; 15 | 16 | /// An integer in Wright source code. 17 | #[derive(Debug)] 18 | pub struct IntegerLiteral<'src> { 19 | /// Metadata about this literal in source code. 20 | pub meta: AstNodeMeta<'src>, 21 | /// The value represented in source code. 22 | pub value: BigUint, 23 | } 24 | 25 | impl<'src> IntegerLiteral<'src> { 26 | /// Parse a literal integer from source code. 27 | /// Do not mutate parser state if there is not a [`TokenTy::IntegerLit`] next. 28 | pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserResult { 29 | // Read and destructure an integer literal token from the lexer. 30 | let IndexedToken { 31 | index, 32 | token: Token { length, .. }, 33 | } = parser_state 34 | // All integer literals should be of this token type. 35 | .next_token_if_ty_eq(TokenTy::IntegerLit) 36 | // Error out if the next token is not an integer literal. 37 | .ok_or_else(|| ParserError { 38 | byte_range: parser_state.peek_byte_range(), 39 | ty: ParserErrorVariant::Expected("integer literal"), 40 | })?; 41 | 42 | // Get the matching source of this token. 43 | let matching_source = &parser_state.source[index..index + length]; 44 | // Check for a prefix 45 | let prefix = &matching_source[..cmp::max(2, matching_source.len())]; 46 | 47 | // Get a radix off the prefix 48 | let radix = match prefix { 49 | "0x" | "0X" => 16, 50 | "0b" | "0B" => 2, 51 | "0o" => 8, 52 | _ => 10, 53 | }; 54 | 55 | // Strip the prefix from the string to get the body of it to parse. 56 | let body = if radix != 10 { 57 | &matching_source[2..] 58 | } else { 59 | matching_source 60 | }; 61 | 62 | // Parse it. 63 | let value = BigUint::from_str_radix(body, radix) 64 | // Panic here as the lexer should check for this. 65 | .expect("lexer checks integer literal format"); 66 | 67 | // Return ok. 68 | Ok(IntegerLiteral { 69 | meta: parser_state.make_ast_node_meta(index, length), 70 | value, 71 | }) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/expression/literal/string.rs: -------------------------------------------------------------------------------- 1 | //! AST node representation and parsing implementation for string literals. 2 | 3 | use std::rc::Rc; 4 | 5 | use crate::parser::{ast::{metadata::AstNodeMeta, expression::literal::escapes::unescape}, state::ParserState, util::NodeParserResult, lexer::{tokens::{TokenTy, Token}, IndexedToken}, error::{ParserError, ParserErrorVariant}}; 6 | 7 | /// The value of a string literal in source code. 8 | #[derive(Debug, Clone)] 9 | pub enum StringLiteralValue<'src> { 10 | /// A string literal in source code without any escapes can be represented directly 11 | /// using a reference into the source code. This will refer to the string literal without the 12 | /// opening and closing quotatation marks. 13 | WithoutEscapes(&'src str), 14 | 15 | /// A string literal in source code with escapes must be represented using an owned string, as 16 | /// we have to do some processing to resolve all the escapes into the actual unescaped unicaode string. 17 | /// We store this in an [`Rc`] to make cloning less expensive, as we will not need to mutate this string 18 | /// while it's in the AST. 19 | WithEscapes(Rc) 20 | } 21 | 22 | impl<'src> StringLiteralValue<'src> { 23 | pub fn as_str(&self) -> &str { 24 | match self { 25 | StringLiteralValue::WithoutEscapes(s) => s, 26 | StringLiteralValue::WithEscapes(rc) => rc.as_ref(), 27 | } 28 | } 29 | } 30 | 31 | /// A string literal in source code. 32 | #[derive(Debug)] 33 | pub struct StringLit<'src> { 34 | /// The metadata about this node. 35 | pub meta: AstNodeMeta<'src>, 36 | /// A reference counted owned string representing the parsed value. 37 | pub value: StringLiteralValue<'src>, 38 | /// Format strings are denoted using '`' instead of '"'. Treat these similarly to string literals. 39 | pub is_format_string: bool, 40 | } 41 | 42 | impl<'src> StringLit<'src> { 43 | /// Parse a string literal from source code. If there is not a [`TokenTy::StringLit`] 44 | /// available from the parser state's lexer, then this will not mutate the parser state. 45 | pub fn parse(parser_state: &mut ParserState<'src>) -> NodeParserResult { 46 | // Peek the type of the next token or error out if there is not one. 47 | let peeked_token_ty = parser_state 48 | .peek_token_ty() 49 | // Dereferencing map here to prevent complaining about ref after mut borrow. 50 | .map(|token_ty: &TokenTy| *token_ty) 51 | // If there is not a next token, error out. 52 | .ok_or(ParserError { byte_range: parser_state.peek_byte_range(), ty: ParserErrorVariant::Expected("string literal") })?; 53 | 54 | // Mathc on the next token type available from the lexer. 55 | match peeked_token_ty { 56 | // Unterminated string literals produce an error. 57 | TokenTy::StringLit { is_terminated: false, .. } => Err(parser_state.peek_byte_range_into_error(ParserErrorVariant::UnterminatedStringLiteral)), 58 | 59 | // Terminated string literals produce a value. 60 | TokenTy::StringLit { is_format, .. } => { 61 | // Peek the important parts of the token. 62 | let IndexedToken { index, token: Token { length, .. } } = *parser_state.peek_token().unwrap(); 63 | // Get the associated part of source code, making an immutable reference into the parser state. 64 | let full_matching_source: &str = &parser_state.source[index..index+length]; 65 | // Get a reference to the body of the string literal itself (without the quotes or backticks for format 66 | // strings). 67 | let string_lit_body: &str = &full_matching_source[1..(full_matching_source.len()-1)]; 68 | 69 | // Try to unescape the string literal. 70 | match unescape(string_lit_body) { 71 | Ok(str_lit_value) => {}, 72 | Err(str_lit_errors) => {}, 73 | } 74 | 75 | 76 | 77 | unimplemented!() 78 | } 79 | 80 | // All other token types produce an error. 81 | _ => Err(parser_state.peek_byte_range_into_error(ParserErrorVariant::Expected("string literal"))), 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/expression/parentheses.rs: -------------------------------------------------------------------------------- 1 | //! An expression in parentheses in Wright source code. 2 | 3 | use super::Expression; 4 | use crate::parser::{ast::metadata::AstNodeMeta, state::ParserState, util::NodeParserResult}; 5 | 6 | #[derive(Debug)] 7 | pub struct ParenthesesExpression<'src> { 8 | /// The AST node metadata. 9 | pub meta: AstNodeMeta<'src>, 10 | /// The body of this block as an expression. 11 | pub body: Box>, 12 | } 13 | 14 | /// Parse an expression in parentheses from source code. 15 | pub fn parse_parentheses_expr<'src>( 16 | _parser_state: &mut ParserState<'src>, 17 | ) -> NodeParserResult> { 18 | unimplemented!() 19 | } 20 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/expression/primary.rs: -------------------------------------------------------------------------------- 1 | //! Primary expressions in Qright source code. 2 | 3 | use super::{literal::Literal, parentheses::ParenthesesExpression}; 4 | use crate::parser::ast::path::Path; 5 | 6 | /// A primary expression is a special type of low-level expression that can appear in places where other expressions 7 | /// (such as blocks or conditionals) are not allowed. 8 | #[derive(Debug)] 9 | pub enum Primary<'src> { 10 | /// A literal in source code. 11 | Literal(Literal<'src>), 12 | /// A path to an item/symbol/constant value. 13 | /// 14 | /// This includes identifiers as single element paths. 15 | Path(Path<'src>), 16 | /// An expression in parentheses. 17 | Parentheses(ParenthesesExpression<'src>), 18 | } 19 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/identifier.rs: -------------------------------------------------------------------------------- 1 | //! Identifiers in wright source code. 2 | 3 | use super::metadata::AstNodeMeta; 4 | use crate::parser::{ 5 | error::{ParserError, ParserErrorVariant}, 6 | lexer::{ 7 | tokens::{Token, TokenTy}, 8 | IndexedToken, 9 | }, 10 | state::ParserState, 11 | util::NodeParserResult, 12 | }; 13 | 14 | /// An identifier in the source code being parsed. 15 | #[derive(Debug, Clone, Copy)] 16 | pub struct Identifier<'src> { 17 | /// An identifier is just a string in source code so we use a single metadata here 18 | /// and pass on the indetifier from the matching source. 19 | pub inner: AstNodeMeta<'src>, 20 | } 21 | 22 | impl<'src> Identifier<'src> { 23 | /// Get the matching source for this identifier. 24 | pub fn matching_source(&self) -> &'src str { 25 | self.inner.matching_source 26 | } 27 | } 28 | 29 | /// Parse an identifier in source code. 30 | pub fn parse_identifier<'src>( 31 | parser_state: &mut ParserState<'src>, 32 | ) -> NodeParserResult> { 33 | // Conditionally get an identifier token from the lexer. 34 | let IndexedToken { 35 | index, 36 | token: Token { length, .. }, 37 | } = parser_state 38 | // Require the token to be an identifier token. 39 | .next_token_if_ty_eq(TokenTy::Identifier) 40 | // Error out if there is not an identifier token. 41 | .ok_or_else(|| ParserError { 42 | byte_range: parser_state.peek_byte_range(), 43 | ty: ParserErrorVariant::Expected("identifer"), 44 | })?; 45 | 46 | // Turn the token into an AST node and return OK. 47 | Ok(Identifier { 48 | inner: parser_state.make_ast_node_meta(index, length), 49 | }) 50 | } 51 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/metadata.rs: -------------------------------------------------------------------------------- 1 | //! Metadata used to track the source code that produces nodes in the AST. 2 | 3 | use crate::filemap::{FileId, FileMap}; 4 | use codespan_reporting::files::{Files, Location}; 5 | 6 | /// The metadata used for determining where in the source code the given node is and what source was parsed to produce 7 | /// it. 8 | #[derive(Debug, Clone, Copy)] 9 | pub struct AstNodeMeta<'src> { 10 | /// Reference to the file map that this node was parsed on. 11 | pub file_map: &'src FileMap, 12 | /// The file id for the file this node is in. 13 | pub file_id: FileId, 14 | /// The byte index of the matching string in the source file. 15 | /// 16 | /// This may be equal to the index of the next token if the matching source for an AST node is empty, which is 17 | /// the case in some rare circumstances (i.e. default visibility ommitted). 18 | pub index: usize, 19 | /// The matching source code for this node. This carries the byte length in source in it's metadata. 20 | pub matching_source: &'src str, 21 | } 22 | 23 | impl<'src> AstNodeMeta<'src> { 24 | /// Get the starting location of this AST node. 25 | pub fn start(&self) -> Location { 26 | self.file_map 27 | .location(self.file_id, self.index) 28 | .expect("AST Node metadata is valid") 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/path.rs: -------------------------------------------------------------------------------- 1 | //! A fully qualified sympbol path in wright source code. 2 | //! 3 | //! Path items are separated using `::` similar to rust. 4 | 5 | use super::{ 6 | identifier::{parse_identifier, Identifier}, 7 | metadata::AstNodeMeta, 8 | }; 9 | use crate::parser::{ 10 | error::ParserErrorVariant, 11 | lexer::tokens::TokenTy, 12 | state::ParserState, 13 | util::{ignore::ignore_whitespace_and_comments, NodeParserOption, NodeParserResult}, 14 | }; 15 | 16 | /// A double-colon seperated path to a module, type, or function in Wright source code. 17 | /// 18 | /// Note that this can be only a single identifier in length, signaling a path/identifier that's in current scope. 19 | #[derive(Debug)] 20 | pub struct Path<'src> { 21 | /// Node metadata. 22 | pub meta: AstNodeMeta<'src>, 23 | /// The first part of the path, read left to right. 24 | pub head: Identifier<'src>, 25 | /// The rest of the path. 26 | pub tail: Option>>, 27 | } 28 | 29 | /// Parse a path (`head::tail`) in source code. 30 | pub fn parse_path<'src>(parser_state: &mut ParserState<'src>) -> NodeParserResult> { 31 | // Get the initial index to make metadata at the end. 32 | let initial_index = parser_state.index(); 33 | 34 | // Parse the head of the path and destructure the parser success. 35 | let head = parse_identifier(parser_state) 36 | // Replace the error with a missing path error. 37 | .map_err(|mut parser_error| { 38 | // Replace the parser error text. 39 | parser_error.ty = ParserErrorVariant::Expected( 40 | "fully qualified symbol reference (path) or identifier", 41 | ); 42 | parser_error 43 | })?; 44 | 45 | // Parse the tail of the path. Map through Box::new to create neccesary heap allocation. 46 | let tail = parse_path_tail(parser_state).map(Box::new); 47 | // Make the metadata for the produced AST node. 48 | let meta = parser_state.make_ast_node_meta(initial_index, parser_state.index() - initial_index); 49 | // Return Ok. 50 | Ok(Path { meta, head, tail }) 51 | } 52 | 53 | /// Parse the tail of a path, ignoring any whitespace encountered and producing an [`Option`] with a [`Path`]. 54 | /// 55 | /// This will update the parser state's cursor incrementally, avoiding leaving it partially between two tokens in 56 | /// the tail or past the whitespace at the end of the tail. 57 | fn parse_path_tail<'src>(parser_state: &mut ParserState<'src>) -> NodeParserOption> { 58 | // Get the initial index of the parser. 59 | let initial_index = parser_state.index(); 60 | // Make a clone of the parser state to parse path parts incrementally on. 61 | let mut scoped_state = parser_state.clone(); 62 | // Allow ignored whitespace/comment between parts of the path. 63 | // This will turn into None and return early if we peek a multi-line unterminated comment. 64 | ignore_whitespace_and_comments(&mut scoped_state).ok()?; 65 | // Parse the double colon. 66 | // Returns early if this returns none and the next token is not a double colon. 67 | scoped_state.next_token_if_ty_eq(TokenTy::ColonColon)?; 68 | // Allow ignored whitespace/comments after the double colon. 69 | // This will turn into None and error out if we peek a multi-line unterminated comment. 70 | ignore_whitespace_and_comments(&mut scoped_state).ok()?; 71 | // Parse the head of the tail. If this errors, return none and do not update parser state. 72 | let head = parse_identifier(&mut scoped_state).ok()?; 73 | // Update the parser state after parsing the head so that the parent function does not re-parse it. 74 | *parser_state = scoped_state.clone(); 75 | // Parse the rest of the tail. If this returns None we have reached the end of the path. 76 | // Map througgh Box::new to create a heap allocation and prevent infinite stack nesting. 77 | let tail = parse_path_tail(&mut scoped_state).map(Box::new); 78 | // Update the parser state if there was a parsed tail. If there was not, do not update the parser state as 79 | // it may have greedily consumed various whitespace, comments, and double colons. 80 | if tail.is_some() { 81 | *parser_state = scoped_state; 82 | } 83 | 84 | // Make AST node metadata using the initial index and the current index. 85 | let meta = parser_state.make_ast_node_meta(initial_index, parser_state.index() - initial_index); 86 | // Return the parsed tail combined into a path. 87 | Some(Path { meta, head, tail }) 88 | } 89 | 90 | #[cfg(test)] 91 | mod test_path { 92 | use crate::{ 93 | filemap::{FileMap, FileName}, 94 | parser::state::ParserState, 95 | }; 96 | 97 | use super::parse_path; 98 | 99 | /// Test the simple case path. 100 | #[test] 101 | fn test_simple_path() { 102 | let source = "test::path"; 103 | 104 | let mut file_map = FileMap::new(); 105 | let file_id = file_map.add(FileName::Test("test input"), source.to_owned()); 106 | let mut parser_state = ParserState::new(&file_map, file_id); 107 | let path = parse_path(&mut parser_state).expect("parses successfully"); 108 | 109 | assert_eq!(path.head.matching_source(), "test"); 110 | assert!(path.tail.is_some()); 111 | assert_eq!(path.tail.unwrap().head.matching_source(), "path"); 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/statement.rs: -------------------------------------------------------------------------------- 1 | //! Statements in Wright source code. 2 | 3 | use self::bind::Bind; 4 | 5 | pub mod bind; 6 | 7 | pub enum Statement<'src> { 8 | /// A variable or constant binding. 9 | Bind(Bind<'src>), 10 | } 11 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/statement/bind.rs: -------------------------------------------------------------------------------- 1 | //! Statements that bind a value to a symbol, possibly with a type, in the context of a scope. 2 | 3 | use crate::parser::ast::metadata::AstNodeMeta; 4 | 5 | /// Bind statement 6 | pub struct Bind<'src> { 7 | pub meta: AstNodeMeta<'src>, 8 | } 9 | -------------------------------------------------------------------------------- /wright/src/parser/old/ast/types.rs: -------------------------------------------------------------------------------- 1 | //! AST node representations to do with type instantiations in wright source code. 2 | 3 | use super::{expression::Expression, metadata::AstNodeMeta, path::Path}; 4 | 5 | /// A use of a type in source code. 6 | #[derive(Debug)] 7 | pub struct TypeInstantiation<'src> { 8 | /// The metadata for this node. 9 | pub meta: AstNodeMeta<'src>, 10 | /// The type's name, possibly at the end of a path to resolve it. 11 | /// This path will usually probably be one identifier long. 12 | pub typename: Path<'src>, 13 | /// Any types used as generic arguments to make this a concrete type. 14 | pub generic_type_arguments: Vec>, 15 | /// Any generic constants used to construct this type, in order. 16 | pub generic_const_arguments: Vec>, 17 | /// Optional constraint that modifies this type. 18 | pub constrain_clause: Option>, 19 | } 20 | -------------------------------------------------------------------------------- /wright/src/parser/old/error.rs: -------------------------------------------------------------------------------- 1 | //! Parser error handling. 2 | 3 | use std::ops::Range; 4 | 5 | /// An error that can occur during parsing. 6 | #[derive(Debug)] 7 | pub struct ParserError { 8 | /// The byte index range of the offending line in the file being parsed. 9 | pub byte_range: Range, 10 | /// The type of error. 11 | pub ty: ParserErrorVariant, 12 | } 13 | 14 | /// Different types of errors that can be generated duruing parsing. 15 | #[derive(Debug, Clone, Copy)] 16 | pub enum ParserErrorVariant { 17 | /// Something was expected and wasn't there. 18 | Expected(&'static str), 19 | 20 | /// Encountered unterminated multi-line comment. 21 | UnterminatedMultilineComment, 22 | 23 | /// Encountered unterminated string literral. 24 | UnterminatedStringLiteral, 25 | } 26 | -------------------------------------------------------------------------------- /wright/src/parser/old/state.rs: -------------------------------------------------------------------------------- 1 | //! Parser state structure and implementation. 2 | 3 | use super::{ 4 | ast::metadata::AstNodeMeta, 5 | lexer::{ 6 | tokens::{Token, TokenTy}, 7 | IndexedLexer, IndexedToken, 8 | }, error::{ParserError, ParserErrorVariant}, 9 | }; 10 | use crate::filemap::{FileId, FileMap}; 11 | use codespan_reporting::files::Files; 12 | 13 | #[cfg(doc)] 14 | use std::iter::Peekable; 15 | use std::ops::Range; 16 | 17 | /// The state of the [`Parser`] used to transform wright source code into the 18 | /// appropriate series of [AST] (Abstract Syntax Tree) nodes. 19 | /// 20 | /// [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree 21 | #[derive(Debug, Clone)] 22 | pub struct ParserState<'src> { 23 | /// The file map that this parser's parent file is in. 24 | file_map: &'src FileMap, 25 | /// The file handle for the file being parsed. 26 | file_id: FileId, 27 | /// Reference to the source code we are parsing. 28 | pub source: &'src str, 29 | /// Underlying indexed lexer feeding tokens to this parser. 30 | lexer: IndexedLexer<'src>, 31 | /// Store up to one "peeked" token, similar to the [`Peekable`] iterator. 32 | /// See https://doc.rust-lang.org/std/iter/struct.Peekable.html#method.peek for design inspiration. 33 | peeked_token: Option>, 34 | } 35 | 36 | impl<'src> ParserState<'src> { 37 | /// Construct a new parser for a given source file. 38 | /// 39 | /// # Panics: 40 | /// If the file ID is not in the file map. 41 | pub fn new(file_map: &'src FileMap, file_id: FileId) -> Self { 42 | // Get the source using the file map. 43 | let source = file_map 44 | .source(file_id) 45 | .expect("file id exists in file map"); 46 | 47 | ParserState { 48 | file_map, 49 | file_id, 50 | source, 51 | lexer: IndexedLexer::new(source), 52 | peeked_token: None, 53 | } 54 | } 55 | 56 | /// Make a new [`AstNodeMeta`] object using this [`ParserState`]'s [`FileMap`] and [`FileId`]. 57 | /// The byte index and byte length in source code are supplied as arguments, usually from the 58 | /// [`IndexedToken`] pulled from this [ParserState]'s internal [`IndexedLexer`]. 59 | pub fn make_ast_node_meta(&self, index: usize, length: usize) -> AstNodeMeta<'src> { 60 | AstNodeMeta { 61 | file_map: self.file_map, 62 | file_id: self.file_id, 63 | index, 64 | matching_source: &self.source[index..index + length], 65 | } 66 | } 67 | 68 | /// Peek a token from the internal lexer. 69 | pub fn peek_token(&mut self) -> Option<&IndexedToken> { 70 | // Get a mutable reference to the internal iterator. 71 | let iter = &mut self.lexer; 72 | // Get the previously peeked token or a new one from the iterator. 73 | self.peeked_token 74 | .get_or_insert_with(|| iter.next()) 75 | .as_ref() 76 | } 77 | 78 | /// Peek the type of the next token. 79 | pub fn peek_token_ty(&mut self) -> Option<&TokenTy> { 80 | self.peek_token() 81 | .map(|indexed_token| &indexed_token.token.variant) 82 | } 83 | 84 | /// Get the starting byte index of the next [`IndexedToken`] in source code. 85 | pub fn index(&self) -> usize { 86 | // Check to see if a token has been peeked already. 87 | match self.peeked_token.as_ref() { 88 | // If one has, return its index. 89 | Some(Some(IndexedToken { index, .. })) => *index, 90 | 91 | // Otherwise return the current lexer index. 92 | _ => self.lexer.index, 93 | } 94 | } 95 | 96 | /// Get the next token from the internal lexer. 97 | pub fn next_token(&mut self) -> Option { 98 | match self.peeked_token.take() { 99 | Some(peeked_token) => peeked_token, 100 | None => self.lexer.next(), 101 | } 102 | } 103 | 104 | /// Get the next token if it satisfies a given predicate. 105 | pub fn next_token_if_ty_eq(&mut self, token_ty: TokenTy) -> Option { 106 | // Get the next token or consume a previously peeked token. 107 | match self.next_token() { 108 | // Token with matching variant field. 109 | Some( 110 | token @ IndexedToken { 111 | token: Token { variant, .. }, 112 | .. 113 | }, 114 | ) if variant == token_ty => Some(token), 115 | 116 | // Otherwise save the peeked token (the field should have been consumed by calling next_token) 117 | other => { 118 | // Sanity check. 119 | assert!(self.peeked_token.is_none()); 120 | self.peeked_token = Some(other); 121 | None 122 | } 123 | } 124 | } 125 | 126 | /// Get the byte range of the next token. If there is no next token in the lexer, return a zero-length 127 | /// range of the current index. 128 | pub fn peek_byte_range(&mut self) -> Range { 129 | if let Some(IndexedToken { 130 | index, 131 | token: Token { length, .. }, 132 | }) = self.peek_token() 133 | { 134 | *index..*index + *length 135 | } else { 136 | self.index()..self.index() 137 | } 138 | } 139 | 140 | /// Create a parser error by peeking the next byte range and combining it with the given variant. 141 | #[inline] 142 | pub(crate) fn peek_byte_range_into_error(&mut self, err_ty: ParserErrorVariant) -> ParserError { 143 | ParserError { byte_range: self.peek_byte_range(), ty: err_ty } 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /wright/src/parser/old/util.rs: -------------------------------------------------------------------------------- 1 | //! Parsing utility functions used throughout the parser to make the process of parsing easier. 2 | 3 | use super::{error::ParserError, state::ParserState}; 4 | 5 | pub mod discard_error; 6 | pub mod erase; 7 | pub mod first_successful; 8 | pub mod ignore; 9 | pub mod map; 10 | 11 | /// A [`Result`] returned from an AST node parser. 12 | pub type NodeParserResult = Result; 13 | 14 | /// An [`Option`] returned from an AST node parser. 15 | pub type NodeParserOption = Option; 16 | 17 | /// Type alias used to apease the borrow/lifetime checker complaining about HKTs and stuff. 18 | pub type BoxedParserFn<'src, Output> = Box) -> Output) + 'src>; 19 | -------------------------------------------------------------------------------- /wright/src/parser/old/util/discard_error.rs: -------------------------------------------------------------------------------- 1 | //! Parser function combinator useful for discarding errors. This can transform a parser from returning a [`Result`] 2 | //! to returning an [`Option`]. 3 | 4 | use super::{BoxedParserFn, NodeParserOption, NodeParserResult}; 5 | use crate::parser::state::ParserState; 6 | 7 | /// Return a [Box]xed parser function that returns an [`Option`] rather than a [`Result`]. 8 | pub fn discard_errors<'src, PF, O>(parser_function: PF) -> BoxedParserFn<'src, NodeParserOption> 9 | where 10 | PF: (Fn(&mut ParserState<'src>) -> NodeParserResult) + 'src, 11 | { 12 | Box::new(move |parser_state: &mut ParserState<'_>| ((parser_function)(parser_state)).ok()) 13 | } 14 | -------------------------------------------------------------------------------- /wright/src/parser/old/util/erase.rs: -------------------------------------------------------------------------------- 1 | //! Erase is a useful parser combinator that erases the concrete type of a parser function and turns it into a [BoxedParserFn]. 2 | 3 | use super::BoxedParserFn; 4 | use crate::parser::state::ParserState; 5 | 6 | /// Erase the concrete type of a parser function and put it in a [`BoxedParserFn`]. 7 | pub fn erase<'src, PF, O>(parser_function: PF) -> BoxedParserFn<'src, O> 8 | where 9 | PF: (Fn(&mut ParserState<'src>) -> O) + 'src, 10 | { 11 | Box::new(parser_function) 12 | } 13 | -------------------------------------------------------------------------------- /wright/src/parser/old/util/first_successful.rs: -------------------------------------------------------------------------------- 1 | //! The [`FirstSuccessful`] parse combinator takes a series of parsers and applies them in order until one is 2 | //! successful. 3 | 4 | use super::{BoxedParserFn, NodeParserOption}; 5 | use crate::parser::state::ParserState; 6 | 7 | /// Parser combinator that takes a list of [`BoxedParserFn`]s and runs them in order on the input 8 | /// until one of them succeeds, and returns that one. If none succeed, none is returned. 9 | pub fn first_sucessful<'src, N: 'src>( 10 | parser_functions: Vec>>, 11 | ) -> BoxedParserFn<'src, NodeParserOption> { 12 | Box::new(move |parser_state: &mut ParserState<'_>| { 13 | parser_functions 14 | .iter() 15 | .find_map(|parser_function| { 16 | // Call the function on the clean clone. 17 | (parser_function)(parser_state) 18 | }) 19 | }) 20 | } 21 | -------------------------------------------------------------------------------- /wright/src/parser/old/util/ignore.rs: -------------------------------------------------------------------------------- 1 | //! Utility features to ignore whitespace and comments in source code. 2 | 3 | use super::NodeParserResult; 4 | use crate::parser::{ 5 | error::{ParserError, ParserErrorVariant}, 6 | lexer::tokens::{CommentTy, TokenTy}, 7 | state::ParserState, 8 | }; 9 | 10 | /// Parse through any/all whitespace and comments from the lexer. Return an error if any unterminated 11 | /// comment is encountered. Does not parse through doc-comments. 12 | #[rustfmt::skip] // Do not rustfmt this. 13 | pub fn ignore_whitespace_and_comments(parser_state: &mut ParserState) -> NodeParserResult<()> { 14 | // Use an infinite loop and only break out when we cannot parse either another whitespace or another comment. 15 | while let Some(peeked_token_ty) = parser_state.peek_token_ty() { 16 | match peeked_token_ty { 17 | // Any of the following can be safely ignored. 18 | | TokenTy::Whitespace 19 | | TokenTy::SingleLineComment { comment_type: CommentTy::Normal } 20 | | TokenTy::MultilineComment { comment_type: CommentTy::Normal, is_terminated: true } 21 | => { 22 | // Discard the next token. 23 | let _ = parser_state.next_token(); 24 | } 25 | 26 | // Any unterminated multiline comment will cause an error. 27 | TokenTy::MultilineComment { is_terminated: false, .. } => { 28 | return Err(ParserError { 29 | byte_range: parser_state.peek_byte_range(), 30 | ty: ParserErrorVariant::UnterminatedMultilineComment, 31 | }) 32 | } 33 | 34 | // Any token peeked that is not handled above is important -- return Ok 35 | // and let caller handle. 36 | _ => { 37 | return Ok(()); 38 | } 39 | } 40 | } 41 | 42 | // If the lexer hits its end, return Ok. 43 | Ok(()) 44 | } 45 | -------------------------------------------------------------------------------- /wright/src/parser/old/util/map.rs: -------------------------------------------------------------------------------- 1 | //! Parser mapping utilities. 2 | 3 | use super::{BoxedParserFn, NodeParserResult}; 4 | use crate::parser::state::ParserState; 5 | 6 | /// Create a [Box]xed function (dyn [`Fn`]) that maps the output of a parser function through another function. 7 | pub fn map<'src, PF, MF, O1, O2>(parser_function: PF, map_function: MF) -> BoxedParserFn<'src, O2> 8 | where 9 | PF: (Fn(&mut ParserState<'src>) -> O1) + 'static, 10 | MF: (Fn(O1) -> O2) + 'static, 11 | { 12 | Box::new(move |parser_state: &mut ParserState| (map_function)((parser_function)(parser_state))) 13 | } 14 | 15 | /// Map specifically the node produced by a parser function. 16 | pub fn map_node_type<'src, PF, MF, N1, N2>( 17 | parser_function: PF, 18 | map_function: MF, 19 | ) -> BoxedParserFn<'src, NodeParserResult> 20 | where 21 | PF: (Fn(&mut ParserState<'src>) -> NodeParserResult) + 'src, 22 | MF: (Fn(N1) -> N2) + 'src, 23 | { 24 | Box::new(move |parser_state: &mut ParserState<'_>| { 25 | // Run the parser and get the result. 26 | let parser_result = (parser_function)(parser_state); 27 | 28 | // Map the node type 29 | // Allow the redundant closure as it seems to bypass the need for the generic to impl Copy. 30 | #[allow(clippy::redundant_closure)] 31 | parser_result.map(|node| (map_function)(node)) 32 | }) 33 | } 34 | -------------------------------------------------------------------------------- /wright/src/parser/path.rs: -------------------------------------------------------------------------------- 1 | //! Parsing implementation for [Path]. 2 | 3 | use std::sync::Arc; 4 | 5 | use super::Parser; 6 | use super::error::ParserError; 7 | use super::error::ParserErrorKind; 8 | use crate::ast::identifier::Identifier; 9 | use crate::ast::path::Path; 10 | use crate::lexer::token::TokenTy; 11 | use crate::source_tracking::fragment::Fragment; 12 | 13 | impl Path { 14 | /// Parse a [Path] from the given [Parser]. This is greedy (as much path as possible will be parsed). 15 | /// [Path]s of size 1 (just a single identifier) are accepted. 16 | pub fn parse(parser: &mut Parser) -> Result { 17 | let head: Identifier = parse_head(parser)?; 18 | let mut tail = Vec::new(); 19 | 20 | // Parse the tail. 21 | while let Some(ident) = parse_segment(parser) { 22 | tail.push(ident); 23 | } 24 | 25 | // Calculate the fragment containing the whole path. 26 | let last = tail.last().unwrap_or(&head); 27 | let matched_source_range = head.fragment.range.start..last.fragment.range.end; 28 | 29 | Ok(Path { 30 | // Head and tail should all have the same source ref since they came from the same parser. 31 | full_path: Fragment { 32 | source: Arc::clone(&head.fragment.source), 33 | range: matched_source_range, 34 | }, 35 | head, 36 | tail, 37 | }) 38 | } 39 | } 40 | 41 | /// Parse the first (and possibly only) [Identifier] in the [Path]. 42 | fn parse_head(parser: &mut Parser) -> Result { 43 | Identifier::parse(parser).map_err(|mut err| { 44 | err.kind = ParserErrorKind::ExpectedPath; 45 | err 46 | }) 47 | } 48 | 49 | /// Attempt to parse a segment of this path indivisbly (never just parse a seperator without another [Identifier] 50 | /// at the end of it). 51 | fn parse_segment(parser: &mut Parser) -> Option { 52 | // The list of valid segment sequences we will accept is always the same. 53 | const VALID_SEGMENT_SEQUENCES: [&[TokenTy]; 4] = [ 54 | &[ 55 | TokenTy::Whitespace, 56 | TokenTy::ColonColon, 57 | TokenTy::Whitespace, 58 | TokenTy::Identifier, 59 | ], 60 | &[ 61 | TokenTy::Whitespace, 62 | TokenTy::ColonColon, 63 | TokenTy::Identifier, 64 | ], 65 | &[ 66 | TokenTy::ColonColon, 67 | TokenTy::Whitespace, 68 | TokenTy::Identifier, 69 | ], 70 | &[TokenTy::ColonColon, TokenTy::Identifier], 71 | ]; 72 | 73 | for sep_token_sequence in VALID_SEGMENT_SEQUENCES { 74 | if parser.matches(sep_token_sequence) { 75 | parser.advance(sep_token_sequence.len() - 1); 76 | // SAFETY: We just checked/matched that this parser ends with an identifier. 77 | return Some(unsafe { Identifier::parse(parser).unwrap_unchecked() }); 78 | } 79 | } 80 | 81 | // If none of the valid segment sequences match, return None. 82 | None 83 | } 84 | 85 | #[cfg(test)] 86 | mod test_path { 87 | use crate::{ 88 | ast::path::Path, 89 | lexer::Lexer, 90 | parser::Parser, 91 | source_tracking::{SourceMap, filename::FileName, source::Source}, 92 | }; 93 | 94 | /// Test the simple case path. 95 | #[test] 96 | fn test_ok_paths() { 97 | let map = SourceMap::new(); 98 | let sources = &["test::path", "test :: path", "test ::path", "test:: path"]; 99 | 100 | for source in sources { 101 | dbg!(source); 102 | let source_ref = map.add(Source::new_from_static_str(FileName::None, *source)); 103 | let lexer = Lexer::new(source_ref); 104 | let mut parser = Parser::new(lexer); 105 | let path = Path::parse(&mut parser).unwrap(); 106 | assert_eq!(path.head.fragment.as_str(), "test"); 107 | assert_eq!(path.tail[0].fragment.as_str(), "path"); 108 | assert_eq!(path.full_path.len(), source.len()); 109 | assert_eq!(parser.lexer.bytes_remaining(), 0); 110 | } 111 | } 112 | 113 | #[test] 114 | fn test_not_paths() { 115 | let sources = &["", "0", "_"]; 116 | 117 | for source in sources { 118 | let mut parser = Parser::new(Lexer::new_test(source)); 119 | assert_eq!(Path::parse(&mut parser).unwrap_err().location.as_str(), *source); 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /wright/src/parser/ty.rs: -------------------------------------------------------------------------------- 1 | //! Parser implementation for parsing types. 2 | 3 | use crate::ast::ty::{AtomicTy, ReferenceTy, Type}; 4 | 5 | use super::{ 6 | Parser, 7 | error::{ParserError, ParserErrorKind}, 8 | }; 9 | 10 | mod primitive; 11 | mod reference; 12 | 13 | impl Type { 14 | /// Parse a type signature in source code. 15 | pub fn parse(parser: &mut Parser) -> Result { 16 | // Atempt to parse atomic types first -- they're the simplest. If we fail to parse, the parser doesn't advance. 17 | // Since they're all keywords we don't have to worry at all about under-greedy parsing (yet). 18 | if let Ok(atomic) = AtomicTy::parse(parser) { 19 | return Ok(Type::Atomic(atomic)); 20 | } 21 | 22 | let bytes_remaining = parser.bytes_remaining(); 23 | 24 | match ReferenceTy::parse(parser) { 25 | Ok(reference_ty) => return Ok(Type::Reference(reference_ty)), 26 | 27 | Err(err) => { 28 | // If the parser was advanced in parsing the reference type, error out here. 29 | if bytes_remaining != parser.bytes_remaining() { 30 | return Err( 31 | err.with_help("encountered error while parsing reference type signature") 32 | ); 33 | } 34 | 35 | // If we didn't advance we can just ignore the error and try parsing other type signature 36 | // forms or fall through to the catch all "expected type signature" error (since it means 37 | // we would have not seen an `@` to start a reference type signature). 38 | } 39 | } 40 | 41 | Err(ParserErrorKind::ExpectedTypeSignature.at(parser.peek_fragment_or_rest_cloned())) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /wright/src/parser/ty/primitive.rs: -------------------------------------------------------------------------------- 1 | //! Parsing for primitive type signatures. 2 | 3 | use crate::{ 4 | ast::ty::{AtomicTy, AtomicTyVariant}, 5 | lexer::token::TokenTy, 6 | parser::{ 7 | Parser, 8 | error::{ParserError, ParserErrorKind}, 9 | }, 10 | }; 11 | 12 | impl AtomicTy { 13 | /// Parse an atomic primitive type from souce or error with [ParserErrorKind::ExpectedAtomicTypeSignature] 14 | /// and no progress made on the given [Parser]. 15 | #[rustfmt::skip] 16 | pub fn parse(parser: &mut Parser) -> Result { 17 | 18 | // Local function reused by the match block below to shorten unwrapping a fragment and initializing the new 19 | // value once a match has been found. 20 | fn accept(variant: AtomicTyVariant, parser: &mut Parser) -> Result { 21 | Ok(AtomicTy { variant, matching_source: parser.next_token().unwrap().unwrap().fragment }) 22 | } 23 | 24 | match parser.peek_variant() { 25 | Some(TokenTy::KwBool) => accept(AtomicTyVariant::Bool, parser), 26 | Some(TokenTy::KwChar) => accept(AtomicTyVariant::Char, parser), 27 | Some(TokenTy::KwU8 ) => accept(AtomicTyVariant::U8, parser), 28 | Some(TokenTy::KwI8 ) => accept(AtomicTyVariant::I8, parser), 29 | Some(TokenTy::KwU16 ) => accept(AtomicTyVariant::U16, parser), 30 | Some(TokenTy::KwI16 ) => accept(AtomicTyVariant::I16, parser), 31 | Some(TokenTy::KwU32 ) => accept(AtomicTyVariant::U32, parser), 32 | Some(TokenTy::KwI32 ) => accept(AtomicTyVariant::I32, parser), 33 | Some(TokenTy::KwF32 ) => accept(AtomicTyVariant::F32, parser), 34 | Some(TokenTy::KwU64 ) => accept(AtomicTyVariant::U64, parser), 35 | Some(TokenTy::KwI64 ) => accept(AtomicTyVariant::I64, parser), 36 | Some(TokenTy::KwF64 ) => accept(AtomicTyVariant::F64, parser), 37 | _ => Err(ParserErrorKind::ExpectedAtomicTypeSignature.at(parser.peek_fragment_or_rest_cloned())), 38 | } 39 | } 40 | } 41 | 42 | #[cfg(test)] 43 | mod tests { 44 | use crate::{ 45 | ast::ty::{AtomicTy, AtomicTyVariant}, 46 | lexer::Lexer, 47 | parser::Parser, 48 | }; 49 | 50 | #[test] 51 | fn test_bool() { 52 | let mut parser = Parser::new(Lexer::new_test("bool")); 53 | let atomic_ty = AtomicTy::parse(&mut parser).unwrap(); 54 | assert_eq!(atomic_ty.matching_source.as_str(), "bool"); 55 | assert_eq!(atomic_ty.variant, AtomicTyVariant::Bool); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /wright/src/parser/ty/reference.rs: -------------------------------------------------------------------------------- 1 | //! Referenced types. Types that are defined by users or in the standard library. 2 | 3 | use crate::{ 4 | ast::ty::{ReferenceTy, Type}, 5 | lexer::token::TokenTy, 6 | parser::{ 7 | Parser, 8 | error::{ParserError, ParserErrorKind}, 9 | whitespace, 10 | }, 11 | source_tracking::fragment::Fragment, 12 | }; 13 | 14 | impl ReferenceTy { 15 | /// Attempt to parse a reference type signature, i.e. `@u64`. 16 | /// 17 | /// This will leave the parser unmodified and return an error if it doesn't match the `@` symbol, however 18 | /// if it does match the `@` symbol it will advance the parser and then may still return an error if the 19 | /// `@` symbol is not followed by a type signature. 20 | pub fn parse(parser: &mut Parser) -> Result { 21 | let Some(at_symbol) = parser.next_if_is(TokenTy::At) else { 22 | return Err(ParserErrorKind::ExpectedReferenceTypeSignature 23 | .at(parser.peek_fragment_or_rest_cloned())); 24 | }; 25 | 26 | whitespace::optional_whitespace(parser); 27 | 28 | let referenced_type = Type::parse(parser)?; 29 | 30 | Ok(ReferenceTy { 31 | matching_source: Fragment::cover( 32 | &at_symbol.fragment, 33 | referenced_type.matching_source(), 34 | ), 35 | target_ty: Box::new(referenced_type), 36 | }) 37 | } 38 | } 39 | 40 | #[cfg(test)] 41 | mod tests { 42 | use crate::{ 43 | ast::ty::{AtomicTyVariant, ReferenceTy}, 44 | lexer::Lexer, 45 | parser::Parser, 46 | }; 47 | 48 | #[test] 49 | fn test_reference_to_atomic() { 50 | let mut parser = Parser::new(Lexer::new_test("@u64")); 51 | let result = ReferenceTy::parse(&mut parser).unwrap(); 52 | 53 | assert_eq!(result.matching_source.as_str(), "@u64"); 54 | assert_eq!(result.target_ty.downcast_primitive().unwrap().variant, AtomicTyVariant::U64); 55 | } 56 | 57 | #[test] 58 | fn test_reference_to_a_reference_to_atomic() { 59 | let mut parser = Parser::new(Lexer::new_test("@@u64")); 60 | let result = ReferenceTy::parse(&mut parser).unwrap(); 61 | 62 | assert_eq!(result.matching_source.as_str(), "@@u64"); 63 | assert!(result.target_ty.downcast_reference().is_some()); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /wright/src/parser/whitespace.rs: -------------------------------------------------------------------------------- 1 | //! Utilities for parsing through whitespace. 2 | 3 | use super::{ 4 | Parser, 5 | error::{ParserError, ParserErrorKind}, 6 | }; 7 | use crate::lexer::token::TokenTy; 8 | 9 | /// Consume and ignore a [TokenTy::Whitespace] from the front of the [Parser]. 10 | /// If there is not one, do nothing. 11 | pub fn optional_whitespace(parser: &mut Parser) { 12 | while parser.peek_variant() == Some(TokenTy::Whitespace) { 13 | parser.advance(1); 14 | } 15 | } 16 | 17 | /// Require a whitespace from the [Parser]. Do not advance if the next [Token] is not a whitespace. 18 | /// 19 | /// [Token]: crate::lexer::token::Token 20 | pub fn require_whitespace(parser: &mut Parser) -> Result<(), ParserError> { 21 | match parser.next_if_is(TokenTy::Whitespace) { 22 | Some(_) => { 23 | // Remove any other non-contiguous whitespaces that may have followed. 24 | optional_whitespace(parser); 25 | Ok(()) 26 | } 27 | 28 | None => Err(ParserErrorKind::ExpectedWhitespace.at(parser.peek_fragment_or_rest_cloned())), 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /wright/src/repl.rs: -------------------------------------------------------------------------------- 1 | //! The Wright interactive REPL. 2 | 3 | use crate::WRIGHT_VERSION; 4 | use derive_more::Display; 5 | use std::io::{self, BufRead, Write}; 6 | 7 | const HELP_MESSAGE: &str = " 8 | Wright REPL Help: 9 | 10 | Built-in commands: 11 | - :?/:h/:help -- Print this help menu. 12 | - :m/:mode -- Print the current mode. 13 | - :e/:eval -- Switch to eval mode. 14 | - :t/:token -- Switch to token mode. 15 | - :a/:ast -- Switch to AST mode. 16 | - :c/:clear -- Clear the terminal window. 17 | - :v/:version -- Print the current Wright version information. 18 | - :q/:quit/:exit -- Quit/Exit the REPL. 19 | 20 | Modes: 21 | - eval mode: Evaluate each line of input 22 | - token mode: Print the tokens generated for each line of input. 23 | - AST mode: Print the AST tree/node generated for each line of input. 24 | "; 25 | 26 | #[derive(Clone, Copy, PartialEq, Debug, Default, Display)] 27 | enum ReplMode { 28 | /// Default REPL mode -- evaluates and prints results of input. 29 | #[default] 30 | Eval, 31 | 32 | /// Print the tokens passed to the repl. 33 | Tokens, 34 | 35 | /// Print the AST Tree passed to the repl. 36 | Ast, 37 | } 38 | 39 | /// Start an interactive Wright repl. 40 | pub fn start() -> anyhow::Result<()> { 41 | // Print version info. 42 | println!("Wright REPL interpreter (wright version {})", WRIGHT_VERSION); 43 | 44 | // Get a global lock on the standard input. 45 | let stdin = io::stdin(); 46 | let mut input = stdin.lock(); 47 | let stdout = io::stdout(); 48 | let mut output = stdout.lock(); 49 | 50 | // Track the line number of the input. 51 | let mut input_number = 0usize; 52 | 53 | // Set the repl mode. 54 | let mut repl_mode = ReplMode::Tokens; 55 | 56 | // Make a file map to track input. 57 | // let mut code_map = FileMap::new(); 58 | 59 | // Loop until this returns/exits. 60 | loop { 61 | // Increment input number. 62 | input_number += 1; 63 | 64 | // Write prompt. 65 | write!(&mut output, "[{}]: >> ", input_number)?; 66 | output.flush()?; 67 | 68 | // Read line of input. 69 | let mut line = String::new(); 70 | input.read_line(&mut line)?; 71 | 72 | // Handle certain builtin REPL commands. 73 | match line.trim() { 74 | ":?" | ":h" | ":help" => { 75 | writeln!(&mut output, "{}", HELP_MESSAGE)?; 76 | continue; 77 | } 78 | 79 | ":v" | ":version" => { 80 | writeln!(&mut output, "Wright programming language version {}", WRIGHT_VERSION)?; 81 | continue; 82 | } 83 | 84 | ":m" | ":mode" => { 85 | writeln!(&mut output, "{}", repl_mode)?; 86 | continue; 87 | } 88 | 89 | ":q" | ":exit" | ":quit" => return Ok(()), 90 | 91 | ":c" | ":clear" => { 92 | // https://stackoverflow.com/questions/34837011/how-to-clear-the-terminal-screen-in-rust-after-a-new-line-is-printed 93 | writeln!(&mut output, "{esc}[2J{esc}[1;1H", esc = 27 as char)?; 94 | continue; 95 | } 96 | 97 | ":e" | ":eval" => unimplemented!("Eval mode is not yet implemented."), 98 | 99 | "t" | ":token" => { 100 | repl_mode = ReplMode::Tokens; 101 | writeln!(&mut output, "switched to token mode")?; 102 | continue; 103 | } 104 | 105 | ":a" | ":ast" => { 106 | repl_mode = ReplMode::Ast; 107 | writeln!(&mut output, "switched to AST mode")?; 108 | continue; 109 | } 110 | 111 | // Any other input is a no-op here and will get handled later. 112 | _ => {} 113 | } 114 | 115 | // If the line was actual input and not a command -- Print the output prefix 116 | write!(&mut output, "[{}]: << ", input_number)?; 117 | output.flush()?; 118 | 119 | unimplemented!("REPL needs to be re-worked a bit."); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /wright/src/source_tracking.rs: -------------------------------------------------------------------------------- 1 | //! Types and traits for tracking source code fed to the wright compiler. 2 | 3 | use self::source::Source; 4 | use dashmap::DashMap; 5 | use source::SourceId; 6 | use std::sync::Arc; 7 | 8 | pub mod filename; 9 | pub mod fragment; 10 | pub mod immutable_string; 11 | pub mod source; 12 | 13 | /// A reference to a [Source] in a [SourceMap]. 14 | pub type SourceRef = Arc; 15 | 16 | /// Storage for [Source]s used and referenced in compiling a wright project. 17 | /// 18 | /// [Clone]ing is cheap, since this uses an [Arc] internally. 19 | #[derive(Debug, Default, Clone)] 20 | pub struct SourceMap { 21 | /// Internally, we use [DashMap] for a concurrent hashmap from [Source::id]s to their [Arc]'d 22 | /// 23 | /// Each source is wrapped in an [Arc] to make them all accessible without holding a reference to this map 24 | /// directly. 25 | inner: Arc>, 26 | } 27 | 28 | impl SourceMap { 29 | /// Construct a new empty [SourceMap]. 30 | pub fn new() -> Self { 31 | Default::default() 32 | } 33 | 34 | /// Add a [Source] to this [SourceMap] and get a [SourceRef] to it after it's added. 35 | pub fn add(&self, source: Source) -> SourceRef { 36 | // Put the source in an Arc. 37 | let source: SourceRef = Arc::new(source); 38 | // Push the souce to the internal Vec. 39 | self.inner.insert(source.id, Arc::clone(&source)); 40 | // Return the now-Arc'd source. 41 | source 42 | } 43 | 44 | /// Get a reference to a [Source] stored in this [SourceMap] using it's [Source::id]. 45 | /// 46 | /// This is currently `O(1)` since [SourceMap] uses a [DashMap] internally. 47 | /// 48 | /// Returns [None] if the [Source] with the given [Source::id] is not in this [SourceMap]. 49 | pub fn get(&self, id: SourceId) -> Option { 50 | self.inner.get(&id).map(|source| Arc::clone(&source)) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /wright/src/source_tracking/filename.rs: -------------------------------------------------------------------------------- 1 | //! Structure and implementation relating to file names used throughout the wright compiler and tooling. 2 | 3 | use derive_more::Display; 4 | use std::path::PathBuf; 5 | 6 | /// Used to represent different file names used throughout this crate. 7 | #[derive(Debug, Display, Clone)] 8 | pub enum FileName { 9 | /// A real file on the user's computer. 10 | #[display("{}", "_0.display()")] 11 | Real(PathBuf), 12 | /// A named test-case in this crate's source code. 13 | Test(&'static str), 14 | // /// The interactive Wright repl. 15 | // #[display(fmt = "REPL:{}", line_number)] 16 | // Repl { line_number: usize }, 17 | /// An un-named test case in this crate's source code. 18 | #[display("")] 19 | None, 20 | } 21 | -------------------------------------------------------------------------------- /wright/src/source_tracking/immutable_string.rs: -------------------------------------------------------------------------------- 1 | //! Structure and implementation relating to the representation of source files (as immutable strings) throughout 2 | //! the Wright compiler and tooling. 3 | 4 | use std::{str::CharIndices, sync::Arc}; 5 | 6 | #[cfg(feature = "file_memmap")] 7 | use fs4::fs_std::FileExt; 8 | 9 | #[cfg(feature = "file_memmap")] 10 | use memmap2::Mmap; 11 | 12 | #[cfg(feature = "file_memmap")] 13 | use std::{fs::File, io}; 14 | 15 | /// An immutable string that either 16 | /// - References a source string in memory using a `'static` reference, 17 | /// - Owns a source string in memory. 18 | /// - Owns a locked and memory mapped file from the disk. 19 | /// 20 | /// This uses an [Arc] internally to make cloning cheap. 21 | #[derive(Debug, Clone)] 22 | pub struct ImmutableString { 23 | /// Wrap the internal enum representation. This is to avoid exposing the API for a user to construct an 24 | /// [ImmutableStringInner] without satisfying certain invariants. 25 | inner: Arc, 26 | } 27 | 28 | impl ImmutableString { 29 | /// Wrap the inner representation in this type. 30 | #[inline] 31 | fn from_inner(inner: ImmutableStringInner) -> Self { 32 | ImmutableString { 33 | inner: Arc::new(inner), 34 | } 35 | } 36 | 37 | /// Create a new [ImmutableString] holding the given [File] (assumed to be locked with [fs4]) 38 | /// and the [Mmap] mapping that file to memory. 39 | /// 40 | /// This function requires that the memory mapped by the given 41 | /// [Mmap] is valid UTF-8 using [std::str::from_utf8]. 42 | #[cfg(feature = "file_memmap")] 43 | pub(super) fn new_locked_file(file: File, mem_map: Mmap) -> Self { 44 | Self::from_inner(ImmutableStringInner::LockedFile { 45 | locked_file: file, 46 | mem_map, 47 | }) 48 | } 49 | 50 | /// Create a new [ImmutableString] that owns a string allocated on the heap. 51 | pub(super) fn new_owned(boxed_str: Box) -> Self { 52 | Self::from_inner(ImmutableStringInner::Owned(boxed_str)) 53 | } 54 | 55 | /// Create a new [ImmutableString] referencing a string directly. 56 | pub(super) fn new_static(str_ref: &'static str) -> Self { 57 | Self::from_inner(ImmutableStringInner::Static(str_ref)) 58 | } 59 | 60 | /// Get a list of byte indices into this [ImmutableString] of the start of every line. 61 | pub fn line_starts(&self) -> impl Iterator + use<'_> { 62 | // Make a iterator over this string's characters and their byte indices. 63 | let mut char_indices: CharIndices = self.as_ref().char_indices(); 64 | // Track whether the previous character was a newline using a bool -- this starts as true, so that the first 65 | // character of a source is considered to be starting a newline. 66 | let mut last_was_newline: bool = true; 67 | 68 | // Create a custom iterator that flattens to give us indices immediately following \n characters. 69 | let iter = std::iter::from_fn(move || { 70 | // If the next char indice is none, return none. There are no lines on empty strings. 71 | let (index, next) = char_indices.next()?; 72 | 73 | // Determine whether to list this character's index as starting a new line. 74 | let result = Some(last_was_newline.then_some(index)); 75 | 76 | // Update the boolean based on the consumed character. 77 | last_was_newline = next == '\n'; 78 | 79 | // Return the above result. 80 | result 81 | }); 82 | 83 | iter.flatten() 84 | } 85 | 86 | /// Get this [ImmutableString] as a [str] reference. 87 | /// This just calls [AsRef::as_ref]. 88 | pub fn as_str(&self) -> &str { 89 | self.as_ref() 90 | } 91 | 92 | /// Get the length of this [ImmutableString] in bytes. 93 | /// See [str::len]. 94 | pub fn len(&self) -> usize { 95 | self.as_str().len() 96 | } 97 | 98 | /// Check if this [ImmutableString] is empty. 99 | #[inline] 100 | pub fn is_empty(&self) -> bool { 101 | self.len() == 0 102 | } 103 | } 104 | 105 | impl AsRef for ImmutableString { 106 | fn as_ref(&self) -> &str { 107 | (*self.inner).as_ref() 108 | } 109 | } 110 | 111 | /// The internal enum representation of the immutable string. 112 | #[derive(Debug)] 113 | enum ImmutableStringInner { 114 | /// An immutable reference to an existing static string. 115 | Static(&'static str), 116 | 117 | /// An owned immutable string. 118 | Owned(Box), 119 | 120 | /// A locked, memory mapped file from the disk. 121 | #[cfg(feature = "file_memmap")] 122 | LockedFile { 123 | /// The locked file that gets unlocked when this struct is dropped. 124 | locked_file: File, 125 | 126 | /// The memory mapped file. 127 | /// 128 | /// # Safety 129 | /// - Undefined behavior occurs if the file on disk is modified while memory mapped. Always lock the 130 | /// file (in this crate's case, using [fs4]) before creating this [Mmap] for it. 131 | /// See [Mmap] for more details. 132 | /// - This struct assumes that any memory-mapped files have their UTF-8 validity checked by the caller. 133 | /// Specificically the [ImmutableString::as_ref] method relies on [std::str::from_utf8_unchecked], 134 | /// so if you do not ensure the [Mmap] is valid UTF-8, you will run into undefined behavior. 135 | mem_map: Mmap, 136 | }, 137 | } 138 | 139 | /// Implement [Drop] to make sure that the files from disk get unlocked as they go out of scope/use. 140 | #[cfg(feature = "file_memmap")] 141 | impl Drop for ImmutableStringInner { 142 | fn drop(&mut self) { 143 | match self { 144 | // Unlock locked files. 145 | ImmutableStringInner::LockedFile { locked_file, .. } => { 146 | FileExt::unlock(locked_file) 147 | // Log the error if there is one, 148 | .map_err(|io_err: io::Error| eprintln!("{}", io_err)) 149 | // Discard value of result 150 | .ok(); 151 | } 152 | 153 | // All other types drop trivially. 154 | ImmutableStringInner::Owned(_) | ImmutableStringInner::Static(_) => {} 155 | } 156 | } 157 | } 158 | 159 | impl AsRef for ImmutableStringInner { 160 | fn as_ref(&self) -> &str { 161 | match self { 162 | ImmutableStringInner::Static(str) => str, 163 | ImmutableStringInner::Owned(str) => str, 164 | 165 | #[cfg(feature = "file_memmap")] 166 | ImmutableStringInner::LockedFile { mem_map, .. } => { 167 | // Get a direct reference to the data that is in the memory map. 168 | let raw_data: &[u8] = mem_map.as_ref(); 169 | // SAFETY: UTF-8 validity is checked when the file is added to the file map, or by the API consumer. 170 | unsafe { std::str::from_utf8_unchecked(raw_data) } 171 | } 172 | } 173 | } 174 | } 175 | 176 | #[cfg(test)] 177 | mod tests { 178 | use super::ImmutableString; 179 | 180 | #[test] 181 | fn test_line_starts() { 182 | let v: Vec = ImmutableString::new_static("a\n\nb\nc") 183 | .line_starts() 184 | .collect(); 185 | 186 | assert_eq!(v.as_slice(), &[0, 2, 3, 5]); 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /wright/src/util.rs: -------------------------------------------------------------------------------- 1 | //! Some utility functions/modules shared by various binaries for things like checking unicode support. 2 | 3 | pub mod supports_unicode; 4 | -------------------------------------------------------------------------------- /wright/src/util/supports_unicode.rs: -------------------------------------------------------------------------------- 1 | //! Utility module that builds on top of the functionality of the global [mod@supports_unicode] crate by adding a fallback 2 | //! global static, and a function that always indicates lack of unicode support if the crate/feature is not enabled. 3 | 4 | #[cfg(feature = "supports-unicode")] 5 | use ::supports_unicode as supports_unicode_crate; 6 | 7 | #[cfg(feature = "supports-unicode")] 8 | use core::sync::atomic::AtomicBool; 9 | 10 | /// Should all output force the use of ASCII characters only? 11 | #[cfg(feature = "supports-unicode")] 12 | pub static FORCE_ASCII: AtomicBool = AtomicBool::new(false); 13 | 14 | /// Set the global [FORCE_ASCII] static. 15 | #[cfg(feature = "supports-unicode")] 16 | pub fn set_force_ascii(force_ascii: bool) { 17 | use core::sync::atomic::Ordering; 18 | 19 | FORCE_ASCII.store(force_ascii, Ordering::Release); 20 | } 21 | 22 | /// Should we be writing unicode out to the user's terminal? 23 | pub fn supports_unicode() -> bool { 24 | #[cfg(feature = "supports-unicode")] 25 | { 26 | use core::sync::atomic::Ordering; 27 | 28 | !FORCE_ASCII.load(Ordering::Acquire) && supports_unicode_crate::supports_unicode() 29 | } 30 | 31 | #[cfg(not(feature = "supports-unicode"))] 32 | { 33 | false 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /wright/tests/lexer.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "lexer")] 2 | 3 | use std::sync::Arc; 4 | use wright::{ 5 | lexer::{Lexer, token::TokenTy}, 6 | source_tracking::{filename::FileName, source::Source}, 7 | }; 8 | 9 | fn new_test_lexer(s: &'static str) -> Lexer { 10 | Lexer::new(Arc::new(Source::new_from_static_str(FileName::None, s))) 11 | } 12 | 13 | /// Test unterminated string literal. 14 | #[test] 15 | fn unterminated_string_literal() { 16 | let mut lexer = new_test_lexer(r#""this string is not closed"#); 17 | 18 | let token = lexer.next_token().unwrap(); 19 | 20 | assert_eq!(token.variant, TokenTy::StringLiteral { terminated: false }); 21 | assert_eq!(token.fragment.as_str(), lexer.remaining.source.source().as_str()); 22 | assert_eq!(lexer.bytes_remaining(), 0); 23 | 24 | assert!(lexer.next_token().is_none()); 25 | } 26 | 27 | /// Test string literal with escaped terminal. 28 | #[test] 29 | fn string_with_escape() { 30 | let mut lexer = new_test_lexer(r#""this string has an escaped terminator \" ""#); 31 | 32 | let token = lexer.next_token().unwrap(); 33 | 34 | assert_eq!(token.variant, TokenTy::StringLiteral { terminated: true }); 35 | assert_eq!(token.fragment.as_str(), lexer.remaining.source.source().as_str()); 36 | assert_eq!(lexer.bytes_remaining(), 0); 37 | 38 | assert!(lexer.next_token().is_none()); 39 | } 40 | -------------------------------------------------------------------------------- /wright/tests/parser.rs: -------------------------------------------------------------------------------- 1 | #![cfg(all(feature = "reporting", feature = "parser"))] 2 | 3 | use termcolor::Buffer; 4 | use wright::{ 5 | ast::identifier::Identifier, 6 | lexer::Lexer, 7 | parser::Parser, 8 | source_tracking::{SourceMap, SourceRef, filename::FileName, source::Source}, 9 | }; 10 | 11 | #[test] 12 | fn test_parse_fail_identifier_to_diagnostic() { 13 | let map: SourceMap = SourceMap::new(); 14 | let source_ref: SourceRef = map.add(Source::new_from_static_str(FileName::None, "12345")); 15 | let mut parser = Parser::new(Lexer::new(source_ref)); 16 | let parse_error = Identifier::parse(&mut parser).unwrap_err(); 17 | let mut buffer = Buffer::no_color(); 18 | 19 | parse_error 20 | .as_diagnostic() 21 | .write(&map, &mut buffer, &Default::default()) 22 | .unwrap(); 23 | 24 | assert_eq!( 25 | std::str::from_utf8(buffer.as_slice()).unwrap(), 26 | "\ 27 | error: expected identifier 28 | ┌─ :1:1 29 | │ 30 | 1 │ 12345 31 | │ ^^^^^\n\n" 32 | ); 33 | } 34 | --------------------------------------------------------------------------------