├── .cargo └── config ├── .dockerignore ├── .github └── workflows │ ├── pages.yml │ ├── release.yml │ └── test.yml ├── .gitignore ├── .vscode ├── launch.json └── settings.json ├── Cargo.lock ├── Cargo.toml ├── Dockerfile ├── crates ├── r0codegen │ ├── Cargo.toml │ └── src │ │ ├── code.rs │ │ ├── err.rs │ │ ├── generator │ │ ├── mod.rs │ │ └── util.rs │ │ ├── lib.rs │ │ ├── scope.rs │ │ └── ty.rs ├── r0vm │ ├── .gitignore │ ├── .vscode │ │ └── settings.json │ ├── Cargo.lock │ ├── Cargo.toml │ ├── readme.md │ └── src │ │ ├── error.rs │ │ ├── lib.rs │ │ ├── opcodes.rs │ │ ├── s0 │ │ ├── io.rs │ │ └── mod.rs │ │ ├── tests │ │ ├── mod.rs │ │ └── ser.rs │ │ ├── util.rs │ │ └── vm │ │ ├── mem.rs │ │ ├── mod.rs │ │ └── ops.rs └── syntax │ ├── .gitignore │ ├── Cargo.toml │ ├── readme.md │ └── src │ ├── ast.rs │ ├── lexer.rs │ ├── lib.rs │ ├── parser │ ├── err.rs │ └── mod.rs │ ├── scope.rs │ ├── span.rs │ ├── token.rs │ └── util.rs ├── docs ├── .gitignore ├── book.toml ├── src │ ├── SUMMARY.md │ ├── c0 │ │ ├── c0.md │ │ ├── expr.md │ │ ├── extended-c0.md │ │ ├── func.md │ │ ├── method_notes.md │ │ ├── notes.md │ │ ├── stdlib.md │ │ ├── stmt.md │ │ ├── token.md │ │ └── ty.md │ ├── compile-pipeline.md │ ├── ebnf.md │ ├── judge.md │ ├── navm │ │ ├── faq.md │ │ ├── index.md │ │ └── instruction.md │ ├── preface.md │ ├── ref-impl.md │ ├── reference.md │ ├── requirement.md │ ├── res │ │ ├── 200331-natrium.png │ │ ├── banner.png │ │ ├── base.png │ │ └── logo.png │ ├── scoring.md │ └── todo.md └── theme │ └── css │ └── additional.css ├── judge.toml ├── readme.md ├── src ├── bin │ └── r0vm.rs ├── lib.rs ├── main.rs ├── test.rs └── util.rs └── web ├── .gitignore ├── Cargo.toml ├── README.md ├── js ├── index.styl ├── index.tsx └── main.tsx ├── package.json ├── src └── lib.rs ├── static └── index.html ├── tests └── app.rs ├── tsconfig.json ├── webpack.config.js ├── webpack.prod.js └── yarn.lock /.cargo/config: -------------------------------------------------------------------------------- 1 | [target.x86_64-unknown-linux-musl] 2 | rustflags = [ 3 | "-C", 4 | "link-arg=-fuse-ld=lld", 5 | ] 6 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | /crates/r0codegen/target/ 3 | /crates/r0vm/target/ 4 | /crates/syntax/target/ 5 | /web/node_modules/ 6 | /web/pkg/ 7 | -------------------------------------------------------------------------------- /.github/workflows/pages.yml: -------------------------------------------------------------------------------- 1 | name: Deploy GitHub Pages 2 | on: 3 | push: 4 | branches: 5 | - master 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Setup mdBook 13 | uses: peaceiris/actions-mdbook@v1 14 | with: 15 | # mdbook-version: '0.4.1' 16 | mdbook-version: 'latest' 17 | 18 | - name: setup wasm-pack 19 | run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh 20 | 21 | - name: build c0 book 22 | run: mdbook build 23 | working-directory: ./docs 24 | 25 | - name: build c0 web 26 | run: | 27 | mkdir -p static/fonts 28 | wget https://github.com/be5invis/Iosevka/releases/download/v4.0.0-alpha.1/webfont-iosevka-4.0.0-alpha.1.zip -O iosevka.zip 29 | 7z x iosevka.zip static/fonts/ 30 | yarn && yarn build 31 | working-directory: ./web 32 | 33 | - name: combine build results 34 | run: | 35 | mkdir web-dist 36 | cp -r ./docs/book/* web-dist 37 | mkdir -p web-dist/playground 38 | cp -r ./web/dist/* web-dist/playground 39 | 40 | - name: Deploy 41 | uses: peaceiris/actions-gh-pages@v3 42 | with: 43 | github_token: ${{ secrets.GITHUB_TOKEN }} 44 | publish_dir: ./web-dist 45 | cname: c0.karenia.cc 46 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | 8 | jobs: 9 | build-judger: 10 | strategy: 11 | matrix: 12 | include: 13 | # - os: ubuntu-latest 14 | # artifact_name: rurikawa 15 | # asset_name: rurikawa-linux-amd64-gnu 16 | # # target_name: x86_64-unknown-linux-gnu 17 | - os: ubuntu-latest 18 | artifact_name: natrium 19 | artifact_name_2: navm 20 | asset_name: natrium-linux-amd64-musl 21 | asset_name_2: navm-linux-amd64-musl 22 | target_name: x86_64-unknown-linux-musl 23 | - os: windows-latest 24 | artifact_name: natrium.exe 25 | artifact_name_2: navm.exe 26 | asset_name: natrium-windows-amd64.exe 27 | asset_name_2: navm-windows-amd64.exe 28 | - os: macos-latest 29 | artifact_name: natrium 30 | artifact_name_2: navm 31 | asset_name: natrium-macos-amd64 32 | asset_name_2: navm-macos-amd64 33 | 34 | runs-on: ${{ matrix.os }} 35 | steps: 36 | - uses: actions/checkout@v2 37 | with: 38 | submodules: true 39 | 40 | - name: Build rust 41 | if: ${{ matrix.target_name == 0 }} 42 | run: | 43 | cargo fetch 44 | cargo build --frozen --release 45 | 46 | - name: install musl tools 47 | if: ${{ matrix.target_name == 'x86_64-unknown-linux-musl' }} 48 | run: sudo apt update && sudo apt install musl musl-dev musl-tools 49 | 50 | - name: Install rust target 51 | if: ${{ matrix.target_name != 0 }} 52 | continue-on-error: true 53 | run: rustup target add ${{ matrix.target_name }} 54 | 55 | - name: Build rust (targeted) 56 | if: ${{ matrix.target_name != 0 }} 57 | run: | 58 | cargo fetch 59 | cargo build --frozen --release --target ${{ matrix.target_name }} 60 | 61 | - uses: haya14busa/action-cond@v1 62 | id: condval 63 | with: 64 | cond: ${{ matrix.target_name != 0 }} 65 | if_true: target/${{matrix.target_name}}/release 66 | if_false: target/release 67 | 68 | - name: Upload binaries to release 69 | uses: svenstaro/upload-release-action@v1-release 70 | with: 71 | repo_token: ${{ secrets.GITHUB_TOKEN }} 72 | file: ${{ steps.condval.outputs.value }}/${{ matrix.artifact_name }} 73 | asset_name: ${{ matrix.asset_name }} 74 | tag: ${{ github.ref }} 75 | overwrite: true 76 | - name: Upload binaries to release 77 | uses: svenstaro/upload-release-action@v1-release 78 | with: 79 | repo_token: ${{ secrets.GITHUB_TOKEN }} 80 | file: ${{ steps.condval.outputs.value }}/${{ matrix.artifact_name_2 }} 81 | asset_name: ${{ matrix.asset_name_2 }} 82 | tag: ${{ github.ref }} 83 | overwrite: true 84 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: rust-toolchain 14 | uses: actions-rs/toolchain@v1.0.6 15 | with: 16 | toolchain: nightly 17 | default: true 18 | - name: Build 19 | run: cargo build --verbose --package r0vm 20 | - name: Run tests 21 | run: cargo test --verbose --package r0vm 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | *.c0 3 | *.o0 4 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // 使用 IntelliSense 了解相关属性。 3 | // 悬停以查看现有属性的描述。 4 | // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "type": "lldb", 9 | "request": "launch", 10 | "name": "Debug unit tests in library 'r0codegen'", 11 | "cargo": { 12 | "args": [ 13 | "test", 14 | "--no-run", 15 | "--lib", 16 | "--package=r0codegen" 17 | ], 18 | "filter": { 19 | "name": "r0codegen", 20 | "kind": "lib" 21 | } 22 | }, 23 | "args": [], 24 | "cwd": "${workspaceFolder}" 25 | }, 26 | { 27 | "type": "lldb", 28 | "request": "launch", 29 | "name": "Debug unit tests in library 'r0syntax'", 30 | "cargo": { 31 | "args": [ 32 | "test", 33 | "--no-run", 34 | "--lib", 35 | "--package=r0syntax" 36 | ], 37 | "filter": { 38 | "name": "r0syntax", 39 | "kind": "lib" 40 | } 41 | }, 42 | "args": [], 43 | "cwd": "${workspaceFolder}" 44 | }, 45 | { 46 | "type": "lldb", 47 | "request": "launch", 48 | "name": "Debug unit tests in library 'r0vm'", 49 | "cargo": { 50 | "args": [ 51 | "test", 52 | "--no-run", 53 | "--lib", 54 | "--package=r0vm" 55 | ], 56 | "filter": { 57 | "name": "r0vm", 58 | "kind": "lib" 59 | } 60 | }, 61 | "args": [], 62 | "cwd": "${workspaceFolder}" 63 | }, 64 | { 65 | "type": "lldb", 66 | "request": "launch", 67 | "name": "Debug executable 'natrium'", 68 | "cargo": { 69 | "args": [ 70 | "build", 71 | "--bin=natrium", 72 | "--package=natrium" 73 | ], 74 | "filter": { 75 | "name": "natrium", 76 | "kind": "bin" 77 | } 78 | }, 79 | "args": [], 80 | "cwd": "${workspaceFolder}" 81 | }, 82 | { 83 | "type": "lldb", 84 | "request": "launch", 85 | "name": "Debug unit tests in executable 'natrium'", 86 | "cargo": { 87 | "args": [ 88 | "test", 89 | "--no-run", 90 | "--bin=natrium", 91 | "--package=natrium" 92 | ], 93 | "filter": { 94 | "name": "natrium", 95 | "kind": "bin" 96 | } 97 | }, 98 | "args": [], 99 | "cwd": "${workspaceFolder}" 100 | } 101 | ] 102 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "Natrium" 4 | ] 5 | } -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Rynco Maekawa "] 3 | edition = "2018" 4 | name = "natrium" 5 | version = "0.1.0" 6 | 7 | [lib] 8 | crate-type = ["cdylib", "rlib"] 9 | 10 | [[bin]] 11 | name = "navm" 12 | path = "src/bin/r0vm.rs" 13 | required-features = ["cli"] 14 | 15 | [[bin]] 16 | name = "natrium" 17 | path = "src/main.rs" 18 | required-features = ["cli"] 19 | 20 | [profile.release] 21 | lto = true 22 | 23 | [dependencies] 24 | bimap = "0.5.3" 25 | "clap" = {version = "3.0.0-beta.2", optional = true} 26 | "crossterm" = {version = "0.18", optional = true} 27 | itoa = "*" 28 | lexpr = "0.2.5" 29 | "line-span" = "0.1.2" 30 | "logos" = "0.11.4" 31 | "r0codegen" = {path = "crates/r0codegen"} 32 | "r0syntax" = {path = "crates/syntax"} 33 | "r0vm" = {path = "crates/r0vm", default-features = false} 34 | "rustyline" = {version = "7.0.0", optional = true} 35 | serde = "1.0" 36 | serde-lexpr = "0.1.1" 37 | shell-words = {version = "1.0.0", optional = true} 38 | tracing = "*" 39 | tracing-subscriber = "*" 40 | unicode-width = "0.1.8" 41 | # inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "llvm9-0", optional = true } 42 | 43 | [features] 44 | cli = ["clap", "crossterm", "r0vm/serde", "rustyline", "shell-words"] 45 | default = ["vm", "cli"] 46 | vm = ["r0vm/vm"] 47 | 48 | [workspace] 49 | members = ["crates/r0codegen", "crates/r0vm", "crates/syntax", "web"] 50 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rust:1.48-alpine 2 | RUN if [ -z "$CI" ]; then sed -i 's/dl-cdn.alpinelinux.org/mirrors.tuna.tsinghua.edu.cn/g' /etc/apk/repositories; fi 3 | RUN apk add --no-cache gcc libgcc build-base 4 | WORKDIR /app 5 | RUN if [ -z "$CI" ]; then \ 6 | mkdir -p ./.cargo && \ 7 | echo -e '[source.crates-io]\nreplace-with = "ustc"\n[source.ustc]\nregistry = "https://mirrors.tuna.tsinghua.edu.cn/git/crates.io-index.git"' > ./.cargo/config.toml;\ 8 | fi 9 | COPY crates ./crates 10 | COPY web ./web 11 | COPY Cargo.toml Cargo.lock ./ 12 | COPY src ./src 13 | RUN cargo fetch --locked 14 | RUN cargo build --release --locked --frozen 15 | -------------------------------------------------------------------------------- /crates/r0codegen/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Rynco Maekawa "] 3 | edition = "2018" 4 | name = "r0codegen" 5 | version = "0.1.0" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | "bit-set" = "0.5.2" 11 | indexmap = "1.6.0" 12 | once_cell = "1.4.1" 13 | r0syntax = {path = "../syntax"} 14 | r0vm = {path = "../r0vm", default_features = false} 15 | smol_str = "0.1.17" 16 | -------------------------------------------------------------------------------- /crates/r0codegen/src/code.rs: -------------------------------------------------------------------------------- 1 | use r0vm::opcodes::Op; 2 | 3 | #[derive(Debug, Copy, Clone)] 4 | pub enum JumpInst { 5 | Undefined, 6 | Unreachable, 7 | Return, 8 | Jump(usize), 9 | JumpIf(usize, usize), 10 | } 11 | 12 | #[derive(Debug, Clone)] 13 | pub struct BasicBlock { 14 | pub code: Vec, 15 | pub jump: JumpInst, 16 | } 17 | 18 | impl BasicBlock { 19 | pub fn new() -> BasicBlock { 20 | BasicBlock { 21 | code: vec![], 22 | jump: JumpInst::Undefined, 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /crates/r0codegen/src/err.rs: -------------------------------------------------------------------------------- 1 | use r0syntax::span::Span; 2 | 3 | #[derive(Debug, Clone)] 4 | pub struct CompileError { 5 | pub kind: CompileErrorKind, 6 | pub span: Option, 7 | } 8 | 9 | #[allow(non_snake_case)] 10 | pub fn CompileError(kind: CompileErrorKind, span: Option) -> CompileError { 11 | CompileError { kind, span } 12 | } 13 | 14 | #[derive(Debug, Clone)] 15 | pub enum CompileErrorKind { 16 | UnknownType(String), 17 | NoSuchSymbol(String), 18 | DuplicateSymbol(String), 19 | VoidTypeVariable, 20 | TypeMismatch { 21 | expected: String, 22 | got: Option, 23 | }, 24 | NotLValue, 25 | InvalidCalculation(String), 26 | FuncParamSizeMismatch(usize, usize), 27 | AssignToConst, 28 | NoBreakContext, 29 | NoContinueContext, 30 | NotAllRoutesReturn, 31 | } 32 | 33 | pub trait WithSpan { 34 | fn with_span(self, span: Span) -> Self; 35 | } 36 | 37 | impl WithSpan for CompileError { 38 | fn with_span(mut self, span: Span) -> CompileError { 39 | self.span = Some(span); 40 | self 41 | } 42 | } 43 | 44 | impl WithSpan for Result { 45 | fn with_span(self, span: Span) -> Result { 46 | self.map_err(|e| e.with_span(span)) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /crates/r0codegen/src/generator/util.rs: -------------------------------------------------------------------------------- 1 | use bit_set::BitSet; 2 | use r0syntax::span::Span; 3 | 4 | use crate::{ 5 | code::BasicBlock, 6 | err::{CompileError, CompileErrorKind}, 7 | }; 8 | 9 | use super::CompileResult; 10 | 11 | /// Cycle Finding state variable 12 | #[derive(Debug)] 13 | pub struct BBArranger<'st> { 14 | bb: &'st [BasicBlock], 15 | path: BitSet, 16 | vis: BitSet, 17 | in_degree: Vec, 18 | arr: Vec, 19 | } 20 | 21 | impl<'st> BBArranger<'st> { 22 | pub fn new(bb: &'st [BasicBlock]) -> BBArranger<'st> { 23 | BBArranger { 24 | bb, 25 | path: BitSet::new(), 26 | vis: BitSet::new(), 27 | in_degree: vec![0; bb.len()], 28 | arr: vec![], 29 | } 30 | } 31 | 32 | pub fn construct_arrangement(&mut self, start: usize) -> CompileResult<()> { 33 | self.vis(start); 34 | self.arr(start) 35 | } 36 | 37 | pub fn vis(&mut self, id: usize) { 38 | if self.path.contains(id) { 39 | // cycle does not count 40 | return; 41 | } 42 | self.in_degree[id] += 1; 43 | if self.vis.contains(id) { 44 | // visited node 45 | return; 46 | } 47 | self.vis.insert(id); 48 | self.path.insert(id); 49 | match self.bb[id].jump { 50 | crate::code::JumpInst::Jump(bb1) => { 51 | self.vis(bb1); 52 | } 53 | crate::code::JumpInst::JumpIf(bb1, bb2) => { 54 | self.vis(bb1); 55 | self.vis(bb2); 56 | } 57 | _ => {} 58 | } 59 | self.path.remove(id); 60 | } 61 | 62 | pub fn arr(&mut self, id: usize) -> CompileResult<()> { 63 | if self.path.contains(id) { 64 | // cycle does not count 65 | return Ok(()); 66 | } 67 | self.in_degree[id] = self.in_degree[id] 68 | .checked_sub(1) 69 | .unwrap_or_else(|| panic!("id: {}, in_degrees: {:?}", id, &self.in_degree)); 70 | if self.in_degree[id] != 0 { 71 | return Ok(()); 72 | } 73 | 74 | self.arr.push(id); 75 | self.path.insert(id); 76 | 77 | match self.bb[id].jump { 78 | crate::code::JumpInst::Jump(bb1) => { 79 | self.arr(bb1)?; 80 | } 81 | crate::code::JumpInst::JumpIf(bb1, bb2) => { 82 | self.arr(bb1)?; 83 | self.arr(bb2)?; 84 | } 85 | crate::code::JumpInst::Return => {} 86 | crate::code::JumpInst::Unreachable => panic!( 87 | "Unreachable basic block {} being visited; block map:\n {:#?}", 88 | id, self.bb 89 | ), 90 | crate::code::JumpInst::Undefined => { 91 | return Err(CompileError(CompileErrorKind::NotAllRoutesReturn, None)) 92 | } 93 | } 94 | self.path.remove(id); 95 | Ok(()) 96 | } 97 | 98 | pub fn arrange(self) -> Vec { 99 | self.arr 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /crates/r0codegen/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod code; 2 | pub mod err; 3 | pub mod generator; 4 | pub mod scope; 5 | pub mod ty; 6 | 7 | pub trait Codegen {} 8 | -------------------------------------------------------------------------------- /crates/r0codegen/src/scope.rs: -------------------------------------------------------------------------------- 1 | use std::{cell::RefCell, sync::atomic::AtomicU64}; 2 | 3 | use indexmap::IndexMap; 4 | use smol_str::SmolStr; 5 | 6 | use crate::ty::Ty; 7 | 8 | #[derive(Debug)] 9 | pub struct SymbolIdGenerator { 10 | next_id: u64, 11 | } 12 | 13 | impl SymbolIdGenerator { 14 | pub fn new() -> SymbolIdGenerator { 15 | SymbolIdGenerator { next_id: 0 } 16 | } 17 | 18 | pub fn next(&mut self) -> u64 { 19 | let id = self.next_id; 20 | self.next_id += 1; 21 | id 22 | } 23 | } 24 | 25 | #[derive(Debug)] 26 | pub struct Scope<'p> { 27 | symbol_gen: &'p RefCell, 28 | pub parent: Option<&'p Scope<'p>>, 29 | pub vars: IndexMap, 30 | } 31 | 32 | #[allow(clippy::new_without_default)] 33 | impl<'p> Scope<'p> { 34 | pub fn new_with_parent(parent: &'p Scope<'p>) -> Scope<'p> { 35 | Scope { 36 | symbol_gen: parent.symbol_gen, 37 | parent: Some(parent), 38 | vars: IndexMap::new(), 39 | } 40 | } 41 | 42 | pub fn new(symbol_gen: &'p RefCell) -> Scope<'p> { 43 | Scope { 44 | symbol_gen, 45 | parent: None, 46 | vars: IndexMap::new(), 47 | } 48 | } 49 | 50 | pub fn find_in_self(&self, ident: &str) -> Option<&Symbol> { 51 | self.vars.get(ident) 52 | } 53 | 54 | pub fn find<'s>(&'s self, ident: &str) -> Option<&'s Symbol> { 55 | let self_res = self.find_in_self(ident); 56 | 57 | if self_res.is_none() { 58 | if let Some(p) = self.parent { 59 | return p.find(ident); 60 | } 61 | } 62 | self_res 63 | } 64 | 65 | pub fn is_root_scope(&self) -> bool { 66 | self.parent.is_none() 67 | } 68 | 69 | pub fn find_is_global<'s>(&'s self, ident: &str) -> Option<(&'s Symbol, bool)> { 70 | let self_res = self.find_in_self(ident); 71 | 72 | if self_res.is_none() { 73 | if let Some(p) = self.parent { 74 | return p.find_is_global(ident); 75 | } 76 | } 77 | 78 | self_res.map(|x| (x, self.is_root_scope())) 79 | } 80 | 81 | pub fn insert(&mut self, ident: SmolStr, mut symbol: Symbol) -> Option { 82 | let entry = self.vars.entry(ident); 83 | match entry { 84 | indexmap::map::Entry::Occupied(_) => None, 85 | indexmap::map::Entry::Vacant(v) => { 86 | let id = self.symbol_gen.borrow_mut().next(); 87 | symbol.id = id; 88 | v.insert(symbol); 89 | Some(id) 90 | } 91 | } 92 | } 93 | 94 | pub fn get_new_id(&self) -> u64 { 95 | self.symbol_gen.borrow_mut().next() 96 | } 97 | } 98 | 99 | #[derive(Debug, Clone)] 100 | pub struct Symbol { 101 | pub id: u64, 102 | pub ty: Ty, 103 | pub is_const: bool, 104 | } 105 | 106 | impl Symbol { 107 | pub fn new(ty: Ty, is_const: bool) -> Symbol { 108 | Symbol { 109 | ty, 110 | is_const, 111 | id: 0, 112 | } 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /crates/r0codegen/src/ty.rs: -------------------------------------------------------------------------------- 1 | use r0syntax::util::P; 2 | 3 | #[derive(Debug, Clone, Eq, PartialEq)] 4 | pub enum Ty { 5 | Int, 6 | Double, 7 | Bool, 8 | Addr, 9 | Func(FuncTy), 10 | Void, 11 | } 12 | 13 | #[derive(Debug, Clone, Eq, PartialEq)] 14 | pub struct FuncTy { 15 | pub params: Vec>, 16 | pub ret: P, 17 | } 18 | 19 | impl Ty { 20 | pub fn size(&self) -> usize { 21 | match self { 22 | Ty::Int | Ty::Double | Ty::Addr => 8, 23 | Ty::Bool => 1, 24 | Ty::Func(_) => 0, 25 | Ty::Void => 0, 26 | } 27 | } 28 | 29 | pub fn size_slot(&self) -> usize { 30 | match self { 31 | Ty::Int | Ty::Double | Ty::Bool | Ty::Addr => 1, 32 | Ty::Func(_) => 0, 33 | Ty::Void => 0, 34 | } 35 | } 36 | 37 | pub fn get_func(&self) -> Option<&FuncTy> { 38 | match self { 39 | Ty::Func(f) => Some(f), 40 | _ => None, 41 | } 42 | } 43 | } 44 | 45 | impl std::fmt::Display for Ty { 46 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 47 | match self { 48 | Ty::Int => write!(f, "int"), 49 | Ty::Double => write!(f, "double"), 50 | Ty::Bool => write!(f, "bool"), 51 | Ty::Addr => write!(f, "addr"), 52 | Ty::Func(ty) => { 53 | write!(f, "Fn(")?; 54 | let mut param_iter = ty.params.iter(); 55 | 56 | if let Some(r) = param_iter.next() { 57 | write!(f, "{}", r)?; 58 | } 59 | for r in param_iter { 60 | write!(f, ", {}", r)?; 61 | } 62 | 63 | write!(f, ") -> {}", ty.ret) 64 | } 65 | Ty::Void => write!(f, "void"), 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /crates/r0vm/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /crates/r0vm/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "rust-analyzer.cargo.features": [ 3 | "llvm9", 4 | "test" 5 | ] 6 | } -------------------------------------------------------------------------------- /crates/r0vm/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "aho-corasick" 5 | version = "0.7.10" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | checksum = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" 8 | dependencies = [ 9 | "memchr", 10 | ] 11 | 12 | [[package]] 13 | name = "backtrace" 14 | version = "0.3.46" 15 | source = "registry+https://github.com/rust-lang/crates.io-index" 16 | checksum = "b1e692897359247cc6bb902933361652380af0f1b7651ae5c5013407f30e109e" 17 | dependencies = [ 18 | "backtrace-sys", 19 | "cfg-if", 20 | "libc", 21 | "rustc-demangle", 22 | ] 23 | 24 | [[package]] 25 | name = "backtrace-sys" 26 | version = "0.1.35" 27 | source = "registry+https://github.com/rust-lang/crates.io-index" 28 | checksum = "7de8aba10a69c8e8d7622c5710229485ec32e9d55fdad160ea559c086fdcd118" 29 | dependencies = [ 30 | "cc", 31 | "libc", 32 | ] 33 | 34 | [[package]] 35 | name = "bitflags" 36 | version = "1.2.1" 37 | source = "registry+https://github.com/rust-lang/crates.io-index" 38 | checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" 39 | 40 | [[package]] 41 | name = "cc" 42 | version = "1.0.50" 43 | source = "registry+https://github.com/rust-lang/crates.io-index" 44 | checksum = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd" 45 | 46 | [[package]] 47 | name = "cfg-if" 48 | version = "0.1.10" 49 | source = "registry+https://github.com/rust-lang/crates.io-index" 50 | checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" 51 | 52 | [[package]] 53 | name = "cloudabi" 54 | version = "0.0.3" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" 57 | dependencies = [ 58 | "bitflags", 59 | ] 60 | 61 | [[package]] 62 | name = "either" 63 | version = "1.5.3" 64 | source = "registry+https://github.com/rust-lang/crates.io-index" 65 | checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" 66 | 67 | [[package]] 68 | name = "failure" 69 | version = "0.1.7" 70 | source = "registry+https://github.com/rust-lang/crates.io-index" 71 | checksum = "b8529c2421efa3066a5cbd8063d2244603824daccb6936b079010bb2aa89464b" 72 | dependencies = [ 73 | "backtrace", 74 | "failure_derive", 75 | ] 76 | 77 | [[package]] 78 | name = "failure_derive" 79 | version = "0.1.7" 80 | source = "registry+https://github.com/rust-lang/crates.io-index" 81 | checksum = "030a733c8287d6213886dd487564ff5c8f6aae10278b3588ed177f9d18f8d231" 82 | dependencies = [ 83 | "proc-macro2 1.0.10", 84 | "quote 1.0.3", 85 | "syn 1.0.17", 86 | "synstructure", 87 | ] 88 | 89 | [[package]] 90 | name = "inkwell" 91 | version = "0.1.0" 92 | source = "git+https://github.com/TheDan64/inkwell?branch=llvm9-0#5a7c94b2b5e4ebabc94ecd6dc8d29d8ed7309e16" 93 | dependencies = [ 94 | "either", 95 | "inkwell_internals", 96 | "libc", 97 | "llvm-sys", 98 | "once_cell", 99 | "parking_lot", 100 | "regex", 101 | ] 102 | 103 | [[package]] 104 | name = "inkwell_internals" 105 | version = "0.1.0" 106 | source = "git+https://github.com/TheDan64/inkwell?branch=llvm9-0#5a7c94b2b5e4ebabc94ecd6dc8d29d8ed7309e16" 107 | dependencies = [ 108 | "proc-macro2 0.4.30", 109 | "quote 0.6.13", 110 | "syn 0.15.44", 111 | ] 112 | 113 | [[package]] 114 | name = "lazy_static" 115 | version = "1.4.0" 116 | source = "registry+https://github.com/rust-lang/crates.io-index" 117 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 118 | 119 | [[package]] 120 | name = "libc" 121 | version = "0.2.68" 122 | source = "registry+https://github.com/rust-lang/crates.io-index" 123 | checksum = "dea0c0405123bba743ee3f91f49b1c7cfb684eef0da0a50110f758ccf24cdff0" 124 | 125 | [[package]] 126 | name = "llvm-sys" 127 | version = "90.0.0" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | checksum = "191f83195c348af813546379e057429b13f5f2009896d45c52a1235ede6d2b84" 130 | dependencies = [ 131 | "cc", 132 | "lazy_static", 133 | "libc", 134 | "regex", 135 | "semver", 136 | ] 137 | 138 | [[package]] 139 | name = "lock_api" 140 | version = "0.3.4" 141 | source = "registry+https://github.com/rust-lang/crates.io-index" 142 | checksum = "c4da24a77a3d8a6d4862d95f72e6fdb9c09a643ecdb402d754004a557f2bec75" 143 | dependencies = [ 144 | "scopeguard", 145 | ] 146 | 147 | [[package]] 148 | name = "memchr" 149 | version = "2.3.3" 150 | source = "registry+https://github.com/rust-lang/crates.io-index" 151 | checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" 152 | 153 | [[package]] 154 | name = "once_cell" 155 | version = "1.3.1" 156 | source = "registry+https://github.com/rust-lang/crates.io-index" 157 | checksum = "b1c601810575c99596d4afc46f78a678c80105117c379eb3650cf99b8a21ce5b" 158 | 159 | [[package]] 160 | name = "parking_lot" 161 | version = "0.10.2" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "d3a704eb390aafdc107b0e392f56a82b668e3a71366993b5340f5833fd62505e" 164 | dependencies = [ 165 | "lock_api", 166 | "parking_lot_core", 167 | ] 168 | 169 | [[package]] 170 | name = "parking_lot_core" 171 | version = "0.7.1" 172 | source = "registry+https://github.com/rust-lang/crates.io-index" 173 | checksum = "0e136c1904604defe99ce5fd71a28d473fa60a12255d511aa78a9ddf11237aeb" 174 | dependencies = [ 175 | "cfg-if", 176 | "cloudabi", 177 | "libc", 178 | "redox_syscall", 179 | "smallvec", 180 | "winapi", 181 | ] 182 | 183 | [[package]] 184 | name = "proc-macro2" 185 | version = "0.4.30" 186 | source = "registry+https://github.com/rust-lang/crates.io-index" 187 | checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" 188 | dependencies = [ 189 | "unicode-xid 0.1.0", 190 | ] 191 | 192 | [[package]] 193 | name = "proc-macro2" 194 | version = "1.0.10" 195 | source = "registry+https://github.com/rust-lang/crates.io-index" 196 | checksum = "df246d292ff63439fea9bc8c0a270bed0e390d5ebd4db4ba15aba81111b5abe3" 197 | dependencies = [ 198 | "unicode-xid 0.2.0", 199 | ] 200 | 201 | [[package]] 202 | name = "quote" 203 | version = "0.6.13" 204 | source = "registry+https://github.com/rust-lang/crates.io-index" 205 | checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" 206 | dependencies = [ 207 | "proc-macro2 0.4.30", 208 | ] 209 | 210 | [[package]] 211 | name = "quote" 212 | version = "1.0.3" 213 | source = "registry+https://github.com/rust-lang/crates.io-index" 214 | checksum = "2bdc6c187c65bca4260c9011c9e3132efe4909da44726bad24cf7572ae338d7f" 215 | dependencies = [ 216 | "proc-macro2 1.0.10", 217 | ] 218 | 219 | [[package]] 220 | name = "r0vm" 221 | version = "0.1.0" 222 | dependencies = [ 223 | "failure", 224 | "inkwell", 225 | "static_assertions", 226 | "text_io", 227 | ] 228 | 229 | [[package]] 230 | name = "redox_syscall" 231 | version = "0.1.56" 232 | source = "registry+https://github.com/rust-lang/crates.io-index" 233 | checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" 234 | 235 | [[package]] 236 | name = "regex" 237 | version = "1.3.7" 238 | source = "registry+https://github.com/rust-lang/crates.io-index" 239 | checksum = "a6020f034922e3194c711b82a627453881bc4682166cabb07134a10c26ba7692" 240 | dependencies = [ 241 | "aho-corasick", 242 | "memchr", 243 | "regex-syntax", 244 | "thread_local", 245 | ] 246 | 247 | [[package]] 248 | name = "regex-syntax" 249 | version = "0.6.17" 250 | source = "registry+https://github.com/rust-lang/crates.io-index" 251 | checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae" 252 | 253 | [[package]] 254 | name = "rustc-demangle" 255 | version = "0.1.16" 256 | source = "registry+https://github.com/rust-lang/crates.io-index" 257 | checksum = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783" 258 | 259 | [[package]] 260 | name = "scopeguard" 261 | version = "1.1.0" 262 | source = "registry+https://github.com/rust-lang/crates.io-index" 263 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 264 | 265 | [[package]] 266 | name = "semver" 267 | version = "0.9.0" 268 | source = "registry+https://github.com/rust-lang/crates.io-index" 269 | checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" 270 | dependencies = [ 271 | "semver-parser", 272 | ] 273 | 274 | [[package]] 275 | name = "semver-parser" 276 | version = "0.7.0" 277 | source = "registry+https://github.com/rust-lang/crates.io-index" 278 | checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" 279 | 280 | [[package]] 281 | name = "smallvec" 282 | version = "1.3.0" 283 | source = "registry+https://github.com/rust-lang/crates.io-index" 284 | checksum = "05720e22615919e4734f6a99ceae50d00226c3c5aca406e102ebc33298214e0a" 285 | 286 | [[package]] 287 | name = "static_assertions" 288 | version = "1.1.0" 289 | source = "registry+https://github.com/rust-lang/crates.io-index" 290 | checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" 291 | 292 | [[package]] 293 | name = "syn" 294 | version = "0.15.44" 295 | source = "registry+https://github.com/rust-lang/crates.io-index" 296 | checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" 297 | dependencies = [ 298 | "proc-macro2 0.4.30", 299 | "quote 0.6.13", 300 | "unicode-xid 0.1.0", 301 | ] 302 | 303 | [[package]] 304 | name = "syn" 305 | version = "1.0.17" 306 | source = "registry+https://github.com/rust-lang/crates.io-index" 307 | checksum = "0df0eb663f387145cab623dea85b09c2c5b4b0aef44e945d928e682fce71bb03" 308 | dependencies = [ 309 | "proc-macro2 1.0.10", 310 | "quote 1.0.3", 311 | "unicode-xid 0.2.0", 312 | ] 313 | 314 | [[package]] 315 | name = "synstructure" 316 | version = "0.12.3" 317 | source = "registry+https://github.com/rust-lang/crates.io-index" 318 | checksum = "67656ea1dc1b41b1451851562ea232ec2e5a80242139f7e679ceccfb5d61f545" 319 | dependencies = [ 320 | "proc-macro2 1.0.10", 321 | "quote 1.0.3", 322 | "syn 1.0.17", 323 | "unicode-xid 0.2.0", 324 | ] 325 | 326 | [[package]] 327 | name = "text_io" 328 | version = "0.1.8" 329 | source = "registry+https://github.com/rust-lang/crates.io-index" 330 | checksum = "6cb170b4f47dc48835fbc56259c12d8963e542b05a24be2e3a1f5a6c320fd2d4" 331 | 332 | [[package]] 333 | name = "thread_local" 334 | version = "1.0.1" 335 | source = "registry+https://github.com/rust-lang/crates.io-index" 336 | checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" 337 | dependencies = [ 338 | "lazy_static", 339 | ] 340 | 341 | [[package]] 342 | name = "unicode-xid" 343 | version = "0.1.0" 344 | source = "registry+https://github.com/rust-lang/crates.io-index" 345 | checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" 346 | 347 | [[package]] 348 | name = "unicode-xid" 349 | version = "0.2.0" 350 | source = "registry+https://github.com/rust-lang/crates.io-index" 351 | checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" 352 | 353 | [[package]] 354 | name = "winapi" 355 | version = "0.3.8" 356 | source = "registry+https://github.com/rust-lang/crates.io-index" 357 | checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" 358 | dependencies = [ 359 | "winapi-i686-pc-windows-gnu", 360 | "winapi-x86_64-pc-windows-gnu", 361 | ] 362 | 363 | [[package]] 364 | name = "winapi-i686-pc-windows-gnu" 365 | version = "0.4.0" 366 | source = "registry+https://github.com/rust-lang/crates.io-index" 367 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 368 | 369 | [[package]] 370 | name = "winapi-x86_64-pc-windows-gnu" 371 | version = "0.4.0" 372 | source = "registry+https://github.com/rust-lang/crates.io-index" 373 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 374 | -------------------------------------------------------------------------------- /crates/r0vm/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Rynco Maekawa "] 3 | edition = "2018" 4 | name = "r0vm" 5 | version = "0.1.0" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | [dependencies] 9 | "failure" = "^0.1.7" 10 | "serde" = {version = "1.0", optional = true, features = ["derive"]} 11 | smol_str = "0.1.17" 12 | "static_assertions" = "1.1.0" 13 | "text_io" = "0.1.8" 14 | "tracing" = "0.1.22" 15 | 16 | [dev-dependencies] 17 | "ntest" = "0.7.1" 18 | 19 | [features] 20 | default = ["vm", "serde"] 21 | vm = [] 22 | 23 | [package.metadata] 24 | test-timeout = 10 25 | -------------------------------------------------------------------------------- /crates/r0vm/readme.md: -------------------------------------------------------------------------------- 1 | # r0vm 2 | 3 | 这个库包含了 r0 使用的虚拟机。虚拟机计划提供 C 风格的接口。 4 | -------------------------------------------------------------------------------- /crates/r0vm/src/error.rs: -------------------------------------------------------------------------------- 1 | use failure::Fail; 2 | use std::fmt::Display; 3 | 4 | use crate::s0::S0; 5 | 6 | pub type Result = std::result::Result; 7 | 8 | #[derive(Fail, Debug)] 9 | pub enum Error { 10 | #[fail(display = "Invalid instruction: {}", _0)] 11 | InvalidInstruction(InvalidInstructionCtx), 12 | 13 | #[fail(display = "Stack overflow")] 14 | StackOverflow, 15 | 16 | #[fail(display = "Stack underflow")] 17 | StackUnderflow, 18 | 19 | #[fail(display = "Invalid local variable index {}", _0)] 20 | InvalidLocalIndex(u32), 21 | 22 | #[fail(display = "Invalid local variable index {}", _0)] 23 | InvalidArgIndex(u32), 24 | 25 | #[fail(display = "Invalid global variable index {}", _0)] 26 | InvalidGlobalIndex(u32), 27 | 28 | #[fail( 29 | display = "Invalid function name index in global variable for function {} :{}", 30 | _0, _1 31 | )] 32 | InvalidFunctionNameIndex(usize, u32), 33 | 34 | #[fail(display = "Invalid address 0x{:016x}", _0)] 35 | InvalidAddress(u64), 36 | 37 | #[fail(display = "Invalid stack offset {} (bp + {})", _0, _1)] 38 | InvalidStackOffset(u64, i64), 39 | 40 | #[fail(display = "Invalid function ID {}", _0)] 41 | InvalidFnId(u32), 42 | 43 | #[fail(display = "Unknown function (name index: {})", _0)] 44 | UnknownFunction(u32), 45 | 46 | #[fail(display = "Unknown function name: {}", _0)] 47 | UnknownFunctionName(String), 48 | 49 | #[fail(display = "Invalid instruction offset {}", _0)] 50 | InvalidInstructionOffset(usize), 51 | 52 | #[fail(display = "Dividing by zero")] 53 | DivZero, 54 | 55 | #[fail(display = "Arithmetic error")] 56 | ArithmeticErr, 57 | 58 | #[fail(display = "Allocated 0 size of memory")] 59 | AllocZero, 60 | 61 | #[fail(display = "Deallocating memory that is not allocated")] 62 | InvalidDeallocation, 63 | 64 | #[fail(display = "Out of memory")] 65 | OutOfMemory, 66 | 67 | #[fail(display = "Unaligned memory access of address 0x{:016x}", _0)] 68 | UnalignedAccess(u64), 69 | 70 | #[fail(display = "Control reaches end of function #{} without returning", _0)] 71 | ControlReachesEnd(usize), 72 | 73 | #[fail(display = "Unable to find entry point")] 74 | NoEntryPoint, 75 | 76 | #[fail(display = "Parse error")] 77 | ParseError, 78 | 79 | #[fail(display = "IO error: {}", _0)] 80 | IoError(std::io::Error), 81 | 82 | #[fail(display = "Allocation Layout error: {}", _0)] 83 | AllocLayoutError(std::alloc::LayoutErr), 84 | 85 | #[fail(display = "Halt")] 86 | Halt, 87 | } 88 | 89 | // impl Error { 90 | // pub fn format_with_ctx(&self, f: &mut std::fmt::Formatter, s0: &S0) -> std::fmt::Result { 91 | // self.fmt(f) 92 | // } 93 | // } 94 | 95 | // pub struct ErrorCtx<'e>{ 96 | // error:'e, 97 | // } 98 | 99 | #[derive(Debug)] 100 | pub struct InvalidInstructionCtx { 101 | /// Instruction opcode 102 | pub inst: u8, 103 | /// Function id 104 | pub fn_id: u32, 105 | /// Instruction offset 106 | pub inst_off: u64, 107 | } 108 | 109 | impl Display for InvalidInstructionCtx { 110 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 111 | write!( 112 | f, 113 | "0x{:02x} at fn #{}:{}", 114 | self.inst, self.fn_id, self.inst_off 115 | ) 116 | } 117 | } 118 | 119 | impl From for Error { 120 | fn from(x: std::io::Error) -> Self { 121 | Error::IoError(x) 122 | } 123 | } 124 | 125 | impl From for Error { 126 | fn from(x: std::alloc::LayoutErr) -> Self { 127 | Error::AllocLayoutError(x) 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /crates/r0vm/src/lib.rs: -------------------------------------------------------------------------------- 1 | // #![feature(map_first_last)] 2 | #![allow(clippy::transmute_int_to_float)] 3 | 4 | pub mod error; 5 | pub mod opcodes; 6 | pub mod s0; 7 | #[cfg(test)] 8 | mod tests; 9 | mod util; 10 | #[cfg(feature = "vm")] 11 | pub mod vm; 12 | 13 | #[macro_export] 14 | /// Create an in-memory representation for s0 binary 15 | macro_rules! s0_bin { 16 | ( 17 | $( 18 | // TODO: global variable declaration 19 | const $const_val:expr; 20 | )* 21 | $( 22 | // TODO: global variable declaration 23 | let $val:expr; 24 | )* 25 | $( 26 | fn $name:ident $loc_slots:literal $param:literal -> $ret:literal { 27 | $($inst:expr $(,)?)* 28 | } 29 | )+ 30 | ) => {{ 31 | use $crate::opcodes::Op::*; 32 | use $crate::util::IntoBytes; 33 | let mut globals = vec![]; 34 | 35 | $({ 36 | let bytes = $const_val.into_bytes(); 37 | let glob = GlobalValue { 38 | is_const: true, 39 | bytes 40 | }; 41 | globals.push(glob); 42 | })* 43 | $({ 44 | let bytes = $val.into_bytes(); 45 | let glob = GlobalValue { 46 | is_const: false, 47 | bytes 48 | }; 49 | globals.push(glob); 50 | })* 51 | 52 | let mut fns = vec![]; 53 | $({ 54 | let name = stringify!($name); 55 | let bytes = name.into_bytes(); 56 | let glob = GlobalValue{ is_const:true, bytes }; 57 | let name_idx = globals.len(); 58 | globals.push(glob); 59 | 60 | let loc_slots = $loc_slots; 61 | let inst = vec![$($inst),*]; 62 | let func = FnDef{ 63 | name: name_idx as u32, 64 | loc_slots, 65 | param_slots: $param, 66 | ret_slots: $ret, 67 | ins: inst, 68 | }; 69 | fns.push(func); 70 | })+ 71 | let s0 = S0{ 72 | globals, 73 | functions: fns, 74 | }; 75 | s0 76 | }}; 77 | } 78 | -------------------------------------------------------------------------------- /crates/r0vm/src/opcodes.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "serde")] 2 | use serde::{Deserialize, Serialize}; 3 | 4 | #[derive(Debug, PartialEq, Eq, Clone, Copy)] 5 | #[repr(u8, C)] 6 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 7 | #[cfg_attr(feature = "serde", serde(rename_all = "lowercase"))] 8 | pub enum Op { 9 | Nop, 10 | Push(u64), 11 | Pop, 12 | PopN(u32), 13 | Dup, 14 | LocA(u32), 15 | ArgA(u32), 16 | GlobA(u32), 17 | Load8, 18 | Load16, 19 | Load32, 20 | Load64, 21 | Store8, 22 | Store16, 23 | Store32, 24 | Store64, 25 | Alloc, 26 | Free, 27 | StackAlloc(u32), 28 | AddI, 29 | SubI, 30 | MulI, 31 | DivI, 32 | AddF, 33 | SubF, 34 | MulF, 35 | DivF, 36 | DivU, 37 | Shl, 38 | Shr, 39 | And, 40 | Or, 41 | Xor, 42 | Not, 43 | CmpI, 44 | CmpU, 45 | CmpF, 46 | NegI, 47 | NegF, 48 | IToF, 49 | FToI, 50 | ShrL, 51 | SetLt, 52 | SetGt, 53 | BrA(u64), 54 | Br(i32), 55 | BrFalse(i32), 56 | BrTrue(i32), 57 | Call(u32), 58 | CallName(u32), 59 | Ret, 60 | ScanI, 61 | ScanC, 62 | ScanF, 63 | PrintI, 64 | PrintC, 65 | PrintF, 66 | PrintS, 67 | PrintLn, 68 | Panic, 69 | } 70 | 71 | impl Op { 72 | pub fn code(&self) -> u8 { 73 | use Op::*; 74 | match self { 75 | Nop => 0x00, 76 | Push(..) => 0x01, 77 | Pop => 0x02, 78 | PopN(..) => 0x03, 79 | Dup => 0x04, 80 | LocA(..) => 0x0a, 81 | ArgA(..) => 0x0b, 82 | GlobA(..) => 0x0c, 83 | Load8 => 0x10, 84 | Load16 => 0x11, 85 | Load32 => 0x12, 86 | Load64 => 0x13, 87 | Store8 => 0x14, 88 | Store16 => 0x15, 89 | Store32 => 0x16, 90 | Store64 => 0x17, 91 | Alloc => 0x18, 92 | Free => 0x19, 93 | StackAlloc(..) => 0x1a, 94 | AddI => 0x20, 95 | SubI => 0x21, 96 | MulI => 0x22, 97 | DivI => 0x23, 98 | AddF => 0x24, 99 | SubF => 0x25, 100 | MulF => 0x26, 101 | DivF => 0x27, 102 | DivU => 0x28, 103 | Shl => 0x29, 104 | Shr => 0x2a, 105 | And => 0x2b, 106 | Or => 0x2c, 107 | Xor => 0x2d, 108 | Not => 0x2e, 109 | CmpI => 0x30, 110 | CmpU => 0x31, 111 | CmpF => 0x32, 112 | NegI => 0x34, 113 | NegF => 0x35, 114 | IToF => 0x36, 115 | FToI => 0x37, 116 | ShrL => 0x38, 117 | SetLt => 0x39, 118 | SetGt => 0x3a, 119 | BrA(..) => 0x40, 120 | Br(..) => 0x41, 121 | BrFalse(..) => 0x42, 122 | BrTrue(..) => 0x43, 123 | Call(..) => 0x48, 124 | Ret => 0x49, 125 | CallName(..) => 0x4a, 126 | ScanI => 0x50, 127 | ScanC => 0x51, 128 | ScanF => 0x52, 129 | PrintI => 0x54, 130 | PrintC => 0x55, 131 | PrintF => 0x56, 132 | PrintS => 0x57, 133 | PrintLn => 0x58, 134 | Panic => 0xfe, 135 | } 136 | } 137 | 138 | pub fn param_size(code: u8) -> usize { 139 | match code { 140 | 0x01 | 0x40 => 8, 141 | 0x03 | 0x0a | 0x0b | 0x0c | 0x1a | 0x41 | 0x42 | 0x43 | 0x48 | 0x4a => 4, 142 | _ => 0, 143 | } 144 | } 145 | 146 | pub fn from_code(code: u8, param: u64) -> Option { 147 | use Op::*; 148 | match code { 149 | 0x00 => Nop.into(), 150 | 0x01 => Push(param).into(), 151 | 0x02 => Pop.into(), 152 | 0x03 => PopN(param as u32).into(), 153 | 0x04 => Dup.into(), 154 | 0x0a => LocA(param as u32).into(), 155 | 0x0b => ArgA(param as u32).into(), 156 | 0x0c => GlobA(param as u32).into(), 157 | 0x10 => Load8.into(), 158 | 0x11 => Load16.into(), 159 | 0x12 => Load32.into(), 160 | 0x13 => Load64.into(), 161 | 0x14 => Store8.into(), 162 | 0x15 => Store16.into(), 163 | 0x16 => Store32.into(), 164 | 0x17 => Store64.into(), 165 | 0x18 => Alloc.into(), 166 | 0x19 => Free.into(), 167 | 0x1a => StackAlloc(param as u32).into(), 168 | 0x20 => AddI.into(), 169 | 0x21 => SubI.into(), 170 | 0x22 => MulI.into(), 171 | 0x23 => DivI.into(), 172 | 0x24 => AddF.into(), 173 | 0x25 => SubF.into(), 174 | 0x26 => MulF.into(), 175 | 0x27 => DivF.into(), 176 | 0x28 => DivU.into(), 177 | 0x29 => Shl.into(), 178 | 0x2a => Shr.into(), 179 | 0x2b => And.into(), 180 | 0x2c => Or.into(), 181 | 0x2d => Xor.into(), 182 | 0x2e => Not.into(), 183 | 0x30 => CmpI.into(), 184 | 0x31 => CmpU.into(), 185 | 0x32 => CmpF.into(), 186 | 0x34 => NegI.into(), 187 | 0x35 => NegF.into(), 188 | 0x36 => IToF.into(), 189 | 0x37 => FToI.into(), 190 | 0x38 => ShrL.into(), 191 | 0x39 => SetLt.into(), 192 | 0x3a => SetGt.into(), 193 | 0x40 => BrA(param as u64).into(), 194 | 0x41 => Br(param as i64 as i32).into(), 195 | 0x42 => BrFalse(param as i64 as i32).into(), 196 | 0x43 => BrTrue(param as i64 as i32).into(), 197 | 0x48 => Call(param as u32).into(), 198 | 0x49 => Ret.into(), 199 | 0x4a => CallName(param as u32).into(), 200 | 0x50 => ScanI.into(), 201 | 0x51 => ScanC.into(), 202 | 0x52 => ScanF.into(), 203 | 0x54 => PrintI.into(), 204 | 0x55 => PrintC.into(), 205 | 0x56 => PrintF.into(), 206 | 0x57 => PrintS.into(), 207 | 0x58 => PrintLn.into(), 208 | 0xfe => Panic.into(), 209 | _ => None, 210 | } 211 | } 212 | 213 | pub fn code_param(&self) -> u64 { 214 | use Op::*; 215 | match *self { 216 | Push(x) => x, 217 | PopN(x) => x as u64, 218 | LocA(x) => x as u64, 219 | ArgA(x) => x as u64, 220 | GlobA(x) => x as u64, 221 | StackAlloc(x) => x as u64, 222 | BrA(x) => x, 223 | Br(x) => x as i64 as u64, 224 | BrFalse(x) => x as i64 as u64, 225 | BrTrue(x) => x as i64 as u64, 226 | Call(x) => x as u64, 227 | CallName(x) => x as u64, 228 | _ => 0u64, 229 | } 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /crates/r0vm/src/s0/io.rs: -------------------------------------------------------------------------------- 1 | //! Module for reading and writing s0 values 2 | use super::*; 3 | use tracing::*; 4 | // use nom::*; 5 | use std::io::{Read, Write}; 6 | 7 | /// Read and write from binary source 8 | pub trait WriteBinary: Sized { 9 | fn read_binary(r: &mut dyn Read) -> std::io::Result>; 10 | fn write_binary(&self, w: &mut dyn Write) -> std::io::Result<()>; 11 | } 12 | 13 | impl S0 { 14 | pub const MAGIC_NUMBER: u32 = 0x72303b3e; 15 | pub const VERSION: u32 = 1; 16 | } 17 | 18 | macro_rules! unwrap { 19 | ($e:expr) => { 20 | match $e { 21 | Some(x) => x, 22 | None => return Ok(None), 23 | } 24 | }; 25 | } 26 | 27 | macro_rules! read { 28 | ($ty:ty,$read:expr) => { 29 | match <$ty>::read_binary($read)? { 30 | Some(x) => x, 31 | None => return Ok(None), 32 | } 33 | }; 34 | } 35 | 36 | impl WriteBinary for Vec 37 | where 38 | T: WriteBinary, 39 | { 40 | #[instrument(name = "vec/read", skip(r), err)] 41 | fn read_binary(r: &mut dyn Read) -> std::io::Result> { 42 | let size = read!(u32, r) as usize; 43 | debug!("vec size: {}", size); 44 | let mut vec = Vec::with_capacity(size); 45 | for _ in 0..size { 46 | let t = read!(T, r); 47 | vec.push(t); 48 | } 49 | Ok(Some(vec)) 50 | } 51 | 52 | fn write_binary(&self, w: &mut dyn Write) -> std::io::Result<()> { 53 | (self.len() as u32).write_binary(w)?; 54 | for item in self { 55 | item.write_binary(w)?; 56 | } 57 | Ok(()) 58 | } 59 | } 60 | 61 | impl WriteBinary for u8 { 62 | #[inline] 63 | fn read_binary(r: &mut dyn Read) -> std::io::Result> { 64 | let mut buf = [0u8; 1]; 65 | r.read_exact(&mut buf)?; 66 | Ok(Some(buf[0])) 67 | } 68 | 69 | #[inline] 70 | fn write_binary(&self, w: &mut dyn Write) -> std::io::Result<()> { 71 | w.write_all(&[*self]) 72 | } 73 | } 74 | 75 | impl WriteBinary for u32 { 76 | #[inline] 77 | fn read_binary(r: &mut dyn Read) -> std::io::Result> { 78 | let mut buf = [0u8; 4]; 79 | r.read_exact(&mut buf)?; 80 | Ok(Some(u32::from_be_bytes(buf))) 81 | } 82 | 83 | #[inline] 84 | fn write_binary(&self, w: &mut dyn Write) -> std::io::Result<()> { 85 | w.write_all(&self.to_be_bytes()) 86 | } 87 | } 88 | 89 | impl WriteBinary for u64 { 90 | #[inline] 91 | fn read_binary(r: &mut dyn Read) -> std::io::Result> { 92 | let mut buf = [0u8; 8]; 93 | r.read_exact(&mut buf)?; 94 | Ok(Some(u64::from_be_bytes(buf))) 95 | } 96 | 97 | #[inline] 98 | fn write_binary(&self, w: &mut dyn Write) -> std::io::Result<()> { 99 | w.write_all(&self.to_be_bytes()) 100 | } 101 | } 102 | 103 | impl WriteBinary for Op { 104 | #[instrument(name = "op/read", skip(r), err)] 105 | fn read_binary(r: &mut dyn Read) -> std::io::Result> { 106 | let opcode = unwrap!(u8::read_binary(r)?); 107 | let param_length = Op::param_size(opcode); 108 | let op = match param_length { 109 | 0 => Op::from_code(opcode, 0), 110 | 4 => Op::from_code(opcode, read!(u32, r) as u64), 111 | 8 => Op::from_code(opcode, read!(u64, r)), 112 | _ => unreachable!(), 113 | }; 114 | debug!("Op: {:?}", op); 115 | Ok(op) 116 | } 117 | 118 | fn write_binary(&self, w: &mut dyn Write) -> std::io::Result<()> { 119 | let opcode = self.code(); 120 | let param = self.code_param(); 121 | let param_len = Op::param_size(opcode); 122 | 123 | w.write_all(&[opcode])?; 124 | match param_len { 125 | 0 => (), 126 | 4 => { 127 | let x = param as u32; 128 | x.write_binary(w)? 129 | } 130 | 8 => param.write_binary(w)?, 131 | _ => unreachable!(), 132 | } 133 | Ok(()) 134 | } 135 | } 136 | 137 | impl WriteBinary for FnDef { 138 | #[instrument(name = "fn/read", skip(r), err)] 139 | fn read_binary(r: &mut dyn Read) -> std::io::Result> { 140 | let name = read!(u32, r); 141 | debug!("name: {}", name); 142 | let ret_slots = read!(u32, r); 143 | debug!("ret_slots: {}", ret_slots); 144 | let param_slots = read!(u32, r); 145 | debug!("param_slots: {}", param_slots); 146 | let loc_slots = read!(u32, r); 147 | debug!("loc_slots: {}", loc_slots); 148 | let ins = read!(Vec, r); 149 | Ok(Some(FnDef { 150 | name, 151 | ret_slots, 152 | param_slots, 153 | loc_slots, 154 | ins, 155 | })) 156 | } 157 | 158 | fn write_binary(&self, w: &mut dyn Write) -> std::io::Result<()> { 159 | self.name.write_binary(w)?; 160 | self.ret_slots.write_binary(w)?; 161 | self.param_slots.write_binary(w)?; 162 | self.loc_slots.write_binary(w)?; 163 | self.ins.write_binary(w) 164 | } 165 | } 166 | 167 | impl WriteBinary for GlobalValue { 168 | #[instrument(name = "global/read", skip(r), err)] 169 | fn read_binary(r: &mut dyn Read) -> std::io::Result> { 170 | let is_const = read!(u8, r); 171 | let payload = read!(Vec, r); 172 | Ok(Some(GlobalValue { 173 | is_const: is_const != 0, 174 | bytes: payload, 175 | })) 176 | } 177 | fn write_binary(&self, w: &mut dyn Write) -> std::io::Result<()> { 178 | (self.is_const as u8).write_binary(w)?; 179 | self.bytes.write_binary(w) 180 | } 181 | } 182 | 183 | impl WriteBinary for S0 { 184 | #[instrument(name = "o0/read", skip(r), err)] 185 | fn read_binary(r: &mut dyn Read) -> std::io::Result> { 186 | let magic_number = read!(u32, r); 187 | debug!("Magic {:08x}", magic_number); 188 | let version = read!(u32, r); 189 | debug!("Version {:08x}", version); 190 | if magic_number != S0::MAGIC_NUMBER || version != S0::VERSION { 191 | return Ok(None); 192 | } 193 | let global_values = read!(Vec, r); 194 | let fn_defs = read!(Vec, r); 195 | Ok(Some(S0 { 196 | globals: global_values, 197 | functions: fn_defs, 198 | })) 199 | } 200 | 201 | fn write_binary(&self, w: &mut dyn Write) -> std::io::Result<()> { 202 | S0::MAGIC_NUMBER.write_binary(w)?; 203 | S0::VERSION.write_binary(w)?; 204 | self.globals.write_binary(w)?; 205 | self.functions.write_binary(w) 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /crates/r0vm/src/s0/mod.rs: -------------------------------------------------------------------------------- 1 | // #[cfg(parse)] 2 | pub mod io; 3 | 4 | use crate::opcodes::Op; 5 | #[cfg(feature = "serde")] 6 | use serde::{Deserialize, Serialize}; 7 | use std::{collections::HashMap, fmt::Display}; 8 | 9 | /// S0 Assembly for use in R0VM 10 | #[derive(Debug, PartialEq, Eq, Clone)] 11 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 12 | #[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))] 13 | pub struct S0 { 14 | pub globals: Vec, 15 | pub functions: Vec, 16 | } 17 | 18 | impl Display for S0 { 19 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 20 | for global in &self.globals { 21 | writeln!(f, "{}", global)?; 22 | } 23 | writeln!(f)?; 24 | for func in &self.functions { 25 | writeln!(f, "{}", func)?; 26 | } 27 | Ok(()) 28 | } 29 | } 30 | 31 | /// Global variable or constant, described by bytes, addressed by ID 32 | #[derive(Debug, PartialEq, Eq, Clone)] 33 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 34 | #[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))] 35 | pub struct GlobalValue { 36 | pub is_const: bool, 37 | pub bytes: Vec, 38 | } 39 | 40 | impl Display for GlobalValue { 41 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 42 | if self.is_const { 43 | write!(f, "const:")?; 44 | } else { 45 | write!(f, "static:")?; 46 | } 47 | for byte in &self.bytes { 48 | write!(f, " {:X}", byte)?; 49 | } 50 | if let Ok(s) = String::from_utf8(self.bytes.clone()) { 51 | write!(f, " (`{}`)", s.escape_default())?; 52 | } 53 | writeln!(f) 54 | } 55 | } 56 | 57 | /// Function definition 58 | #[derive(Debug, PartialEq, Eq, Clone)] 59 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 60 | #[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))] 61 | pub struct FnDef { 62 | pub name: u32, 63 | pub ret_slots: u32, 64 | pub param_slots: u32, 65 | pub loc_slots: u32, 66 | pub ins: Vec, 67 | } 68 | 69 | impl Display for FnDef { 70 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 71 | writeln!( 72 | f, 73 | "fn [{}] {} {} -> {} {{", 74 | self.name, self.loc_slots, self.param_slots, self.ret_slots 75 | )?; 76 | for (idx, op) in self.ins.iter().enumerate() { 77 | writeln!(f, "{:5}: {:?}", idx, op)?; 78 | } 79 | writeln!(f, "}}") 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /crates/r0vm/src/tests/mod.rs: -------------------------------------------------------------------------------- 1 | mod ser; 2 | 3 | use super::*; 4 | use crate::error::*; 5 | use crate::opcodes::*; 6 | use crate::s0::*; 7 | use crate::vm::ops::{reinterpret_t, reinterpret_u}; 8 | use crate::vm::*; 9 | use ntest::timeout; 10 | 11 | #[test] 12 | pub fn base_test() { 13 | let s0 = s0_bin!( 14 | fn _start 0 0 -> 0 { 15 | Push(1), 16 | Push(2), 17 | AddI, 18 | IToF, 19 | Push(unsafe { std::mem::transmute(0.4f64) }), 20 | MulF, 21 | } 22 | ); 23 | let stdin = std::io::empty(); 24 | let stdout = std::io::sink(); 25 | let mut vm = R0Vm::new(&s0, Box::new(stdin), Box::new(stdout)).unwrap(); 26 | for _ in 0..3 { 27 | vm.step().unwrap(); 28 | } 29 | let stack = vm.stack(); 30 | assert_eq!(stack[3..], vec![3u64][..]); 31 | for _ in 0..3 { 32 | vm.step().unwrap(); 33 | } 34 | let stack = vm.stack(); 35 | assert!((unsafe { std::mem::transmute::<_, f64>(stack[3]) } - 1.2f64).abs() < 1e-10); 36 | } 37 | 38 | #[test] 39 | pub fn panic_test() { 40 | let s0 = s0_bin! ( 41 | fn _start 0 0 -> 0 { 42 | Panic 43 | } 44 | ); 45 | let stdin = std::io::empty(); 46 | let stdout = std::io::sink(); 47 | let mut vm = R0Vm::new(&s0, Box::new(stdin), Box::new(stdout)).unwrap(); 48 | let e = vm.run_to_end().unwrap_err(); 49 | assert!(matches!(e, Error::Halt)) 50 | } 51 | 52 | #[test] 53 | pub fn call_test() { 54 | let s0 = s0_bin! ( 55 | fn _start 0 0 -> 0 { 56 | StackAlloc(1), 57 | Push(1), 58 | Push(2), 59 | Call(1), 60 | } 61 | fn main 1 2 -> 1 { 62 | ArgA(0) 63 | ArgA(1) 64 | Load64 65 | ArgA(2) 66 | Load64 67 | AddI 68 | Store64 69 | Ret 70 | } 71 | ); 72 | let stdin = std::io::empty(); 73 | let stdout = std::io::sink(); 74 | let mut vm = R0Vm::new(&s0, Box::new(stdin), Box::new(stdout)).unwrap(); 75 | match vm.run_to_end() { 76 | Ok(_) => {} 77 | Err(e) => panic!("{}, stack:\n{}", e, vm.debug_stack()), 78 | }; 79 | assert_eq!( 80 | vm.stack()[3..], 81 | vec![3u64][..], 82 | "stack:\n{}", 83 | vm.debug_stack() 84 | ) 85 | } 86 | 87 | #[test] 88 | pub fn simple_local_var_test() { 89 | let s0 = s0_bin! ( 90 | fn _start 1 0 -> 0 { 91 | // store 1 92 | LocA(0) 93 | Push(1) 94 | Store32 95 | 96 | // store 2 97 | LocA(0) 98 | Push(4) 99 | AddI 100 | Push(2) 101 | Store16 102 | 103 | // store 3 104 | LocA(0) 105 | Push(6) 106 | AddI 107 | Push(3) 108 | Store8 109 | 110 | // load 1 111 | LocA(0) 112 | Load32 113 | 114 | LocA(0) 115 | Push(4) 116 | AddI 117 | Load16 118 | 119 | LocA(0) 120 | Push(6) 121 | AddI 122 | Load8 123 | } 124 | ); 125 | let stdin = std::io::empty(); 126 | let stdout = std::io::sink(); 127 | let mut vm = R0Vm::new(&s0, Box::new(stdin), Box::new(stdout)).unwrap(); 128 | vm.run_to_end().unwrap(); 129 | assert_eq!( 130 | vm.stack()[3], 131 | 0x00_03_0002_00000001, 132 | "stack:\n{}", 133 | vm.debug_stack() 134 | ); 135 | assert_eq!( 136 | vm.stack()[4..], 137 | vec![1u64, 2, 3][..], 138 | "stack:\n{}", 139 | vm.debug_stack() 140 | ); 141 | } 142 | 143 | #[test] 144 | pub fn simple_alloc_test() { 145 | let s0 = s0_bin! ( 146 | fn _start 0 0 -> 0 { 147 | Push(8), 148 | Alloc, 149 | Dup, 150 | Push(0x10008086), 151 | Store64, 152 | Load64 153 | } 154 | ); 155 | let stdin = std::io::empty(); 156 | let stdout = std::io::sink(); 157 | let mut vm = R0Vm::new(&s0, Box::new(stdin), Box::new(stdout)).unwrap(); 158 | vm.run_to_end().unwrap(); 159 | assert_eq!( 160 | vm.stack()[3..], 161 | vec![0x10008086u64][..], 162 | "stack:\n{}", 163 | vm.debug_stack() 164 | ) 165 | } 166 | 167 | #[test] 168 | pub fn simple_branch_test() { 169 | let s0 = s0_bin! ( 170 | fn _start 0 0 -> 0 { 171 | Push(0) 172 | Push(1) 173 | CmpI 174 | BrFalse(2) 175 | Br(2) 176 | Push(3) 177 | Br(1) 178 | Push(5) 179 | } 180 | ); 181 | let stdin = std::io::empty(); 182 | let stdout = std::io::sink(); 183 | let mut vm = R0Vm::new(&s0, Box::new(stdin), Box::new(stdout)).unwrap(); 184 | vm.run_to_end().unwrap(); 185 | 186 | assert_eq!( 187 | vm.stack()[3..], 188 | vec![5u64][..], 189 | "stack:\n{}", 190 | vm.debug_stack() 191 | ) 192 | } 193 | 194 | #[test] 195 | pub fn simple_branch_test_2() { 196 | let s0 = s0_bin! ( 197 | fn _start 0 0 -> 0 { 198 | Push(0) 199 | Push(1) 200 | CmpI 201 | SetGt 202 | BrFalse(3) 203 | Br(2) 204 | Push(3) 205 | Br(1) 206 | Push(5) 207 | } 208 | ); 209 | let stdin = std::io::empty(); 210 | let stdout = std::io::sink(); 211 | let mut vm = R0Vm::new(&s0, Box::new(stdin), Box::new(stdout)).unwrap(); 212 | vm.run_to_end().unwrap(); 213 | 214 | assert_eq!( 215 | vm.stack()[3..], 216 | vec![5u64][..], 217 | "stack:\n{}", 218 | vm.debug_stack() 219 | ) 220 | } 221 | 222 | #[test] 223 | pub fn simple_stdin_test() { 224 | let s0 = s0_bin! ( 225 | fn _start 0 0 -> 0 { 226 | ScanC 227 | ScanF 228 | ScanI 229 | } 230 | ); 231 | let stdin = std::io::Cursor::new("A3.1415926e3 1234"); 232 | let stdout = std::io::sink(); 233 | let mut vm = R0Vm::new(&s0, Box::new(stdin), Box::new(stdout)).unwrap(); 234 | vm.run_to_end().unwrap(); 235 | assert_eq!(vm.stack()[3], b'A' as u64); 236 | assert!((reinterpret_u::(vm.stack()[4]) - 3.1415926e3f64).abs() < 1e-10); 237 | assert_eq!(vm.stack()[5], 1234u64, "stack:\n{}", vm.debug_stack()); 238 | } 239 | 240 | #[test] 241 | pub fn simple_global_test() { 242 | let s0 = s0_bin! ( 243 | const 0x1234u64; 244 | let 0x5678u64; 245 | fn _start 0 0 -> 0 { 246 | GlobA(0) 247 | Load64 248 | GlobA(1) 249 | Load64 250 | } 251 | ); 252 | let stdin = std::io::empty(); 253 | let stdout = std::io::sink(); 254 | let mut vm = R0Vm::new(&s0, Box::new(stdin), Box::new(stdout)).unwrap(); 255 | vm.run_to_end().unwrap(); 256 | assert_eq!( 257 | vm.stack()[3..], 258 | vec![0x1234u64, 0x5678u64][..], 259 | "stack:\n{}", 260 | vm.debug_stack() 261 | ) 262 | } 263 | 264 | #[test] 265 | pub fn stacktrace_test() { 266 | let s0 = s0_bin! ( 267 | fn _start 0 0 -> 0 { 268 | Call(1) 269 | } 270 | fn main 0 0 -> 0 { 271 | StackAlloc(1) 272 | Push(1) 273 | Push(2) 274 | Call(2) 275 | Ret 276 | } 277 | fn test 1 2 -> 1 { 278 | ArgA(0) 279 | ArgA(1) 280 | Load64 281 | ArgA(2) 282 | Load64 283 | AddI 284 | Store64 285 | Ret 286 | } 287 | ); 288 | let stdin = std::io::empty(); 289 | let stdout = std::io::sink(); 290 | let mut vm = R0Vm::new(&s0, Box::new(stdin), Box::new(stdout)).unwrap(); 291 | for _ in 0..9 { 292 | vm.step().unwrap(); 293 | } 294 | let (stacktrace, corrupted) = vm.stack_trace(); 295 | assert!(!corrupted, "The stack should not be corrupted"); 296 | let expected = vec![ 297 | StackInfo { 298 | fn_name: Some("test".into()), 299 | fn_id: 2, 300 | inst: 4, 301 | }, 302 | StackInfo { 303 | fn_name: Some("main".into()), 304 | fn_id: 1, 305 | inst: 4, 306 | }, 307 | StackInfo { 308 | fn_name: Some("_start".into()), 309 | fn_id: 0, 310 | inst: 1, 311 | }, 312 | ]; 313 | assert_eq!(stacktrace, expected); 314 | } 315 | -------------------------------------------------------------------------------- /crates/r0vm/src/tests/ser.rs: -------------------------------------------------------------------------------- 1 | use crate::s0::io::*; 2 | use crate::s0::*; 3 | 4 | use crate::opcodes::Op::*; 5 | #[test] 6 | fn test_ser() { 7 | let src: Vec = vec![ 8 | 0x72, 0x30, 0x3b, 0x3e, // magic 9 | 0x00, 0x00, 0x00, 0x01, // version 10 | 0x00, 0x00, 0x00, 0x02, // global.len 11 | 0x00, // global.1.is_const 12 | 0x00, 0x00, 0x00, 0x03, // global.1.len 13 | 0x00, 0x01, 0x02, // global.1.payload 14 | 0x01, // global.2.is_const 15 | 0x00, 0x00, 0x00, 0x06, // global.2.len 16 | b'_', b's', b't', b'a', b'r', b't', // global.2.payload 17 | 0x00, 0x00, 0x00, 0x01, // fns.len 18 | 0x00, 0x00, 0x00, 0x01, // fns.1.name 19 | 0x00, 0x00, 0x00, 0x00, // fns.1.ret_slots 20 | 0x00, 0x00, 0x00, 0x00, // fns.1.param_slots 21 | 0x00, 0x00, 0x00, 0x00, // fns.1.loc_slots 22 | 0x00, 0x00, 0x00, 0x04, // fns.1.ins.len 23 | // fns.1.ins: 24 | 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // Push(1) 25 | 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, // Push(2) 26 | 0x20, // AddI 27 | 0x34, // NegI 28 | // finish 29 | ]; 30 | let s0 = crate::s0_bin!( 31 | let vec![0x00,0x01,0x02]; 32 | fn _start 0 0 -> 0 { 33 | Push(1) 34 | Push(2) 35 | AddI 36 | NegI 37 | } 38 | ); 39 | let des = S0::read_binary(&mut &src[..]).unwrap().unwrap(); 40 | assert_eq!(des, s0); 41 | let mut ser = vec![]; 42 | des.write_binary(&mut ser).unwrap(); 43 | assert_eq!(ser, src); 44 | } 45 | -------------------------------------------------------------------------------- /crates/r0vm/src/util.rs: -------------------------------------------------------------------------------- 1 | pub trait IntoBytes { 2 | fn into_bytes(&self) -> Vec; 3 | } 4 | 5 | macro_rules! impl_into_bytes_num { 6 | ($ty:ty) => { 7 | impl IntoBytes for $ty { 8 | fn into_bytes(&self) -> Vec { 9 | self.to_ne_bytes().to_vec() 10 | } 11 | } 12 | }; 13 | } 14 | 15 | impl_into_bytes_num!(u8); 16 | impl_into_bytes_num!(u16); 17 | impl_into_bytes_num!(u32); 18 | impl_into_bytes_num!(u64); 19 | impl_into_bytes_num!(u128); 20 | impl_into_bytes_num!(i8); 21 | impl_into_bytes_num!(i16); 22 | impl_into_bytes_num!(i32); 23 | impl_into_bytes_num!(i64); 24 | impl_into_bytes_num!(i128); 25 | impl_into_bytes_num!(f32); 26 | impl_into_bytes_num!(f64); 27 | 28 | impl IntoBytes for [u8] { 29 | fn into_bytes(&self) -> Vec { 30 | self.to_vec() 31 | } 32 | } 33 | 34 | impl IntoBytes for str { 35 | fn into_bytes(&self) -> Vec { 36 | self.as_bytes().to_vec() 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /crates/r0vm/src/vm/mem.rs: -------------------------------------------------------------------------------- 1 | //! Memory-related implementations for R0VM 2 | use super::*; 3 | use static_assertions as sa; 4 | use std::alloc::Layout; 5 | 6 | /// A piece of managed memory owned by the virtual machine 7 | pub struct ManagedMemory { 8 | ptr: *mut u8, 9 | layout: Layout, 10 | is_const: bool, 11 | } 12 | 13 | #[allow(clippy::len_without_is_empty)] 14 | impl ManagedMemory { 15 | /// Allocate a piece of managed memory using global allocator 16 | pub fn alloc(layout: Layout) -> Result { 17 | if layout.size() == 0 { 18 | return Err(Error::AllocZero); 19 | } 20 | let mem = unsafe { std::alloc::alloc_zeroed(layout) }; 21 | if mem.is_null() { 22 | return Err(Error::OutOfMemory); 23 | } 24 | Ok(ManagedMemory { 25 | ptr: mem, 26 | layout, 27 | is_const: false, 28 | }) 29 | } 30 | 31 | /// Allocate a piece of managed memory using global allocator 32 | pub fn from_slice(slice: &[u8]) -> Result { 33 | if slice.len() == 0 { 34 | return Err(Error::AllocZero); 35 | } 36 | let layout = Layout::from_size_align(slice.len(), 8)?; 37 | let mem = unsafe { std::alloc::alloc_zeroed(layout) }; 38 | if mem.is_null() { 39 | return Err(Error::OutOfMemory); 40 | } 41 | 42 | // copy slice content to memory 43 | unsafe { 44 | slice.as_ptr().copy_to_nonoverlapping(mem, slice.len()); 45 | } 46 | 47 | Ok(ManagedMemory { 48 | ptr: mem, 49 | layout, 50 | is_const: false, 51 | }) 52 | } 53 | 54 | /// Construct a piece of managed memory using raw pointer and length 55 | pub unsafe fn new(ptr: *mut u8, layout: Layout, is_const: bool) -> ManagedMemory { 56 | ManagedMemory { 57 | ptr, 58 | layout, 59 | is_const, 60 | } 61 | } 62 | 63 | /// Length of the memory 64 | pub fn len(&self) -> usize { 65 | self.layout.size() 66 | } 67 | 68 | /// Get the memory as slice 69 | pub unsafe fn get_slice(&self) -> &[u8] { 70 | std::slice::from_raw_parts(self.ptr, self.layout.size()) 71 | } 72 | 73 | /// Get the memory as mutable slice 74 | pub unsafe fn get_slice_mut(&mut self) -> &mut [u8] { 75 | std::slice::from_raw_parts_mut(self.ptr, self.layout.size()) 76 | } 77 | 78 | /// Get the memory as raw pointer 79 | pub fn get_ptr(&self) -> *mut u8 { 80 | self.ptr 81 | } 82 | } 83 | 84 | impl Drop for ManagedMemory { 85 | fn drop(&mut self) { 86 | unsafe { std::alloc::dealloc(self.ptr, self.layout) } 87 | } 88 | } 89 | 90 | pub fn stack_idx_to_vm_addr(idx: usize) -> u64 { 91 | R0Vm::STACK_START + (idx as u64) * 8 92 | } 93 | 94 | pub fn vm_addr_to_stack_idx(addr: u64) -> (usize, usize) { 95 | let off = addr - R0Vm::STACK_START; 96 | let idx = off / 8; 97 | let off = off % 8; 98 | (idx as usize, off as usize) 99 | } 100 | 101 | #[inline] 102 | pub fn round_up_to_multiple(x: u64, mult: u64) -> u64 { 103 | x + (mult - x % mult) 104 | } 105 | 106 | impl<'src> R0Vm<'src> { 107 | pub const HEAP_START: u64 = 0x00000001_00000000; 108 | pub const STACK_START: u64 = 0xffffffff_00000000; 109 | // pub const STACK_END: u64 = 0xffffffff_00000000; 110 | 111 | // * Heap stuff --> 112 | 113 | /// Find the piece of heap memory by address. 114 | /// Returns the managed memory instance and the index offset from start. 115 | pub fn get_heap_mem_managed_ref(&self, addr: u64) -> Result<(&ManagedMemory, usize)> { 116 | let range = self 117 | .heap 118 | .range((std::ops::Bound::Unbounded, std::ops::Bound::Included(addr))); 119 | // Get the last memory chunk that is less or equal than address 120 | let (start_addr, mem) = range.last().ok_or(Error::InvalidAddress(addr))?; 121 | let addr_offset = addr - start_addr; 122 | if addr_offset > mem.len() as u64 { 123 | Err(Error::InvalidAddress(addr)) 124 | } else { 125 | Ok((mem, addr_offset as usize)) 126 | } 127 | } 128 | 129 | fn get_heap_mem_ptr(&self, addr: u64) -> Result<*mut u8> { 130 | assert!(addr < R0Vm::STACK_START); 131 | 132 | let alignment_of_t = std::mem::align_of::(); 133 | if addr % alignment_of_t as u64 != 0 { 134 | return Err(Error::UnalignedAccess(addr)); 135 | } 136 | 137 | let (slice, offset) = self.get_heap_mem_managed_ref(addr)?; 138 | let sizeof_t = std::mem::size_of::(); 139 | 140 | // Check remaining space is enough 141 | if sizeof_t + offset > slice.len() { 142 | return Err(Error::InvalidAddress(addr)); 143 | } 144 | 145 | let t_ptr = unsafe { slice.get_ptr().add(offset) }; 146 | Ok(t_ptr) 147 | } 148 | 149 | /// Assuming `mem` is heap memory, get the reference of this memory as `&T`. 150 | /// 151 | /// # Safety 152 | /// 153 | /// addr must be a valid heap memory pointer 154 | pub unsafe fn heap_mem_ref(&self, addr: u64) -> Result<&T> { 155 | let t_ptr = self.get_heap_mem_ptr::(addr)?; 156 | let t_ptr = t_ptr as *mut T; 157 | Ok(&*t_ptr) 158 | } 159 | 160 | /// Assuming `mem` is heap memory, get the reference of this memory as `&mut T`. 161 | /// 162 | /// # Safety 163 | /// 164 | /// addr must be a valid heap memory pointer 165 | pub unsafe fn heap_mem_mut(&self, addr: u64) -> Result<&mut T> { 166 | let t_ptr = self.get_heap_mem_ptr::(addr)?; 167 | let t_ptr = t_ptr as *mut T; 168 | Ok(&mut *t_ptr) 169 | } 170 | 171 | /// Assuming `mem` is heap memory, get the reference of this memory as `&T`. 172 | /// 173 | /// # Safety 174 | /// 175 | /// addr must be a valid heap memory pointer 176 | pub unsafe fn heap_mem_get(&self, addr: u64) -> Result 177 | where 178 | T: Copy, 179 | { 180 | let t_ptr = self.get_heap_mem_ptr::(addr)?; 181 | let t_ptr = t_ptr as *mut T; 182 | Ok(*t_ptr) 183 | } 184 | 185 | /// Assuming `mem` is heap memory, get the reference of this memory as `&mut T`. 186 | /// 187 | /// # Safety 188 | /// 189 | /// addr must be a valid heap memory pointer 190 | pub unsafe fn heap_mem_set(&self, addr: u64, val: T) -> Result<()> { 191 | let t_ptr = self.get_heap_mem_ptr::(addr)?; 192 | let t_ptr = t_ptr as *mut T; 193 | *t_ptr = val; 194 | Ok(()) 195 | } 196 | 197 | /// Allocate a piece of memory of length `len` onto heap. Returns address. 198 | pub fn alloc_heap(&mut self, len: usize, alignment: usize) -> Result { 199 | let mem = unsafe { ManagedMemory::alloc(Layout::from_size_align(len, alignment)?)? }; 200 | let mem_addr = self 201 | .heap 202 | .iter() 203 | .next_back() 204 | .map(|(k, v)| round_up_to_multiple(*k + v.len() as u64, alignment as u64)) 205 | .unwrap_or(R0Vm::HEAP_START); 206 | self.heap.insert(mem_addr, mem); 207 | Ok(mem_addr) 208 | } 209 | 210 | /// Free a piece of memory specified by `addr`. Will return an error if 211 | /// memory is not the very same address as the allocator returns. 212 | pub fn free_heap(&mut self, addr: u64) -> Result<()> { 213 | let mem = self.heap.remove(&addr).ok_or(Error::InvalidDeallocation)?; 214 | drop(mem); 215 | Ok(()) 216 | } 217 | 218 | // * Stack stuff --> 219 | 220 | /// Get the stack memory specified by `addr`. 221 | /// 222 | /// Note that this function allows reading past `bp`. 223 | pub fn get_stack_mem(&self, addr: u64) -> Result 224 | where 225 | T: Copy, 226 | { 227 | let alignof_t = std::mem::align_of::(); 228 | if addr % alignof_t as u64 != 0 { 229 | return Err(Error::UnalignedAccess(addr)); 230 | } 231 | let raw_off = (addr - R0Vm::STACK_START) as usize; 232 | if raw_off > self.max_stack_size * std::mem::size_of::() { 233 | return Err(Error::InvalidAddress(addr)); 234 | } 235 | 236 | unsafe { 237 | let raw_ptr = (self.stack as *mut u8).add(raw_off) as *mut T; 238 | let val = raw_ptr.read(); 239 | 240 | Ok(val) 241 | } 242 | } 243 | 244 | #[inline] 245 | pub fn stack_slot_get(&self, p: usize) -> Result { 246 | if p > self.max_stack_size { 247 | return Err(Error::InvalidStackOffset(p as u64, 0)); 248 | } 249 | unsafe { 250 | let ptr = self.stack.add(p); 251 | let val = *ptr; 252 | Ok(val) 253 | } 254 | } 255 | 256 | pub fn set_stack_mem(&mut self, addr: u64, set_val: T) -> Result<()> 257 | where 258 | T: Copy + Into, 259 | { 260 | let alignof_t = std::mem::align_of::(); 261 | if addr % alignof_t as u64 != 0 { 262 | return Err(Error::UnalignedAccess(addr)); 263 | } 264 | let raw_off = (addr - R0Vm::STACK_START) as usize; 265 | if raw_off > self.max_stack_size * std::mem::size_of::() { 266 | return Err(Error::InvalidAddress(addr)); 267 | } 268 | 269 | unsafe { 270 | let raw_ptr = (self.stack as *mut u8).add(raw_off) as *mut T; 271 | raw_ptr.write(set_val); 272 | 273 | Ok(()) 274 | } 275 | } 276 | 277 | #[inline] 278 | pub fn stack_slot_set(&self, p: usize, val: Slot) -> Result<()> { 279 | if p > self.max_stack_size { 280 | return Err(Error::InvalidStackOffset(p as u64, 0)); 281 | } 282 | unsafe { 283 | let ptr = self.stack.add(p); 284 | *ptr = val; 285 | Ok(()) 286 | } 287 | } 288 | 289 | #[inline] 290 | pub fn stack_push(&mut self, val: Slot) -> Result<()> { 291 | self.stack_slot_set(self.sp, val)?; 292 | self.sp += 1; 293 | Ok(()) 294 | } 295 | 296 | #[inline] 297 | pub fn stack_pop(&mut self) -> Result { 298 | self.sp -= 1; 299 | self.stack_slot_get(self.sp) 300 | } 301 | 302 | #[inline] 303 | pub fn stack_top(&mut self) -> Result { 304 | if self.sp == 0 { 305 | return Err(Error::StackUnderflow); 306 | } 307 | self.stack_slot_get(self.sp - 1) 308 | } 309 | 310 | #[inline] 311 | pub fn stack_truncate_by(&mut self, size: usize) -> Result<()> { 312 | if size > self.sp { 313 | return Err(Error::StackUnderflow); 314 | } 315 | self.sp -= size; 316 | Ok(()) 317 | } 318 | 319 | // * Misc stuff --> 320 | 321 | /// Access an immutable reference of a piece of memory at `addr` 322 | pub fn access_mem_get(&self, addr: u64) -> Result 323 | where 324 | T: Copy, 325 | { 326 | if addr < R0Vm::STACK_START { 327 | // Heap vars 328 | unsafe { self.heap_mem_get::(addr) } 329 | } else { 330 | // Stack vars 331 | self.get_stack_mem(addr) 332 | } 333 | } 334 | 335 | /// Access a mutable reference of a piece of memory at `addr` 336 | pub fn access_mem_set(&mut self, addr: u64, val: T) -> Result<()> 337 | where 338 | T: Copy + Into, 339 | { 340 | if addr < R0Vm::STACK_START { 341 | // Heap vars 342 | unsafe { self.heap_mem_set::(addr, val) } 343 | } else { 344 | // Stack vars 345 | self.set_stack_mem(addr, val) 346 | } 347 | } 348 | } 349 | -------------------------------------------------------------------------------- /crates/r0vm/src/vm/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod mem; 2 | pub mod ops; 3 | 4 | use crate::error::*; 5 | use crate::{opcodes::Op, s0::*}; 6 | use mem::*; 7 | use ops::*; 8 | use smol_str::SmolStr; 9 | use std::{ 10 | borrow::Cow, 11 | collections::{BTreeMap, HashMap}, 12 | io::Write, 13 | io::{Bytes, Read}, 14 | }; 15 | 16 | pub const MAX_STACK_SIZE: usize = 131072; 17 | 18 | pub type Slot = u64; 19 | pub type Addr = u64; 20 | 21 | /// An interpreter running S0 code. 22 | pub struct R0Vm<'src> { 23 | /// Source file 24 | src: &'src S0, 25 | max_stack_size: usize, 26 | 27 | /// Global variable index 28 | global_idx: HashMap, 29 | /// Global variable index 30 | function_idx: HashMap, 31 | 32 | /// Memory heap 33 | heap: BTreeMap, 34 | /// Memory stack 35 | stack: *mut u64, 36 | 37 | /// Function Pointer 38 | fn_info: &'src FnDef, 39 | /// Function ID 40 | fn_id: usize, 41 | /// Instruction Pointer 42 | ip: usize, 43 | /// Stack Pointer 44 | sp: usize, 45 | /// Base Pointer 46 | bp: usize, 47 | 48 | /// Standard Input Stream 49 | stdin: Bytes>, 50 | /// Standard Output Stream 51 | stdout: Box, 52 | } 53 | 54 | impl<'src> R0Vm<'src> { 55 | pub fn new(src: &'src S0, stdin: Box, stdout: Box) -> Result> { 56 | let start = src.functions.get(0).ok_or(Error::NoEntryPoint)?; 57 | let stack = unsafe { 58 | std::alloc::alloc_zeroed(std::alloc::Layout::array::(MAX_STACK_SIZE).unwrap()) 59 | as *mut u64 60 | }; 61 | 62 | unsafe { 63 | // push sentinel values 64 | let usize_max = usize::max_value() as u64; 65 | stack.add(0).write(usize_max); 66 | stack.add(1).write(usize_max); 67 | stack.add(2).write(usize_max); 68 | } 69 | 70 | let bp = 0usize; 71 | let sp = (start.loc_slots + 3) as usize; 72 | let (globals, global_idx) = Self::index_globals(&src.globals[..])?; 73 | let function_idx = Self::index_functions(src)?; 74 | Ok(R0Vm { 75 | src, 76 | max_stack_size: MAX_STACK_SIZE, 77 | global_idx, 78 | function_idx, 79 | heap: globals, 80 | stack, 81 | fn_info: start, 82 | fn_id: 0, 83 | ip: 0, 84 | bp, 85 | sp, 86 | stdin: stdin.bytes(), 87 | stdout, 88 | }) 89 | } 90 | 91 | fn index_globals( 92 | globals: &[GlobalValue], 93 | ) -> Result<(BTreeMap, HashMap)> { 94 | let mut curr_max_addr = 0u64; 95 | 96 | let mut globals_map = BTreeMap::new(); 97 | let mut idx = HashMap::new(); 98 | 99 | for val in globals.into_iter().enumerate() { 100 | let (i, x) = val; 101 | let x: &GlobalValue = x; 102 | let len = x.bytes.len(); 103 | let managed = ManagedMemory::from_slice(&x.bytes[..])?; 104 | 105 | let mem_addr = round_up_to_multiple(curr_max_addr + len as u64, 8); 106 | curr_max_addr = mem_addr; 107 | if mem_addr >= R0Vm::HEAP_START { 108 | return Err(Error::OutOfMemory); 109 | } 110 | 111 | globals_map.insert(mem_addr, managed); 112 | idx.insert(i as u32, mem_addr); 113 | } 114 | Ok((globals_map, idx)) 115 | } 116 | 117 | fn index_functions(asm: &S0) -> Result> { 118 | let mut res = HashMap::new(); 119 | for (idx, f) in asm.functions.iter().enumerate() { 120 | let name = asm 121 | .globals 122 | .get(f.name as usize) 123 | .ok_or(Error::InvalidFunctionNameIndex(idx, f.name))?; 124 | let name = String::from_utf8_lossy(&name.bytes); 125 | let name = SmolStr::new(&name); 126 | res.insert(name, idx as u32); 127 | } 128 | Ok(res) 129 | } 130 | 131 | #[inline] 132 | pub fn step(&mut self) -> Result { 133 | let op = self.get_next_instruction()?; 134 | self.exec_instruction(op)?; 135 | Ok(op) 136 | } 137 | 138 | /// Drive virtual machine to end, and abort when any error occurs. 139 | pub fn run_to_end(&mut self) -> Result<()> { 140 | loop { 141 | match self.step() { 142 | Ok(_) => (), 143 | Err(Error::ControlReachesEnd(0)) => break Ok(()), 144 | Err(e) => break Err(e), 145 | } 146 | } 147 | } 148 | 149 | /// Drive virtual machine to end with an inspecting function to break when returning 150 | /// false, and abort when any error occurs. 151 | pub fn run_to_end_inspect(&mut self, mut inspect: F) -> Result<()> 152 | where 153 | F: FnMut(&Self) -> bool, 154 | { 155 | loop { 156 | let res = self.step(); 157 | if !inspect(self) { 158 | break Ok(()); 159 | } 160 | match res { 161 | Ok(_) => (), 162 | Err(Error::ControlReachesEnd(0)) => break Ok(()), 163 | Err(e) => return Err(e), 164 | } 165 | } 166 | } 167 | 168 | pub fn is_at_end(&self) -> bool { 169 | self.fn_id == 0 && self.ip == self.fn_info.ins.len() 170 | } 171 | 172 | #[inline] 173 | fn get_next_instruction(&mut self) -> Result { 174 | let op = *self 175 | .fn_info 176 | .ins 177 | .get(self.ip) 178 | .ok_or(Error::ControlReachesEnd(self.fn_id))?; 179 | self.ip += 1; 180 | Ok(op) 181 | } 182 | 183 | pub fn fn_info(&self) -> &FnDef { 184 | self.fn_info 185 | } 186 | 187 | pub fn fn_id(&self) -> usize { 188 | self.fn_id 189 | } 190 | 191 | pub fn ip(&self) -> usize { 192 | self.ip 193 | } 194 | 195 | pub fn sp(&self) -> usize { 196 | self.sp 197 | } 198 | 199 | pub fn bp(&self) -> usize { 200 | self.bp 201 | } 202 | 203 | pub(crate) fn check_stack_overflow(&self, push_cnt: usize) -> Result<()> { 204 | if self.bp + push_cnt < self.max_stack_size { 205 | Ok(()) 206 | } else { 207 | Err(Error::StackOverflow) 208 | } 209 | } 210 | 211 | pub fn get_fn_by_id(&self, id: u32) -> Result<&'src FnDef> { 212 | self.src 213 | .functions 214 | .get(id as usize) 215 | .ok_or(Error::InvalidFnId(id)) 216 | } 217 | 218 | pub fn get_fn_by_name(&self, name: &str) -> Result { 219 | self.function_idx 220 | .get(name) 221 | .copied() 222 | .ok_or_else(|| Error::UnknownFunctionName(name.to_owned())) 223 | } 224 | 225 | pub fn get_global_by_id(&self, id: u32) -> Result<&'src GlobalValue> { 226 | self.src 227 | .globals 228 | .get(id as usize) 229 | .ok_or(Error::InvalidFnId(id)) 230 | } 231 | 232 | pub fn get_fn_name_by_id(&self, id: u32) -> Result { 233 | let func = self.get_fn_by_id(id)?; 234 | Ok(String::from_utf8_lossy(&self.get_global_by_id(func.name)?.bytes).into_owned()) 235 | } 236 | 237 | pub fn exec_instruction(&mut self, op: Op) -> Result<()> { 238 | use Op::*; 239 | match op { 240 | Nop => Ok(()), 241 | Push(x) => self.push(x), 242 | Pop => self.pop().map(|_| ()), 243 | PopN(n) => self.pop_n(n), 244 | Dup => self.dup(), 245 | LocA(n) => self.loc_a(n), 246 | ArgA(n) => self.arg_a(n), 247 | GlobA(n) => self.glob_a(n), 248 | Load8 => self.load8(), 249 | Load16 => self.load16(), 250 | Load32 => self.load32(), 251 | Load64 => self.load64(), 252 | Store8 => self.store8(), 253 | Store16 => self.store16(), 254 | Store32 => self.store32(), 255 | Store64 => self.store64(), 256 | Alloc => self.alloc(), 257 | Free => self.free(), 258 | StackAlloc(n) => self.stack_alloc(n), 259 | AddI => self.add_i(), 260 | SubI => self.sub_i(), 261 | MulI => self.mul_i(), 262 | DivI => self.div_i(), 263 | AddF => self.add_f(), 264 | SubF => self.sub_f(), 265 | MulF => self.mul_f(), 266 | DivF => self.div_f(), 267 | DivU => self.div_u(), 268 | Shl => self.shl(), 269 | Shr => self.shr(), 270 | And => self.and(), 271 | Or => self.or(), 272 | Xor => self.xor(), 273 | Not => self.not(), 274 | CmpI => self.cmp_i(), 275 | CmpU => self.cmp_u(), 276 | CmpF => self.cmp_f(), 277 | NegI => self.neg_i(), 278 | NegF => self.neg_f(), 279 | IToF => self.itof(), 280 | FToI => self.ftoi(), 281 | ShrL => self.shr_l(), 282 | SetLt => self.set_lt(), 283 | SetGt => self.set_gt(), 284 | BrA(addr) => self.br_a(addr), 285 | Br(off) => self.br(off), 286 | BrFalse(off) => self.bz(off), 287 | BrTrue(off) => self.bnz(off), 288 | Call(id) => self.call(id), 289 | Ret => self.ret(), 290 | CallName(id) => self.call_by_name(id), 291 | ScanI => self.scan_i(), 292 | ScanC => self.scan_c(), 293 | ScanF => self.scan_f(), 294 | PrintI => self.print_i(), 295 | PrintC => self.print_c(), 296 | PrintF => self.print_f(), 297 | PrintS => self.print_s(), 298 | PrintLn => self.print_ln(), 299 | Panic => self.halt(), 300 | } 301 | } 302 | 303 | /// All information from current runtime stack. Usually being called 304 | /// during panic, halt, stack overflow or debug. Returns stacks and whether 305 | /// the stack is corrupted 306 | pub fn stack_trace(&self) -> (Vec, bool) { 307 | let mut infos = Vec::new(); 308 | let cur_stack = match self.cur_stack_info() { 309 | Ok(i) => i, 310 | Err(_) => { 311 | return (Vec::new(), true); 312 | } 313 | }; 314 | infos.push(cur_stack); 315 | 316 | let mut bp = self.bp; 317 | let mut corrupted = false; 318 | while bp != usize::max_value() { 319 | let (info, bp_) = match self.stack_info(bp) { 320 | Ok(info) => info, 321 | Err(_) => { 322 | corrupted = true; 323 | break; 324 | } 325 | }; 326 | if info.fn_id == usize::max_value() as u64 { 327 | // Stack bottom sentinel item 328 | break; 329 | } 330 | bp = bp_; 331 | infos.push(info); 332 | } 333 | (infos, corrupted) 334 | } 335 | 336 | /// Return the information of current running function 337 | pub fn cur_stack_info(&self) -> Result { 338 | Ok(StackInfo { 339 | fn_id: self.fn_id as u64, 340 | inst: self.ip as u64, 341 | fn_name: self 342 | .src 343 | .globals 344 | .get(self.fn_info.name as usize) 345 | .map(|val| String::from_utf8_lossy(&val.bytes[..]).into()), 346 | }) 347 | } 348 | 349 | /// Returns information of the stack function at `bp` and the base pointer of the 350 | /// caller of this function. 351 | pub fn stack_info(&self, bp: usize) -> Result<(StackInfo, usize)> { 352 | let prev_bp = self.stack_slot_get(bp)?; 353 | let ip = self.stack_slot_get(bp + 1)?; 354 | let fn_id = self.stack_slot_get(bp + 2)?; 355 | let fn_name = self.src.functions.get(fn_id as usize).and_then(|f| { 356 | self.src 357 | .globals 358 | .get(f.name as usize) 359 | .map(|val| String::from_utf8_lossy(&val.bytes[..]).into()) 360 | }); 361 | Ok(( 362 | StackInfo { 363 | fn_name, 364 | fn_id, 365 | inst: ip, 366 | }, 367 | prev_bp as usize, 368 | )) 369 | } 370 | 371 | pub fn debug_stack(&self) -> StackDebugger { 372 | StackDebugger::new(self.sp, self.bp, self.fn_info, self.stack().into()) 373 | } 374 | 375 | pub fn debug_frame(&self, frame: usize) -> Result { 376 | let (sp, bp, fn_id) = 377 | (0..frame).try_fold((self.sp, self.bp, self.fn_id as u64), |(_sp, bp, _), _| { 378 | let (info, nbp) = self.stack_info(bp)?; 379 | Ok::<_, Error>((bp, nbp, info.fn_id)) 380 | })?; 381 | let fn_info = self.get_fn_by_id(fn_id as u32)?; 382 | Ok(StackDebugger::new(sp, bp, fn_info, self.stack().into())) 383 | } 384 | 385 | pub fn stack(&self) -> &[Slot] { 386 | unsafe { std::slice::from_raw_parts(self.stack, self.sp) } 387 | } 388 | 389 | #[inline] 390 | fn total_loc(&self) -> usize { 391 | let total_loc = self.fn_info.loc_slots + self.fn_info.param_slots + self.fn_info.ret_slots; 392 | total_loc as usize 393 | } 394 | } 395 | 396 | impl<'s> Drop for R0Vm<'s> { 397 | fn drop(&mut self) { 398 | unsafe { 399 | std::alloc::dealloc( 400 | self.stack as *mut u8, 401 | std::alloc::Layout::array::(MAX_STACK_SIZE).unwrap(), 402 | ); 403 | } 404 | } 405 | } 406 | 407 | #[derive(Debug, PartialEq, Eq)] 408 | pub struct StackInfo { 409 | pub fn_name: Option, 410 | pub fn_id: u64, 411 | pub inst: u64, 412 | } 413 | 414 | impl std::fmt::Display for StackInfo { 415 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 416 | let name = self.fn_name.as_deref().unwrap_or("Unnamed function"); 417 | write!(f, "{} (id={}) +{}", name, self.fn_id, self.inst) 418 | } 419 | } 420 | 421 | pub struct StackDebugger<'s, 'stack> { 422 | sp: usize, 423 | bp: usize, 424 | fn_info: &'s FnDef, 425 | stack: Cow<'stack, [Slot]>, 426 | stacktrace: bool, 427 | bounds: bool, 428 | } 429 | 430 | impl<'s, 'stack> StackDebugger<'s, 'stack> { 431 | pub fn new( 432 | sp: usize, 433 | bp: usize, 434 | fn_info: &'s FnDef, 435 | stack: Cow<'stack, [Slot]>, 436 | ) -> StackDebugger<'s, 'stack> { 437 | StackDebugger { 438 | sp, 439 | bp, 440 | fn_info, 441 | stack, 442 | stacktrace: true, 443 | bounds: true, 444 | } 445 | } 446 | 447 | pub fn snapshot_stack(&mut self) { 448 | self.stack = self.stack.to_owned() 449 | } 450 | 451 | pub fn bounds(mut self, op: bool) -> Self { 452 | self.bounds = op; 453 | self 454 | } 455 | } 456 | 457 | impl<'s, 'stack> std::fmt::Display for StackDebugger<'s, 'stack> { 458 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 459 | let sp = self.sp; 460 | let bp = self.bp; 461 | 462 | let ret_slots = self.fn_info.ret_slots as usize; 463 | let param_slots = self.fn_info.param_slots as usize; 464 | let loc_slots = self.fn_info.loc_slots as usize; 465 | 466 | let upper_bound = std::cmp::min(sp + 5, self.stack.len()); 467 | let lower_bound = bp.saturating_sub((param_slots + ret_slots) as usize); 468 | 469 | let loc_start = bp + 3; 470 | let loc_end = loc_start + loc_slots; 471 | let ret_end = bp - param_slots; 472 | 473 | writeln!(f, "Stack:")?; 474 | for i in (lower_bound..upper_bound).rev() { 475 | write!(f, "{:5} | {:#018x} |", i, self.stack.get(i).unwrap())?; 476 | if i == sp { 477 | write!(f, " <- sp")?; 478 | } 479 | if i == bp { 480 | write!(f, " <- bp")?; 481 | } 482 | writeln!(f)?; 483 | 484 | if self.bounds { 485 | if i == sp { 486 | writeln!(f, "------v {:18} -", "expression")?; 487 | } 488 | if i == loc_end { 489 | writeln!(f, "------v {:18} -", "local variable")?; 490 | } 491 | if i == loc_start { 492 | writeln!(f, "------v {:18} -", "compiler info")?; 493 | } 494 | if i == bp { 495 | writeln!(f, "------v {:18} -", "params")?; 496 | } 497 | if i == ret_end { 498 | writeln!(f, "------v {:18} -", "return value")?; 499 | } 500 | } 501 | } 502 | 503 | Ok(()) 504 | } 505 | } 506 | 507 | impl<'s> std::fmt::Debug for StackDebugger<'s, '_> { 508 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 509 | (self as &dyn std::fmt::Display).fmt(f) 510 | } 511 | } 512 | -------------------------------------------------------------------------------- /crates/syntax/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /crates/syntax/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Rynco Maekawa "] 3 | edition = "2018" 4 | name = "r0syntax" 5 | version = "0.1.0" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | [dependencies] 9 | indexmap = "1.6.0" 10 | logos = "0.11.4" 11 | ryu = "1.0" 12 | serde = {version = "1.0", optional = true, features = ["derive", "rc"]} 13 | smol_str = "0.1.17" 14 | unescape = "0.1.0" 15 | 16 | [features] 17 | serde_impl = ["serde", "smol_str/serde"] 18 | -------------------------------------------------------------------------------- /crates/syntax/readme.md: -------------------------------------------------------------------------------- 1 | # r0syntax 2 | 3 | 这个库负责对 r0 源代码进行词法和语法分析。 4 | 5 | 本库的编写大量的参考了 [`rustc` 的开发指南][rustc]。 6 | 7 | [rustc]: https://rustc-dev-guide.rust-lang.org/ 8 | -------------------------------------------------------------------------------- /crates/syntax/src/ast.rs: -------------------------------------------------------------------------------- 1 | //! This crate lists the common AST items inside R0. 2 | //! 3 | //! For the pointer type, see `crate::util::{P, Mut}` 4 | 5 | use crate::{span::Span, util::P}; 6 | #[cfg(feature = "serde_impl")] 7 | use serde::{Deserialize, Serialize}; 8 | use smol_str::SmolStr; 9 | 10 | #[derive(Debug, Clone)] 11 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 12 | pub struct Program { 13 | pub decls: Vec, 14 | pub funcs: Vec, 15 | } 16 | 17 | pub trait AstNode { 18 | fn span(&self) -> Span; 19 | } 20 | 21 | #[derive(Debug, Clone)] 22 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 23 | pub struct FuncStmt { 24 | pub span: Span, 25 | pub name: Ident, 26 | pub params: Vec, 27 | pub ret_ty: TyDef, 28 | pub body: BlockStmt, 29 | } 30 | 31 | #[derive(Debug, Clone)] 32 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 33 | pub struct FuncParam { 34 | pub is_const: bool, 35 | pub name: Ident, 36 | pub ty: TyDef, 37 | } 38 | 39 | #[derive(Debug, Clone)] 40 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 41 | pub enum Stmt { 42 | Block(BlockStmt), 43 | While(WhileStmt), 44 | If(IfStmt), 45 | Expr(Expr), 46 | Decl(DeclStmt), 47 | Return(ReturnStmt), 48 | Break(Span), 49 | Continue(Span), 50 | Empty(Span), 51 | } 52 | 53 | impl Stmt { 54 | pub fn span(&self) -> Span { 55 | match self { 56 | Stmt::Block(i) => i.span, 57 | Stmt::While(i) => i.span, 58 | Stmt::If(i) => i.span, 59 | Stmt::Expr(i) => i.span(), 60 | Stmt::Decl(i) => i.span, 61 | Stmt::Return(i) => i.span, 62 | Stmt::Break(s) => *s, 63 | Stmt::Continue(s) => *s, 64 | Stmt::Empty(s) => *s, 65 | } 66 | } 67 | } 68 | 69 | #[derive(Debug, Clone)] 70 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 71 | pub struct DeclStmt { 72 | pub is_const: bool, 73 | pub name: Ident, 74 | pub ty: TyDef, 75 | pub val: Option>, 76 | pub span: Span, 77 | } 78 | 79 | #[derive(Debug, Clone)] 80 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 81 | pub struct ReturnStmt { 82 | pub val: Option>, 83 | pub span: Span, 84 | } 85 | 86 | #[derive(Debug, Clone)] 87 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 88 | pub struct TyDef { 89 | pub span: Span, 90 | pub name: SmolStr, 91 | pub params: Option>, 92 | } 93 | 94 | #[derive(Debug, Clone)] 95 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 96 | pub struct BlockStmt { 97 | pub span: Span, 98 | pub stmts: Vec, 99 | } 100 | 101 | #[derive(Debug, Clone)] 102 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 103 | pub struct WhileStmt { 104 | pub span: Span, 105 | pub cond: P, 106 | pub body: P, 107 | } 108 | 109 | #[derive(Debug, Clone)] 110 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 111 | pub struct IfStmt { 112 | pub span: Span, 113 | pub cond: P, 114 | pub if_block: P, 115 | pub else_block: IfElseBlock, 116 | } 117 | 118 | #[derive(Debug, Clone)] 119 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 120 | pub enum IfElseBlock { 121 | None, 122 | If(P), 123 | Block(P), 124 | } 125 | 126 | #[derive(Debug, Clone)] 127 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 128 | pub enum Expr { 129 | Ident(Ident), 130 | Assign(AssignExpr), 131 | As(AsExpr), 132 | Literal(LiteralExpr), 133 | Unary(UnaryExpr), 134 | Binary(BinaryExpr), 135 | Call(CallExpr), 136 | } 137 | 138 | impl Expr { 139 | pub fn span(&self) -> Span { 140 | match self { 141 | Expr::Ident(x) => x.span, 142 | Expr::Assign(x) => x.span, 143 | Expr::As(x) => x.span, 144 | Expr::Literal(x) => x.span, 145 | Expr::Unary(x) => x.span, 146 | Expr::Binary(x) => x.span, 147 | Expr::Call(x) => x.span, 148 | } 149 | } 150 | } 151 | 152 | #[derive(Debug, Clone)] 153 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 154 | pub struct LiteralExpr { 155 | pub span: Span, 156 | pub kind: LiteralKind, 157 | } 158 | 159 | #[derive(Debug, Clone)] 160 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 161 | pub enum LiteralKind { 162 | Integer(u64), 163 | Float(f64), 164 | String(String), 165 | Char(char), 166 | } 167 | 168 | #[derive(Debug, Clone)] 169 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 170 | pub struct UnaryExpr { 171 | pub span: Span, 172 | pub op: UnaryOp, 173 | pub expr: P, 174 | } 175 | 176 | #[derive(Debug, Clone)] 177 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 178 | pub struct AssignExpr { 179 | pub span: Span, 180 | pub allow_assign_const: bool, 181 | pub lhs: P, 182 | pub rhs: P, 183 | } 184 | 185 | #[derive(Debug, Clone)] 186 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 187 | pub struct AsExpr { 188 | pub span: Span, 189 | pub val: P, 190 | pub ty: TyDef, 191 | } 192 | 193 | #[derive(Debug, Clone)] 194 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 195 | pub struct BinaryExpr { 196 | pub span: Span, 197 | pub op: BinaryOp, 198 | pub lhs: P, 199 | pub rhs: P, 200 | } 201 | 202 | #[derive(Debug, Clone)] 203 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 204 | pub struct CallExpr { 205 | pub span: Span, 206 | pub func: Ident, 207 | pub params: Vec, 208 | } 209 | 210 | #[derive(Debug, Copy, Clone)] 211 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 212 | pub enum UnaryOp { 213 | Neg, 214 | Pos, 215 | } 216 | 217 | #[derive(Debug, Copy, Clone)] 218 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 219 | pub enum BinaryOp { 220 | Add, 221 | Sub, 222 | Mul, 223 | Div, 224 | Gt, 225 | Lt, 226 | Ge, 227 | Le, 228 | Eq, 229 | Neq, 230 | } 231 | 232 | #[derive(Debug, Clone)] 233 | #[cfg_attr(feature = "serde_impl", derive(Serialize, Deserialize))] 234 | pub struct Ident { 235 | pub span: Span, 236 | pub name: SmolStr, 237 | } 238 | -------------------------------------------------------------------------------- /crates/syntax/src/lexer.rs: -------------------------------------------------------------------------------- 1 | use logos::Logos; 2 | 3 | use crate::{prelude::Span, token::Token}; 4 | 5 | pub type Lexer<'src> = logos::Lexer<'src, Token>; 6 | 7 | pub fn lexer(s: &str) -> Lexer { 8 | Token::lexer(s) 9 | } 10 | 11 | pub fn spanned_lexer<'s>(s: &'s str) -> impl Iterator + 's { 12 | Token::lexer(s) 13 | .spanned() 14 | .map(|(t, s)| (t, crate::prelude::Span::new_idx(s.start, s.end))) 15 | } 16 | -------------------------------------------------------------------------------- /crates/syntax/src/lib.rs: -------------------------------------------------------------------------------- 1 | /// A span representing a section of source file 2 | pub mod span; 3 | 4 | /// Utilities 5 | pub mod util; 6 | 7 | /// Lexer for r0 tokens 8 | pub mod lexer; 9 | /// Models of r0 tokens 10 | pub mod token; 11 | 12 | /// Models of the abstract syntax tree. 13 | pub mod ast; 14 | /// Parser for r0 programs 15 | pub mod parser; 16 | 17 | pub use lexer::Lexer; 18 | pub use token::Token; 19 | 20 | mod prelude { 21 | pub use crate::span::Span; 22 | pub use crate::util::{Mut, MutWeak, P}; 23 | } 24 | -------------------------------------------------------------------------------- /crates/syntax/src/parser/err.rs: -------------------------------------------------------------------------------- 1 | use crate::{prelude::Span, Token}; 2 | 3 | #[derive(Debug)] 4 | pub struct ParseError { 5 | pub kind: ParseErrorKind, 6 | pub span: Option, 7 | } 8 | 9 | impl ParseError { 10 | pub fn new_span(kind: ParseErrorKind, span: Span) -> Self { 11 | Self { 12 | kind, 13 | span: Some(span), 14 | } 15 | } 16 | 17 | pub fn new_none(kind: ParseErrorKind) -> Self { 18 | Self { kind, span: None } 19 | } 20 | 21 | pub fn new(kind: ParseErrorKind, span: Option) -> Self { 22 | Self { kind, span } 23 | } 24 | } 25 | 26 | #[derive(Debug)] 27 | pub enum ParseErrorKind { 28 | ExpectToken(Token), 29 | ExpectedPattern(String), 30 | UnexpectedEof, 31 | Dummy, 32 | } 33 | -------------------------------------------------------------------------------- /crates/syntax/src/scope.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /crates/syntax/src/span.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Debug, ops::Index}; 2 | 3 | /// A Span is the information of a piece of source code inside a file. 4 | /// 5 | /// `Span`s are only meaningful when indexing the file it is originated from. 6 | #[derive(Clone, Copy, Eq, PartialEq)] 7 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 8 | pub struct Span { 9 | /// The start index (in bytes or other meaningful item index) 10 | /// in the file of this span 11 | pub idx: usize, 12 | 13 | /// The length of the span 14 | pub len: usize, 15 | } 16 | 17 | pub const DUMMY_SPAN: Span = Span { 18 | // ctx: usize::max_value(), 19 | idx: 0, 20 | len: 0, 21 | }; 22 | 23 | impl Span { 24 | pub fn start(&self) -> usize { 25 | self.idx 26 | } 27 | 28 | pub fn end(&self) -> usize { 29 | self.idx + self.len 30 | } 31 | 32 | pub fn new(idx: usize, len: usize) -> Span { 33 | Span { idx, len } 34 | } 35 | 36 | pub fn new_idx(lo: usize, hi: usize) -> Span { 37 | let (lo, hi) = if lo > hi { (hi, lo) } else { (lo, hi) }; 38 | let len = hi - lo; 39 | Span { idx: lo, len } 40 | } 41 | 42 | pub const fn eof() -> Span { 43 | Span { 44 | idx: usize::max_value(), 45 | len: 0, 46 | } 47 | } 48 | } 49 | 50 | impl std::ops::Add for Span { 51 | type Output = Span; 52 | 53 | fn add(self, rhs: Self) -> Self::Output { 54 | let start = std::cmp::min(self.start(), rhs.start()); 55 | let end = std::cmp::max(self.end(), rhs.end()); 56 | Span::new_idx(start, end) 57 | } 58 | } 59 | 60 | impl std::ops::AddAssign for Span { 61 | fn add_assign(&mut self, rhs: Self) { 62 | *self = *self + rhs 63 | } 64 | } 65 | 66 | impl Debug for Span { 67 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 68 | write!(f, "[{}, {})", self.idx, self.idx + self.len) 69 | } 70 | } 71 | 72 | impl Default for Span { 73 | fn default() -> Self { 74 | DUMMY_SPAN 75 | } 76 | } 77 | 78 | impl Index for Vec { 79 | type Output = [T]; 80 | fn index(&self, index: Span) -> &Self::Output { 81 | &self[index.idx..(index.idx + index.len)] 82 | } 83 | } 84 | 85 | impl From for Span { 86 | fn from(s: logos::Span) -> Self { 87 | Span::new_idx(s.start, s.end) 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /crates/syntax/src/token.rs: -------------------------------------------------------------------------------- 1 | use logos::{Lexer, Logos}; 2 | use smol_str::SmolStr; 3 | 4 | fn parse_string_literal(i: &mut Lexer) -> Option { 5 | unescape::unescape(&i.slice()[1..i.slice().len() - 1]) 6 | // Some(i.slice().into()) 7 | } 8 | 9 | fn parse_char_literal(i: &mut Lexer) -> Option { 10 | unescape::unescape(&i.slice()[1..i.slice().len() - 1]).and_then(|x| x.chars().next()) 11 | } 12 | 13 | #[derive(Debug, Clone, Logos)] 14 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 15 | pub enum Token { 16 | #[token("fn")] 17 | FnKw, 18 | #[token("let")] 19 | LetKw, 20 | #[token("const")] 21 | ConstKw, 22 | #[token("as")] 23 | AsKw, 24 | #[token("while")] 25 | WhileKw, 26 | #[token("if")] 27 | IfKw, 28 | #[token("else")] 29 | ElseKw, 30 | #[token("return")] 31 | ReturnKw, 32 | #[token("break")] 33 | BreakKw, 34 | #[token("continue")] 35 | ContinueKw, 36 | 37 | #[regex(r"\d+", |lex| lex.slice().parse())] 38 | UIntLiteral(u64), 39 | #[regex(r"\d+\.\d+([eE][+-]?\d+)?", |lex| lex.slice().parse())] 40 | FloatLiteral(f64), 41 | #[regex(r#"'([^\\']|\\[rnt\\/"'])'"#, parse_char_literal)] 42 | CharLiteral(char), 43 | #[regex(r#""([^\\"]|\\([rnt\\/"']))*""#, parse_string_literal)] 44 | StringLiteral(String), 45 | #[regex(r"[_a-zA-Z][_a-zA-Z0-9]*", |lex| SmolStr::new(lex.slice()))] 46 | Ident(SmolStr), 47 | 48 | #[token(r"+")] 49 | Plus, 50 | #[token(r"-")] 51 | Minus, 52 | #[token(r"*")] 53 | Mul, 54 | #[token(r"/")] 55 | Div, 56 | #[token(r"=")] 57 | Assign, 58 | #[token(r"==")] 59 | Eq, 60 | #[token(r"!=")] 61 | Neq, 62 | #[token(r"<")] 63 | Lt, 64 | #[token(r">")] 65 | Gt, 66 | #[token(r"<=")] 67 | Le, 68 | #[token(r">=")] 69 | Ge, 70 | #[token("(")] 71 | LParen, 72 | #[token(")")] 73 | RParen, 74 | #[token("{")] 75 | LBrace, 76 | #[token("}")] 77 | RBrace, 78 | #[token(r"->")] 79 | Arrow, 80 | #[token(r",")] 81 | Comma, 82 | #[token(r":")] 83 | Colon, 84 | #[token(r";")] 85 | Semicolon, 86 | 87 | // Empty stuff 88 | #[regex(r"\s+", logos::skip, priority = 1)] 89 | Whitespace, 90 | #[regex(r"//.*\n", logos::skip)] 91 | Comment, 92 | 93 | // Error token 94 | #[error] 95 | Error, 96 | } 97 | 98 | impl Token { 99 | pub fn get_ident(&self) -> Option<&str> { 100 | match self { 101 | Token::Ident(i) => Some(&i), 102 | _ => None, 103 | } 104 | } 105 | 106 | pub fn get_ident_owned(self) -> Option { 107 | match self { 108 | Token::Ident(i) => Some(i), 109 | _ => None, 110 | } 111 | } 112 | 113 | pub fn get_uint(&self) -> Option { 114 | match self { 115 | Token::UIntLiteral(i) => Some(*i), 116 | Token::CharLiteral(c) => Some(*c as u64), 117 | _ => None, 118 | } 119 | } 120 | 121 | pub fn get_float(&self) -> Option { 122 | match self { 123 | Token::FloatLiteral(i) => Some(*i), 124 | _ => None, 125 | } 126 | } 127 | 128 | pub fn get_string(&self) -> Option<&str> { 129 | match self { 130 | Token::StringLiteral(i) => Some(&i), 131 | _ => None, 132 | } 133 | } 134 | 135 | pub fn get_string_owned(self) -> Option { 136 | match self { 137 | Token::StringLiteral(i) => Some(i), 138 | _ => None, 139 | } 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /crates/syntax/src/util.rs: -------------------------------------------------------------------------------- 1 | use std::{cell::RefCell, fmt::Debug, ops::Deref, rc::Rc}; 2 | 3 | pub type P = Rc; 4 | 5 | #[allow(non_snake_case)] 6 | pub fn P(val: T) -> P { 7 | P::new(val) 8 | } 9 | 10 | pub struct Mut(Rc>); 11 | 12 | pub struct MutWeak(std::rc::Weak>); 13 | 14 | impl Mut { 15 | pub fn new(val: T) -> Mut { 16 | Mut(Rc::new(RefCell::new(val))) 17 | } 18 | 19 | pub fn weak(&self) -> MutWeak { 20 | MutWeak(Rc::downgrade(&self.0)) 21 | } 22 | 23 | pub fn take_inner(this: Self) -> Result> { 24 | Rc::try_unwrap(this.0).map(|x| x.into_inner()).map_err(Mut) 25 | } 26 | } 27 | 28 | impl Deref for Mut { 29 | type Target = Rc>; 30 | 31 | fn deref(&self) -> &Self::Target { 32 | &self.0 33 | } 34 | } 35 | 36 | impl Deref for MutWeak { 37 | type Target = std::rc::Weak>; 38 | 39 | fn deref(&self) -> &Self::Target { 40 | &self.0 41 | } 42 | } 43 | 44 | impl Clone for Mut { 45 | fn clone(&self) -> Self { 46 | Mut(self.0.clone()) 47 | } 48 | } 49 | 50 | impl Debug for Mut 51 | where 52 | T: Debug, 53 | { 54 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 55 | self.0.borrow().fmt(f) 56 | } 57 | } 58 | 59 | impl std::fmt::Display for Mut 60 | where 61 | T: std::fmt::Display, 62 | { 63 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 64 | std::fmt::Display::fmt(&*self.0.borrow(), f) 65 | } 66 | } 67 | 68 | impl Debug for MutWeak 69 | where 70 | T: Debug, 71 | { 72 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 73 | self.0.fmt(f) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | book 2 | -------------------------------------------------------------------------------- /docs/book.toml: -------------------------------------------------------------------------------- 1 | [book] 2 | authors = ["Rynco Maekawa"] 3 | language = "en" 4 | multilingual = false 5 | src = "src" 6 | title = "C0 指导书" 7 | 8 | [output.html] 9 | additional-css = ["./theme/css/additional.css"] 10 | default-theme = "rust" 11 | mathjax-support = true 12 | preferred-dark-theme = "ayu" 13 | theme = "./theme" 14 | 15 | [output.html.fold] 16 | enable = true 17 | level = 2 18 | 19 | [output.html.playpen] 20 | enabled = false 21 | 22 | [output.html.playground] 23 | copy-js = false 24 | enabled = false 25 | -------------------------------------------------------------------------------- /docs/src/SUMMARY.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | [前言](./preface.md) 4 | 5 | - [实验安排及要求](./requirement.md) 6 | - [编译过程概述](./compile-pipeline.md) 7 | - [语法表示说明](./ebnf.md) 8 | - [c0 语言标准与指导书](./c0/c0.md) 9 | - [单词](./c0/token.md) 10 | - [类型系统](./c0/ty.md) 11 | - [表达式](./c0/expr.md) 12 | - [语句](./c0/stmt.md) 13 | - [函数、全局变量与程序](./c0/func.md) 14 | - [标准库](./c0/stdlib.md) 15 | - [扩展 C0(加分项)](./c0/extended-c0.md) 16 | - [实现指导](./c0/method_notes.md) 17 | 18 | - [navm 虚拟机说明](./navm/index.md) 19 | - [navm 虚拟机指令集](./navm/instruction.md) 20 | - [一些实现指导](./navm/faq.md) 21 | - [评测说明](./judge.md) 22 | 23 | [参考实现](./ref-impl.md) 24 | 25 | [参考资料](./reference.md) 26 | 27 | -------------------------------------------------------------------------------- /docs/src/c0/c0.md: -------------------------------------------------------------------------------- 1 | # c0 语法说明 2 | 3 | c0 是一个用于编译原理课程的微型语言。c0 提供的功能类似于 C,但是为了减少编译器实现的压力(减少前瞻和/或回溯),在语言风格上大量参考了 Rust 的设计。请注意,这个语言**并不是**对 Rust 语言的简化。 4 | 5 | ## 语法定义 6 | 7 | 以下是 c0 语言的全部语法定义,**包括所有扩展语法**。 8 | 9 | ``` 10 | // # 单词 11 | 12 | // ## 关键字 13 | FN_KW -> 'fn' 14 | LET_KW -> 'let' 15 | CONST_KW -> 'const' 16 | AS_KW -> 'as' 17 | WHILE_KW -> 'while' 18 | IF_KW -> 'if' 19 | ELSE_KW -> 'else' 20 | RETURN_KW -> 'return' 21 | BREAK_KW -> 'break' 22 | CONTINUE_KW -> 'continue' 23 | 24 | // ## 字面量 25 | digit -> [0-9] 26 | UINT_LITERAL -> digit+ 27 | DOUBLE_LITERAL -> digit+ '.' digit+ ([eE] digit+)? 28 | 29 | escape_sequence -> '\' [\\"'nrt] 30 | string_regular_char -> [^"\\] 31 | STRING_LITERAL -> '"' (string_regular_char | escape_sequence)* '"' 32 | 33 | char_regular_char -> [^'\\] 34 | CHAR_LITERAL -> '\'' (char_regular_char | escape_sequence) '\'' 35 | 36 | // ## 标识符 37 | IDENT -> [_a-zA-Z] [_a-zA-Z0-9]* 38 | 39 | // ## 符号 40 | PLUS -> '+' 41 | MINUS -> '-' 42 | MUL -> '*' 43 | DIV -> '/' 44 | ASSIGN -> '=' 45 | EQ -> '==' 46 | NEQ -> '!=' 47 | LT -> '<' 48 | GT -> '>' 49 | LE -> '<=' 50 | GE -> '>=' 51 | L_PAREN -> '(' 52 | R_PAREN -> ')' 53 | L_BRACE -> '{' 54 | R_BRACE -> '}' 55 | ARROW -> '->' 56 | COMMA -> ',' 57 | COLON -> ':' 58 | SEMICOLON -> ';' 59 | 60 | // ## 注释 61 | COMMENT -> '//' regex(.*) '\n' 62 | 63 | // # 表达式 64 | expr -> 65 | operator_expr 66 | | negate_expr 67 | | assign_expr 68 | | as_expr 69 | | call_expr 70 | | literal_expr 71 | | ident_expr 72 | | group_expr 73 | 74 | binary_operator -> '+' | '-' | '*' | '/' | '==' | '!=' | '<' | '>' | '<=' | '>=' 75 | operator_expr -> expr binary_operator expr 76 | 77 | negate_expr -> '-' expr 78 | 79 | assign_expr -> l_expr '=' expr 80 | 81 | as_expr -> expr 'as' ty 82 | 83 | call_param_list -> expr (',' expr)* 84 | call_expr -> IDENT '(' call_param_list? ')' 85 | 86 | literal_expr -> UINT_LITERAL | DOUBLE_LITERAL | STRING_LITERAL | CHAR_LITERAL 87 | 88 | ident_expr -> IDENT 89 | 90 | group_expr -> '(' expr ')' 91 | 92 | // ## 左值表达式 93 | l_expr -> IDENT 94 | 95 | // ## 类型 96 | ty -> IDENT 97 | 98 | // # 语句 99 | stmt -> 100 | expr_stmt 101 | | decl_stmt 102 | | if_stmt 103 | | while_stmt 104 | | break_stmt 105 | | continue_stmt 106 | | return_stmt 107 | | block_stmt 108 | | empty_stmt 109 | 110 | expr_stmt -> expr ';' 111 | 112 | let_decl_stmt -> 'let' IDENT ':' ty ('=' expr)? ';' 113 | const_decl_stmt -> 'const' IDENT ':' ty '=' expr ';' 114 | decl_stmt -> let_decl_stmt | const_decl_stmt 115 | 116 | if_stmt -> 'if' expr block_stmt ('else' 'if' expr block_stmt)* ('else' block_stmt)? 117 | 118 | while_stmt -> 'while' expr block_stmt 119 | 120 | break_stmt -> 'break' ';' 121 | 122 | continue_stmt -> 'continue' ';' 123 | 124 | return_stmt -> 'return' expr? ';' 125 | 126 | block_stmt -> '{' stmt* '}' 127 | 128 | empty_stmt -> ';' 129 | 130 | // # 函数 131 | function_param -> 'const'? IDENT ':' ty 132 | function_param_list -> function_param (',' function_param)* 133 | function -> 'fn' IDENT '(' function_param_list? ')' '->' ty block_stmt 134 | 135 | // # 程序 136 | item -> function | decl_stmt 137 | program -> item* 138 | ``` 139 | 140 | 其中,表达式中运算符的优先级从高到低为: 141 | 142 | | 运算符 | 结合性 | 143 | | --------------------------- | ------ | 144 | | 括号表达式 | - | 145 | | 函数调用 | - | 146 | | 前置 `-` | - | 147 | | `as` | - | 148 | | `*` `/` | 左到右 | 149 | | `+` `-` | 左到右 | 150 | | `>` `<` `>=` `<=` `==` `!=` | 左到右 | 151 | | `=` | 右到左 | 152 | 153 | ## 语法参考 154 | 155 | 以下是一些符合语法规范的程序。 156 | 157 | ```rust,ignore 158 | fn fib(x: int) -> int { 159 | if x<=1 { 160 | return 1; 161 | } 162 | let result: int = fib(x - 1); 163 | result = result + fib(x - 2); 164 | return result; 165 | } 166 | 167 | fn main() -> int { 168 | let i: int = 0; 169 | let j: int; 170 | j = getint(); 171 | while i < j { 172 | putint(i); 173 | putchar(32); 174 | putint(fib(i)); 175 | putln(); 176 | i = i + 1; 177 | } 178 | return 0; 179 | } 180 | ``` 181 | -------------------------------------------------------------------------------- /docs/src/c0/expr.md: -------------------------------------------------------------------------------- 1 | # 表达式 2 | 3 | ``` 4 | expr -> 5 | operator_expr 6 | | negate_expr 7 | | assign_expr 8 | | as_expr 9 | | call_expr 10 | | literal_expr 11 | | ident_expr 12 | | group_expr 13 | ``` 14 | 15 | 表达式是代码中运算的最小单位。在语法解析的时候,一个表达式会被展开成一棵树,称作表达式树。 16 | 17 | > 提示:对于 运算符表达式 `operator_expr`、取反表达式 `negate_expr` 和类型转换表达式 `as_expr` 可以使用局部的算符优先文法进行分析。 18 | 19 | 表达式中运算符的优先级从高到低为: 20 | 21 | | 运算符 | 结合性 | 22 | | --------------------------- | ------ | 23 | | 括号表达式 | - | 24 | | 函数调用 | - | 25 | | 前置 `-` | - | 26 | | `as` | - | 27 | | `*` `/` | 左到右 | 28 | | `+` `-` | 左到右 | 29 | | `>` `<` `>=` `<=` `==` `!=` | 左到右 | 30 | | `=` | 右到左 | 31 | 32 | ## 运算符表达式 33 | 34 | ``` 35 | binary_operator -> '+' | '-' | '*' | '/' | '==' | '!=' | '<' | '>' | '<=' | '>=' 36 | operator_expr -> expr binary_operator expr 37 | ``` 38 | 39 | 运算符表达式是中间由一个运算符分隔、两边是子表达式的表达式。r0 一共有 10 种双目运算符。它们分别是: 40 | 41 | - 算数运算符 `+` `-` `*` `/` 42 | - 比较运算符 `>` `<` `>=` `<=` `==` `!=` 43 | 44 | 每个运算符的两侧必须是相同类型的数据。各运算符含义如下: 45 | 46 | | 运算符 | 含义 | 参数类型 | 结果类型 | 结合性 | 47 | | ------ | -------------------------- | -------- | ---------- | ------ | 48 | | `+` | 将左右两侧相加 | 数值 | 与参数相同 | 左到右 | 49 | | `-` | 左侧减去右侧 | 数值 | 与参数相同 | 左到右 | 50 | | `*` | 将左右两侧相乘 | 数值 | 与参数相同 | 左到右 | 51 | | `/` | 左侧除以右侧 | 数值 | 与参数相同 | 左到右 | 52 | | `>` | 如果左侧大于右侧则为真 | 数值 | 布尔\* | 左到右 | 53 | | `<` | 如果左侧小于右侧则为真 | 数值 | 布尔\* | 左到右 | 54 | | `>=` | 如果左侧大于等于右侧则为真 | 数值 | 布尔\* | 左到右 | 55 | | `<=` | 如果左侧小于等于右侧则为真 | 数值 | 布尔\* | 左到右 | 56 | | `==` | 如果左侧等于右侧则为真 | 数值 | 布尔\* | 左到右 | 57 | | `!=` | 如果左侧不等于右侧则为真 | 数值 | 布尔\* | 左到右 | 58 | 59 | ### \* 关于布尔类型 60 | 61 | 布尔类型的表达式只能出现在 `if` 和 `while` 语句的条件表达式中。因此,我们不强制规定布尔类型的值的表现形式。所有非 0 值都会被视为 true,0 会被视为 false。 62 | 63 | ## 取反表达式 64 | 65 | ``` 66 | negate_expr -> '-' expr 67 | ``` 68 | 69 | 取反表达式是在表达式前添加负号组成的表达式。取反表达式的语义是将表达式转换成它的相反数。 70 | 71 | ## 赋值表达式 72 | 73 | ``` 74 | l_expr -> IDENT 75 | assign_expr -> l_expr '=' expr 76 | ``` 77 | 78 | 赋值表达式是由 _左值表达式_、_等号 `=`_、_表达式_ 组成的表达式。赋值表达式的值类型永远是 `void`(即不能被使用)。 79 | 80 | 左值表达式是一个局部或全局的变量名。 81 | 82 | 赋值表达式的语义是将右侧表达式的计算结果赋给左侧表示的值。 83 | 84 | ## 类型转换表达式 85 | 86 | ``` 87 | as_expr -> expr 'as' ty 88 | ``` 89 | 90 | 类型转换表达式是由 _表达式_、_关键字 `as`_、_类型_ 组成的表达式。类型转换表达式的语义是将左侧表达式表示的值转换成右侧类型表示的值。 91 | 92 | 在 c0 实验中只会涉及到整数 `int` 和浮点数 `double` 之间的互相转换。 93 | 94 | ## 函数调用表达式 95 | 96 | ``` 97 | call_param_list -> expr (',' expr)* 98 | call_expr -> IDENT '(' call_param_list? ')' 99 | ``` 100 | 101 | 函数调用表达式是由 _函数名_ 和 _调用参数列表_ 组成的表达式。函数调用表达式的语义是使用给出的参数调用函数名代表的函数。函数必须在调用前声明过(也就是说不存在先出现调用后出现声明的函数)。 102 | 103 | ### 特殊情况 104 | 105 | 标准库中的函数在调用前不需要声明,见 [标准库文档](stdlib.md)。 106 | 107 | ## 字面量表达式 108 | 109 | ``` 110 | literal_expr -> UINT_LITERAL | DOUBLE_LITERAL | STRING_LITERAL 111 | 112 | digit -> [0-9] 113 | UINT_LITERAL -> digit+ 114 | DOUBLE_LITERAL -> digit+ '.' digit+ ([eE] [+-]? digit+)? 115 | 116 | escape_sequence -> '\' [\\"'nrt] 117 | string_regular_char -> [^"\\] 118 | STRING_LITERAL -> '"' (string_regular_char | escape_sequence)* '"' 119 | ``` 120 | 121 | 字面量表达式可以是一个无符号整数、浮点数或者字符串的字面量。_整数_ 和 _浮点数字面量_ 的语义就是用对应类型表示的字面量的值(64 位);_字符串字面量_ 只会在 `putstr` 调用中出现,语义是对应的全局常量的编号。 122 | 123 | ## 标识符表达式 124 | 125 | ``` 126 | ident_expr -> IDENT 127 | ``` 128 | 129 | 标识符表达式是由标识符组成的表达式。其语义是标识符对应的局部或全局变量。标识符表达式的类型与标识符的类型相同。 130 | 131 | ## 括号表达式 132 | 133 | ``` 134 | group_expr -> '(' expr ')' 135 | ``` 136 | 137 | 括号表达式内部的表达式的值将被优先计算。 138 | -------------------------------------------------------------------------------- /docs/src/c0/extended-c0.md: -------------------------------------------------------------------------------- 1 | # 扩展 C0 2 | 3 | 这里列出了实现之后可以获得加分的扩展 C0 特性。 4 | 5 | 加分的单位尚未确定,目前的加分数量都是相对值。 6 | 7 | 扩展 c0 包括: 8 | 9 | - 注释 10 | - 字符字面量 11 | - 类型转换 & 浮点数 12 | - 作用域嵌套 13 | - 变量声明增强 14 | - break/continue 15 | - 返回路径检查 16 | 17 | ## 注释 18 | 19 | 加分:5pt 20 | 21 | ``` 22 | COMMENT -> '//' regex(.*) '\n' 23 | ``` 24 | 25 | C0 的注释是从 `//` 开始到这行结束(遇到第一个 `\n`)为止的字符序列。注释不应当被词法分析输出。 26 | 27 | ## 字符字面量 28 | 29 | 加分:5pt 30 | 31 | ``` 32 | char_regular_char -> [^'\\\n\r] 33 | CHAR_LITERAL -> '\'' (char_regular_char | escape_sequence) '\'' 34 | literal_expr -> UINT_LITERAL | DOUBLE_LITERAL | STRING_LITERAL | CHAR_LITERAL 35 | ``` 36 | 37 | 字符字面量是由单引号 `'` 包裹的单个字符或转义序列。其中单个字符可以是 ASCII 中除了单引号 `'`、反斜线 `\\`、空白符 `\r`(CR)、`\n`(LF)、`\t`(Tab) 以外的任何字符。转义序列可以是 `\'`、`\"`、`\\`、`\n`、`\t`、`\r`,含义与 C 中的对应序列相同。 38 | 39 | _字符字面量_ 的语义是被包裹的字符的 ASCII 编码无符号扩展到 64 位的整数值,类型是 `int`。 40 | 41 | ## 类型转换 & 浮点数类型 42 | 43 | 加分:25pt 44 | 45 | ### 类型转换 46 | 47 | ``` 48 | AS_KW -> 'as' 49 | as_expr -> expr 'as' ty 50 | expr -> .. | as_expr 51 | ``` 52 | 53 | 显式类型转换通过 `as` 表达式实现。语言中没有隐式类型转换。 54 | 55 | `表达式 as 类型` 表示将 `表达式` 的计算结果转换为 `类型` 所表示的类型的数据。`as` 表达式的左侧数据类型和右侧类型都不能是 `void`。 56 | 57 | 允许的类型转换包括: 58 | 59 | - 类型 T 转换到它自己 60 | - 浮点数 `double` 和整数 `int` 之间互相转换 61 | 62 | ### 浮点数类型 63 | 64 | ``` 65 | DOUBLE_LITERAL -> digit+ '.' digit+ ([eE] [+-]? digit+)? 66 | // ^~~~~~~~~~~~~~~~~ ^~~~~~~~~~~~ 67 | // number exponent 68 | ``` 69 | 70 | 浮点数类型 `double` 是遵循 IEEE 754 标准的 64 位浮点数(在其它语言中经常称作 `double`、`float64` 或 `f64`)。 71 | 72 | 带指数部分(`exponent`)的浮点数字面量的值是 `number * (10 ^ exponent)`,如 `1E6`、`1e+6` 均代表 `1000000`、`2.1e-2` 代表 `0.021`。 73 | 74 | 浮点数和整数之间不能进行运算。浮点数之间进行四则运算的结果仍为浮点数。 75 | 76 | ## 作用域嵌套 77 | 78 | 加分:10pt 79 | 80 | 简而言之,在任何一个代码块中都可以声明变量。 81 | 82 | 要求: 83 | 84 | - 每个代码块(`block_stmt`)都是一级作用域。 85 | - 每级作用域内部的变量声明不能重复。 86 | - 作用域内声明的变量可以覆盖上一级作用域中的变量。 87 | - 每个作用域内定义的变量在作用域结束后即失效 88 | 89 | 比如,下面的函数中变量 `x`(1)、`counter`(2)、循环内的 `x`(3) 可以被访问的区域如竖线左侧所示: 90 | 91 | ```rust,ignore 92 | 1 | fn fib_iter(x: int) -> int { // (1) 93 | | | let last_val: int = 1; 94 | | | let cur_val: int = 1; 95 | 2 | | let counter: int = x - 2; // (2) 96 | | | | while counter > 0 { 97 | 3 | | let x: int = cur_val + last_val; // (3) 98 | | | | last_val = cur_val; 99 | | | | cur_val = x; 100 | - | | } 101 | | | | return cur_val; 102 | - - | } 103 | ``` 104 | 105 | ## 变量声明增强 106 | 107 | 加分:5pt 108 | 109 | 在每一级作用域中,你不仅可以在作用域顶端声明变量,也能在作用域中间声明变量。在作用域中间声明的变量同样遵循上一条的生命周期。在全局作用域中,变量声明和函数声明也可以混合。 110 | 111 | ## `break` 和 `continue` 112 | 113 | 加分:10pt 114 | 115 | ``` 116 | BREAK_KW -> 'break' 117 | CONTINUE_KW -> 'continue' 118 | 119 | break_stmt -> 'break' ';' 120 | 121 | continue_stmt -> 'continue' ';' 122 | ``` 123 | 124 | - `break` 和 `continue` 必须在循环体内使用,在其他地方使用是编译错误。 125 | - `break` 代表跳出循环体,控制转移到循环外的下一条语句。 126 | - `continue` 代表跳过本次循环体的代码,控制转移到循环体的最后一条语句。 127 | 128 | > 提示:进入循环之前记录一下跳转的目标位置 129 | 130 | ## 函数返回路径检查 131 | 132 | 加分:10pt 133 | 134 | 你需要对每一个函数的所有控制流进行检查,保证如果函数有返回值,那么所有可能的控制流(这里认为一个条件语句的所有分支都能访问到)都能导向 `return` 语句。比如,以下的函数不能通过编译: 135 | 136 | ```rust,ignore 137 | fn foo(i: int) -> int { 138 | if i == 0 { 139 | return 1; 140 | } else { 141 | putint(0); 142 | } 143 | // 这个分支没有返回 144 | } 145 | ``` 146 | 147 | 这个也不行: 148 | 149 | ```rust,ignore 150 | fn bar() -> int { 151 | let i: int; 152 | i = getint(); 153 | while i > 0 { 154 | i = i - 1; 155 | if i <= 0 { 156 | return i; 157 | } 158 | } 159 | // 这个分支没有返回 160 | } 161 | ``` 162 | 163 | 这个可以,因为在到达函数结尾之前两个分支都返回了: 164 | 165 | ```rust,ignore 166 | fn baz(i: int) -> int { 167 | if i == 0 { 168 | return 1; 169 | } else { 170 | return 0; 171 | } 172 | // 没有分支可以到达这里 173 | } 174 | ``` 175 | 176 | > 提示:用基本块表示函数就能看得一清二楚了。 177 | 178 | > UB: 我们不会考察对于无限循环的控制流检查。你可以选择报错,也可以选择无视。 179 | -------------------------------------------------------------------------------- /docs/src/c0/func.md: -------------------------------------------------------------------------------- 1 | # 函数和全局变量 2 | 3 | ## 函数 4 | 5 | ``` 6 | function_param -> 'const'? IDENT ':' ty 7 | function_param_list -> function_param (',' function_param)* 8 | function -> 'fn' IDENT '(' function_param_list? ')' '->' ty block_stmt 9 | // ^~~~ ^~~~~~~~~~~~~~~~~~~~ ^~ ^~~~~~~~~~ 10 | // | | | | 11 | // function_name param_list return_type function_body 12 | ``` 13 | 14 | 与 miniplc0 不同,c0 中存在函数。 15 | 16 | c0 中一个函数的定义由 _函数名_、_参数列表_、_返回类型_ 和 _函数体_ 组成。 17 | 18 | 函数有以下语义约束: 19 | 20 | - 函数的名称 `function_name` 不能重复,也不能和全局变量重复。 21 | - 函数的参数声明 `param_list` 与 含有初始化表达式的变量声明 有相同的语义约束。 22 | - 函数体、函数的参数声明 在同一个作用域(函数作用域)中,是全局作用域的子作用域。 23 | 24 | 另外再提醒一下,返回值类型 `return_type` 即使为 `void` 也不能省略。 25 | 26 | 函数体的组成单位是语句,见 [语句页面](stmt.md)。 27 | 28 | ## 全局变量 29 | 30 | 全局变量的声明与局部变量相同,都是使用 [声明语句](stmt.md#声明语句) 进行声明。全局变量的定义方式和约束与局部变量相同。全局变量所在作用域是全局,因此有可能被函数内定义的局部变量覆盖。 31 | 32 | ## 程序结构 33 | 34 | ``` 35 | program -> decl_stmt* function* 36 | ``` 37 | 38 | 一个 c0 的程序中可以存在多个 _变量声明_,后接多个 _函数声明_。 39 | 40 | 语义约束: 41 | 42 | - 一个合法的 c0 程序必须存在一个名为 `main` 的函数作为程序入口,否则应视为编译错误; 43 | - 一个函数或变量只能在它的定义中及之后的位置被引用,换句话说就是不存在先使用后定义的情况。 44 | 45 | > 注:扩展 c0 中允许变量声明和函数声明混搭,但仍要遵循以上规定。 46 | -------------------------------------------------------------------------------- /docs/src/c0/method_notes.md: -------------------------------------------------------------------------------- 1 | # 实现方法指导 2 | 3 | 实现一个 parser 有很多种方法,这里会提供一些关于代码实现(而不是理论)的方法指导。 4 | 5 | 对于没有讲到的内容,可以参考 [去年的指导书](https://github.com/BUAA-SE-Compiling/c0-handbook#33-%E5%AE%9E%E7%8E%B0%E6%8C%87%E5%BC%95) 6 | 7 | ## 一些通用的部分 8 | 9 | ### 类型定义 10 | 11 | 对于词法、语法分析时用到的类型,因为类型确定且已知,可以使用继承实现。在支持和类型 (sum type) 的语言里也可以用和类型实现。这样做可以显著降低判断 token 或者语法树节点类型时的工作量,因为可以直接判断变量本身的类型,甚至直接进行模式匹配。比如: 12 | 13 | ```csharp 14 | /* 词法分析器 */ 15 | 16 | class Token {} 17 | 18 | class NumberToken : Token { 19 | public double value; 20 | } 21 | 22 | // ... 23 | 24 | /* 语法分析器 */ 25 | 26 | class Expr {} 27 | 28 | class Literal : Expr {} 29 | 30 | class IntegerLiteral : Literal { 31 | public long value; 32 | } 33 | 34 | class StringLiteral : Literal { 35 | public string value; 36 | } 37 | 38 | class BinaryExpr : Expr { 39 | public Operator op; 40 | public Expr lhs; 41 | public Expr rhs; 42 | } 43 | 44 | // ... 45 | ``` 46 | 47 | 或者在支持的语言里使用带标签的联合类型: 48 | 49 | ```rust,ignore 50 | enum Expr { 51 | Literal(LiteralExpr), 52 | Binary(BinaryExpr), 53 | // ... 54 | } 55 | 56 | enum LiteralExpr { 57 | Integer(i64), 58 | String(String), 59 | // ... 60 | } 61 | 62 | struct BinaryExpr { 63 | pub op: Operator, 64 | pub lhs: Ptr, 65 | pub rhs: Ptr, 66 | } 67 | 68 | // ... 69 | ``` 70 | 71 | ### 迭代器 72 | 73 | 迭代器(Iterator)是对一系列值的抽象,比如说一列输入的字符或者解析完的 token。使用迭代器可以有效地将输入数据和对数据的获取操作解耦,方便在不同时候使用不同方式输入数据,以及进行测试。常见高级语言都有对于迭代器的抽象,包括: 74 | 75 | - Java: `java.util.Iterator` 76 | - C#: `System.Collections.Generic.IEnumerator` 77 | - C++: `std::iterator::iterator_traits` 78 | - C++20: concept `std::ranges::input_iterator` 79 | - Python: 实现 `__next__` 的类型 80 | - JavaScript: 实现 `Symbol.iterator` 的类型 81 | 82 | 由于在解析时常常要回溯,使用的迭代器可以提供一些额外的方法,比如 `peek()` 用于查看下一个值但不移动迭代器,或者 `unread(value)` 用于将已经读出的值放回迭代器。 83 | 84 | ## 词法分析 85 | 86 | 词法分析这个主题比较简单,基本上就是对于每个 token 使用自动机(或者退化成普通的逻辑分支)进行解析。token 的组成一般比较简单,可以在分析时参考正则表达式的状态来设计自动机或逻辑分支。 87 | 88 | 当然,也有一些库允许你直接用正则表达式定义 token 来进行自动分析。好耶。 89 | 90 | > 不要学助教[用逻辑分支模拟自动机][bad_lexing](逃 91 | 92 | [bad_lexing]: https://github.com/01010101lzy/chigusa/blob/0a08176f4318542c1bb96114ac3f0df56ac9510d/src/c0/lexer.rs#L392-L511 93 | 94 | ## 语法分析 95 | 96 | ### 普通的递归下降分析法 97 | 98 | 递归下降是一个很简单、很直观的分析法,也是大多数人实现语法分析的首选方法。在实现递归下降分析器的时候,有一些可以降低编码难度的方法。 99 | 100 | #### 使用迭代器和辅助函数 101 | 102 | 看 miniplc0 java 版本基本上就够了(逃) 103 | 104 | #### 解析器组合子 (Parser Combinator) 105 | 106 | 助教没有试过这么写,如果你用 Haskell 来写的话或许可以试试 `parsec` 这个库。 107 | 108 | ### 使用 LL/LR 解析器生成器 109 | 110 | 自动生成解析器代码总感觉有点作弊的意思,不过用了就用了吧(笑)。如果你确定要用的话,记得选一个好用的,比如 [ANTLR][]。 111 | 112 | [antlr]: https://www.antlr.org 113 | -------------------------------------------------------------------------------- /docs/src/c0/notes.md: -------------------------------------------------------------------------------- 1 | # 设计笔记与讨论 2 | 3 | > 这里存放着设计 2020 软院编译原理所使用的语言的时候所考虑的一些东西。 4 | > 5 | 6 | > Rynco:我认为,实验的目标应当是让学生尽可能多的了解一个真实的编译器是如何运行的。因此,我们需要尽可能消减不必要的内容,比如复杂的指令集、寄存器分配、过于繁琐的语法等等。剩下来的应该是一个只包含核心内容的现代语言。 7 | 8 | ## 语法 9 | 10 | > 邵老师:最好不要自创一门新的语言或者改编自小众语言。 11 | > 12 | > Rynco:我个人是倾向于创建一门新的语言的。 13 | 14 | 考点:词法、语法分析 15 | 16 | > 按照 hambaka 的意思,c0 的语法可以进一步简化,降低实现难度。 17 | 18 | Hambaka 建议的语法修改包括: 19 | 20 | - 去除隐式类型转换,所有类型转换必须显式声明 21 | - 只保留 while 和/或 for 作为唯一/二可用的循环 22 | - 去除 switch 语句 23 | 24 | Rynco 正在考虑的语法修改包括: 25 | 26 | - 类型后置 27 | - 规范 bool 类型 28 | 29 | > Rynco: 30 | > 31 | > 我计划的是一个长得有点 Rust(只借鉴了关键字和部分语法,因为解析器写起来容易)的 C,不知道其他人怎么看 32 | > 33 | > 我希望这么做的原因是:如果使用类型后置的话,就可以把类型关键字解析完全放在语义解析里面做了。从而不需要在 parse 的时候维护一个符号表来确定哪些标识符是类型、哪些是变量,或者在解析到类型转换时回溯了(见 [附录1][ref1] )。 34 | 35 | [ref1]: #附录1:C 风格的语法在解析时的回溯问题与解决方案 36 | 37 | ```rust,ignore 38 | let global_var: double = -123456.789e10; 39 | 40 | fn add(a: int, b: int) -> int { 41 | return a + b; 42 | } 43 | 44 | fn main() -> void { 45 | let a: int = 3; 46 | let b: int = 4; 47 | let c: double = add(a, b) as double; 48 | if global_var > 0 { 49 | print("Hello"); 50 | } else { 51 | print(c); 52 | } 53 | } 54 | ``` 55 | 56 | > Rynco: 自动类型推断应该不会有了……吧?有人要当进阶内容做我不反对。 57 | > 58 | 59 | ### 暂定的语法内容 60 | 61 | > Rynco: 这里决定的内容会在实现示例编译器的时候再次确认,保证实现编译器的工作量不会过大。如果实现示例编译器的时候发现什么地方难度偏大的话还会再砍。 62 | 63 | #### 字面量 64 | 65 | 字面量包括以下内容: 66 | 67 | - 整数 68 | - 浮点数 69 | - 字符 70 | - 字符串 71 | - 布尔值 72 | 73 | #### 运算 74 | 75 | 算术运算: 76 | 77 | - 相反数 78 | - 加 79 | - 减 80 | - 乘 81 | - 除 82 | 83 | 比较运算: 84 | 85 | - 大于 86 | - 小于 87 | - 大于等于 88 | - 小于等于 89 | - 等于 90 | - 不等于 91 | 92 | 赋值运算: 93 | 94 | - 赋值 95 | 96 | 进阶版本可以支持以下运算(虚拟机已支持): 97 | 98 | - 布尔非 99 | - 布尔与 100 | - 布尔或 101 | - 按位与 102 | - 按位或 103 | - 按位异或 104 | - 左移 105 | - 算术右移 106 | - 逻辑右移 107 | 108 | #### 变量声明与赋值 109 | 110 | 变量声明使用 `let` 或 `const` 关键字声明,语法见 附录1。 111 | 112 | 赋值表达式使用等号 `=` 作为运算符,运算结果类型为空值(`void`)。 113 | 114 | > 比如 `(a = b) == c` 是合法的表达式,但是类型检查会出错。赋值作为表达式而不是语句的原因是赋值语句和表达式的 FIRST 集会相交。 115 | 116 | 要求实现变量作用域。 117 | 118 | #### 条件表达式和循环 119 | 120 | 一种条件表达式:`if-elseif-else` 表达式。 121 | 122 | 一种循环:`while` 循环。 123 | 124 | #### 函数 125 | 126 | 函数使用 `fn` 关键字声明,语法见 附录1。 127 | 128 | 不要求实现前向引用。 129 | 130 | 131 | ## 类型系统 132 | 133 | 考点:类型转换 134 | 135 | 虚拟机的设计默认使用 64 位整数和浮点数。 136 | 137 | 会有整数和浮点数(没法用同一种类型假装能转换的) 138 | 139 | 虚拟机的设计支持数组、结构体和堆内存分配,这三者可以作为进阶内容选做。 140 | 141 | ### 暂定的类型系统 142 | 143 | 必做 144 | 145 | - `int` (`i64`) 146 | - `double` (`f64`) 147 | - `void` (`unit` / `()`) 148 | 149 | 进阶(待砍刀) 150 | 151 | - `bool` 152 | (存储上等同于 `u8`,`false == 0u8`, `true == 255u8`) 153 | - `u8`/`u16`/`u32`/`u64` 154 | - `i8`/`i16`/`i32`/`i64` 155 | - struct 156 | - array (`[T; N]`) 157 | - pointer (`&T`) 158 | - 自动类型推断(省去 `let variable` 后面的类型) 159 | 160 | ## 虚拟机 161 | 162 | 考点:代码生成 163 | 164 | 编译目标是 r0vm 虚拟机,是栈式虚拟机。编译到 JVM / Dotnet CLR / x86 等目标如果想做的话可以选做,需要提前跟助教声明。 165 | 166 | 虚拟机设计已经基本确定,见相关文档。 167 | 168 | ## 附录 169 | 170 | ### 附录1:C 风格的语法在解析时的回溯问题与解决方案 171 | 172 | 考虑以下 C 风格的变量声明语句的语法规则(`int`、`double` 等类型名称不是关键字): 173 | 174 | ``` 175 | # 标识符 176 | ident -> "int" | "x" 177 | # 运算符 178 | op -> "+" | "-" 179 | 180 | # 类型,在词法分析时被分析为标识符 181 | ty -> ident 182 | # 表达式 183 | expr -> "0" | "1" | ident | expr op expr 184 | 185 | # 变量声明语句 186 | decl_stmt -> ty ident "=" expr ";" 187 | # 表达式语句 188 | expr_stmt -> expr ";" 189 | 190 | # 语句 191 | stmt -> decl_stmt | expr_stmt 192 | ``` 193 | 194 | 显然,FIRST(`decl_stmt`) ∩ FIRST(`expr_stmt`) == { `ident` }。鉴于大部分同学在实现的时候都会考虑采用递归下降分析法(这门课的大作业里不会真的有人去手写/生成 LL/LR 吧?),这个 FIRST 集的重合会造成大量的回溯。 195 | 196 | 类似的还有显式类型转换: 197 | 198 | ``` 199 | ident -> "int" | "x" 200 | ty -> ident 201 | 202 | # 括号表达式 203 | paren_expr -> "(" expr ")" 204 | # C 风格的显式类型转换 205 | cast_expr -> "(" ty ")" expr 206 | 207 | expr -> ident | cast_expr | paren_expr 208 | ``` 209 | 210 | 如果没有在语法分析的时候就建立符号表,在分析代码 `(int)(x)` 时甚至在读完 `(int)` 这三个 token 之后都不能确定解析的表达式到底是类型转换还是括号。为了解决这个问题,要么需要预读不确定数量的 token (在类型声明可能大于 1 个 token 时),要么遇到括号就要准备回溯,两者在代码上实现难度都偏大。 211 | 212 | 因此,我建议在设计语法的时候就考虑这类问题,避免大量的 FIRST 集重合现象,降低递归下降语法分析器的实现难度。具体方案如下: 213 | 214 | > 在以下的语法规则中,`ty` 代表类型,`ident` 代表标识符,`expr` 代表表达式,`block` 代表语法块。 215 | > 216 | 217 | 一,将变量和函数声明中的类型后置,使用关键字开始此类语句,避免与 `expr` 的 FIRST 集重叠。此语法与多种现代语言(如 TypeScript、Kotlin、Go、Rust)相类似。 218 | 219 | > 九个六:`const` 可以考虑砍掉(待定) 220 | 221 | ``` 222 | # 修改前: 223 | 224 | decl_stmt -> "const"? ty ident ("=" expr)? ";" 225 | # int myVariable = 123 + 456; 226 | 227 | function_param -> ty ident ("," ty ident)* 228 | function -> ty ident "(" function_param? ")" block 229 | # int add(int a, int b) { ... } 230 | 231 | # 修改后: 232 | 233 | decl_stmt -> ("let" | "const") ident ":" ty ("=" expr)? ";" 234 | # let myVariable: int = 123 + 456; 235 | 236 | function_param -> ident ":" ty ("," ident ":" ty)* 237 | function -> "fn" ident "(" function_param? ")" "->" ty block 238 | # fn add(a: int, b: int) -> int { ... } 239 | ``` 240 | 241 | 二,将显式类型转换的语法从括号变为使用 `as` 做运算符的二元表达式。此语法与多种现代语言(如 TypeScript、Kotlin、Rust、C#)相类似。 242 | 243 | ``` 244 | # 修改前: 245 | 246 | cast_expr -> "(" ty ")" expr 247 | # (int)42.0 248 | # (double)(a + b) 249 | 250 | # 修改后: 251 | 252 | cast_expr -> expr "as" ty 253 | # 42.0 as int 254 | # (a + b) as double 255 | ``` 256 | 257 | 258 | ### 附录2:九个六先生的例程 259 | 260 | ```rust,ignore 261 | let a1, a2, a3, a4, a5: int; 262 | 263 | fn me(x: int) -> int { 264 | return x; 265 | } 266 | 267 | fn add(x: double, y: double) -> int { 268 | let xx: int = x as int; 269 | let yy: int = y as int; 270 | return xx + yy 271 | } 272 | 273 | fn give(idx: int) -> int { 274 | if idx == 1 { 275 | return a1; 276 | } 277 | else if idx == 2 { 278 | return a2; 279 | } 280 | else if idx == 3 { 281 | return a3; 282 | } 283 | else if idx == 4 { 284 | return a4; 285 | } 286 | else if idx == 5 { 287 | return a5; 288 | } 289 | else { 290 | return 114514.0 as int 291 | } 292 | } 293 | 294 | fn set(idx: int, val: int) -> void { 295 | if idx == 1 { 296 | a1 = val; 297 | } 298 | else if idx == 2 { 299 | a2 = val; 300 | } 301 | else if idx == 3 { 302 | a3 = val; 303 | } 304 | else if idx == 4 { 305 | a4 = val; 306 | } 307 | else if idx == 5 { 308 | a5 = val; 309 | } 310 | } 311 | 312 | fn main() -> void { 313 | let a, b, c, t: int; 314 | let five: int = 5; 315 | a = getint(); 316 | b = getint(); 317 | set(1, a); 318 | set(2, b); 319 | t = 3; 320 | while t <= five { 321 | c = add(a as double, b as double); 322 | b = a; 323 | a = c; 324 | set(t, c); 325 | t = t + 1; 326 | } 327 | print(give(me(five))); 328 | ``` 329 | -------------------------------------------------------------------------------- /docs/src/c0/stdlib.md: -------------------------------------------------------------------------------- 1 | # 标准库 2 | 3 | 由于 c0 语言本身比较简单,为了实现输入输出的功能,我们规定了 8 个不需要声明就可以调用的函数,它们的分别是: 4 | 5 | ```rust,ignore 6 | /// 读入一个有符号整数 7 | fn getint() -> int; 8 | 9 | /// 读入一个浮点数 10 | fn getdouble() -> double; 11 | 12 | /// 读入一个字符 13 | fn getchar() -> int; 14 | 15 | /// 输出一个整数 16 | fn putint(int) -> void; 17 | 18 | /// 输出一个浮点数 19 | fn putdouble(double) -> void; 20 | 21 | /// 输出一个字符 22 | fn putchar(int) -> void; 23 | 24 | /// 将编号为这个整数的全局常量看作字符串输出 25 | fn putstr(int) -> void; 26 | 27 | /// 输出一个换行 28 | fn putln() -> void; 29 | ``` 30 | 31 | 在实现时,这些函数既可以编译成使用虚拟机中的 `callname` 指令调用,也可以编译成相应的虚拟机指令(`scan.i`, `print.i` 等),在虚拟机实现上两者是等价的。**请注意,直接使用指令的时候,是不用预留返回值空间的。** 32 | -------------------------------------------------------------------------------- /docs/src/c0/stmt.md: -------------------------------------------------------------------------------- 1 | # 语句 2 | 3 | ``` 4 | stmt -> 5 | expr_stmt 6 | | decl_stmt 7 | | if_stmt 8 | | while_stmt 9 | | return_stmt 10 | | block_stmt 11 | | empty_stmt 12 | ``` 13 | 14 | 语句是函数的最小组成部分。 15 | 16 | ## 表达式语句 17 | 18 | ``` 19 | expr_stmt -> expr ';' 20 | ``` 21 | 22 | 表达式语句由 _表达式_ 后接分号组成。表达式如果有值,值将会被丢弃。 23 | 24 | ## 声明语句 25 | 26 | ``` 27 | let_decl_stmt -> 'let' IDENT ':' ty ('=' expr)? ';' 28 | const_decl_stmt -> 'const' IDENT ':' ty '=' expr ';' 29 | decl_stmt -> let_decl_stmt | const_decl_stmt 30 | ``` 31 | 32 | 声明语句由 `let`(声明变量)或 `const`(声明常量)接 _标识符_、_类型_ 和可选的 _初始化表达式_ 组成。其中,常量声明语句必须有初始化表达式,而变量声明语句可以没有。 33 | 34 | 一个声明语句会在当前作用域中创建一个给定类型和标识符的变量或常量。声明语句有以下语义约束: 35 | 36 | - 在同一作用域内,一个标识符只能由一个变量或常量使用。 37 | - 变量或常量的类型不能为 `void`。 38 | - 如果存在初始化表达式,其类型应当与变量声明时的类型相同。 39 | - 常量只能被读取,不能被修改。 40 | 41 | 出现违反约束的声明语句是编译期错误。 42 | 43 | > UB: 没有初始化的变量的值未定义。我们不规定对于使用未初始化变量的行为的处理方式,你可以选择忽略、提供默认值或者报错。 44 | 45 | > UB: 我们不考虑局部变量和全局函数重名的情况。局部变量和全局变量重名的时候应当覆盖全局变量定义。 46 | 47 | 以下是一些可以通过编译的变量声明的例子: 48 | 49 | ```rust,ignore 50 | let i: int; 51 | let j: int = 1; 52 | const k: double = 1.20; 53 | ``` 54 | 55 | 以下是一些不能通过编译的变量声明的例子: 56 | 57 | ```rust,ignore 58 | // 没有类型 59 | let l = 1; 60 | // 没有初始化 61 | const m: int; 62 | // 类型不匹配 63 | let n: double = 3; 64 | // 常量不能被修改 65 | const p: double = 3.0; 66 | p = 3.1415; 67 | ``` 68 | 69 | ## 控制流语句 70 | 71 | 基础 C0 中有三种控制流语句,分别是 `if`、`while` 和 `return` 语句。 72 | 73 | > 对于 `if` 和 `while` 的条件,如果求值结果是 `int` 类型,则所有非零值均视为 `true`。 74 | 75 | ### `if` 语句 76 | 77 | ``` 78 | if_stmt -> 'if' expr block_stmt ('else' (block_stmt | if_stmt))? 79 | // ^~~~ ^~~~~~~~~~ ^~~~~~~~~~~~~~~~~~~~~~ 80 | // | if_block else_block 81 | // condition 82 | ``` 83 | 84 | `if` 语句代表一组可选执行的语句。 85 | 86 | `if` 语句的执行流程是: 87 | 88 | - 求 `condition` 的值 89 | - 如果值为 `true`,则执行 `if_block` 90 | - 否则,如果存在 `else_block`,执行 `else_block` 91 | - 否则,执行下一条语句 92 | 93 | 请注意,**if 语句的条件表达式可以没有括号**,且 **条件执行的语句都必须是代码块**。 94 | 95 | 以下是一些合法的 if 语句: 96 | 97 | ```rust,ignore 98 | if x > 0 { 99 | x = x + 1; 100 | } 101 | 102 | if y < 0 { 103 | z = -1; 104 | } else if y > 0 { 105 | z = 1; 106 | } else { 107 | z = 0 108 | } 109 | ``` 110 | 111 | 以下是一些不合法的 if 语句: 112 | 113 | ```rust,ignore 114 | // 必须是代码块 115 | if x > 0 116 | x = x + 1; 117 | ``` 118 | 119 | ### `while` 语句 120 | 121 | ``` 122 | while_stmt -> 'while' expr block_stmt 123 | // ^~~~ ^~~~~~~~~~while_block 124 | // condition 125 | ``` 126 | 127 | while 语句代表一组可以重复执行的语句。 128 | 129 | while 语句的执行流程是: 130 | 131 | - 求值 `condition` 132 | - 如果为 `true` 133 | - 执行 `while_block` 134 | - 回到开头重新求值 135 | - 如果为 `false` 则执行之后的代码 136 | 137 | ### `return` 语句 138 | 139 | ``` 140 | return_stmt -> 'return' expr? ';' 141 | ``` 142 | 143 | 使用 `return` 语句从一个函数中返回。return 语句可以携带一个表达式作为返回值。 144 | 145 | return 语句有以下的语义约束: 146 | 147 | - 如果函数声明的返回值是 `void`,return 语句不能携带返回值;否则,return 语句必须携带返回值 148 | - 返回值表达式的类型必须与函数声明的返回值类型相同 149 | - 当执行到返回值类型是 `void` 的函数的末尾时,应视作存在一个 return 语句进行返回 150 | 151 | > UB: 在基础 C0 中不会出现部分分支没有返回值的情况,所以没有返回语句的分支的返回值是未定义的。在扩展 C0 中你必须检查每个分支都能够正常返回。 152 | 153 | ## 代码块 154 | 155 | ``` 156 | block_stmt -> '{' stmt* '}' 157 | ``` 158 | 159 | 一个代码块可以包含一条或多条语句。执行代码块的效果是顺序执行这些语句。 160 | 161 | 在基础 c0 中,一个代码块中的声明语句只能在其他类型的语句之前出现。 162 | 163 | 在扩展 c0(作用域嵌套)中,一个代码块是其所在作用域的子作用域。在扩展 c0(变量声明增强)中,一个代码块的任何地方均可声明变量。 164 | 165 | ## 空语句 166 | 167 | ``` 168 | empty_stmt -> ';' 169 | ``` 170 | 171 | 空语句没有任何作用,只是一个分号而已。 172 | -------------------------------------------------------------------------------- /docs/src/c0/token.md: -------------------------------------------------------------------------------- 1 | # 单词 (Token) 2 | 3 | 单词是词法分析的结果。 4 | 5 | ## 关键字 6 | 7 | ``` 8 | FN_KW -> 'fn' 9 | LET_KW -> 'let' 10 | CONST_KW -> 'const' 11 | AS_KW -> 'as' 12 | WHILE_KW -> 'while' 13 | IF_KW -> 'if' 14 | ELSE_KW -> 'else' 15 | RETURN_KW -> 'return' 16 | 17 | // 这两个是扩展 c0 的 18 | BREAK_KW -> 'break' 19 | CONTINUE_KW -> 'continue' 20 | ``` 21 | 22 | c0 有 8 个关键字。扩展 c0 增加了 2 个关键字。 23 | 24 | # 字面量 25 | 26 | ``` 27 | digit -> [0-9] 28 | UINT_LITERAL -> digit+ 29 | 30 | escape_sequence -> '\' [\\"'nrt] 31 | string_regular_char -> [^"\\] 32 | STRING_LITERAL -> '"' (string_regular_char | escape_sequence)* '"' 33 | 34 | // 扩展 c0 35 | DOUBLE_LITERAL -> digit+ '.' digit+ ([eE] [+-]? digit+)? 36 | 37 | char_regular_char -> [^'\\] 38 | CHAR_LITERAL -> '\'' (char_regular_char | escape_sequence) '\'' 39 | ``` 40 | 41 | 基础 c0 有两种字面量,分别是 _无符号整数_ 和 _字符串常量_。扩展 c0 增加了 _浮点数常量_ 和 _字符常量_。 42 | 43 | 语义约束: 44 | 45 | - 字符串字面量中的字符可以是 ASCII 中除了双引号 `"`、反斜线 `\\`、空白符 `\r` `\n` `\t` 以外的任何字符。转义序列可以是 `\'`、`\"`、`\\`、`\n`、`\t`、`\r`,含义与 C 中的对应序列相同。 46 | 47 | > UB: 对于无符号整数和浮点数常量超出相应数据类型表示范围的情况我们不做规定。你可以选择报错也可以选择无视。 48 | 49 | ## 标识符 50 | 51 | ``` 52 | IDENT -> [_a-zA-Z] [_a-zA-Z0-9]* 53 | ``` 54 | 55 | c0 的标识符由下划线或字母开头,后面可以接零或多个下划线、字母或数字。标识符不能和关键字重复。 56 | 57 | ## 运算符 58 | 59 | ``` 60 | PLUS -> '+' 61 | MINUS -> '-' 62 | MUL -> '*' 63 | DIV -> '/' 64 | ASSIGN -> '=' 65 | EQ -> '==' 66 | NEQ -> '!=' 67 | LT -> '<' 68 | GT -> '>' 69 | LE -> '<=' 70 | GE -> '>=' 71 | L_PAREN -> '(' 72 | R_PAREN -> ')' 73 | L_BRACE -> '{' 74 | R_BRACE -> '}' 75 | ARROW -> '->' 76 | COMMA -> ',' 77 | COLON -> ':' 78 | SEMICOLON -> ';' 79 | ``` 80 | 81 | ## 注释 82 | 83 | 注释是扩展 c0 内容,见 [扩展 c0](extended-c0.md#注释) 84 | 85 | ``` 86 | COMMENT -> '//' regex(.*) '\n' 87 | ``` 88 | -------------------------------------------------------------------------------- /docs/src/c0/ty.md: -------------------------------------------------------------------------------- 1 | # 类型系统 2 | 3 | ## 基础类型 4 | 5 | c0 有一个十分简单的类型系统。在基础 C0 中你会用到的类型有两种: 6 | 7 | - 64 位有符号整数 `int` 8 | - 空类型 `void` 9 | 10 | 扩展 C0 增加了一种类型: 11 | 12 | - 64 位 IEEE-754 浮点数 `double` 13 | 14 | ## 类型表示 15 | 16 | ``` 17 | ty -> IDENT 18 | ``` 19 | 20 | 在 C0 中,用到类型的地方使用一个标识符表示。这个标识符的所有可能值就是上面列出的基础类型。填入其他值的情况应被视为编译错误。 21 | 22 | ## 关于布尔类型 23 | 24 | 比较运算符的运行结果是布尔类型。在 c0 中,我们并没有规定布尔类型的实际表示方式。在 navm 虚拟机中,所有非 0 的布尔值都被视为 `true`,而 0 被视为 `false`。 25 | -------------------------------------------------------------------------------- /docs/src/compile-pipeline.md: -------------------------------------------------------------------------------- 1 | # 编译过程概述 2 | 3 | 这次我们使用的编译目标是 C0,一种简化了 C 语言语义和编译过程、同时魔改了语法的玩具编程语言。 4 | 5 | ## 编译过程 6 | 7 | 整个 C0 工具链分为两个部分,分别是你接下来需要写的 **编译器** 和我们提供的 **虚拟机**。整个编译流程如下: 8 | 9 | - 编译器(你要写的) 10 | - 读入 c0 源代码 11 | - 输出 o0 二进制代码 12 | - 虚拟机 13 | - 读入 o0 二进制代码 14 | - 解释执行代码 15 | - 输出运行结果 16 | 17 | 你的编译器至少能通过某种**命令行接口**读入一个 c0 文件,并将编译出的 o0 文件输出到另一个文件中。接口的实际特征我们不做规定,需要你自己定义并写在评测配置中,具体见 [评测要求](./judge.md)。 18 | 19 | o0 二进制代码的定义见 [这里](./navm/index.md#二进制格式)。 20 | 21 | 我们不限制你编写编译器使用的语言,且允许复用 miniplc0 实验中的代码。 22 | 23 | 如果你决定编译到其他目标(自己设计的指令集、LLVM IR、Java Bitcode、物理 CPU 指令集等),请联系助教进行单独手动评测。由于需要单独评测,你不太可能因此获得额外的加分。 24 | 25 | ## 编译目标 26 | 27 | 我们编译的目标是本课程自行设计的 `navm` 虚拟机使用的 `o0` 代码。有关 `navm` 虚拟机的设计、结构参见 [对 navm 虚拟机的介绍](./navm/index.md)。 28 | 29 | 比如如果你编译这个文件: 30 | 31 | ```rust,ignore 32 | fn foo(i: int) -> int { 33 | return -i; 34 | } 35 | 36 | fn main() -> void { 37 | putint(foo(-123456)); 38 | } 39 | ``` 40 | 41 | 你得到的结果应该类似于这个(`o0` 格式没有规定相应的文字形式,以下是官方参考实现 `natrium` 和 `navm` 使用的输出格式): 42 | 43 | ``` 44 | static: 66 6F 6F (`foo`) 45 | 46 | static: 70 75 74 69 6E 74 (`putint`) 47 | 48 | static: 6D 61 69 6E (`main`) 49 | 50 | static: 5F 73 74 61 72 74 (`_start`) 51 | 52 | 53 | fn [3] 0 0 -> 0 { 54 | 0: StackAlloc(0) 55 | 1: Call(2) 56 | } 57 | 58 | fn [0] 0 1 -> 1 { 59 | 0: ArgA(0) 60 | 1: ArgA(1) 61 | 2: Load64 62 | 3: NegI 63 | 4: Store64 64 | 5: Ret 65 | } 66 | 67 | fn [2] 0 0 -> 0 { 68 | 0: StackAlloc(0) 69 | 1: StackAlloc(1) 70 | 2: Push(123456) 71 | 3: NegI 72 | 4: Call(1) 73 | 5: CallName(1) 74 | 6: Ret 75 | } 76 | ``` 77 | -------------------------------------------------------------------------------- /docs/src/ebnf.md: -------------------------------------------------------------------------------- 1 | # 语法表示说明 2 | 3 | 本文介绍的是一种 EBNF 的变体,用于描述字符写成的编程语言语法。这个变体将每一个非终结符的产生式都表示成了一个正则表达式,以此来使得语言定义写起来更加精炼。 4 | 5 | 本变体的格式大量参考了 [Rust 的语法描述语言](https://doc.rust-lang.org/reference/notation.html)。 6 | 7 | ## 非终结符 8 | 9 | 非终结符由一段由字母、数字或下划线组成的字符串表示,例如 `expr`, `if_stmt`。其中,字母全大写的字符串,如 `IDENT`,表示这个非终结符是一个单词(token)。 10 | 11 | ## 终结符 12 | 13 | 终结符包括字符串、字符范围和正则表达式。其中: 14 | 15 | - 字符串由双引号 `"` 或单引号 `'` 包括,表示等同于内容的字符序列,如 `"while"`、`"y"`、`"+="`。 16 | 17 | - 字符范围由方括号包括,内部填写包括的字符或字符范围,表示符合要求的任一字符。其中,用短横线 `-` 连接的两个字符表示字符编码中介于两个字符值之间(含端点)的字符。如 `[abcde]`(等同于 `[a-e]`)、`[_0-9a-zA-Z]`。 18 | 19 | 以 `^` 开头的字符范围表示不在范围内的任一字符,如 `[^abc]`、`[^A-Z]`。 20 | 21 | - 正则表达式由 `regex(` `)` 包括,内部是正则表达式,如 `regex(\w+)`、`regex(\w+://(\w+\.)+\.com)` 22 | 23 | 字符范围和字符串遵循 C 风格的转义序列,即使用反斜线 `\\` 后跟随字符组成。如果某个转义序列没有含义,则表示反斜线后的字符本身。 24 | 25 | ## 产生式 26 | 27 | 产生式左侧是非终结符,右侧是一个由终结符和非终结符组成的正则表达式,中间以箭头 `->` 连接。一个产生式占一行和之后缩进的所有行,如: 28 | 29 | ``` 30 | sign -> [+-] 31 | fractional_part -> "." dec_number 32 | my_expression -> 33 | this_is_a_very_long_keyword this_is_a_very_long_expression ";" 34 | ``` 35 | 36 | 当一个非终结符有多个产生式时,右侧的不同产生式用竖线 `|` 分隔,表示“或”的关系。当产生式过长时,也可另起一行缩进书写。如: 37 | 38 | ``` 39 | my_keyword -> "fn" | "class" 40 | binary_operator -> 41 | "+" | "-" | "*" | "/" 42 | | "=" | ">" | "<" | ">=" | "<=" | "==" | "!=" 43 | ``` 44 | 45 | 和正则表达式相同,可以省略的表达式用问号 `?` 修饰,如 `"public"? "class" identifier`; 46 | 47 | 可以重复一次或多次的表达式用加号 `+` 修饰,如 `[0-9a-f]+`; 48 | 49 | 可以重复零次或多次的表达式用星号 `*` 修饰,如 `[1-9] [0-9]*`; 50 | 51 | 可以重复指定次数次的表达式后面用大括号括起来数字修饰,其中: 52 | 53 | - `{m}` 表示指定重复 `m` 次; 54 | - `{m,n}` 表示重复 `m` 到 `n` 次; 55 | - `{m,}` 表示重复 `m` 次及以上; 56 | - `{,n}` 表示重复 0 到 `n` 次; 57 | 58 | 将一系列符号用小括号 `(` `)` 包括起来表示分组,分组内的符号作为一个整体看待,如 `(item ",")+`。 59 | -------------------------------------------------------------------------------- /docs/src/judge.md: -------------------------------------------------------------------------------- 1 | # 提交与评测说明 2 | 3 | C0 大作业的提交方式是自动评测,评测的方式之后会写在这里。 4 | 5 | ## 程序要求 6 | 7 | 你提交的程序应当至少能通过命令行参数接收一个 c0 代码文件的输入,并输出一个 o0 二进制文件。 8 | 9 | 如果编译过程中出现了错误(语法、语义、编译过程错误等),你的程序应当**以非 0 的返回值退出**。否则如果一切正常,你的程序应当以返回值 0 退出。 10 | 11 | ## 提交要求 12 | 13 | 见 。 14 | -------------------------------------------------------------------------------- /docs/src/navm/faq.md: -------------------------------------------------------------------------------- 1 | # 应该比较常见的问题 2 | 3 | ## 条件跳转 4 | 5 | 如果需要实现条件跳转,请使用以下指令的组合(`T` 代表 `u`、`f` 或 `i`;在 **符合条件** 时跳转): 6 | 7 | - 等于:`cmp.T`, `br.false` 8 | - 不等于:`cmp.T`, `br.true` 9 | - 大于:`cmp.T`, `set.gt`, `br.true` 10 | - 小于:`cmp.T`, `set.lt`, `br.true` 11 | - 大于等于:`cmp.T`, `set.lt`, `br.false` 12 | - 小于等于:`cmp.T`, `set.gt`, `br.false` 13 | 14 | ## 局部变量和参数的存取 15 | 16 | 在 navm 中,局部变量和参数是分开存储的。其中,参数和返回值(`arg`)存储在一起,从栈底方向开始顺序编号。局部变量(`loc`)存储在另一个位置,也从栈底开始顺序编号。比如: 17 | 18 | ``` 19 | | d | ↑ loc.1 20 | | c | 局部变量 loc.0 21 | |==============| 22 | | 1 | ↑ 23 | | %ip | 24 | | %bp | 虚拟机数据 25 | |==============| 26 | | b | ↑ arg.2 27 | | a | 参数 arg.1 28 | | _ret | 返回值 arg.0 29 | | ... | 30 | ``` 31 | 32 | 此时执行 `loca 1` 获得的就是变量 `d` 的地址,执行 `arga 0` 获得的就是返回值的地址。 33 | 34 | 获取到地址之后,就可以执行存取操作了。我们用的基本都是 64 位数据类型,所以使用 `load.64` 和 `store.64` 指令就可以了。 35 | 36 | ``` 37 | # 加载局部变量 1 38 | loca 1 39 | load.64 40 | 41 | # 存储 0 到参数 0 42 | arga 0 43 | push 0 44 | store.64 45 | 46 | # 将局部变量 1 拷贝到局部变量 0 47 | loca 0 48 | loca 1 49 | load.64 50 | store.64 51 | ``` 52 | -------------------------------------------------------------------------------- /docs/src/navm/index.md: -------------------------------------------------------------------------------- 1 | # navm 虚拟机标准 2 | 3 | 本次大作业的编译目标是 Natrium 虚拟机 (navm) 的汇编 (o0)。其设计参考了 JVM、DotNet CLR 和上学期的 c0 虚拟机。 4 | 5 | ## 虚拟机简介 6 | 7 | navm 是一个 [栈式虚拟机][stack_machine] —— 简单来说就是,它的寄存器是一个栈。除了少数内存访问指令以外,navm 的大部分指令都只操作位于栈顶的数据。堆栈式计算机的指令与 [逆波兰表示法(后缀表示法][reverse_polish_notation] 表示的表达式(或者说后序遍历的表达式树)有简单的对应关系。 8 | 9 | navm 有 64 位有符号整数、无符号整数、浮点数三种数据类型。详见 [数据类型](#数据类型) 节。 10 | 11 | navm 使用 64 位无符号整数表示地址,具体实现不需要关心。 12 | 13 | navm 使用自制的指令集,共有 50+ 个指令,详见 [指令集说明](./instruction.md)。 14 | 15 | [stack_machine]: https://en.wikipedia.org/wiki/Stack_machine 16 | [reverse_polish_notation]: https://en.wikipedia.org/wiki/Reverse_Polish_notation 17 | 18 | ## 内存空间 19 | 20 | navm 的内存空间以 8 位(1 字节)为单位寻址。8、16、32、64 位的数据类型分别以其大小为单位在内存中对齐。当读取或写入操作未对齐时,会产生 `UnalignedAccess` 错误。 21 | 22 | navm 的栈空间以 8 字节为一个 slot,压栈、弹栈以及各种运算操作均以 slot 为单位进行。默认情况下,栈的大小是 1 MiB (1048576 字节),即 131072 个 slot。栈空时弹栈和栈满时压栈分别会产生 `StackUnderflow` 和 `StackOverflow` 错误。 23 | 24 | ## 数据类型 25 | 26 | navm 在运算中支持三种基本数据类型,分别是 64 位无符号整数 `u64`、64 位有符号整数 `i64`、64 位浮点数 `f64`。长度更短的整数可以使用 `u64` 和 `i64` 模拟。 27 | 28 | `u64` 和 `i64` 都是 64 位整数,使用[二进制补码][2s_complement]形式表示。两种类型在多数整数运算中不做区分,仅在 `cmp.T`(比较指令,见下)等两种运算结果有差别的地方有所区分。在运算溢出时,两种类型均采用环绕 (wrap-around) 方式处理结果。`u64` 同时也可以表示虚拟机中的内存地址。 29 | 30 | `f64` 是符合 [IEEE 754][ieee754] 规定的[双精度浮点数][double]。 31 | 32 | [2s_complement]: https://en.wikipedia.org/wiki/Two%27s_complement 33 | [ieee754]: https://en.wikipedia.org/wiki/IEEE_754 34 | [double]: https://en.wikipedia.org/wiki/Double-precision_floating-point_format 35 | 36 | ## 二进制格式 37 | 38 | o0 是 navm 所使用的二进制程序文件格式,其作用和内容类似 Java 的 `.class` 文件或者 DotNet 的 `.dll` 文件。 39 | 40 | > 注意:之前这里写错了,所有的 `u16` 都应为 `u32`。 41 | 42 | 下面的结构体表示了 o0 的二进制文件结构(也就是说你输出的时候应该按顺序输出下面这些结构体的各个字段的内容,中间不加空隙)。其中,`uXX` 表示 XX 位无符号整数。所有涉及到的多字节整数都是大端序,即高位字节在前、低位字节在后。 43 | 44 | ```rust,ignore 45 | /// 整个 o0 二进制文件 46 | struct o0 { 47 | /// 魔数 48 | magic: u32 = 0x72303b3e, 49 | /// 版本号,定为 1 50 | version: u32 = 0x00000001, 51 | /// 全局变量表 52 | globals: Array, 53 | /// 函数列表 54 | functions: Array, 55 | } 56 | 57 | /// 类型为 T 的通用数组的定义 58 | struct Array { 59 | /// 数组的长度 60 | count: u32, 61 | /// 数组所有元素的无间隔排列 62 | items: T[], 63 | } 64 | 65 | /// 单个全局变量 66 | struct GlobalDef { 67 | /// 是否为常量?非零值视为真 68 | is_const: u8, 69 | /// 按字节顺序排列的变量值 70 | value: Array, 71 | } 72 | 73 | /// 函数 74 | struct FunctionDef { 75 | /// 函数名称在全局变量中的位置 76 | name: u32, 77 | /// 返回值占据的 slot 数 78 | return_slots: u32, 79 | /// 参数占据的 slot 数 80 | param_slots: u32, 81 | /// 局部变量占据的 slot 数 82 | loc_slots: u32, 83 | /// 函数体 84 | body: Array, 85 | } 86 | 87 | /// 指令,可以是以下三个选择之一 88 | union Instruction { 89 | /// 无参数的指令,占 1 字节 90 | variant NoParam { 91 | opcode: u8 92 | }, 93 | /// 有 4 字节参数的指令,占 5 字节 94 | variant u32Param { 95 | opcode: u8, 96 | param: u32, 97 | } 98 | /// 有 8 字节参数的指令,占 9 字节 99 | variant u64Param { 100 | opcode: u8, 101 | param: u64 102 | } 103 | } 104 | ``` 105 | 106 | 下面是一个合法的 o0 文件的例子(**每一字节以十六进制或字符常量展示**,`//` 后的是注释): 107 | 108 | ``` 109 | // start 110 | 72 30 3b 3e // magic 111 | 00 00 00 01 // version 112 | 113 | 00 00 00 02 // globals.count 114 | 115 | // globals[0] 116 | 00 // globals[0].is_const 117 | 00 00 00 08 // globals[0].value.count 118 | 00 00 00 00 00 00 00 00 // globals[0].value.items 119 | 120 | // globals[1] 121 | 01 // globals[1].is_const 122 | 00 00 00 06 // globals[1].value.count 123 | '_' 's' 't' 'a' 'r' 't' // globals[1].value.items 124 | 125 | 00 00 00 01 // functions.count 126 | 127 | // functions[0] 128 | 00 00 00 01 // functions[0].name 129 | 00 00 00 00 // functions[0].ret_slots 130 | 00 00 00 00 // functions[0].param_slots 131 | 00 00 00 00 // functions[0].loc_slots 132 | 00 00 00 04 // functions[0].body.count 133 | // functions[0].body.items 134 | 01 00 00 00 00 00 00 00 01 // Push(1) 135 | 01 00 00 00 00 00 00 00 02 // Push(2) 136 | 20 // AddI 137 | 34 // NegI 138 | // finish 139 | ``` 140 | 141 | ## 栈帧结构 142 | 143 | > 这里描述的是 **这个** navm 实现中使用的栈帧结构。 144 | 145 | ``` 146 | | ... | 147 | | | <- 栈顶 %sp 148 | | 表达式栈 ... | 149 | | 表达式栈 | 150 | | 局部变量 ... | 151 | | 局部变量 | 152 | | 虚拟机参数... | 153 | | 虚拟机参数 | <- 被调用者栈底 %bp 154 | |===============|=== 155 | | 调用参数 ... | v 156 | | 调用参数 | | 157 | | 返回值 | | 158 | | 中间结果 | 调用者栈 159 | | ... | ^ 160 | |===============|=== 161 | ``` 162 | 163 | 其中,调用参数和返回值由调用者压栈,调用参数在函数返回后由被调用者清理。 164 | 165 | ### 虚拟机参数 166 | 167 | 虚拟机会在调用参数和局部变量之间插入一系列的虚拟机参数以辅助虚拟机运行,目前本虚拟机存储的参数格式为(从栈顶到栈底): 168 | 169 | ``` 170 | | ... | 171 | | 局部变量 | 172 | |=================| 173 | | 调用者函数 ID | 174 | | 调用者 %ip | 175 | | 调用者 %bp | 176 | |=================| 177 | | 参数 | 178 | | ... | 179 | ``` 180 | 181 | ### 函数调用时栈帧变化示例 182 | 183 | 假设现有一个函数 `test`,有 1 slot 的返回值、2 slot 的参数和 2 slot 的局部变量。 184 | 185 | ```rust,ignore 186 | /// 可以看成是这样的一个函数 187 | fn test(a: int, b: int) -> int { 188 | let c: int = ...; 189 | let d: int = ...; 190 | ... 191 | return ...; 192 | } 193 | ``` 194 | 195 | 现在,它被编号为 1 的函数 `main` 调用。在调用前,调用者应压入 1 slot 的返回值预留空间、2 slot 的参数(顺序压栈),再通过调用指令调用这个函数。调用前的栈应该长这样: 196 | 197 | ``` 198 | | - | 199 | |============|<- 栈顶 200 | | b | ↑ 201 | | a | 参数 202 | | _ret | 返回值 203 | | ... | ...表达式栈 204 | ``` 205 | 206 | 在执行 `call` 指令后,栈中的变量(**局部变量空间将由虚拟机自动创建**)以及对应的偏移量如下: 207 | 208 | ``` 209 | | - | <- 栈顶(表达式栈) 210 | | d | ↑ loc.1 211 | | c | 局部变量 loc.0 212 | |==============| 213 | | 1 | ↑ 214 | | %ip | 215 | | %bp | 虚拟机数据 216 | |==============| 217 | | b | ↑ arg.2 218 | | a | 参数 arg.1 219 | | _ret | 返回值 arg.0 220 | | ... | 221 | ``` 222 | 223 | 在函数调用返回后,栈如下: 224 | 225 | ``` 226 | | - | 227 | | // d | 228 | | // c | 229 | | // 1 | 230 | | // %ip | 231 | | // %bp | ↑ 232 | | // b | | 233 | | // a | 以上内容被弹栈 234 | |============| <- 栈顶 235 | | _ret | 返回值 236 | | ... | 237 | ``` 238 | 239 | ## 程序入口 240 | 241 | navm 总是会最先运行函数列表里编号为 0 的(也就是整个列表中第一个)函数,按照惯例这个函数的名称为 `_start`。`_start` 函数没有任何参数,也不返回任何值,这两项的参数会被忽略。`_start` 函数不能有返回指令。 242 | 243 | 一般来说,程序会在 `_start` 中设置全局变量的值,以及进行其他的准备工作。在准备工作完成之后,`_start` 函数应当调用 `main` 函数开始正式的程序运行。如果需要,`_start` 函数也可以在 `main` 函数返回之后进行清理工作。`_start` 函数不需要返回。 244 | 245 | 一个示例的 `_start` 函数如下: 246 | 247 | ``` 248 | fn _start 0 0 -> 0 { 249 | // 设置全局变量 1 的值为 1 + 1; 250 | globa 1 251 | push 1 252 | push 1 253 | add.i 254 | store.64 255 | // 调用 main 256 | call 4 257 | // 没有返回语句 258 | } 259 | ``` 260 | 261 | ## 关于全局变量 262 | 263 | 在 navm 中,每个全局变量都是多个字节组成的数组。全局变量的编号是它在全局变量表中的序号(0 开始)。 264 | 265 | ### 用来存储数字 266 | 267 | 使用全局变量存储数字的初始化操作建议在 `_start` 函数中进行,这样不用考虑字节顺序问题。如果你直接给全局变量赋初始值的话,请使用小端序存储(低位字节在前,高位字节在后)。 268 | 269 | ### 用来存储字符串 270 | 271 | 使用全局变量存储字符串时,直接将初始值设置为以 ASCII 存储的字符串内容(类似于 memcpy)即可。存储的字符串不需要以 `\0` 结尾。 272 | -------------------------------------------------------------------------------- /docs/src/navm/instruction.md: -------------------------------------------------------------------------------- 1 | # 指令集 2 | 3 | navm 的指令使用 8 位(1 字节)无符号整数标识,后面跟随可变长度的操作数。操作数类型为 `u64` `i64` 时,长度为 64 位(8 字节),类型为 `u32` `i32` 时,长度为 32 位(4 字节)。 4 | 5 | > 勘误:之前 `cmp.u` 和 `cmp.f` 的指令写反了 6 | 7 | 下表展示了 navm 的所有指令。其中弹栈和压栈的格式为:`栈变化范围[:变量]`,数字按照栈底到栈顶编号。 8 | 9 | | 指令 | 指令名 | 操作数 | 弹栈 | 压栈 | 介绍 | 10 | | ---- | ------------ | -------- | ------------- | ------------ | -------------------------------------------- | 11 | | 0x00 | `nop` | - | - | - | 空指令 | 12 | | 0x01 | `push` | num:u64 | - | 1:num | 将 num 压栈 | 13 | | 0x02 | `pop` | - | 1 | | 弹栈 1 个 slot | 14 | | 0x03 | `popn` | num:u32 | 1-num | - | 弹栈 num 个 slot | 15 | | 0x04 | `dup` | - | 1:num | 1:num, 2:num | 复制栈顶 slot | 16 | | 0x0a | `loca` | off:u32 | - | 1:addr | 加载 off 个 slot 处局部变量的地址 | 17 | | 0x0b | `arga` | off:u32 | - | 1:addr | 加载 off 个 slot 处参数/返回值的地址 | 18 | | 0x0c | `globa` | n:u32 | - | 1:addr | 加载第 n 个全局变量/常量的地址 | 19 | | 0x10 | `load.8` | - | 1:addr | 1:val | 从 addr 加载 8 位 value 压栈 | 20 | | 0x11 | `load.16` | - | 1:addr | 1:val | 从 addr 加载 16 位 value 压栈 | 21 | | 0x12 | `load.32` | - | 1:addr | 1:val | 从 addr 加载 32 位 value 压栈 | 22 | | 0x13 | `load.64` | - | 1:addr | 1:val | 从 addr 加载 64 位 value 压栈 | 23 | | 0x14 | `store.8` | - | 1:addr, 2:val | - | 把 val 截断到 8 位存入 addr | 24 | | 0x15 | `store.16` | - | 1:addr, 2:val | - | 把 val 截断到 16 位存入 addr | 25 | | 0x16 | `store.32` | - | 1:addr, 2:val | - | 把 val 截断到 32 位存入 addr | 26 | | 0x17 | `store.64` | - | 1:addr, 2:val | - | 把 val 存入 addr | 27 | | 0x18 | `alloc` | - | 1:size | 1:addr | 在堆上分配 size 字节的内存 | 28 | | 0x19 | `free` | - | 1:addr | - | 释放 addr 指向的内存块 | 29 | | 0x1a | `stackalloc` | size:u32 | - | - | 在当前栈顶分配 size 个 slot,初始化为 0 | 30 | | 0x20 | `add.i` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs + rhs,参数为整数 | 31 | | 0x21 | `sub.i` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs - rhs,参数为整数 | 32 | | 0x22 | `mul.i` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs * rhs,参数为整数 | 33 | | 0x23 | `div.i` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs / rhs,参数为有符号整数 | 34 | | 0x24 | `add.f` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs + rhs,参数为浮点数 | 35 | | 0x25 | `sub.f` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs - rhs,参数为浮点数 | 36 | | 0x26 | `mul.f` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs * rhs,参数为浮点数 | 37 | | 0x27 | `div.f` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs / rhs,参数为浮点数 | 38 | | 0x28 | `div.u` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs / rhs,参数为无符号整数 | 39 | | 0x29 | `shl` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs << rhs | 40 | | 0x2a | `shr` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs >> rhs (算术右移) | 41 | | 0x2b | `and` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs & rhs | 42 | | 0x2c | `or` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs | rhs | 43 | | 0x2d | `xor` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs ^ rhs | 44 | | 0x2e | `not` | - | 1:lhs | 1:res | 计算 res = !lhs | 45 | | 0x30 | `cmp.i` | - | 1:lhs, 2:rhs | 1:res | 比较有符号整数 lhs 和 rhs 大小 | 46 | | 0x31 | `cmp.u` | - | 1:lhs, 2:rhs | 1:res | 比较无符号整数 lhs 和 rhs 大小 | 47 | | 0x32 | `cmp.f` | - | 1:lhs, 2:rhs | 1:res | 比较浮点数 lhs 和 rhs 大小 | 48 | | 0x34 | `neg.i` | - | 1:lhs | 1:res | 对 lhs 取反 | 49 | | 0x35 | `neg.f` | - | 1:lhs | 1:res | 对 lhs 取反 | 50 | | 0x36 | `itof` | - | 1:lhs | 1:res | 把 lhs 从整数转换成浮点数 | 51 | | 0x37 | `ftoi` | - | 1:lhs | 1:res | 把 lhs 从浮点数转换成整数 | 52 | | 0x38 | `shrl` | - | 1:lhs, 2:rhs | 1:res | 计算 res = lhs >>> rhs (逻辑右移) | 53 | | 0x39 | `set.lt` | - | 1:lhs | 1:res | 如果 lhs < 0 则推入 1,否则 0 | 54 | | 0x3a | `set.gt` | - | 1:lhs | 1:res | 如果 lhs > 0 则推入 1,否则 0 | 55 | | 0x41 | `br` | off:i32 | | | 无条件跳转偏移 `off` | 56 | | 0x42 | `br.false` | off:i32 | 1:test | | 如果 `test` 是 0 则跳转偏移 `off` | 57 | | 0x43 | `br.true` | off:i32 | 1:test | | 如果 `test` 非 0 则跳转偏移 `off` | 58 | | 0x48 | `call` | id:u32 | | 见栈帧介绍 | 调用编号为 id 的函数 | 59 | | 0x49 | `ret` | - | 见栈帧介绍 | | 从当前函数返回 | 60 | | 0x4a | `callname` | id:u32 | | 见栈帧介绍 | 调用名称与编号为 id 的全局变量内容相同的函数 | 61 | | 0x50 | `scan.i` | - | - | 1:n | 从标准输入读入一个整数 n | 62 | | 0x51 | `scan.c` | - | - | 1:c | 从标准输入读入一个字符 c | 63 | | 0x52 | `scan.f` | - | - | 1:f | 从标准输入读入一个浮点数 f | 64 | | 0x54 | `print.i` | - | 1:x | - | 向标准输出写入一个有符号整数 x | 65 | | 0x55 | `print.c` | - | 1:c | - | 向标准输出写入字符 c | 66 | | 0x56 | `print.f` | - | 1:f | - | 向标准输出写入浮点数 f | 67 | | 0x57 | `print.s` | - | 1:i | - | 向标准输出写入全局变量 i 代表的字符串 | 68 | | 0x58 | `println` | - | - | - | 向标准输出写入一个换行 | 69 | | 0xfe | `panic` | | | | 恐慌(强行退出) | 70 | 71 | 72 | 73 | ### `cmp.T` 指令 74 | 75 | 指令会在 `lhs < rhs` 时压入 `-1`, `lhs > rhs` 时压入 `1`, `lhs == rhs` 时压入 `0`。浮点数无法比较时压入 `0`。 76 | 77 | ### `load.8/16/32/64` 指令 78 | 79 | 指令会从 `addr` 处取 `T` 长度的数据压入栈中。如果 `addr` 不是 `T` 的倍数,将会产生 `UnalignedAccess` 错误。如果 `T` 小于 64,多余的数位将会被补成 0。 80 | 81 | ### `store.8/16/32/64` 指令 82 | 83 | 指令会将 `T` 长度的数据弹栈并存入 `addr` 地址处。如果 `addr` 不是 `T` 的倍数,将会产生 `UnalignedAccess` 错误。如果 `T` 小于 64,数据将被截断至 `T` 长度。 84 | 85 | ### `br` 系列分支指令 86 | 87 | 指令会将(指向下一条指令的)当前指令指针 `ip` 与 `offset` 相加得出新的指令指针的值。比如如果 `br 3` 是一个函数的第 5 条指令,那么此时 `ip` = 6,指令执行之后 `ip` = 6 + 3 = 9。`br.true`, `br.false` 指令的执行规则相同。 88 | 89 | ### 90 | -------------------------------------------------------------------------------- /docs/src/preface.md: -------------------------------------------------------------------------------- 1 | # 前言 2 | 3 | 本指导书为 C0 指导书,阅读本指导书前,请确保你已经顺利完成了 mini 实验并且对编译器的结构和编译过程有了基本认识。 4 | 5 | 这次实验要实现的编译器,根据你选择的编程语言、模型不同,代码量可能会在 2000--5000 行之间。作为参考,使用 Rust 编写的参考实现的代码量约为 3500 行。 6 | 7 | 本指导书仍然还是 Beta 版本。如果你在书中发现了(包括但不限于)以下问题,欢迎积极联系助教,或者提 Issue/PR 修正,可能会有加分哦 ouo: 8 | 9 | - 难以理解的表述 10 | - 逻辑/知识错误 11 | - 代码错误 12 | - 前后矛盾 13 | - 代码不对应/过时 14 | - 任何可以优化的部分 15 | 16 | 以上,祝各位同学编译愉快! 17 | 18 | —— 你们的魔鬼助教(笑) 19 | -------------------------------------------------------------------------------- /docs/src/ref-impl.md: -------------------------------------------------------------------------------- 1 | # 参考实现 2 | 3 | [这个仓库][natrium] 中保存了一个实现了全部扩展 C0 的编译器参考实现,以及相关的工具链。你可以在 Release 中下载相关程序自己测试(),也可以在 使用浏览器版本测试。 4 | 5 | [natrium]: https://github.com/BUAA-SE-Compiling/natrium 6 | -------------------------------------------------------------------------------- /docs/src/reference.md: -------------------------------------------------------------------------------- 1 | # 参考资料 2 | -------------------------------------------------------------------------------- /docs/src/requirement.md: -------------------------------------------------------------------------------- 1 | # C0 编译实验安排及要求 2 | 3 | ## 安排 4 | 5 | **时间:** 2020-11-13(第 10 周) -- 2021-01-03(第 17 周)(作业提交截止到考期开始前) 6 | 7 | **提交方式:** 于 在线评测,详见 [提交与评测说明](./judge.md)。 8 | 9 | ## 要求 10 | 11 | 1. 以个人为单位进行开发,不得多人合作完成; 12 | 2. 利用实验课和课余时间完成; 13 | 3. 编程语言、实现方式自定; 14 | 4. 输入语言为 [C0](./c0/c0.md),输出语言为 [虚拟机代码](./navm/index.md); 15 | 5. 存在选做部分([扩展 C0](./c0/extended-c0.md)),实现选做部分可以获得更高分数; 16 | 6. 程序具体要求见本指导书其余内容。 17 | 18 | ## 成绩评定 19 | 20 | 学生进行在线评测,取提交截止前最后一次测试的结果,所有通过的测试点权重之和为学生最终成绩。 21 | 22 | ## 参考书 23 | 24 | 龙书、虎书、狼书等 25 | -------------------------------------------------------------------------------- /docs/src/res/200331-natrium.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BUAA-SE-Compiling/natrium/779e9b5963a4870b724e6ee8de0ba2d5d7d645e6/docs/src/res/200331-natrium.png -------------------------------------------------------------------------------- /docs/src/res/banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BUAA-SE-Compiling/natrium/779e9b5963a4870b724e6ee8de0ba2d5d7d645e6/docs/src/res/banner.png -------------------------------------------------------------------------------- /docs/src/res/base.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BUAA-SE-Compiling/natrium/779e9b5963a4870b724e6ee8de0ba2d5d7d645e6/docs/src/res/base.png -------------------------------------------------------------------------------- /docs/src/res/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BUAA-SE-Compiling/natrium/779e9b5963a4870b724e6ee8de0ba2d5d7d645e6/docs/src/res/logo.png -------------------------------------------------------------------------------- /docs/src/scoring.md: -------------------------------------------------------------------------------- 1 | # 评分标准 2 | 3 | ## 目前可选的实现点 4 | 5 | - 注释: 5 6 | - 类型系统 7 | - char(只保留字面量): 5 8 | - double: 25 9 | - 布尔运算: 10 10 | - 嵌套作用域 11 | - 初级(只能在作用域顶部声明变量): 10 12 | - 高级(作用域内任意位置均可声明变量): 5 13 | - 控制流 14 | - break/continue: 10 15 | - 检测所有分支都要有返回值: 10 16 | -------------------------------------------------------------------------------- /docs/src/todo.md: -------------------------------------------------------------------------------- 1 | # TODO 2 | 3 | 还没写完。 4 | -------------------------------------------------------------------------------- /docs/theme/css/additional.css: -------------------------------------------------------------------------------- 1 | body { 2 | line-height: 175%; 3 | font-family: Iosevka, Fira Sans, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 4 | 'Helvetica Neue', sans-serif; 5 | } 6 | 7 | code { 8 | line-height: 150%; 9 | /* The original file has !important here so don't blame me */ 10 | font-family: Iosevka, Inconsolata, Consolas, 'Roboto Mono', 'San Francisco Mono', 'Courier New', 11 | Courier, monospace !important; 12 | } 13 | -------------------------------------------------------------------------------- /judge.toml: -------------------------------------------------------------------------------- 1 | [jobs.c0] 2 | image = {source = "dockerfile", path = ".", tag = "rynco-c0"} 3 | run = ["target/release/natrium $input --emit o0 -o $asm"] 4 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Codename: Natrium | 呐元素(?) 2 | 3 | ![logo](docs/src/res/logo.png) 4 | 5 | 这个 repo 里面保存着有关 2018 级北航软件学院学生编译原理课的课程规划和代码。 6 | 7 | ## 目录结构 8 | 9 | - 根目录 - 示例编译器+虚拟机可执行文件(待定) 10 | - `crates/r0vm` - 虚拟机 & 二进制格式库 11 | - `crates/syntax` - 语法分析库 12 | - `crates/r0codegen` - 代码生成库 13 | - `docs/src` - 文档 14 | 15 | ## 进度 16 | 17 | | 部分 | 状态 | 进度 | 18 | | -------------------- | ---- | -------- | 19 | | 编译源语言 | 📘 | 好了 | 20 | | 虚拟机指令集 | 📕 | 基本确定 | 21 | | 虚拟机汇编二进制格式 | 🔥 | 文字? | 22 | | 虚拟机 | 📕 | 基本完成 | 23 | | 示例编译器 | 📘 | 好了 | 24 | | 实验设计 | 📝 | 准备中 | 25 | | 实验安排 | 📝 | 草案 | 26 | | 评测安排 | 📕 | 还行 | 27 | | 评测姬 | 📕 | 很有精神 | 28 | 29 | 30 | ## 后记 31 | 32 | 我赌你们不会 Rust(逃 33 | -------------------------------------------------------------------------------- /src/bin/r0vm.rs: -------------------------------------------------------------------------------- 1 | use clap::{AppSettings, Clap}; 2 | use clap::{FromArgMatches, IntoApp}; 3 | use crossterm::{style::Attribute, ExecutableCommand, QueueableCommand}; 4 | use natrium::util::pretty_print_error; 5 | use r0vm::{opcodes::Op, s0::io::WriteBinary, vm::R0Vm}; 6 | use r0vm::{s0::S0, vm}; 7 | use std::{io::stdout, path::PathBuf, str::FromStr}; 8 | 9 | pub fn main() { 10 | let opt = Opt::parse(); 11 | let sub = tracing_subscriber::FmtSubscriber::builder() 12 | .compact() 13 | .without_time() 14 | .with_max_level(opt.log) 15 | .finish(); 16 | tracing::subscriber::set_global_default(sub).unwrap(); 17 | 18 | let mut file = match std::fs::File::open(&opt.file) { 19 | Ok(f) => f, 20 | Err(e) => { 21 | eprintln!("Cannot open file {}: {}", opt.file.to_string_lossy(), e); 22 | return; 23 | } 24 | }; 25 | 26 | let s0 = match S0::read_binary(&mut file) { 27 | Ok(Some(s)) => s, 28 | Ok(None) => { 29 | eprintln!("File is not valid s0"); 30 | return; 31 | } 32 | Err(e) => { 33 | eprintln!("File is not valid s0: {}", e); 34 | return; 35 | } 36 | }; 37 | 38 | if opt.dump { 39 | println!("{}", s0); 40 | } else if opt.debug { 41 | debug_run(&s0) 42 | } else { 43 | run(&s0) 44 | } 45 | } 46 | 47 | fn run(s0: &S0) { 48 | let mut vm = create_vm_stdio(s0); 49 | match vm.run_to_end() { 50 | Ok(_) => {} 51 | Err(e) => { 52 | eprintln!("Runtime error: {}", e); 53 | eprintln!("{}", vm.debug_stack()); 54 | std::process::exit(1); 55 | } 56 | }; 57 | } 58 | 59 | fn create_vm_stdio(s0: &S0) -> R0Vm { 60 | let stdin = std::io::stdin(); 61 | let stdout = stdout(); 62 | match vm::R0Vm::new(s0, Box::new(stdin), Box::new(stdout)) { 63 | Ok(vm) => vm, 64 | Err(e) => { 65 | panic!("Failed to create VM: {}", e) 66 | } 67 | } 68 | } 69 | 70 | macro_rules! print_unwrap { 71 | ($calc:expr,$p:pat => $if_true:block) => { 72 | match $calc { 73 | $p => $if_true, 74 | Err(e) => println!("Error: {:?}", e), 75 | } 76 | }; 77 | } 78 | 79 | fn debug_run(s0: &S0) { 80 | let mut vm = create_vm_stdio(s0); 81 | let mut breakpoints = bimap::BiBTreeMap::::new(); 82 | 83 | let mut terminal = rustyline::Editor::<()>::with_config( 84 | rustyline::Config::builder().max_history_size(100).build(), 85 | ); 86 | loop { 87 | stdout() 88 | .queue(crossterm::style::SetForegroundColor( 89 | crossterm::style::Color::White, 90 | )) 91 | .unwrap() 92 | .execute(crossterm::style::SetAttribute(Attribute::Bold)) 93 | .unwrap(); 94 | 95 | let line = terminal.readline("navm |><> "); 96 | 97 | stdout() 98 | .queue(crossterm::style::SetForegroundColor( 99 | crossterm::style::Color::Reset, 100 | )) 101 | .unwrap() 102 | .execute(crossterm::style::SetAttribute(Attribute::Reset)) 103 | .unwrap(); 104 | 105 | match line { 106 | Ok(mut line) => { 107 | // If line is empty, repeat the last instruction 108 | if line.trim().is_empty() { 109 | match terminal.history().last().cloned() { 110 | Some(history_line) => line = history_line, 111 | None => { 112 | continue; 113 | } 114 | }; 115 | } else { 116 | terminal.add_history_entry(&line); 117 | } 118 | 119 | let words = shell_words::split(&line); 120 | match words { 121 | Ok(s) => { 122 | let mut app = DebuggerInst::into_app() 123 | .setting(AppSettings::NoBinaryName) 124 | .setting(AppSettings::InferSubcommands) 125 | .help_template("Commands:\r\n{subcommands}") 126 | .override_usage(" [args]"); 127 | let res = app 128 | .try_get_matches_from_mut(&s) 129 | .map(|x| DebuggerInst::from_arg_matches(&x)); 130 | match res { 131 | Ok(s) => match exec_opt(s, &mut vm, &mut breakpoints) { 132 | InstructionResult::Exit => { 133 | break; 134 | } 135 | InstructionResult::None => {} 136 | InstructionResult::Reset => { 137 | vm = create_vm_stdio(s0); 138 | } 139 | }, 140 | Err(e) => match e.kind { 141 | clap::ErrorKind::DisplayHelp => println!("{}", e), 142 | _ => println!("{}", e.to_string().lines().next().unwrap()), 143 | }, 144 | }; 145 | } 146 | Err(e) => println!("Invalid input: {}!", e), 147 | } 148 | } 149 | 150 | Err(rustyline::error::ReadlineError::Eof) 151 | | Err(rustyline::error::ReadlineError::Interrupted) => { 152 | println!("Interrupted."); 153 | break; 154 | } 155 | Err(e) => { 156 | println!("Error: {:?}", e); 157 | break; 158 | } 159 | } 160 | } 161 | } 162 | 163 | fn exec_opt( 164 | opt: DebuggerInst, 165 | vm: &mut r0vm::vm::R0Vm, 166 | breakpoints: &mut bimap::BiBTreeMap, 167 | ) -> InstructionResult { 168 | match opt { 169 | DebuggerInst::Run => print_unwrap! { 170 | vm.run_to_end_inspect(|vm| cur_breakpoint(vm, breakpoints).is_none()), 171 | Ok(_) => { 172 | if vm.is_at_end() { 173 | println!("Program exited without error"); 174 | return InstructionResult::Reset; 175 | } else { 176 | if let Some(b) = cur_breakpoint(vm, breakpoints){ 177 | println!("At breakpoint {}: {}", b, vm.cur_stack_info().unwrap()); 178 | } 179 | print_vm_next_instruction(vm, None); 180 | } 181 | } 182 | }, 183 | DebuggerInst::Step => print_unwrap! { 184 | vm.step(), 185 | Ok(executed_op) => { 186 | if vm.is_at_end() { 187 | println!("Program exited without error"); 188 | return InstructionResult::Reset; 189 | } else { 190 | print_vm_next_instruction(vm, Some(executed_op)); 191 | } 192 | } 193 | }, 194 | DebuggerInst::Finish => { 195 | let current_fn_bp = vm.bp(); 196 | print_unwrap! { 197 | vm.run_to_end_inspect(|vm| vm.bp() >= current_fn_bp && cur_breakpoint(vm, breakpoints).is_none()), 198 | Ok(_) => { 199 | if let Some(b) = cur_breakpoint(vm, breakpoints){ 200 | println!("At breakpoint {}: {}", b, vm.cur_stack_info().unwrap()); 201 | } 202 | print_vm_next_instruction(vm, None); 203 | } 204 | } 205 | } 206 | DebuggerInst::Backtrace => { 207 | let (stacktrace, corrupted) = vm.stack_trace(); 208 | for (idx, frame) in stacktrace.into_iter().enumerate() { 209 | println!("{:4}: {}", idx, frame); 210 | } 211 | if corrupted { 212 | println!("The stack corrupted here"); 213 | } 214 | } 215 | DebuggerInst::Frame(inst) => { 216 | match vm.debug_frame(inst.position) { 217 | Ok(debugger) => println!("{}", debugger), 218 | Err(_) => println!("The stack is corrupted"), 219 | }; 220 | } 221 | DebuggerInst::Breakpoint(b) => { 222 | let pos = b.position; 223 | match vm.get_fn_by_name(&pos.function_name) { 224 | Ok(id) => { 225 | let def = vm.get_fn_by_id(id).unwrap(); 226 | if pos.offset < def.ins.len() { 227 | let max_breakpoint = breakpoints 228 | .left_values() 229 | .last() 230 | .cloned() 231 | .map(|x| x + 1) 232 | .unwrap_or(0); 233 | 234 | breakpoints.insert( 235 | max_breakpoint, 236 | Breakpoint { 237 | fn_id: id as u32, 238 | offset: pos.offset, 239 | }, 240 | ); 241 | 242 | println!("Added breakpoint {}", max_breakpoint); 243 | } else { 244 | println!("Error: offset is larger than function length"); 245 | } 246 | } 247 | Err(e) => println!("Error: {}", e), 248 | } 249 | } 250 | DebuggerInst::RemoveBreakpoint { id } => { 251 | if let Some(point) = breakpoints.get_by_left(&id) { 252 | let fn_name = vm.get_fn_name_by_id(point.fn_id).unwrap(); 253 | println!("Remove breakpoint #{} at {}:{}", id, fn_name, point.offset); 254 | } else { 255 | println!("No such breakpoint was found."); 256 | } 257 | } 258 | DebuggerInst::ListBreakpoint => { 259 | for (id, point) in breakpoints.iter() { 260 | let fn_name = vm.get_fn_name_by_id(point.fn_id).unwrap(); 261 | println!("#{}: {}:{}", id, fn_name, point.offset); 262 | } 263 | } 264 | DebuggerInst::Exit => return InstructionResult::Exit, 265 | DebuggerInst::Reset => { 266 | return InstructionResult::Reset; 267 | } 268 | } 269 | InstructionResult::None 270 | } 271 | 272 | fn print_vm_next_instruction(vm: &R0Vm, executed_op: Option) { 273 | let fn_info = vm.fn_info(); 274 | let ip = vm.ip(); 275 | if let Some(executed_op) = executed_op { 276 | println!(" | {:?}", executed_op); 277 | } 278 | if let Some(next_op) = fn_info.ins.get(ip) { 279 | println!("-> {:4} | {:?}", ip, next_op); 280 | } else { 281 | println!("-> {:4}| Function end", ip); 282 | } 283 | if let Ok(cur) = vm.cur_stack_info() { 284 | println!("at: {}", cur) 285 | } 286 | } 287 | 288 | #[inline] 289 | fn cur_breakpoint(vm: &R0Vm, breakpoints: &bimap::BiBTreeMap) -> Option { 290 | let fn_id = vm.fn_id() as u32; 291 | let ip = vm.ip(); 292 | breakpoints 293 | .get_by_right(&Breakpoint { fn_id, offset: ip }) 294 | .copied() 295 | } 296 | 297 | #[derive(Clap, Debug)] 298 | #[clap(name = "r0vm")] 299 | /// A virtual machine for r0 stuff 300 | struct Opt { 301 | /// The file to run 302 | pub file: PathBuf, 303 | 304 | /// Run in debugger mode 305 | #[clap(short, long)] 306 | pub debug: bool, 307 | 308 | /// Dump the assembly in human-readable format 309 | #[clap(long)] 310 | pub dump: bool, 311 | 312 | /// Set log level. Values: error, warning, info, debug, trace 313 | #[clap(long, default_value = "warn")] 314 | pub log: tracing::level_filters::LevelFilter, 315 | } 316 | 317 | #[derive(Clap, Debug)] 318 | struct FrameInst { 319 | /// The frame to show 320 | #[clap(default_value = "0")] 321 | pub position: usize, 322 | } 323 | 324 | #[derive(Clap, Debug)] 325 | struct BreakpointInst { 326 | /// Breakpoint position, in format `[:]` 327 | pub position: BreakpointRef, 328 | } 329 | 330 | #[derive(Debug)] 331 | struct BreakpointRef { 332 | pub function_name: String, 333 | pub offset: usize, 334 | } 335 | 336 | #[derive(Debug, Ord, PartialOrd, Eq, PartialEq)] 337 | struct Breakpoint { 338 | pub fn_id: u32, 339 | pub offset: usize, 340 | } 341 | 342 | impl FromStr for BreakpointRef { 343 | type Err = String; 344 | 345 | fn from_str(s: &str) -> Result { 346 | let mut it = s.splitn(2, ':'); 347 | let name = it.next().to_owned(); 348 | let inst = it.next().map(|x| usize::from_str(x).ok()); 349 | match (name, inst) { 350 | (None, _) => { 351 | Err("No function name supplied. Expected: [:]".into()) 352 | } 353 | (_, Some(None)) => Err("Offset is not a number".into()), 354 | (Some(name), Some(Some(offset))) => Ok(BreakpointRef { 355 | function_name: name.into(), 356 | offset, 357 | }), 358 | (Some(name), None) => Ok(BreakpointRef { 359 | function_name: name.into(), 360 | offset: 0, 361 | }), 362 | } 363 | } 364 | } 365 | 366 | #[derive(Clap, Debug)] 367 | enum DebuggerInst { 368 | /// Run or continue the current execution to end. [alias: r, continue, c] 369 | #[clap(alias = "r", alias = "continue")] 370 | Run, 371 | 372 | /// Move one instruction forward. [alias: s, si, n] 373 | #[clap(alias = "s", alias = "n", alias = "si")] 374 | Step, 375 | 376 | /// Continue until function returns. [alias: f] 377 | #[clap(alias = "f")] 378 | Finish, 379 | 380 | /// Show call stack. [alias: where, stacktrace] 381 | #[clap(alias = "where", alias = "stacktrace")] 382 | Backtrace, 383 | 384 | /// Add breakpoint. [alias: b] 385 | #[clap(alias = "b")] 386 | Breakpoint(BreakpointInst), 387 | 388 | /// Remove breakpoint. [alias: rb] 389 | #[clap(alias = "rb")] 390 | RemoveBreakpoint { id: usize }, 391 | 392 | /// List all breakpoints [alias: rb] 393 | #[clap(alias = "lb")] 394 | ListBreakpoint, 395 | 396 | /// Show function frame 397 | Frame(FrameInst), 398 | 399 | /// Reset execution to start 400 | Reset, 401 | 402 | /// Exit the debugger. [alias: q, quit] 403 | #[clap(alias = "quit")] 404 | Exit, 405 | } 406 | 407 | enum InstructionResult { 408 | Exit, 409 | Reset, 410 | None, 411 | } 412 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod test; 3 | pub mod util; 4 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use clap::Clap; 2 | use logos::{Lexer, Logos}; 3 | use natrium::util::pretty_print_error; 4 | use r0syntax::{ast::Program, span::Span, token::Token}; 5 | use r0vm::s0::io::WriteBinary; 6 | use std::{ 7 | io::{Read, Write}, 8 | path::PathBuf, 9 | str::FromStr, 10 | }; 11 | 12 | fn main() { 13 | let opt = Opt::parse(); 14 | let input = std::fs::read_to_string(&opt.input).expect("Unable to read input file"); 15 | 16 | let output_file = get_output(&opt); 17 | let mut output = build_output(output_file, opt.interpret); 18 | 19 | let lexer = r0syntax::lexer::spanned_lexer(&input); 20 | if !opt.interpret && opt.emit == EmitTarget::Token { 21 | dump_lex(lexer, output); 22 | } 23 | 24 | let program = parser(lexer, &input); 25 | if !opt.interpret && opt.emit == EmitTarget::Ast { 26 | dump_ast(program, output); 27 | } 28 | 29 | let s0 = compile_s0(&program, &input); 30 | if !opt.interpret { 31 | if opt.emit == EmitTarget::O0 { 32 | s0.write_binary(&mut output) 33 | .expect("Failed to write to output"); 34 | } else { 35 | write!(output, "{}", s0).expect("Failed to write to output"); 36 | } 37 | } else { 38 | let stdin = std::io::stdin(); 39 | let stdout = std::io::stdout(); 40 | let mut vm = r0vm::vm::R0Vm::new(&s0, Box::new(stdin), Box::new(stdout)).unwrap(); 41 | 42 | match vm.run_to_end() { 43 | Ok(_) => {} 44 | Err(e) => { 45 | eprintln!("{}", &s0); 46 | eprintln!("{}", e); 47 | eprintln!("{}", vm.debug_stack()); 48 | } 49 | } 50 | } 51 | } 52 | 53 | /// Get real output path based on options. None means stdout. 54 | fn get_output(opt: &Opt) -> Option { 55 | match opt.output.as_deref() { 56 | Some("-") => None, 57 | Some(x) => PathBuf::from_str(x).ok(), 58 | None => { 59 | let filename = opt 60 | .input 61 | .file_stem() 62 | .and_then(|x| x.to_str()) 63 | .unwrap_or("a"); 64 | let ext = match opt.emit { 65 | EmitTarget::O0 => "o0", 66 | EmitTarget::Text => "s0", 67 | EmitTarget::Token => "tokenstream", 68 | EmitTarget::Ast => "ast", 69 | }; 70 | let out_file = format!("{}.{}", filename, ext); 71 | Some(out_file.into()) 72 | } 73 | } 74 | } 75 | 76 | fn build_output(output: Option, interpret: bool) -> Box { 77 | if interpret { 78 | return Box::new(std::io::stdout()); 79 | } 80 | if let Some(path) = output { 81 | let file = std::fs::File::create(path).expect("Failed to open file"); 82 | Box::new(file) 83 | } else { 84 | Box::new(std::io::stdout()) 85 | } 86 | } 87 | 88 | fn dump_lex(lexer: T, mut output: Box) -> ! 89 | where 90 | T: Iterator, 91 | { 92 | for (token, span) in lexer { 93 | writeln!(output, "{:?} at {:?}", token, span).expect("Failed to write"); 94 | } 95 | std::process::exit(0); 96 | } 97 | 98 | fn dump_ast(ast: Program, mut output: Box) -> ! { 99 | writeln!(output, "{:?}", ast).expect("Failed to write to output"); 100 | std::process::exit(0); 101 | } 102 | 103 | fn parser(lexer: T, input: &str) -> Program 104 | where 105 | T: Iterator, 106 | { 107 | let mut p = r0syntax::parser::Parser::new(lexer); 108 | let r = p.parse(); 109 | 110 | match r { 111 | Ok(p) => p, 112 | Err(e) => { 113 | if let Some(span) = e.span { 114 | pretty_print_error( 115 | &mut std::io::stdout(), 116 | &input, 117 | &format!("{:?}", e.kind), 118 | span, 119 | ) 120 | .unwrap(); 121 | } else { 122 | println!("{:?}", e.kind); 123 | } 124 | std::process::exit(1); 125 | } 126 | } 127 | } 128 | 129 | fn compile_s0(program: &Program, input: &str) -> r0vm::s0::S0 { 130 | match r0codegen::generator::compile(program) { 131 | Ok(p) => p, 132 | Err(e) => { 133 | if let Some(span) = e.span { 134 | pretty_print_error( 135 | &mut std::io::stdout(), 136 | &input, 137 | &format!("{:?}", e.kind), 138 | span, 139 | ) 140 | .unwrap(); 141 | } else { 142 | println!("{:?}", e.kind); 143 | } 144 | std::process::exit(1); 145 | } 146 | } 147 | } 148 | 149 | #[derive(Clap, Debug)] 150 | struct Opt { 151 | /// Input file 152 | pub input: PathBuf, 153 | 154 | /// Emit target 155 | /// 156 | /// O0: binary object code; 157 | /// Text: text format code; 158 | /// Token: token stream; 159 | /// Ast: abstract syntax tree 160 | #[clap(long, default_value = "o0")] 161 | pub emit: EmitTarget, 162 | 163 | /// Output file. Defaults to `.o0|s0|tt|ast` 164 | #[clap(long, short)] 165 | pub output: Option, 166 | 167 | /// Interpret the input file with virtual machine; alias: `--run` 168 | #[cfg(feature = "vm")] 169 | #[clap(short = 'i', long, alias = "run")] 170 | pub interpret: bool, 171 | } 172 | 173 | #[derive(Debug, Copy, Clone, Eq, PartialEq)] 174 | enum EmitTarget { 175 | O0, 176 | Text, 177 | Token, 178 | Ast, 179 | } 180 | 181 | impl FromStr for EmitTarget { 182 | type Err = String; 183 | 184 | fn from_str(s: &str) -> Result { 185 | Ok(match s.to_lowercase().as_str() { 186 | "o0" => EmitTarget::O0, 187 | "text" | "s0" => EmitTarget::Text, 188 | "token" | "lex" => EmitTarget::Token, 189 | "ast" | "parse" => EmitTarget::Ast, 190 | _ => return Err(format!("Expected one of: o0, text, token, ast; got: {}", s)), 191 | }) 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/test.rs: -------------------------------------------------------------------------------- 1 | use r0vm::s0::{io::WriteBinary, S0}; 2 | 3 | #[test] 4 | fn test_ser() { 5 | let input = r#" 6 | fn is_odd(x: int) -> int { 7 | return (x / 2 * 2) - x; 8 | } 9 | 10 | fn fastpow(base: int, exp: int) -> int { 11 | let res: int = 1; 12 | if exp < 0 { 13 | return 0; 14 | } 15 | while exp > 0 { 16 | if is_odd(exp) { 17 | res = res * base; 18 | } 19 | base = base * base; 20 | exp = exp / 2; 21 | } 22 | return res; 23 | } 24 | 25 | fn main() -> void { 26 | let base: int; 27 | let exp: int; 28 | let count: int; 29 | count = getint(); 30 | while count > 0 { 31 | base = getint(); 32 | exp = getint(); 33 | putint(fastpow(base,exp)); 34 | putln(); 35 | count = count - 1; 36 | } 37 | } 38 | "#; 39 | let lexer = r0syntax::lexer::spanned_lexer(&input); 40 | let program = r0syntax::parser::Parser::new(lexer).parse().unwrap(); 41 | let s0 = r0codegen::generator::compile(&program).unwrap(); 42 | 43 | let mut bin = vec![]; 44 | s0.write_binary(&mut bin).unwrap(); 45 | let s0_re = S0::read_binary(&mut &bin[..]).unwrap().unwrap(); 46 | assert_eq!(s0, s0_re); 47 | } 48 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | use r0syntax::span::Span; 2 | use std::io::Write; 3 | use unicode_width::UnicodeWidthStr; 4 | 5 | pub fn pretty_print_error( 6 | writer: &mut dyn Write, 7 | input: &str, 8 | error: &str, 9 | span: Span, 10 | ) -> Result<(), std::io::Error> { 11 | writeln!(writer, "{}", error)?; 12 | 13 | if span == Span::eof() { 14 | let line = input.lines().last().unwrap_or(""); 15 | writeln!(writer, "{}", line)?; 16 | writeln!(writer, "{:space_width$}^", space_width = line.width())?; 17 | 18 | Ok(()) 19 | } else { 20 | let start = line_span::find_line_range(input, span.start()); 21 | let end = line_span::find_line_range(input, span.end()); 22 | 23 | if let Some(line) = line_span::find_prev_line_range(input, span.start()) { 24 | writeln!(writer, "{}", &input[line])?; 25 | } 26 | if start == end { 27 | writeln!(writer, "{}", &input[start.clone()])?; 28 | writeln!( 29 | writer, 30 | "{:space_width$}{:^^line_width$}", 31 | "", 32 | "", 33 | space_width = input[start.start..span.start()].width(), 34 | line_width = input[span.start()..span.end()].width() 35 | )?; 36 | } else { 37 | let print_range = start.start..end.end; 38 | let input_range = input[print_range].lines().collect::>(); 39 | 40 | writeln!(writer, "{}", input_range[0])?; 41 | writeln!( 42 | writer, 43 | "{:space_width$}{:^^line_width$}", 44 | "", 45 | "", 46 | space_width = input[start.start..span.start()].width(), 47 | line_width = input[span.start()..start.end].width() 48 | )?; 49 | for i in 1..(input_range.len() - 1) { 50 | writeln!(writer, "{}", input_range[i])?; 51 | writeln!(writer, "{:^^len$}", "", len = input_range[i].width())?; 52 | } 53 | writeln!(writer, "{}", input_range[input_range.len() - 1])?; 54 | writeln!( 55 | writer, 56 | "{:^^line_width$}", 57 | "", 58 | line_width = input[end.start..(span.end())].width() 59 | )?; 60 | } 61 | if let Some(line) = line_span::find_next_line_range(input, span.end()) { 62 | writeln!(writer, "{}", &input[line])?; 63 | } 64 | Ok(()) 65 | } 66 | } 67 | 68 | -------------------------------------------------------------------------------- /web/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | /dist 3 | /target 4 | /pkg 5 | /wasm-pack.log 6 | /statc/fonts 7 | -------------------------------------------------------------------------------- /web/Cargo.toml: -------------------------------------------------------------------------------- 1 | # You must change these to your own details. 2 | [package] 3 | authors = ["You "] 4 | categories = ["wasm"] 5 | description = "My super awesome Rust, WebAssembly, and Webpack project!" 6 | edition = "2018" 7 | name = "natrium-web" 8 | readme = "README.md" 9 | version = "0.1.0" 10 | 11 | [lib] 12 | crate-type = ["cdylib"] 13 | 14 | [profile.release] 15 | # This makes the compiled code faster and smaller, but it makes compiling slower, 16 | # so it's only enabled in release mode. 17 | lto = true 18 | 19 | [features] 20 | default = ["wee_alloc"] 21 | 22 | [dependencies] 23 | bytes = "0.6" 24 | console_error_panic_hook = "0.1.5" 25 | js-sys = "0.3.22" 26 | "line-span" = "0.1.2" 27 | "logos" = "0.11.4" 28 | natrium = {path = "../", default-features = false} 29 | "r0codegen" = {path = "../crates/r0codegen"} 30 | "r0syntax" = {path = "../crates/syntax"} 31 | "r0vm" = {path = "../crates/r0vm"} 32 | wasm-bindgen = "0.2.68" 33 | wee_alloc = {version = "0.4.2", optional = true} 34 | 35 | # The `web-sys` crate allows you to interact with the various browser APIs, 36 | # like the DOM. 37 | [dependencies.web-sys] 38 | features = ["console"] 39 | version = "0.3.22" 40 | 41 | # The `console_error_panic_hook` crate provides better debugging of panics by 42 | # logging them with `console.error`. This is great for development, but requires 43 | # all the `std::fmt` and `std::panicking` infrastructure, so it's only enabled 44 | # in debug mode. 45 | # [target."cfg(debug_assertions)".dependencies] 46 | 47 | # These crates are used for running unit tests. 48 | [dev-dependencies] 49 | futures = "0.1.27" 50 | wasm-bindgen-futures = "0.3.22" 51 | wasm-bindgen-test = "0.2.45" 52 | 53 | [package.metadata.wasm-pack.profile.release] 54 | wasm-opt = false 55 | -------------------------------------------------------------------------------- /web/README.md: -------------------------------------------------------------------------------- 1 | ## How to install 2 | 3 | ```sh 4 | npm install 5 | ``` 6 | 7 | ## How to run in debug mode 8 | 9 | ```sh 10 | # Builds the project and opens it in a new browser tab. Auto-reloads when the project changes. 11 | npm start 12 | ``` 13 | 14 | ## How to build in release mode 15 | 16 | ```sh 17 | # Builds the project and places it into the `dist` folder. 18 | npm run build 19 | ``` 20 | 21 | ## How to run unit tests 22 | 23 | ```sh 24 | # Runs tests in Firefox 25 | npm test -- --firefox 26 | 27 | # Runs tests in Chrome 28 | npm test -- --chrome 29 | 30 | # Runs tests in Safari 31 | npm test -- --safari 32 | ``` 33 | 34 | ## What does each file do? 35 | 36 | * `Cargo.toml` contains the standard Rust metadata. You put your Rust dependencies in here. You must change this file with your details (name, description, version, authors, categories) 37 | 38 | * `package.json` contains the standard npm metadata. You put your JavaScript dependencies in here. You must change this file with your details (author, name, version) 39 | 40 | * `webpack.config.js` contains the Webpack configuration. You shouldn't need to change this, unless you have very special needs. 41 | 42 | * The `js` folder contains your JavaScript code (`index.js` is used to hook everything into Webpack, you don't need to change it). 43 | 44 | * The `src` folder contains your Rust code. 45 | 46 | * The `static` folder contains any files that you want copied as-is into the final build. It contains an `index.html` file which loads the `index.js` file. 47 | 48 | * The `tests` folder contains your Rust unit tests. 49 | -------------------------------------------------------------------------------- /web/js/index.styl: -------------------------------------------------------------------------------- 1 | iosevka-font = 'Iosevka Extended', 'IBM Plex Mono', 'Roboto Mono', 'SF Mono', 'Consolas', monospace 2 | 3 | .editor-space { 4 | width: 60% 5 | } 6 | 7 | .result-space { 8 | width: 40% 9 | overflow: auto 10 | scroll-behavior: auto 11 | } 12 | 13 | .app { 14 | display: flex 15 | flex-direction: row 16 | align-self: stretch 17 | } 18 | 19 | .app, 20 | #app-root, 21 | html, 22 | body { 23 | padding: 0 24 | margin: 0 25 | height: 100% 26 | } 27 | 28 | html { 29 | overflow: hidden 30 | font-family: iosevka-font 31 | } 32 | 33 | .ace-editor, 34 | .ace_editor, 35 | .ace_placeholder, 36 | pre { 37 | font-family: iosevka-font 38 | } 39 | 40 | input { 41 | font: inherit 42 | } 43 | -------------------------------------------------------------------------------- /web/js/index.tsx: -------------------------------------------------------------------------------- 1 | import * as React from 'react' 2 | import './index.styl' 3 | import * as Natrium from '../pkg/index' 4 | import AceEditor from 'react-ace' 5 | 6 | export interface AppProps { 7 | natrium: typeof Natrium 8 | } 9 | 10 | export interface AppState { 11 | error?: string 12 | code: string 13 | compiledCode?: string 14 | output: string 15 | } 16 | 17 | export class App extends React.Component { 18 | natrium: typeof Natrium 19 | 20 | constructor(props: AppProps) { 21 | super(props) 22 | this.natrium = props.natrium 23 | this.state = { 24 | error: undefined, 25 | code: '', 26 | compiledCode: undefined, 27 | output: '', 28 | } 29 | } 30 | 31 | onCodeUpdate(code: string) { 32 | this.setState({ code: code }) 33 | } 34 | 35 | render() { 36 | return ( 37 |
38 |
39 | this.onCodeUpdate(code)}> 40 |
41 |
42 | 43 | 44 | {this.state.compiledCode &&
{this.state.compiledCode}
} 45 | {this.state.error &&
{this.state.error}
} 46 | {this.state.output &&
{this.state.output}
} 47 |
48 |
49 | ) 50 | } 51 | 52 | compile(code: string) { 53 | try { 54 | let compiledCode = this.natrium.compile(code) 55 | this.setState({ compiledCode: compiledCode, output: '', error: undefined }) 56 | } catch (e) { 57 | this.setState({ error: e, compiledCode: undefined }) 58 | } 59 | } 60 | 61 | run(code: string) { 62 | try { 63 | this.setState({ output: '', compiledCode: undefined, error: undefined }) 64 | this.natrium.run( 65 | code, 66 | () => '', 67 | (x: Uint8Array) => this.appendCode(x) 68 | ) 69 | console.log('finished') 70 | } catch (e) { 71 | this.setState({ error: e, compiledCode: undefined }) 72 | } 73 | } 74 | 75 | appendCode(x: Uint8Array) { 76 | let s = new TextDecoder('utf8').decode(x) 77 | console.log('out', s) 78 | this.setState((x) => ({ 79 | output: x.output + s, 80 | })) 81 | } 82 | } 83 | 84 | interface EditorProps { 85 | onCodeChange: (code: string) => void 86 | code: string 87 | } 88 | 89 | class Editor extends React.Component { 90 | constructor(props: EditorProps) { 91 | super(props) 92 | this.props = props 93 | } 94 | 95 | props: EditorProps = undefined 96 | 97 | updateCode(code: string) { 98 | this.props.onCodeChange(code) 99 | } 100 | 101 | render() { 102 | return ( 103 | this.updateCode(code)} 108 | width="100%" 109 | height="100%" 110 | fontSize="1rem" 111 | placeholder="// Your code here" 112 | editorProps={{ 113 | $blockScrolling: true, 114 | }} 115 | setOptions={{ 116 | wrap: true, 117 | displayIndentGuides: true, 118 | cursorStyle: 'smooth', 119 | }} 120 | > 121 | ) 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /web/js/main.tsx: -------------------------------------------------------------------------------- 1 | import { render } from 'react-dom' 2 | import * as React from 'react' 3 | import { App, AppProps } from './index.tsx' 4 | 5 | import('../pkg/index').then((mod) => 6 | render(, document.getElementById('app-root')) 7 | ) 8 | -------------------------------------------------------------------------------- /web/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Rynco Maekawa ", 3 | "name": "natrium-web", 4 | "version": "0.1.0", 5 | "scripts": { 6 | "build": "rimraf dist pkg && webpack", 7 | "start": "rimraf dist pkg && webpack-dev-server --open -d", 8 | "test": "cargo test && wasm-pack test --headless" 9 | }, 10 | "devDependencies": { 11 | "@types/react": "^16.9.56", 12 | "@types/react-dom": "^16.9.9", 13 | "@wasm-tool/wasm-pack-plugin": "^1.1.0", 14 | "copy-webpack-plugin": "^5.0.3", 15 | "css-loader": "^5.0.1", 16 | "file-loader": "^6.2.0", 17 | "rimraf": "^3.0.0", 18 | "style-loader": "^2.0.0", 19 | "stylus": "^0.54.8", 20 | "stylus-loader": "^4.2.0", 21 | "ts-loader": "^8.0.10", 22 | "typescript": "^4.0.5", 23 | "wasm-loader": "^1.3.0", 24 | "webpack": "^4.42.0", 25 | "webpack-cli": "^3.3.3", 26 | "webpack-dev-server": "^3.7.1" 27 | }, 28 | "dependencies": { 29 | "ace-builds": "^1.4.12", 30 | "react": "^17.0.1", 31 | "react-ace": "^9.2.0", 32 | "react-dom": "^17.0.1" 33 | }, 34 | "sideEffects": [ 35 | "**/*.css", 36 | "**/*.styl", 37 | "js/main.tsx", 38 | "pkg/*" 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /web/src/lib.rs: -------------------------------------------------------------------------------- 1 | use bytes::Bytes; 2 | use natrium::util::pretty_print_error; 3 | use r0vm::s0::S0; 4 | use std::{fmt::Write as FmtWrite, io, io::Write}; 5 | use wasm_bindgen::prelude::*; 6 | use web_sys::console; 7 | 8 | #[cfg(feature = "wee_alloc")] 9 | #[global_allocator] 10 | static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT; 11 | 12 | // This is like the `main` function, except for JavaScript. 13 | #[wasm_bindgen(start)] 14 | pub fn main_js() -> Result<(), JsValue> { 15 | #[cfg(debug_assertions)] 16 | console_error_panic_hook::set_once(); 17 | 18 | // Your code goes here! 19 | unsafe { console::log_1(&JsValue::from_str("Hello world!")) }; 20 | 21 | Ok(()) 22 | } 23 | 24 | fn compile_internal(input: &str) -> Result { 25 | let l = r0syntax::lexer::spanned_lexer(input); 26 | let mut p = r0syntax::parser::Parser::new(l); 27 | let r = p.parse(); 28 | let program = match r { 29 | Ok(p) => p, 30 | Err(e) => { 31 | let mut err = Vec::new(); 32 | if let Some(span) = e.span { 33 | pretty_print_error(&mut err, input, &format!("{:?}", e.kind), span) 34 | .map_err(|x| x.to_string())?; 35 | } else { 36 | writeln!(err, "{:?}", e.kind).map_err(|x| x.to_string())?; 37 | } 38 | return Err(unsafe { String::from_utf8_unchecked(err) }); 39 | } 40 | }; 41 | 42 | let s0 = match r0codegen::generator::compile(&program) { 43 | Ok(p) => p, 44 | Err(e) => { 45 | let mut err = Vec::new(); 46 | if let Some(span) = e.span { 47 | pretty_print_error(&mut err, input, &format!("{:?}", e.kind), span) 48 | .map_err(|x| x.to_string())?; 49 | } else { 50 | writeln!(err, "{:?}", e.kind).map_err(|x| x.to_string())?; 51 | } 52 | return Err(unsafe { String::from_utf8_unchecked(err) }); 53 | } 54 | }; 55 | 56 | Ok(s0) 57 | } 58 | 59 | #[wasm_bindgen] 60 | pub fn compile(input: &str) -> Result { 61 | compile_internal(input) 62 | .map_err(|x| JsValue::from_str(&x)) 63 | .map(|x| x.to_string()) 64 | } 65 | 66 | #[wasm_bindgen] 67 | pub fn run( 68 | input: &str, 69 | read_chunk: js_sys::Function, 70 | write_chunk: js_sys::Function, 71 | ) -> Result<(), JsValue> { 72 | let code = compile_internal(input).map_err(|x| JsValue::from_str(&x))?; 73 | let mut stdout = JsStdioAdaptor::new(read_chunk, write_chunk); 74 | let mut var_name = io::empty(); 75 | let mut vm = r0vm::vm::R0Vm::new(&code, Box::new(var_name), Box::new(stdout)).map_err(|x| { 76 | let mut s = String::new(); 77 | write!(s, "{:?}", x).unwrap(); 78 | s 79 | })?; 80 | vm.run_to_end().unwrap(); 81 | Ok(()) 82 | } 83 | 84 | struct JsStdioAdaptor { 85 | read_chunk: js_sys::Function, 86 | write_chunk: js_sys::Function, 87 | pending_read: Option, 88 | } 89 | 90 | impl JsStdioAdaptor { 91 | pub fn new(read_chunk: js_sys::Function, write_chunk: js_sys::Function) -> JsStdioAdaptor { 92 | JsStdioAdaptor { 93 | read_chunk, 94 | write_chunk, 95 | pending_read: None, 96 | } 97 | } 98 | } 99 | 100 | impl io::Read for JsStdioAdaptor { 101 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 102 | if let Some(mut data) = self.pending_read.take() { 103 | if data.len() > buf.len() { 104 | let remianing = data.split_off(buf.len()); 105 | self.pending_read = Some(remianing); 106 | buf.copy_from_slice(&data[..]); 107 | Ok(buf.len()) 108 | } else { 109 | let buf_sub = &mut buf[0..data.len()]; 110 | buf_sub.copy_from_slice(&data[..]); 111 | Ok(data.len()) 112 | } 113 | } else { 114 | let val: JsValue = self.read_chunk.call0(&JsValue::null()).unwrap(); 115 | if !val.is_string() { 116 | return Err(io::Error::new( 117 | io::ErrorKind::InvalidInput, 118 | "Value is not a string", 119 | )); 120 | } 121 | let val_str = val.as_string().unwrap(); 122 | let mut data = Bytes::from(val_str); 123 | if data.len() > buf.len() { 124 | let remianing = data.split_off(buf.len()); 125 | self.pending_read = Some(remianing); 126 | buf.copy_from_slice(&data[..]); 127 | Ok(buf.len()) 128 | } else { 129 | let buf_sub = &mut buf[0..data.len()]; 130 | buf_sub.copy_from_slice(&data[..]); 131 | Ok(data.len()) 132 | } 133 | } 134 | } 135 | } 136 | 137 | impl std::io::Write for JsStdioAdaptor { 138 | fn write(&mut self, buf: &[u8]) -> io::Result { 139 | let js_buf = js_sys::Uint8Array::from(buf); 140 | self.write_chunk.call1(&JsValue::null(), &js_buf).unwrap(); 141 | Ok(buf.len()) 142 | } 143 | 144 | fn flush(&mut self) -> io::Result<()> { 145 | Ok(()) 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /web/static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Natrium Playground 6 | 7 | 8 | 9 |
10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /web/tests/app.rs: -------------------------------------------------------------------------------- 1 | use wasm_bindgen_test::{wasm_bindgen_test_configure, wasm_bindgen_test}; 2 | use futures::prelude::*; 3 | use wasm_bindgen::JsValue; 4 | use wasm_bindgen_futures::JsFuture; 5 | 6 | wasm_bindgen_test_configure!(run_in_browser); 7 | 8 | 9 | // This runs a unit test in native Rust, so it can only use Rust APIs. 10 | #[test] 11 | fn rust_test() { 12 | assert_eq!(1, 1); 13 | } 14 | 15 | 16 | // This runs a unit test in the browser, so it can use browser APIs. 17 | #[wasm_bindgen_test] 18 | fn web_test() { 19 | assert_eq!(1, 1); 20 | } 21 | 22 | 23 | // This runs a unit test in the browser, and in addition it supports asynchronous Future APIs. 24 | #[wasm_bindgen_test(async)] 25 | fn async_test() -> impl Future { 26 | // Creates a JavaScript Promise which will asynchronously resolve with the value 42. 27 | let promise = js_sys::Promise::resolve(&JsValue::from(42)); 28 | 29 | // Converts that Promise into a Future. 30 | // The unit test will wait for the Future to resolve. 31 | JsFuture::from(promise) 32 | .map(|x| { 33 | assert_eq!(x, 42); 34 | }) 35 | } 36 | -------------------------------------------------------------------------------- /web/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "outDir": "build/dist", 4 | "module": "esnext", 5 | "target": "es5", 6 | "lib": ["es6", "dom"], 7 | "sourceMap": true, 8 | "allowJs": true, 9 | "jsx": "react", 10 | "moduleResolution": "node", 11 | "allowSyntheticDefaultImports": true, 12 | "incremental": true, 13 | "sourceRoot": "js" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /web/webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require('path') 2 | const CopyPlugin = require('copy-webpack-plugin') 3 | const WasmPackPlugin = require('@wasm-tool/wasm-pack-plugin') 4 | const TerserPlugin = require('terser-webpack-plugin') 5 | const APP_DIR = path.resolve(__dirname, './src') 6 | const dist = path.resolve(__dirname, 'dist') 7 | 8 | module.exports = { 9 | mode: 'production', 10 | entry: { 11 | index: './js/main.tsx', 12 | }, 13 | output: { 14 | path: dist, 15 | filename: '[name].js', 16 | }, 17 | devServer: { 18 | contentBase: dist, 19 | }, 20 | module: { 21 | rules: [ 22 | { test: /.tsx?$/, use: 'ts-loader', exclude: /node_modules/ }, 23 | // { test: /.wasm$/, use: 'wasm-loader', exclude: /node_modules/ }, 24 | { 25 | test: /.css$/, 26 | include: APP_DIR, 27 | use: ['style-loader', 'css-loader'], 28 | }, 29 | { test: /.styl$/, use: ['style-loader', 'css-loader', 'stylus-loader'] }, 30 | { test: /.ttf$/, use: 'file-loader' }, 31 | ], 32 | }, 33 | plugins: [ 34 | new CopyPlugin([path.resolve(__dirname, 'static')]), 35 | 36 | new WasmPackPlugin({ 37 | crateDirectory: __dirname, 38 | }), 39 | ], 40 | optimization: { 41 | minimize: true, 42 | minimizer: [new TerserPlugin()], 43 | }, 44 | } 45 | -------------------------------------------------------------------------------- /web/webpack.prod.js: -------------------------------------------------------------------------------- 1 | const { merge } = require('webpack-merge') 2 | const common = require('./webpack.common.js') 3 | 4 | module.exports = merge(common, { 5 | mode: 'production', 6 | 7 | devtool: 'source-map', 8 | }) 9 | --------------------------------------------------------------------------------