├── .github └── workflows │ └── quickstart.yml ├── .gitignore ├── Cargo.toml ├── DEVELOPMENT.md ├── LICENSE ├── README.md ├── ROADMAP.md ├── _config.yml ├── benchmarks └── .gitkeep ├── build.rs ├── dc_codegen ├── Cargo.toml └── src │ └── lib.rs ├── dc_compiler ├── .gitignore ├── Cargo.toml ├── README.md └── src │ ├── lib.rs │ ├── lowerify │ ├── base_target.rs │ ├── classic_target.rs │ ├── code_object.rs │ ├── mod.rs │ └── wasm_target.rs │ ├── meanify │ ├── README.md │ ├── control_flow_graph.rs │ ├── meanify.rs │ ├── mod.rs │ └── variable_table.rs │ └── neat │ ├── builtin.rs │ ├── expression.rs │ ├── mod.rs │ ├── namespace.rs │ ├── program.rs │ ├── statements.rs │ ├── struct_function.rs │ ├── symbol_table.rs │ └── unit.rs ├── dc_hir ├── Cargo.toml ├── README.md └── src │ ├── expression.rs │ ├── function.rs │ ├── hir.rs │ ├── lib.rs │ ├── statement.rs │ ├── struct_def.rs │ └── types.rs ├── dc_ir_pretty ├── Cargo.toml ├── README.md └── src │ ├── hir_pretty │ └── mod.rs │ ├── lib.rs │ └── mir_pretty │ └── mod.rs ├── dc_lexer ├── Cargo.toml └── src │ ├── error.rs │ ├── lexer.rs │ ├── lib.rs │ ├── location.rs │ └── token.rs ├── dc_mir ├── Cargo.toml ├── README.md └── src │ ├── basic_block.rs │ ├── instruction.rs │ └── lib.rs ├── dc_parser ├── Cargo.toml ├── build.rs └── src │ ├── datum.lalrpop │ ├── lib.rs │ ├── parse_tree.rs │ └── parser.rs ├── docs ├── README.md ├── adr │ ├── 0001-newline-or-semicolon.md │ ├── 0002-package-format.md │ ├── 0003-standard-library-design.md │ ├── 0004-dependency-manager.md │ ├── 0005-types-system.md │ ├── 0006-empty-method.md │ ├── 0007-support-for-newline.md │ ├── 0008-constructor-design.md │ ├── 0009-compiler-design.md │ ├── 0010-target.md │ ├── 0011-does-syntax-close-to-llvm.md │ ├── 0012-builtin-functions.md │ ├── 0013-irbuilder.md │ ├── 0014-ir-design.md │ ├── 0015-mir.md │ ├── 0016-cli-target-improve.md │ ├── 0017-container-parser-syntax.md │ ├── 0018-use-semicolon-for-split-context.md │ ├── 0019-language-server.md │ ├── 0020-low-code-design-inside.md │ ├── 0021-replace-tower-lsp.md │ ├── 0022-design-by-contract-design.md │ ├── README.md │ └── images │ │ └── flow.svg ├── book │ ├── .gitignore │ ├── book.toml │ └── src │ │ ├── SUMMARY.md │ │ └── chapter_1.md ├── compares │ ├── rust │ │ ├── README.md │ │ ├── main.hir │ │ ├── main.hir-tree │ │ ├── main.mir │ │ └── main.rs │ ├── solang.md │ └── solang │ │ └── resolve.md ├── design.md ├── examples │ ├── func-call.cj │ ├── hello-world.cj │ ├── if-condition.cj │ ├── multiple-import.cj │ └── struct.cj ├── langs │ ├── 111-assignment.md │ ├── 2-type-system.md │ └── zh-cn │ │ └── 0-design-principle.md └── llvm │ ├── README.md │ ├── hello.c │ ├── hello.ll │ └── hello.s ├── justfile ├── src ├── bin │ ├── dc.rs │ └── languageserver │ │ └── mod.rs └── lib.rs └── stdlib ├── fmt └── fmt.cj ├── io └── io.cj ├── net └── net.cj ├── os └── os.cj ├── reflect └── reflect.cj └── strings └── strings.cj /.github/workflows/quickstart.yml: -------------------------------------------------------------------------------- 1 | name: Datum Build 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | strategy: 8 | matrix: 9 | os: [macos-latest, ubuntu-latest] 10 | include: 11 | - os: macos-latest 12 | INSTALL_LLVM: brew install llvm 13 | ENV_VARS: LLVM_SYS_120_PREFIX="/usr/local/opt/llvm" 14 | - os: ubuntu-latest 15 | INSTALL_LLVM: wget https://apt.llvm.org/llvm.sh && chmod +x llvm.sh && sudo ./llvm.sh 12 16 | ENV_VARS: ~ 17 | runs-on: ${{ matrix.os }} 18 | steps: 19 | - uses: actions/checkout@v2 20 | - name: Checkout submodules 21 | shell: bash 22 | run: | 23 | auth_header="$(git config --local --get http.https://github.com/.extraheader)" 24 | git submodule sync --recursive 25 | git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1 26 | - name: Install LLVM 27 | run: ${{matrix.INSTALL_LLVM}} 28 | 29 | - name: Get rust nightly 30 | run: rustup install nightly 31 | 32 | - name: Set rust nightly as default for cargo 33 | run: rustup default nightly 34 | 35 | - name: Build Debug 36 | run: ${{matrix.ENV_VARS}} cargo build --verbose 37 | 38 | - name: Run tests 39 | run: ${{matrix.ENV_VARS}} cargo test --verbose --all 40 | 41 | - name: Build Release 42 | run: ${{matrix.ENV_VARS}} cargo build --verbose --release 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | 12 | 13 | #Added by cargo 14 | 15 | /target 16 | .idea 17 | *.ll 18 | *.wasm 19 | *.log 20 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datum" 3 | version = "0.1.0" 4 | authors = ["Phodal Huang "] 5 | edition = "2018" 6 | license = "MIT" 7 | readme = "README.md" 8 | repository = "https://github.com/datum-lang/datum" 9 | documentation = "https://github.com/datum-lang/datum" 10 | homepage = "https://github.com/datum-lang/datum" 11 | description = """ 12 | Datum is a language for describe other languages 13 | """ 14 | categories = ["text-processing", "command-line-interface", "development-tools", "parser-implementations", "parsing"] 15 | exclude = [ 16 | "benchmark/*", 17 | "fixtures/*", 18 | ".github/*", 19 | ".gitattributes", 20 | ".adr.json", 21 | ".cargo_vcs_info.json", 22 | ] 23 | 24 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 25 | [dependencies] 26 | clap = "2.33" 27 | 28 | # serial 29 | serde_json = "1.0" 30 | serde = "1.0" 31 | serde_derive = { version = "1.0" } 32 | 33 | # lsp 34 | tower-lsp = "0.13" 35 | lsp-types = "0.81" 36 | tokio = { version = "0.2", features = ["rt-core", "io-std"] } 37 | 38 | dc_parser = { path = "dc_parser" } 39 | dc_compiler = { path = "dc_compiler" } 40 | dc_codegen = { path = "dc_codegen" } 41 | 42 | [dev-dependencies] 43 | assert_cmd = "2.0.0" 44 | 45 | [workspace] 46 | members = [ 47 | "dc_compiler", 48 | "dc_lexer", 49 | "dc_parser", 50 | "dc_codegen", 51 | "dc_hir", 52 | "dc_mir", 53 | "dc_ir_pretty", # output IR for debug 54 | ] -------------------------------------------------------------------------------- /DEVELOPMENT.md: -------------------------------------------------------------------------------- 1 | ## Development 2 | 3 | - dc_lexer, aka lexer 4 | - dc_parser (outputs: AST) 5 | - dc_hir (define: CFG) 6 | - dc_mir (define: MIR) 7 | - compiler (outputs: LLVM IR) 8 | - neat. AST -> HIR 9 | - medium. HIR -> MIR (TBD) 10 | - lowerify. MIR -> LLVM IR 11 | - dc_codegen (process MIR -> LLVM IR) 12 | 13 | ## Process 14 | 15 | -----> parser ----> AST ------------compiler------------------> LLVM IR 16 | lexer -----> parser -----> hir -----> mir -----> codegen -----> LLVM IR 17 | lalrpop AST 18 | 19 | 20 | ## Setup LLVM 21 | 22 | ``` 23 | brew install llvm 24 | ``` 25 | 26 | ## Target Platform Support (Plan) 27 | 28 | ### Tier 1 29 | 30 | | target | std | host | notes | 31 | | --- | --- | --- | --- | 32 | | `i686-pc-windows-gnu` | ✓ | ✓ | 32-bit MinGW (Windows 7+) | 33 | | `i686-pc-windows-msvc` | ✓ | ✓ | 32-bit MSVC (Windows 7+) | 34 | | `i686-unknown-linux-gnu` | ✓ | ✓ | 32-bit Linux (kernel 2.6.32+, glibc 2.11+) | 35 | | `x86_64-apple-darwin` | ✓ | ✓ | 64-bit macOS (10.7+, Lion+) | 36 | | `x86_64-pc-windows-gnu` | ✓ | ✓ | 64-bit MinGW (Windows 7+) | 37 | | `x86_64-pc-windows-msvc` | ✓ | ✓ | 64-bit MSVC (Windows 7+) | 38 | | `x86_64-unknown-linux-gnu` | ✓ | ✓ | 64-bit Linux (kernel 2.6.32+, glibc 2.11+) | 39 | | `wasm32-unknown-unknown` | ✓ | | WebAssembly | 40 | 41 | ### Tier 2 42 | 43 | | target | std | host | notes | 44 | | --- | --- | --- | --- | 45 | | `wasm32-wasi` | ✓ | | WebAssembly with WASI | 46 | | `wasm32-unknown-emscripten` | ✓ | | WebAssembly via Emscripten | 47 | | `aarch64-unknown-linux-gnu` | ✓ | ✓ | ARM64 Linux (kernel 4.2, glibc 2.17+) | 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020~ Phodal Huang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Datum (aka Charj) 2 | 3 | [![Datum Build](https://github.com/datum-lang/datum/actions/workflows/quickstart.yml/badge.svg)](https://github.com/datum-lang/datum/actions/workflows/quickstart.yml) 4 | 5 | > A easy maintain(read/write) language for transform **from**/**to** other languages. 6 | 7 | A language of thinking in [https://github.com/phodal/cloud-dev](https://github.com/phodal/cloud-dev), design for: 8 | 9 | - legacy system migration. 10 | - multiple-targets compiled languages. (by LLVM) 11 | - quick pseudocode. 12 | - simple DSL design. (TBD) 13 | - domain model design for languages. 14 | - visualize architecture. 15 | 16 | ## Development 17 | 18 | see in [DEVELOPMENT.md](DEVELOPMENT.md) 19 | 20 | ## Roadmap 21 | 22 | see in [ROADMAP.md](ROADMAP.md) 23 | 24 | ## License 25 | 26 | lexer based on & inspired by [solang](https://github.com/hyperledger-labs/solang) & [RustPython](https://github.com/RustPython/RustPython) 27 | 28 | This code is distributed under the MIT license. See `LICENSE` in this directory. 29 | -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- 1 | # Roadmap 2 | 3 | ## Todo 4 | 5 | ### Main 6 | 7 | High priority: 8 | 9 | - [x] implementation basic compiler logic 10 | - [x] hir convert (neat) 11 | - [x] mir convert (medium) 12 | - [x] to LLVM (codegen) 13 | - [ ] improve Charj syntax 14 | - [ ] design HIR 15 | - [ ] design MIR 16 | - [ ] charj is a MIR for high level 17 | - [ ] LLVM 11 18 | - [ ] waiting for inkwell 1100 19 | 20 | Medium priority: 21 | 22 | - [ ] multiple-target 23 | - [ ] WASM 24 | - [x] Desktop 25 | - [ ] build system 26 | - [ ] package manager 27 | - [ ] use Maven ? 28 | - [ ] dependency manager 29 | - [ ] document system 30 | - [ ] document system 31 | 32 | ### Syntax Design 33 | 34 | - [ ] lexer & parser 35 | - [ ] syntax design 36 | - [x] import 37 | - [x] package 38 | - [x] struct 39 | - [x] function 40 | - [ ] control flow 41 | - [x] if 42 | - [ ] loop 43 | - [ ] while 44 | - [ ] for 45 | - [ ] break 46 | - [ ] expression 47 | - [x] assignment 48 | - [x] method call 49 | - [x] compiler 50 | - [x] LLVM spike 51 | - [x] hello, world 52 | 53 | 54 | ### Workflow [low priority] 55 | 56 | - [x] CI 57 | - [x] replace Solang CI container. 58 | - [ ] run on windows 59 | 60 | ### Document 61 | 62 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | remote_theme: phodal/mifa-jekyll 2 | -------------------------------------------------------------------------------- /benchmarks/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datum-lang/datum/55ad99d2c9da9641819fa90bae6298a3845e34f1/benchmarks/.gitkeep -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | use std::process::Command; 2 | 3 | fn main() { 4 | let output = Command::new("git") 5 | .args(&["describe", "--tags"]) 6 | .output() 7 | .unwrap(); 8 | let git_hash = String::from_utf8(output.stdout).unwrap(); 9 | println!("cargo:rustc-env=GIT_HASH={}", git_hash); 10 | } 11 | -------------------------------------------------------------------------------- /dc_codegen/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dc_codegen" 3 | version = "0.1.0" 4 | authors = ["Phodal Huang "] 5 | edition = "2018" 6 | license = "MIT" 7 | readme = "README.md" 8 | repository = "https://github.com/datum-lang/datum" 9 | documentation = "https://github.com/datum-lang/datum" 10 | homepage = "https://github.com/datum-lang/datum" 11 | description = """ 12 | Datum is a language for describe other languages 13 | """ 14 | categories = ["text-processing", "command-line-interface", "development-tools", "parser-implementations", "parsing"] 15 | exclude = [ 16 | "benchmark/*", 17 | "fixtures/*", 18 | ".github/*", 19 | ".gitattributes", 20 | ".adr.json", 21 | ".cargo_vcs_info.json", 22 | ] 23 | 24 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 25 | 26 | [dependencies] 27 | -------------------------------------------------------------------------------- /dc_codegen/src/lib.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dc_compiler/.gitignore: -------------------------------------------------------------------------------- 1 | *.ll 2 | *.cjc 3 | *.bc 4 | *.wasm 5 | *.html -------------------------------------------------------------------------------- /dc_compiler/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dc_compiler" 3 | version = "0.1.0" 4 | authors = ["Phodal Huang "] 5 | edition = "2018" 6 | license = "MIT" 7 | readme = "README.md" 8 | repository = "https://github.com/datum-lang/datum" 9 | documentation = "https://github.com/datum-lang/datum" 10 | homepage = "https://github.com/datum-lang/datum" 11 | description = """ 12 | Datum is a language for describe other languages 13 | """ 14 | categories = ["text-processing", "command-line-interface", "development-tools", "parser-implementations", "parsing"] 15 | exclude = [ 16 | "benchmark/*", 17 | "fixtures/*", 18 | ".github/*", 19 | ".gitattributes", 20 | ".adr.json", 21 | ".cargo_vcs_info.json", 22 | ] 23 | 24 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 25 | 26 | [dependencies] 27 | indexmap = "1.0" 28 | lazy_static = "1.4" 29 | # todo: update targets 30 | inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["target-x86", "target-arm", "target-webassembly", "llvm12-0"] } 31 | 32 | dc_lexer = { path = "../dc_lexer" } 33 | dc_hir = { path = "../dc_hir" } 34 | dc_mir = { path = "../dc_mir" } 35 | dc_parser = { path = "../dc_parser" } 36 | dc_codegen = { path = "../dc_codegen" } 37 | 38 | 39 | [dev-dependencies] 40 | wasmi = "0.6" 41 | -------------------------------------------------------------------------------- /dc_compiler/README.md: -------------------------------------------------------------------------------- 1 | # Charj compiler: Zao 2 | 3 | ## Workflow 4 | 5 | - Neat mod for transform AST -> HIR 6 | - Medium mod for transform HIR -> MIR (TBD) 7 | - Lowerify mod for transform MIR -> LLVM IR 8 | 9 | ## Target 10 | 11 | ### X86-X64 12 | 13 | ### WASM 14 | 15 | ### Bitcode 16 | 17 | Run bitcode 18 | 19 | ```rust 20 | lli main.cjc 21 | ``` 22 | 23 | -------------------------------------------------------------------------------- /dc_compiler/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub use lowerify::*; 2 | pub use meanify::*; 3 | pub use neat::*; 4 | 5 | pub mod lowerify; 6 | pub mod meanify; 7 | pub mod neat; 8 | 9 | pub fn parse_and_resolve(input: &str, filename: &str) -> Namespace { 10 | let mut namespace = Namespace::new(); 11 | namespace.files.push(filename.to_string()); 12 | 13 | program(input, filename, &mut namespace); 14 | namespace 15 | } 16 | 17 | pub fn process_string(input: &str, filename: &str) -> Namespace { 18 | let mut namespace = parse_and_resolve(input, filename); 19 | meanify(&mut namespace); 20 | namespace 21 | } 22 | 23 | #[cfg(test)] 24 | mod test { 25 | use crate::{codegen, process_string}; 26 | 27 | #[test] 28 | #[rustfmt::skip] 29 | fn should_support_local_function_call() { 30 | let mut ns = process_string(" 31 | default$say_hello() {println(\"hello, world\");println(5);} 32 | default$main() {say_hello();} 33 | ", "hello.cj"); 34 | assert_eq!("say_hello", ns.cfgs[0].name); 35 | assert_eq!("main", ns.cfgs[1].name); 36 | let _results = codegen(&mut ns, "jit"); 37 | } 38 | 39 | #[test] 40 | #[rustfmt::skip] 41 | fn should_support_local_function_call_utf8() { 42 | let mut ns = process_string(" 43 | default$你好() {println(\"你好,世界!\");} 44 | default$main() {你好();} 45 | ", "hello.cj"); 46 | assert_eq!("你好", ns.cfgs[0].name); 47 | assert_eq!("main", ns.cfgs[1].name); 48 | let _results = codegen(&mut ns, "jit"); 49 | } 50 | 51 | #[test] 52 | #[rustfmt::skip] 53 | fn should_run_function_after_main() { 54 | 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /dc_compiler/src/lowerify/base_target.rs: -------------------------------------------------------------------------------- 1 | use inkwell::types::BasicMetadataTypeEnum; 2 | use inkwell::values::FunctionValue; 3 | 4 | use dc_mir::instruction::ExprKind; 5 | 6 | use crate::code_object::CodeObject; 7 | use crate::ControlFlowGraph; 8 | 9 | pub trait BaseTarget<'a> { 10 | fn emit_function(&self, sb: &mut CodeObject, cfg: &ControlFlowGraph) { 11 | let function = self.create_llvm_function(sb, &cfg); 12 | self.emit_cfg(sb, function, cfg); 13 | } 14 | 15 | fn create_llvm_function<'func>( 16 | &self, 17 | co: &mut CodeObject<'func>, 18 | cfg: &ControlFlowGraph, 19 | ) -> FunctionValue<'func> { 20 | let ret_type = co.context.i32_type(); 21 | let args_types = std::iter::repeat(ret_type) 22 | .take(cfg.params.len()) 23 | .map(|f| f.into()) 24 | .collect::>(); 25 | let args_types = args_types.as_slice(); 26 | 27 | let fn_type = co.context.i32_type().fn_type(args_types, false); 28 | 29 | let func_decl = co.module.add_function(&cfg.name, fn_type, None); 30 | func_decl 31 | } 32 | 33 | fn emit_cfg(&self, sb: &mut CodeObject, function: FunctionValue, cfg: &ControlFlowGraph) { 34 | let bb = sb.context.append_basic_block(function, &cfg.name); 35 | sb.builder.position_at_end(bb); 36 | 37 | for instr in &cfg.blocks.instructions { 38 | match instr { 39 | ExprKind::Var { .. } => {} 40 | ExprKind::Call { value } => { 41 | sb.emit_call(value); 42 | } 43 | ExprKind::Print { value } => { 44 | sb.emit_print(&"", value); 45 | } 46 | } 47 | } 48 | 49 | sb.emit_void(); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /dc_compiler/src/lowerify/classic_target.rs: -------------------------------------------------------------------------------- 1 | use inkwell::context::Context; 2 | 3 | use crate::base_target::BaseTarget; 4 | use crate::lowerify::code_object::CodeObject; 5 | use crate::Namespace; 6 | 7 | pub struct ClassicTarget {} 8 | 9 | impl ClassicTarget { 10 | pub fn build<'a>( 11 | filename: &'a String, 12 | context: &'a Context, 13 | ns: &'a Namespace, 14 | ) -> CodeObject<'a> { 15 | let target = ClassicTarget {}; 16 | 17 | let mut structure = CodeObject::new(context, filename, ns, "x86_64"); 18 | // todo: call main after build others. 19 | for cfg in &ns.cfgs { 20 | target.emit_function(&mut structure, &cfg); 21 | } 22 | 23 | structure 24 | } 25 | } 26 | 27 | impl<'a> BaseTarget<'a> for ClassicTarget {} 28 | -------------------------------------------------------------------------------- /dc_compiler/src/lowerify/code_object.rs: -------------------------------------------------------------------------------- 1 | use std::path::Path; 2 | 3 | use inkwell::builder::Builder; 4 | use inkwell::context::Context; 5 | use inkwell::module::{Linkage, Module}; 6 | use inkwell::targets::{CodeModel, FileType, RelocMode, TargetTriple}; 7 | use inkwell::values::PointerValue; 8 | use inkwell::{AddressSpace, OptimizationLevel}; 9 | 10 | use crate::Namespace; 11 | 12 | #[allow(dead_code)] 13 | #[derive(Debug)] 14 | pub struct CodeObject<'a> { 15 | pub name: &'a str, 16 | pub module: Module<'a>, 17 | pub context: &'a Context, 18 | pub(crate) builder: Builder<'a>, 19 | pub ns: &'a Namespace, 20 | } 21 | 22 | impl<'a> CodeObject<'a> { 23 | pub fn new(context: &'a Context, filename: &'a str, ns: &'a Namespace, target: &str) -> Self { 24 | let triple = TargetTriple::create(target); 25 | let module = context.create_module(filename); 26 | 27 | module.set_triple(&triple); 28 | module.set_source_file_name(filename); 29 | 30 | CodeObject { 31 | name: &filename, 32 | module, 33 | builder: context.create_builder(), 34 | context, 35 | ns, 36 | } 37 | } 38 | 39 | pub(crate) fn emit_call(&self, name: &str) { 40 | let opt = self.module.get_function(name); 41 | match opt { 42 | None => {} 43 | Some(fun) => { 44 | self.builder.build_call(fun, &[], name); 45 | } 46 | } 47 | } 48 | 49 | pub(crate) fn emit_print(&self, name: &&str, data: &str) { 50 | let printf; 51 | match self.module.get_function("puts") { 52 | None => { 53 | let i32_type = self.context.i32_type(); 54 | let str_type = self.context.i8_type().ptr_type(AddressSpace::Generic); 55 | let printf_type = i32_type.fn_type(&[str_type.into()], true); 56 | 57 | printf = self 58 | .module 59 | .add_function("puts", printf_type, Some(Linkage::External)); 60 | } 61 | Some(func) => { 62 | printf = func; 63 | } 64 | } 65 | 66 | let pointer_value = self.emit_global_string(name, data.as_ref(), false); 67 | self.builder.build_call(printf, &[pointer_value.into()], ""); 68 | } 69 | 70 | fn emit_global_string(&self, name: &str, data: &[u8], constant: bool) -> PointerValue<'a> { 71 | let ty = self.context.i8_type().array_type(data.len() as u32); 72 | 73 | let gv = self 74 | .module 75 | .add_global(ty, Some(AddressSpace::Generic), name); 76 | 77 | gv.set_linkage(Linkage::Internal); 78 | 79 | gv.set_initializer(&self.context.const_string(data, false)); 80 | 81 | if constant { 82 | gv.set_constant(true); 83 | gv.set_unnamed_addr(true); 84 | } 85 | 86 | self.builder.build_pointer_cast( 87 | gv.as_pointer_value(), 88 | self.context.i8_type().ptr_type(AddressSpace::Generic), 89 | name, 90 | ) 91 | } 92 | 93 | pub fn emit_void(&mut self) { 94 | self.builder 95 | .build_return(Some(&self.context.i32_type().const_zero())); 96 | } 97 | 98 | pub fn bitcode(&self, path: &Path) { 99 | self.module.write_bitcode_to_path(path); 100 | } 101 | 102 | pub fn run_jit(&self) -> i32 { 103 | self.module.get_function("main").unwrap().verify(true); 104 | 105 | let ee = self 106 | .module 107 | .create_jit_execution_engine(OptimizationLevel::None) 108 | .unwrap(); 109 | let maybe_fn = unsafe { 110 | // todo: thinking in return of main func 111 | ee.get_function:: i32>("main") 112 | }; 113 | 114 | let compiled_fn = match maybe_fn { 115 | Ok(f) => f, 116 | Err(err) => { 117 | panic!("{:?}", err); 118 | } 119 | }; 120 | 121 | unsafe { compiled_fn.call() } 122 | } 123 | 124 | pub fn dump_llvm(&self, path: &Path) -> Result<(), String> { 125 | if let Err(s) = self.module.print_to_file(path) { 126 | return Err(s.to_string()); 127 | } 128 | 129 | Ok(()) 130 | } 131 | 132 | pub fn code(&self) -> Result, String> { 133 | let target = inkwell::targets::Target::from_name("wasm32").unwrap(); 134 | let target_machine = target 135 | .create_target_machine( 136 | &TargetTriple::create("wasm32-unknown-unknown-wasm"), 137 | "", 138 | "", 139 | OptimizationLevel::None, 140 | RelocMode::Default, 141 | CodeModel::Default, 142 | ) 143 | .unwrap(); 144 | 145 | match target_machine.write_to_memory_buffer(&self.module, FileType::Object) { 146 | Ok(out) => { 147 | let slice = out.as_slice(); 148 | return Ok(slice.to_vec()); 149 | } 150 | Err(s) => { 151 | return Err(s.to_string()); 152 | } 153 | } 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /dc_compiler/src/lowerify/mod.rs: -------------------------------------------------------------------------------- 1 | use std::path::Path; 2 | 3 | use inkwell::context::Context; 4 | 5 | use crate::lowerify::classic_target::ClassicTarget; 6 | use crate::lowerify::wasm_target::WasmTarget; 7 | use crate::Namespace; 8 | 9 | pub mod base_target; 10 | pub mod classic_target; 11 | pub mod code_object; 12 | pub mod wasm_target; 13 | 14 | lazy_static::lazy_static! { 15 | static ref LLVM_INIT: () = { 16 | inkwell::targets::Target::initialize_webassembly(&Default::default()); 17 | }; 18 | } 19 | 20 | #[derive(PartialEq, Clone, Debug)] 21 | pub enum CodegenResult { 22 | Jit { exit_code: i32 }, 23 | Wasm { code: Vec }, 24 | LLVM { value: String }, 25 | BitCode, 26 | } 27 | 28 | #[derive(PartialEq, Clone, Copy, Debug)] 29 | pub enum CharjTarget { 30 | Generic, 31 | WASM, 32 | BitCode, 33 | } 34 | 35 | impl CharjTarget { 36 | #[allow(dead_code)] 37 | fn llvm_target_name(&self) -> &'static str { 38 | return match self { 39 | CharjTarget::Generic => "generic", 40 | CharjTarget::WASM => "wasm", 41 | CharjTarget::BitCode => "bitcode", 42 | }; 43 | } 44 | 45 | #[allow(dead_code)] 46 | fn llvm_target_triple(&self) -> &'static str { 47 | return match self { 48 | CharjTarget::Generic => "x86_64", 49 | CharjTarget::WASM => "wasm32-unknown-unknown-wasm", 50 | CharjTarget::BitCode => "", 51 | }; 52 | } 53 | } 54 | 55 | pub fn codegen(ns: &mut Namespace, target: &str) -> Vec { 56 | let mut results = vec![]; 57 | 58 | let filename = ns.files[0].clone(); 59 | let context = Context::create(); 60 | 61 | match target { 62 | "jit" => { 63 | let obj = ClassicTarget::build(&filename, &context, ns); 64 | let exit_code = obj.run_jit(); 65 | results.push(CodegenResult::Jit { exit_code }); 66 | } 67 | "wasm" => { 68 | lazy_static::initialize(&LLVM_INIT); 69 | let obj = WasmTarget::build(&filename, &context, ns); 70 | let code = obj.code().expect("compile should succeed"); 71 | results.push(CodegenResult::Wasm { code }); 72 | } 73 | "llvm" => { 74 | let obj = ClassicTarget::build(&filename, &context, ns); 75 | let name = format!("{}.ll", filename); 76 | match obj.dump_llvm(Path::new(&name)) { 77 | Ok(_) => { 78 | println!("dump llvm succeed: {:?}", name); 79 | } 80 | Err(_) => { 81 | panic!("dump llvm failed: {:?}", name); 82 | } 83 | } 84 | 85 | results.push(CodegenResult::LLVM { 86 | value: "".to_string(), 87 | }); 88 | } 89 | &_ => { 90 | let obj = ClassicTarget::build(&filename, &context, ns); 91 | let name = format!("{}.bc", &filename); 92 | obj.bitcode(Path::new(&name)); 93 | results.push(CodegenResult::BitCode); 94 | } 95 | } 96 | 97 | results 98 | } 99 | 100 | #[cfg(test)] 101 | mod test { 102 | #[test] 103 | #[rustfmt::skip] 104 | fn init_parser() {} 105 | } 106 | -------------------------------------------------------------------------------- /dc_compiler/src/lowerify/wasm_target.rs: -------------------------------------------------------------------------------- 1 | use crate::base_target::BaseTarget; 2 | use crate::code_object::CodeObject; 3 | use crate::Namespace; 4 | use inkwell::context::Context; 5 | 6 | pub struct WasmTarget {} 7 | 8 | impl WasmTarget { 9 | pub fn build<'a>( 10 | filename: &'a String, 11 | context: &'a Context, 12 | ns: &'a Namespace, 13 | ) -> CodeObject<'a> { 14 | let target = WasmTarget {}; 15 | 16 | let wasm_target = "wasm32-unknown-unknown-wasm"; 17 | let mut structure = CodeObject::new(context, filename, ns, wasm_target); 18 | for cfg in &ns.cfgs { 19 | target.emit_function(&mut structure, &cfg); 20 | } 21 | 22 | structure 23 | } 24 | } 25 | 26 | impl<'a> BaseTarget<'a> for WasmTarget {} 27 | -------------------------------------------------------------------------------- /dc_compiler/src/meanify/README.md: -------------------------------------------------------------------------------- 1 | # TBD -------------------------------------------------------------------------------- /dc_compiler/src/meanify/control_flow_graph.rs: -------------------------------------------------------------------------------- 1 | use dc_hir::Parameter; 2 | use dc_mir::basic_block::BasicBlock; 3 | use dc_mir::instruction::ExprKind; 4 | 5 | /// which is a [Control-flow graph](https://en.wikipedia.org/wiki/Control-flow_graph) 6 | #[derive(Clone, Debug)] 7 | pub struct ControlFlowGraph { 8 | pub name: String, 9 | pub blocks: BasicBlock, 10 | pub params: Vec, 11 | pub returns: Vec, 12 | } 13 | 14 | #[allow(dead_code)] 15 | impl ControlFlowGraph { 16 | pub fn new(name: String) -> Self { 17 | ControlFlowGraph { 18 | name, 19 | blocks: Default::default(), 20 | params: vec![], 21 | returns: vec![], 22 | } 23 | } 24 | 25 | pub fn placeholder() -> Self { 26 | ControlFlowGraph { 27 | name: "".to_string(), 28 | blocks: Default::default(), 29 | params: vec![], 30 | returns: vec![], 31 | } 32 | } 33 | 34 | pub fn emit(&mut self, instruction: ExprKind) { 35 | self.blocks.instructions.push(instruction); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /dc_compiler/src/meanify/meanify.rs: -------------------------------------------------------------------------------- 1 | use crate::{ControlFlowGraph, Namespace}; 2 | use dc_hir::{Builtin, Expression, Function, Statement}; 3 | use dc_mir::instruction::ExprKind; 4 | 5 | pub fn meanify(ns: &mut Namespace) { 6 | #[allow(unused_assignments)] 7 | let mut cfg_no = 0; 8 | let mut all_cfg = Vec::new(); 9 | 10 | cfg_no = ns.functions.len(); 11 | all_cfg.resize(cfg_no, ControlFlowGraph::placeholder()); 12 | 13 | let mut function_no = 0; 14 | for _cfg in all_cfg { 15 | function_cfg(function_no, ns); 16 | function_no = function_no + 1; 17 | } 18 | } 19 | 20 | pub fn function_cfg(function_no: usize, ns: &mut Namespace) { 21 | let func = &ns.functions[function_no]; 22 | 23 | let func_name = &func.name; 24 | let mut cfg = ControlFlowGraph::new(func_name.to_string()); 25 | cfg.params = func.params.clone(); 26 | cfg.returns = func.returns.clone(); 27 | 28 | for stmt in &func.body { 29 | statement_cfg(stmt, func, &mut cfg, ns) 30 | } 31 | 32 | ns.cfgs.push(cfg); 33 | } 34 | 35 | pub fn statement_cfg( 36 | stmt: &Statement, 37 | _func: &Function, 38 | cfg: &mut ControlFlowGraph, 39 | ns: &Namespace, 40 | ) { 41 | match stmt { 42 | Statement::VariableDecl { location: _ } => { 43 | // todo 44 | } 45 | Statement::Expression { 46 | location: _, 47 | expression: expr, 48 | } => { 49 | expression_cfg(expr, cfg, ns); 50 | } 51 | } 52 | } 53 | 54 | pub fn expression_cfg(expr: &Expression, cfg: &mut ControlFlowGraph, ns: &Namespace) -> Expression { 55 | match expr { 56 | Expression::Placeholder => Expression::Placeholder, 57 | Expression::StringLiteral { 58 | location: _, 59 | value: _, 60 | } => expr.clone(), 61 | Expression::NumberLiteral { 62 | location: _, 63 | ty: _, 64 | value: _, 65 | } => expr.clone(), 66 | Expression::BytesLiteral { .. } => Expression::Placeholder, 67 | Expression::InternalFunctionCall { 68 | location: _, 69 | function: fun, 70 | args: _, 71 | } => { 72 | match &**fun { 73 | Expression::Variable { 74 | location: _, 75 | ty: _, 76 | value, 77 | } => { 78 | cfg.emit(ExprKind::Call { 79 | value: value.to_string(), 80 | }); 81 | } 82 | _ => {} 83 | } 84 | Expression::Placeholder 85 | } 86 | Expression::Builtin { 87 | location: _, 88 | types: _, 89 | builtin, 90 | args, 91 | } => match builtin { 92 | Builtin::Assert => Expression::Placeholder, 93 | Builtin::Print => { 94 | let expr = expression_cfg(&args[0], cfg, ns); 95 | let mut val = "".to_string(); 96 | match expr { 97 | Expression::StringLiteral { location: _, value } => { 98 | val = value; 99 | } 100 | Expression::NumberLiteral { 101 | location: _, 102 | ty: _, 103 | value, 104 | } => { 105 | val = value.to_string(); 106 | } 107 | _ => {} 108 | } 109 | cfg.emit(ExprKind::Print { value: val }); 110 | Expression::Placeholder 111 | } 112 | }, 113 | Expression::Variable { 114 | location: _, 115 | ty: _, 116 | value: _, 117 | } => expr.clone(), 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /dc_compiler/src/meanify/mod.rs: -------------------------------------------------------------------------------- 1 | pub use control_flow_graph::*; 2 | pub use meanify::*; 3 | 4 | pub mod control_flow_graph; 5 | pub mod meanify; 6 | pub mod variable_table; 7 | -------------------------------------------------------------------------------- /dc_compiler/src/meanify/variable_table.rs: -------------------------------------------------------------------------------- 1 | pub struct VariableTable {} 2 | -------------------------------------------------------------------------------- /dc_compiler/src/neat/builtin.rs: -------------------------------------------------------------------------------- 1 | use crate::symbol_table::SymbolTable; 2 | use crate::{expression, Namespace}; 3 | use dc_hir::{Builtin, Expression, Type}; 4 | use dc_lexer::Location; 5 | 6 | #[derive(PartialEq, Clone, Debug)] 7 | pub struct Prototype { 8 | pub builtin: Builtin, 9 | pub namespace: Option<&'static str>, 10 | pub name: &'static str, 11 | pub args: &'static [Type], 12 | pub ret: &'static [Type], 13 | pub doc: &'static str, 14 | } 15 | 16 | // A list of all Solidity builtins functions 17 | static BUILTIN_FUNCTIONS: [Prototype; 3] = [ 18 | Prototype { 19 | builtin: Builtin::Print, 20 | namespace: None, 21 | name: "print", 22 | args: &[Type::String], 23 | ret: &[Type::Void], 24 | doc: "log string without new line", 25 | }, 26 | Prototype { 27 | builtin: Builtin::Print, 28 | namespace: None, 29 | name: "println", 30 | args: &[Type::String], 31 | ret: &[Type::Void], 32 | doc: "log string with line", 33 | }, 34 | Prototype { 35 | builtin: Builtin::Assert, 36 | namespace: None, 37 | name: "assert", 38 | args: &[Type::Bool], 39 | ret: &[Type::Void], 40 | doc: "abort execution if argument evaluates to false", 41 | }, 42 | ]; 43 | 44 | #[derive(Clone, PartialEq)] 45 | pub enum Symbol { 46 | Function(Vec), 47 | Variable(Location, usize, usize), 48 | Struct(Location, usize), 49 | Import(Location, usize), 50 | } 51 | 52 | pub fn is_builtin_call(namespace: Option<&str>, fname: &str) -> bool { 53 | BUILTIN_FUNCTIONS 54 | .iter() 55 | .any(|p| p.name == fname && p.namespace == namespace) 56 | } 57 | 58 | pub fn resolve_call( 59 | location: &Location, 60 | namespace: Option<&str>, 61 | ns: &mut Namespace, 62 | id: &str, 63 | args: &Vec, // args: &[Expression], 64 | symbol_table: &mut SymbolTable, 65 | ) -> Result { 66 | let matches = BUILTIN_FUNCTIONS 67 | .iter() 68 | .filter(|p| p.name == id && p.namespace == namespace) 69 | .collect::>(); 70 | 71 | let mut resolved_args = Vec::new(); 72 | for arg in args { 73 | let expr = expression::expression(&arg.expr, ns, symbol_table)?; 74 | resolved_args.push(expr); 75 | } 76 | 77 | for func in &matches { 78 | if func.args.len() != args.len() { 79 | continue; 80 | } 81 | let matches = true; 82 | 83 | if matches { 84 | return Ok(Expression::Builtin { 85 | location: location.to_owned(), 86 | types: func.ret.to_vec(), 87 | builtin: func.builtin.clone(), 88 | args: resolved_args, 89 | }); 90 | } 91 | } 92 | 93 | Err(()) 94 | } 95 | 96 | #[cfg(test)] 97 | mod tests { 98 | use crate::builtin::is_builtin_call; 99 | use dc_lexer::Location; 100 | use dc_parser::parse_tree::{Expression, ExpressionType}; 101 | 102 | #[test] 103 | fn should_identify_builtin_print() { 104 | let is_builtin = is_builtin_call(None, "print"); 105 | assert_eq!(true, is_builtin); 106 | 107 | let no_builtin = is_builtin_call(None, "printf"); 108 | assert_eq!(false, no_builtin); 109 | } 110 | 111 | #[test] 112 | fn should_resolved_call() { 113 | let expr = Expression { 114 | location: Location::new(0, 0), 115 | node: ExpressionType::String { 116 | value: "hello,world".to_string(), 117 | }, 118 | }; 119 | let mut exprs = vec![]; 120 | exprs.push(expr); 121 | // 122 | // let result = resolve_call("demo", None, "print", &exprs); 123 | // assert_eq!(true, result.is_ok()); 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /dc_compiler/src/neat/expression.rs: -------------------------------------------------------------------------------- 1 | use dc_hir::{Expression, Type}; 2 | use dc_parser::{Argument, ExpressionType}; 3 | 4 | use crate::builtin; 5 | use crate::neat::Namespace; 6 | use crate::symbol_table::SymbolTable; 7 | 8 | pub fn expression( 9 | expr: &dc_parser::Expression, 10 | ns: &mut Namespace, 11 | symbol_table: &mut SymbolTable, 12 | ) -> Result { 13 | match &expr.node { 14 | ExpressionType::Range { .. } => Ok(dc_hir::Expression::Placeholder), 15 | ExpressionType::BoolOp { .. } => Ok(dc_hir::Expression::Placeholder), 16 | ExpressionType::Binop { .. } => Ok(dc_hir::Expression::Placeholder), 17 | ExpressionType::Unop { .. } => Ok(dc_hir::Expression::Placeholder), 18 | ExpressionType::String { value } => Ok(dc_hir::Expression::StringLiteral { 19 | location: *&expr.location, 20 | value: value.to_string(), 21 | }), 22 | ExpressionType::Bool { .. } => Ok(dc_hir::Expression::Placeholder), 23 | ExpressionType::Number { value } => { 24 | let bits = value.bits(); 25 | let int_size = if bits < 7 { 8 } else { (bits + 7) & !7 } as u16; 26 | 27 | Ok(dc_hir::Expression::NumberLiteral { 28 | location: *&expr.location, 29 | ty: Type::Int(int_size), 30 | value: value.clone(), 31 | }) 32 | } 33 | ExpressionType::List { .. } => Ok(dc_hir::Expression::Placeholder), 34 | ExpressionType::Identifier { id: name } => Ok(dc_hir::Expression::Variable { 35 | location: *&expr.location, 36 | ty: Type::String, 37 | value: name.name.clone(), 38 | }), 39 | ExpressionType::Type { .. } => Ok(dc_hir::Expression::Placeholder), 40 | ExpressionType::MemberAccess { .. } => Ok(dc_hir::Expression::Placeholder), 41 | ExpressionType::Call { function, args } => { 42 | let result = function_call_expr(function, args, ns, symbol_table); 43 | return result; 44 | } 45 | ExpressionType::Compare { .. } => Ok(dc_hir::Expression::Placeholder), 46 | ExpressionType::PostUnop { .. } => Ok(dc_hir::Expression::Placeholder), 47 | ExpressionType::EmptyObject => Ok(dc_hir::Expression::Placeholder), 48 | } 49 | } 50 | 51 | fn function_call_expr( 52 | function: &Box, 53 | args: &Vec, 54 | ns: &mut Namespace, 55 | symtable: &mut SymbolTable, 56 | ) -> Result { 57 | // todo: match for MemberAccess 58 | function_call(function, args, ns, symtable) 59 | } 60 | 61 | fn function_call( 62 | var: &Box, 63 | args: &Vec, 64 | ns: &mut Namespace, 65 | symbol_table: &mut SymbolTable, 66 | ) -> Result { 67 | match &var.node { 68 | ExpressionType::Identifier { id } => { 69 | let is_builtin = builtin::is_builtin_call(None, &*id.name); 70 | 71 | if is_builtin { 72 | let result = 73 | builtin::resolve_call(&var.location, None, ns, &*id.name, args, symbol_table); 74 | return result; 75 | } 76 | 77 | let function = expression(var, ns, symbol_table)?; 78 | return Ok(Expression::InternalFunctionCall { 79 | location: var.location, 80 | args: vec![], 81 | function: Box::new(function), 82 | }); 83 | } 84 | _ => { 85 | println!("{:?}", &var.node); 86 | } 87 | } 88 | 89 | return Err(()); 90 | } 91 | -------------------------------------------------------------------------------- /dc_compiler/src/neat/mod.rs: -------------------------------------------------------------------------------- 1 | pub use expression::*; 2 | pub use namespace::*; 3 | pub use program::*; 4 | pub use statements::*; 5 | pub use struct_function::*; 6 | pub use symbol_table::*; 7 | pub use unit::*; 8 | 9 | pub mod builtin; 10 | pub mod expression; 11 | pub mod namespace; 12 | pub mod program; 13 | pub mod statements; 14 | pub mod struct_function; 15 | pub mod symbol_table; 16 | pub mod unit; 17 | -------------------------------------------------------------------------------- /dc_compiler/src/neat/namespace.rs: -------------------------------------------------------------------------------- 1 | use crate::ControlFlowGraph; 2 | use dc_hir::{Function, StructDecl}; 3 | use dc_parser::ExpressionType; 4 | 5 | #[derive(Debug)] 6 | pub struct Namespace { 7 | // todo: add diagnostics 8 | pub files: Vec, 9 | pub structs: Vec, 10 | pub functions: Vec, 11 | pub cfgs: Vec, 12 | } 13 | 14 | impl Namespace { 15 | pub fn new() -> Self { 16 | Namespace { 17 | files: vec![], 18 | structs: vec![], 19 | functions: vec![], 20 | cfgs: vec![], 21 | } 22 | } 23 | 24 | pub fn resolve_type(&mut self, id: &dc_parser::Expression) { 25 | self.expr_to_type(&id); 26 | } 27 | 28 | pub fn expr_to_type<'a>(&mut self, expr: &'a dc_parser::Expression) { 29 | let expr = expr; 30 | match expr.node { 31 | ExpressionType::Call { .. } => {} 32 | _ => { 33 | println!("{:?}", expr.node); 34 | } 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /dc_compiler/src/neat/program.rs: -------------------------------------------------------------------------------- 1 | use crate::neat::unit::resolve_program; 2 | use crate::neat::Namespace; 3 | use dc_parser::parser::parse_program; 4 | 5 | pub fn program(input: &str, _filename: &str, namespace: &mut Namespace) { 6 | let parse_ast = parse_program(input); 7 | match parse_ast { 8 | Ok(unit) => { 9 | resolve_program(unit, namespace); 10 | } 11 | Err(_) => {} 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /dc_compiler/src/neat/statements.rs: -------------------------------------------------------------------------------- 1 | use crate::neat::expression::expression; 2 | use crate::neat::Namespace; 3 | use crate::symbol_table::SymbolTable; 4 | use dc_hir::Statement; 5 | use dc_parser::StructFuncDecl; 6 | 7 | pub fn resolve_function_body( 8 | func_def: &StructFuncDecl, 9 | namespace: &mut Namespace, 10 | function_no: usize, 11 | ) { 12 | let mut res = Vec::new(); 13 | let mut symbol_table = SymbolTable::new(); 14 | 15 | statement(&func_def.body, &mut res, namespace, &mut symbol_table); 16 | 17 | namespace.functions[function_no].body = res; 18 | } 19 | 20 | pub fn statement( 21 | body: &Vec, 22 | res: &mut Vec, 23 | namespace: &mut Namespace, 24 | symbol_table: &mut SymbolTable, 25 | ) { 26 | for stmt in body { 27 | match &stmt.node { 28 | dc_parser::StatementType::Break => {} 29 | dc_parser::StatementType::Continue => {} 30 | dc_parser::StatementType::If { .. } => {} 31 | dc_parser::StatementType::While { .. } => {} 32 | dc_parser::StatementType::For { .. } => {} 33 | dc_parser::StatementType::Loop => {} 34 | dc_parser::StatementType::Assign { .. } => {} 35 | dc_parser::StatementType::VariableDecl { .. } => {} 36 | dc_parser::StatementType::Return { .. } => {} 37 | dc_parser::StatementType::Expression { expr } => { 38 | let result = expression(&expr, namespace, symbol_table); 39 | match result { 40 | Ok(expression) => { 41 | res.push(Statement::Expression { 42 | location: stmt.location, 43 | expression, 44 | }); 45 | } 46 | Err(_) => {} 47 | } 48 | } 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /dc_compiler/src/neat/struct_function.rs: -------------------------------------------------------------------------------- 1 | use crate::neat::Namespace; 2 | use dc_hir::{Function, Parameter}; 3 | 4 | pub fn struct_function_decl( 5 | struct_func_def: &dc_parser::StructFuncDecl, 6 | namespace: &mut Namespace, 7 | ) -> bool { 8 | let success = true; 9 | 10 | let params = resolve_params(&struct_func_def.params, namespace); 11 | let (returns, _return_success) = resolve_returns(&struct_func_def.returns, namespace); 12 | 13 | let name = struct_func_def.name.name.to_owned(); 14 | 15 | let function = Function::new(name, params, returns); 16 | 17 | namespace.functions.push(function); 18 | 19 | success 20 | } 21 | 22 | pub fn resolve_returns( 23 | _returns: &Option, 24 | _namespace: &mut Namespace, 25 | ) -> (Vec, bool) { 26 | let resolved_returns = Vec::new(); 27 | let success = true; 28 | 29 | (resolved_returns, success) 30 | } 31 | 32 | pub fn resolve_params( 33 | parameters: &Vec<(dc_lexer::Loc, Option)>, 34 | namespace: &mut Namespace, 35 | ) -> Vec { 36 | let mut params = Vec::new(); 37 | for (loc, p) in parameters { 38 | let p = match p { 39 | Some(p) => p, 40 | None => { 41 | continue; 42 | } 43 | }; 44 | 45 | namespace.resolve_type(&p.ty); 46 | 47 | params.push(Parameter { 48 | location: *loc, 49 | name: p.get_name(), 50 | }) 51 | } 52 | return params; 53 | } 54 | -------------------------------------------------------------------------------- /dc_compiler/src/neat/symbol_table.rs: -------------------------------------------------------------------------------- 1 | use core::fmt; 2 | 3 | use indexmap::map::IndexMap; 4 | 5 | #[derive(Clone, Copy, PartialEq)] 6 | pub enum SymbolTableType { 7 | Module, 8 | // same to class symbol 9 | Struct, 10 | Function, 11 | Variable, 12 | BuiltinType, 13 | } 14 | 15 | impl fmt::Display for SymbolTableType { 16 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 17 | match self { 18 | SymbolTableType::Module => write!(f, "module"), 19 | SymbolTableType::Struct => write!(f, "struct"), 20 | SymbolTableType::Function => write!(f, "function"), 21 | SymbolTableType::Variable => write!(f, "variable"), 22 | SymbolTableType::BuiltinType => write!(f, "builtintype"), 23 | } 24 | } 25 | } 26 | 27 | #[derive(Clone)] 28 | pub struct SymbolTable { 29 | /// The name of this symbol table. Often the name of the class or function. 30 | pub name: String, 31 | 32 | /// The type of symbol table 33 | pub typ: SymbolTableType, 34 | 35 | /// The line number in the sourcecode where this symboltable begins. 36 | pub line_number: usize, 37 | 38 | // Return True if the block is a nested class or function 39 | pub is_nested: bool, 40 | 41 | /// A set of symbols present on this scope level. 42 | pub symbols: IndexMap, 43 | 44 | /// A list of subscopes in the order as found in the 45 | /// AST nodes. 46 | pub sub_tables: Vec, 47 | } 48 | 49 | impl SymbolTable { 50 | pub fn new() -> Self { 51 | SymbolTable { 52 | name: "".to_string(), 53 | typ: SymbolTableType::Module, 54 | line_number: 0, 55 | is_nested: false, 56 | symbols: Default::default(), 57 | sub_tables: vec![], 58 | } 59 | } 60 | } 61 | 62 | /// Indicator for a single symbol what the scope of this symbol is. 63 | /// The scope can be unknown, which is unfortunate, but not impossible. 64 | #[derive(Debug, Clone)] 65 | pub enum SymbolScope { 66 | Global, 67 | Nonlocal, 68 | Local, 69 | Unknown, 70 | } 71 | 72 | /// A single symbol in a table. Has various properties such as the scope 73 | /// of the symbol, and also the various uses of the symbol. 74 | #[derive(Debug, Clone)] 75 | pub struct Symbol { 76 | pub name: String, 77 | // pub table: SymbolTableRef, 78 | pub scope: SymbolScope, 79 | } 80 | -------------------------------------------------------------------------------- /dc_compiler/src/neat/unit.rs: -------------------------------------------------------------------------------- 1 | use dc_parser::{Program, ProgramUnit, StructFuncDecl}; 2 | 3 | use crate::neat::struct_function::struct_function_decl; 4 | use crate::neat::{statements, Namespace}; 5 | 6 | pub fn resolve_program(program: Program, namespace: &mut Namespace) { 7 | // todo: make structs 8 | let _structs = program 9 | .0 10 | .iter() 11 | .filter_map(|part| { 12 | if let ProgramUnit::StructDecl(def) = part { 13 | Some(def) 14 | } else { 15 | None 16 | } 17 | }) 18 | .enumerate() 19 | .map(|(no, def)| (no, def.as_ref())) 20 | .collect::>(); 21 | 22 | // todo: resolve struct function 23 | let struct_funcs = program 24 | .0 25 | .iter() 26 | .filter_map(|part| { 27 | if let ProgramUnit::StructFuncDecl(def) = part { 28 | Some(def) 29 | } else { 30 | None 31 | } 32 | }) 33 | .enumerate() 34 | .map(|(no, def)| (no, def.as_ref())) 35 | .collect::>(); 36 | 37 | // todo: add import support 38 | for part in &program.0 { 39 | match part { 40 | ProgramUnit::ImportDecl(_) => {} 41 | _ => {} 42 | } 43 | } 44 | 45 | resolve_struct_functions(struct_funcs, namespace); 46 | } 47 | 48 | pub fn resolve_struct_functions( 49 | struct_funcs: Vec<(usize, &StructFuncDecl)>, 50 | namespace: &mut Namespace, 51 | ) -> bool { 52 | let mut _broken = false; 53 | let mut function_bodies = Vec::new(); 54 | 55 | for (index, func) in struct_funcs { 56 | struct_function_decl(func, namespace); 57 | if !(func.body.is_empty()) { 58 | function_bodies.push((index, func)); 59 | } else { 60 | // todo 61 | } 62 | } 63 | 64 | for (index, def) in function_bodies { 65 | statements::resolve_function_body(def, namespace, index); 66 | } 67 | 68 | _broken 69 | } 70 | -------------------------------------------------------------------------------- /dc_hir/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dc_hir" 3 | version = "0.1.0" 4 | authors = ["Phodal Huang "] 5 | edition = "2018" 6 | license = "MIT" 7 | readme = "README.md" 8 | repository = "https://github.com/datum-lang/datum" 9 | documentation = "https://github.com/datum-lang/datum" 10 | homepage = "https://github.com/datum-lang/datum" 11 | description = """ 12 | Datum is a language for describe other languages 13 | """ 14 | categories = ["text-processing", "command-line-interface", "development-tools", "parser-implementations", "parsing"] 15 | exclude = [ 16 | "benchmark/*", 17 | "fixtures/*", 18 | ".github/*", 19 | ".gitattributes", 20 | ".adr.json", 21 | ".cargo_vcs_info.json", 22 | ] 23 | 24 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 25 | 26 | [dependencies] 27 | num-bigint = "0.3" 28 | 29 | dc_lexer = { path = "../dc_lexer" } 30 | -------------------------------------------------------------------------------- /dc_hir/README.md: -------------------------------------------------------------------------------- 1 | # HIR 2 | 3 | ## Todo 4 | 5 | - -------------------------------------------------------------------------------- /dc_hir/src/expression.rs: -------------------------------------------------------------------------------- 1 | use dc_lexer::Location; 2 | use num_bigint::BigInt; 3 | 4 | use crate::Type; 5 | 6 | #[derive(PartialEq, Clone, Debug)] 7 | pub enum Builtin { 8 | Assert, 9 | Print, 10 | } 11 | 12 | #[derive(Clone, Debug)] 13 | pub enum Expression { 14 | Placeholder, 15 | Variable { 16 | location: Location, 17 | ty: Type, 18 | // change to symbol table 19 | value: String, 20 | }, 21 | StringLiteral { 22 | location: Location, 23 | value: String, 24 | }, 25 | NumberLiteral { 26 | location: Location, 27 | ty: Type, 28 | value: BigInt, 29 | }, 30 | BytesLiteral { 31 | location: Location, 32 | ty: Type, 33 | value: Vec, 34 | }, 35 | InternalFunctionCall { 36 | location: Location, 37 | function: Box, 38 | args: Vec, 39 | }, 40 | Builtin { 41 | location: Location, 42 | types: Vec, 43 | builtin: Builtin, 44 | args: Vec, 45 | }, 46 | } 47 | -------------------------------------------------------------------------------- /dc_hir/src/function.rs: -------------------------------------------------------------------------------- 1 | use crate::{Parameter, Statement}; 2 | 3 | #[derive(Clone, Debug)] 4 | pub struct Function { 5 | pub name: String, 6 | pub params: Vec, 7 | pub returns: Vec, 8 | pub body: Vec, 9 | } 10 | 11 | impl Function { 12 | pub fn new(name: String, params: Vec, returns: Vec) -> Self { 13 | Function { 14 | name, 15 | params, 16 | returns, 17 | body: Vec::new(), 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /dc_hir/src/hir.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug)] 2 | pub struct Expr<'hir> { 3 | pub kind: ExprKind<'hir>, 4 | } 5 | 6 | #[derive(Copy, Clone, PartialEq, Debug)] 7 | pub enum BinOpKind { 8 | /// The `+` operator (addition). 9 | Add, 10 | /// The `-` operator (subtraction). 11 | Sub, 12 | /// The `*` operator (multiplication). 13 | Mul, 14 | /// The `/` operator (division). 15 | Div, 16 | /// The `%` operator (modulus). 17 | Rem, 18 | /// The `&&` operator (logical and). 19 | And, 20 | /// The `||` operator (logical or). 21 | Or, 22 | /// The `^` operator (bitwise xor). 23 | BitXor, 24 | /// The `&` operator (bitwise and). 25 | BitAnd, 26 | /// The `|` operator (bitwise or). 27 | BitOr, 28 | /// The `<<` operator (shift left). 29 | Shl, 30 | /// The `>>` operator (shift right). 31 | Shr, 32 | /// The `==` operator (equality). 33 | Eq, 34 | /// The `<` operator (less than). 35 | Lt, 36 | /// The `<=` operator (less than or equal to). 37 | Le, 38 | /// The `!=` operator (not equal to). 39 | Ne, 40 | /// The `>=` operator (greater than or equal to). 41 | Ge, 42 | /// The `>` operator (greater than). 43 | Gt, 44 | } 45 | 46 | #[derive(Debug)] 47 | pub enum ExprKind<'hir> { 48 | Call(&'hir Expr<'hir>, &'hir [Expr<'hir>]), 49 | MethodCall(), 50 | Continue(), 51 | Struct(), 52 | } 53 | -------------------------------------------------------------------------------- /dc_hir/src/lib.rs: -------------------------------------------------------------------------------- 1 | use dc_lexer::Loc; 2 | pub use expression::*; 3 | pub use function::*; 4 | pub use hir::*; 5 | pub use statement::*; 6 | pub use struct_def::*; 7 | pub use types::*; 8 | 9 | pub mod expression; 10 | pub mod function; 11 | pub mod hir; 12 | pub mod statement; 13 | pub mod struct_def; 14 | pub mod types; 15 | 16 | #[derive(Clone, Debug)] 17 | pub struct Parameter { 18 | pub location: Loc, 19 | pub name: String, 20 | } 21 | -------------------------------------------------------------------------------- /dc_hir/src/statement.rs: -------------------------------------------------------------------------------- 1 | use crate::Expression; 2 | use dc_lexer::Location; 3 | 4 | #[derive(Clone, Debug)] 5 | pub enum Statement { 6 | VariableDecl { 7 | location: Location, 8 | }, 9 | Expression { 10 | location: Location, 11 | expression: Expression, 12 | }, 13 | } 14 | -------------------------------------------------------------------------------- /dc_hir/src/struct_def.rs: -------------------------------------------------------------------------------- 1 | use crate::Function; 2 | 3 | #[derive(Clone, Debug)] 4 | pub struct Struct { 5 | pub name: String, 6 | } 7 | 8 | #[derive(Clone, Debug)] 9 | pub struct StructDecl { 10 | pub name: String, 11 | pub functions: Vec, 12 | } 13 | -------------------------------------------------------------------------------- /dc_hir/src/types.rs: -------------------------------------------------------------------------------- 1 | #[derive(PartialEq, Clone, Debug)] 2 | pub enum Type { 3 | Bool, 4 | Int(u16), 5 | Void, 6 | String, 7 | Bytes(u8), 8 | } 9 | -------------------------------------------------------------------------------- /dc_ir_pretty/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dc_ir_pretty" 3 | version = "0.1.0" 4 | authors = ["Phodal Huang "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | -------------------------------------------------------------------------------- /dc_ir_pretty/README.md: -------------------------------------------------------------------------------- 1 | # Charj Compiler IR Pretty 2 | 3 | make a output API for print HIR, MIR & LIR -------------------------------------------------------------------------------- /dc_ir_pretty/src/hir_pretty/mod.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dc_ir_pretty/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod hir_pretty; 2 | pub mod mir_pretty; 3 | -------------------------------------------------------------------------------- /dc_ir_pretty/src/mir_pretty/mod.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dc_lexer/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dc_lexer" 3 | version = "0.1.0" 4 | authors = ["Phodal Huang "] 5 | edition = "2018" 6 | license = "MIT" 7 | readme = "README.md" 8 | repository = "https://github.com/datum-lang/datum" 9 | documentation = "https://github.com/datum-lang/datum" 10 | homepage = "https://github.com/datum-lang/datum" 11 | description = """ 12 | Datum is a language for describe other languages 13 | """ 14 | categories = ["text-processing", "command-line-interface", "development-tools", "parser-implementations", "parsing"] 15 | exclude = [ 16 | "benchmark/*", 17 | "fixtures/*", 18 | ".github/*", 19 | ".gitattributes", 20 | ".adr.json", 21 | ".cargo_vcs_info.json", 22 | ] 23 | 24 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 25 | 26 | [dependencies] 27 | phf = { version = "0.8", features = ["macros"] } 28 | lalrpop-util = "0.19.0" 29 | unicode-xid = "0.2.0" 30 | -------------------------------------------------------------------------------- /dc_lexer/src/error.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use lalrpop_util::ParseError; 4 | 5 | use crate::location::Loc; 6 | use crate::token::Token; 7 | 8 | #[derive(Debug, Eq, Hash, PartialEq)] 9 | pub enum Level { 10 | Debug, 11 | Info, 12 | Warning, 13 | Error, 14 | } 15 | 16 | #[derive(Debug, Eq, Hash, PartialEq)] 17 | pub enum ErrorType { 18 | None, 19 | ParserError, 20 | SyntaxError, 21 | DeclarationError, 22 | TypeError, 23 | Warning, 24 | } 25 | 26 | #[derive(Debug, Eq, Hash, PartialEq)] 27 | pub struct Note { 28 | pub pos: Loc, 29 | pub message: String, 30 | } 31 | 32 | #[derive(Debug, PartialEq)] 33 | pub struct Diagnostic { 34 | pub level: Level, 35 | pub ty: ErrorType, 36 | pub pos: Option, 37 | pub message: String, 38 | pub notes: Vec, 39 | } 40 | 41 | impl Diagnostic { 42 | pub fn handle_error(error: ParseError) -> Diagnostic { 43 | match error { 44 | ParseError::InvalidToken { location } => { 45 | Diagnostic::parser_error(Loc(location, location), "invalid token".to_string()) 46 | } 47 | ParseError::UnrecognizedToken { 48 | token: (l, token, r), 49 | expected, 50 | } => Diagnostic::parser_error( 51 | Loc(l, r), 52 | format!( 53 | "unrecognised token `{}', expected {}", 54 | token, 55 | expected.join(", ") 56 | ), 57 | ), 58 | ParseError::User { error } => Diagnostic::parser_error(error.loc(), error.to_string()), 59 | ParseError::ExtraToken { token } => Diagnostic::parser_error( 60 | Loc(token.0, token.2), 61 | format!("extra token `{}' encountered", token.0), 62 | ), 63 | ParseError::UnrecognizedEOF { location, expected } => Diagnostic::parser_error( 64 | Loc(location, location), 65 | format!("unexpected end of file, expected {}", expected.join(", ")), 66 | ), 67 | } 68 | } 69 | 70 | pub fn error(pos: Loc, message: String) -> Self { 71 | Diagnostic { 72 | level: Level::Error, 73 | ty: ErrorType::SyntaxError, 74 | pos: Some(pos), 75 | message, 76 | notes: Vec::new(), 77 | } 78 | } 79 | 80 | pub fn parser_error(pos: Loc, message: String) -> Self { 81 | Diagnostic { 82 | level: Level::Error, 83 | ty: ErrorType::ParserError, 84 | pos: Some(pos), 85 | message, 86 | notes: Vec::new(), 87 | } 88 | } 89 | } 90 | 91 | #[derive(Debug, PartialEq)] 92 | pub enum LexicalError { 93 | EndOfFileInComment(usize, usize), 94 | EndOfFileInString(usize, usize), 95 | EndOfFileInHex(usize, usize), 96 | MissingNumber(usize, usize), 97 | InvalidCharacterInHexLiteral(usize, char), 98 | UnrecognisedToken(usize, usize, String), 99 | MissingExponent(usize, usize), 100 | ExpectedFrom(usize, usize, String), 101 | } 102 | 103 | impl fmt::Display for LexicalError { 104 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 105 | match self { 106 | LexicalError::EndOfFileInComment(_, _) => write!(f, "end of file found in comment"), 107 | LexicalError::EndOfFileInString(_, _) => { 108 | write!(f, "end of file found in string literal") 109 | } 110 | LexicalError::EndOfFileInHex(_, _) => { 111 | write!(f, "end of file found in hex literal string") 112 | } 113 | LexicalError::MissingNumber(_, _) => write!(f, "missing number"), 114 | LexicalError::InvalidCharacterInHexLiteral(_, ch) => { 115 | write!(f, "invalid character ‘{}’ in hex literal string", ch) 116 | } 117 | LexicalError::UnrecognisedToken(_, _, t) => write!(f, "unrecognised token ‘{}’", t), 118 | LexicalError::ExpectedFrom(_, _, t) => write!(f, "‘{}’ found where ‘from’ expected", t), 119 | LexicalError::MissingExponent(_, _) => write!(f, "missing number"), 120 | } 121 | } 122 | } 123 | 124 | impl LexicalError { 125 | pub fn loc(&self) -> Loc { 126 | match self { 127 | LexicalError::EndOfFileInComment(start, end) => Loc(*start, *end), 128 | LexicalError::EndOfFileInString(start, end) => Loc(*start, *end), 129 | LexicalError::EndOfFileInHex(start, end) => Loc(*start, *end), 130 | LexicalError::MissingNumber(start, end) => Loc(*start, *end), 131 | LexicalError::InvalidCharacterInHexLiteral(pos, _) => Loc(*pos, *pos), 132 | LexicalError::UnrecognisedToken(start, end, _) => Loc(*start, *end), 133 | LexicalError::ExpectedFrom(start, end, _) => Loc(*start, *end), 134 | LexicalError::MissingExponent(start, end) => Loc(*start, *end), 135 | } 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /dc_lexer/src/lexer.rs: -------------------------------------------------------------------------------- 1 | use std::iter::Peekable; 2 | use std::str::CharIndices; 3 | 4 | use phf::phf_map; 5 | use unicode_xid::UnicodeXID; 6 | 7 | use crate::error::LexicalError; 8 | use crate::location::Location; 9 | use crate::token::{CommentType, Token}; 10 | 11 | #[allow(unused)] 12 | pub struct Lexer<'input> { 13 | input: &'input str, 14 | chars: Peekable>, 15 | last_tokens: [Option>; 2], 16 | char0: Option, 17 | location: Location, 18 | } 19 | 20 | static KEYWORDS: phf::Map<&'static str, Token> = phf_map! { 21 | // defalut 22 | "import" => Token::Import, 23 | "package" => Token::Package, 24 | "pkg" => Token::Package, 25 | "struct" => Token::Struct, 26 | "as" => Token::As, 27 | "fun" => Token::Fun, 28 | 29 | // statement 30 | "if" => Token::If, 31 | "else" => Token::Else, 32 | "while" => Token::While, 33 | "for" => Token::For, 34 | "in" => Token::In, 35 | "break" => Token::Break, 36 | "continue" => Token::Continue, 37 | "return" => Token::Return, 38 | "let" => Token::Let, 39 | 40 | // type 41 | "bool" => Token::Bool, 42 | "true" => Token::True, 43 | "false" => Token::False, 44 | 45 | "string" => Token::String, 46 | "int" => Token::Int(256), 47 | // "int256" => Token::Int(256), 48 | "uint" => Token::Uint(256), 49 | 50 | "$" => Token::Binding, 51 | }; 52 | 53 | impl<'input> Lexer<'input> { 54 | pub fn new(input: &'input str) -> Self { 55 | let mut lexer = Lexer { 56 | input, 57 | chars: input.char_indices().peekable(), 58 | last_tokens: [None, None], 59 | char0: None, 60 | location: Location::new(0, 0), 61 | }; 62 | 63 | lexer.location.reset(); 64 | lexer 65 | } 66 | 67 | fn lex_string( 68 | &mut self, 69 | token_start: usize, 70 | string_start: usize, 71 | ) -> Option, usize), LexicalError>> { 72 | let mut end; 73 | 74 | let mut last_was_escape = false; 75 | 76 | loop { 77 | if let Some((i, ch)) = self.chars.next() { 78 | end = i; 79 | if !last_was_escape { 80 | if ch == '"' { 81 | break; 82 | } 83 | last_was_escape = ch == '\\'; 84 | } else { 85 | last_was_escape = false; 86 | } 87 | } else { 88 | return Some(Err(LexicalError::EndOfFileInString( 89 | token_start, 90 | self.input.len(), 91 | ))); 92 | } 93 | } 94 | 95 | Some(Ok(( 96 | token_start, 97 | Token::StringLiteral(&self.input[string_start..end]), 98 | end + 1, 99 | ))) 100 | } 101 | 102 | /// Helper function to go to the next character coming up. 103 | #[allow(unused)] 104 | fn next_char(&mut self) -> Option { 105 | let next_char = self.char0; 106 | if next_char == Some('\n') { 107 | self.location.newline(); 108 | } else { 109 | self.location.go_right(); 110 | } 111 | next_char 112 | } 113 | 114 | fn parse_number( 115 | &mut self, 116 | start: usize, 117 | end: usize, 118 | ch: char, 119 | ) -> Option, usize), LexicalError>> { 120 | if ch == '0' { 121 | if let Some((_, 'x')) = self.chars.peek() { 122 | // hex number 123 | self.chars.next(); 124 | 125 | let mut end = match self.chars.next() { 126 | Some((end, ch)) if ch.is_ascii_hexdigit() => end, 127 | Some((_, _)) => { 128 | return Some(Err(LexicalError::MissingNumber(start, start + 1))); 129 | } 130 | None => { 131 | return Some(Err(LexicalError::EndOfFileInHex(start, self.input.len()))); 132 | } 133 | }; 134 | 135 | while let Some((i, ch)) = self.chars.peek() { 136 | if !ch.is_ascii_hexdigit() && *ch != '_' { 137 | break; 138 | } 139 | end = *i; 140 | self.chars.next(); 141 | } 142 | 143 | return Some(Ok(( 144 | start, 145 | Token::HexNumber(&self.input[start..=end]), 146 | end + 1, 147 | ))); 148 | } 149 | } 150 | 151 | let mut end = end; 152 | while let Some((i, ch)) = self.chars.peek() { 153 | if !ch.is_ascii_digit() && *ch != '_' { 154 | break; 155 | } 156 | end = *i; 157 | self.chars.next(); 158 | } 159 | 160 | let base = &self.input[start..=end]; 161 | 162 | let mut exp_start = end + 1; 163 | 164 | if let Some((i, 'e')) = self.chars.peek() { 165 | exp_start = i + 1; 166 | self.chars.next(); 167 | while let Some((i, ch)) = self.chars.peek() { 168 | if !ch.is_ascii_digit() && *ch != '_' { 169 | break; 170 | } 171 | end = *i; 172 | self.chars.next(); 173 | } 174 | 175 | if exp_start > end { 176 | return Some(Err(LexicalError::MissingExponent(start, self.input.len()))); 177 | } 178 | } 179 | 180 | let exp = &self.input[exp_start..=end]; 181 | 182 | Some(Ok((start, Token::NumberLiteral(base, exp), end + 1))) 183 | } 184 | 185 | fn next(&mut self) -> Option, usize), LexicalError>> { 186 | loop { 187 | match self.chars.next() { 188 | Some((start, ch)) if ch == '_' || UnicodeXID::is_xid_start(ch) => { 189 | let end; 190 | 191 | loop { 192 | if let Some((i, ch)) = self.chars.peek() { 193 | if !UnicodeXID::is_xid_continue(*ch) { 194 | end = *i; 195 | break; 196 | } 197 | self.chars.next(); 198 | } else { 199 | end = self.input.len(); 200 | break; 201 | } 202 | } 203 | 204 | let id = &self.input[start..end]; 205 | 206 | if id == "unicode" { 207 | if let Some((_, '"')) = self.chars.peek() { 208 | self.chars.next(); 209 | 210 | return self.lex_string(start, start + 8); 211 | } 212 | } 213 | 214 | if id == "hex" { 215 | if let Some((_, '"')) = self.chars.peek() { 216 | self.chars.next(); 217 | 218 | while let Some((i, ch)) = self.chars.next() { 219 | if ch == '"' { 220 | return Some(Ok(( 221 | start, 222 | Token::HexLiteral(&self.input[start..=i]), 223 | i + 1, 224 | ))); 225 | } 226 | 227 | if !ch.is_ascii_hexdigit() && ch != '_' { 228 | // Eat up the remainer of the string 229 | while let Some((_, ch)) = self.chars.next() { 230 | if ch == '"' { 231 | break; 232 | } 233 | } 234 | 235 | return Some(Err(LexicalError::InvalidCharacterInHexLiteral( 236 | i, ch, 237 | ))); 238 | } 239 | } 240 | 241 | return Some(Err(LexicalError::EndOfFileInString( 242 | start, 243 | self.input.len(), 244 | ))); 245 | } 246 | } 247 | 248 | return if let Some(w) = KEYWORDS.get(id) { 249 | Some(Ok((start, *w, end))) 250 | } else { 251 | Some(Ok((start, Token::Identifier(id), end))) 252 | }; 253 | } 254 | Some((start, '"')) => { 255 | return self.lex_string(start, start + 1); 256 | } 257 | Some((start, '/')) => { 258 | match self.chars.peek() { 259 | Some((_, '=')) => { 260 | self.chars.next(); 261 | return Some(Ok((start, Token::DivideAssign, start + 2))); 262 | } 263 | Some((_, '/')) => { 264 | // line comment 265 | self.chars.next(); 266 | 267 | let doc_comment_start = match self.chars.peek() { 268 | Some((i, '/')) => Some(i + 1), 269 | _ => None, 270 | }; 271 | 272 | let mut last = start + 3; 273 | 274 | while let Some((i, ch)) = self.chars.next() { 275 | if ch == '\n' || ch == '\r' { 276 | break; 277 | } 278 | last = i; 279 | } 280 | 281 | if let Some(doc_start) = doc_comment_start { 282 | if last > doc_start { 283 | return Some(Ok(( 284 | start + 3, 285 | Token::DocComment( 286 | CommentType::Line, 287 | &self.input[doc_start..=last], 288 | ), 289 | last + 1, 290 | ))); 291 | } 292 | } 293 | } 294 | Some((_, '*')) => { 295 | // multiline comment 296 | self.chars.next(); 297 | 298 | let doc_comment_start = match self.chars.peek() { 299 | Some((i, '*')) => Some(i + 1), 300 | _ => None, 301 | }; 302 | 303 | let mut last = start + 3; 304 | let mut seen_star = false; 305 | 306 | loop { 307 | if let Some((i, ch)) = self.chars.next() { 308 | if seen_star && ch == '/' { 309 | break; 310 | } 311 | seen_star = ch == '*'; 312 | last = i; 313 | } else { 314 | return Some(Err(LexicalError::EndOfFileInComment( 315 | start, 316 | self.input.len(), 317 | ))); 318 | } 319 | } 320 | 321 | if let Some(doc_start) = doc_comment_start { 322 | if last > doc_start { 323 | return Some(Ok(( 324 | start + 3, 325 | Token::DocComment( 326 | CommentType::Block, 327 | &self.input[doc_start..last], 328 | ), 329 | last, 330 | ))); 331 | } 332 | } 333 | } 334 | _ => { 335 | return Some(Ok((start, Token::Divide, start + 1))); 336 | } 337 | } 338 | } 339 | Some((_, ch)) if ch.is_whitespace() => (), 340 | Some((start, ch)) if ch.is_ascii_digit() => { 341 | return self.parse_number(start, start, ch) 342 | } 343 | Some((i, ';')) => return Some(Ok((i, Token::Semicolon, i + 1))), 344 | Some((i, ',')) => return Some(Ok((i, Token::Comma, i + 1))), 345 | Some((i, '(')) => return Some(Ok((i, Token::OpenParenthesis, i + 1))), 346 | Some((i, ')')) => return Some(Ok((i, Token::CloseParenthesis, i + 1))), 347 | Some((i, '{')) => return Some(Ok((i, Token::OpenCurlyBrace, i + 1))), 348 | Some((i, '}')) => return Some(Ok((i, Token::CloseCurlyBrace, i + 1))), 349 | Some((i, '~')) => return Some(Ok((i, Token::Complement, i + 1))), 350 | Some((i, '=')) => { 351 | return match self.chars.peek() { 352 | Some((_, '=')) => { 353 | self.chars.next(); 354 | Some(Ok((i, Token::Equal, i + 2))) 355 | } 356 | Some((_, '>')) => { 357 | self.chars.next(); 358 | Some(Ok((i, Token::Arrow, i + 2))) 359 | } 360 | _ => Some(Ok((i, Token::Assign, i + 1))), 361 | }; 362 | } 363 | Some((i, '!')) => { 364 | if let Some((_, '=')) = self.chars.peek() { 365 | self.chars.next(); 366 | return Some(Ok((i, Token::NotEqual, i + 2))); 367 | } else { 368 | return Some(Ok((i, Token::Not, i + 1))); 369 | } 370 | } 371 | Some((i, '|')) => { 372 | return match self.chars.peek() { 373 | Some((_, '=')) => { 374 | self.chars.next(); 375 | Some(Ok((i, Token::BitwiseOrAssign, i + 2))) 376 | } 377 | Some((_, '|')) => { 378 | self.chars.next(); 379 | Some(Ok((i, Token::Or, i + 2))) 380 | } 381 | _ => Some(Ok((i, Token::BitwiseOr, i + 1))), 382 | }; 383 | } 384 | Some((i, '&')) => { 385 | return match self.chars.peek() { 386 | Some((_, '=')) => { 387 | self.chars.next(); 388 | Some(Ok((i, Token::BitwiseAndAssign, i + 2))) 389 | } 390 | Some((_, '&')) => { 391 | self.chars.next(); 392 | Some(Ok((i, Token::And, i + 2))) 393 | } 394 | _ => Some(Ok((i, Token::BitwiseAnd, i + 1))), 395 | }; 396 | } 397 | Some((i, '+')) => { 398 | return match self.chars.peek() { 399 | Some((_, '=')) => { 400 | self.chars.next(); 401 | Some(Ok((i, Token::AddAssign, i + 2))) 402 | } 403 | Some((_, '+')) => { 404 | self.chars.next(); 405 | Some(Ok((i, Token::Increment, i + 2))) 406 | } 407 | _ => Some(Ok((i, Token::Add, i + 1))), 408 | }; 409 | } 410 | Some((i, '-')) => { 411 | return match self.chars.peek() { 412 | Some((_, '=')) => { 413 | self.chars.next(); 414 | Some(Ok((i, Token::SubtractAssign, i + 2))) 415 | } 416 | Some((_, '>')) => { 417 | self.chars.next(); 418 | Some(Ok((i, Token::Rarrow, i + 2))) 419 | } 420 | Some((_, '-')) => { 421 | self.chars.next(); 422 | Some(Ok((i, Token::Decrement, i + 2))) 423 | } 424 | _ => Some(Ok((i, Token::Subtract, i + 1))), 425 | }; 426 | } 427 | Some((i, '*')) => { 428 | return match self.chars.peek() { 429 | Some((_, '=')) => { 430 | self.chars.next(); 431 | Some(Ok((i, Token::MulAssign, i + 2))) 432 | } 433 | Some((_, '*')) => { 434 | self.chars.next(); 435 | Some(Ok((i, Token::Power, i + 2))) 436 | } 437 | _ => Some(Ok((i, Token::Mul, i + 1))), 438 | }; 439 | } 440 | Some((i, '%')) => { 441 | return match self.chars.peek() { 442 | Some((_, '=')) => { 443 | self.chars.next(); 444 | Some(Ok((i, Token::ModuloAssign, i + 2))) 445 | } 446 | _ => Some(Ok((i, Token::Modulo, i + 1))), 447 | }; 448 | } 449 | Some((i, '<')) => { 450 | return match self.chars.peek() { 451 | Some((_, '<')) => { 452 | self.chars.next(); 453 | if let Some((_, '=')) = self.chars.peek() { 454 | self.chars.next(); 455 | Some(Ok((i, Token::ShiftLeftAssign, i + 3))) 456 | } else { 457 | Some(Ok((i, Token::ShiftLeft, i + 2))) 458 | } 459 | } 460 | Some((_, '=')) => { 461 | self.chars.next(); 462 | Some(Ok((i, Token::LessEqual, i + 2))) 463 | } 464 | _ => Some(Ok((i, Token::Less, i + 1))), 465 | }; 466 | } 467 | Some((i, '>')) => { 468 | return match self.chars.peek() { 469 | Some((_, '>')) => { 470 | self.chars.next(); 471 | if let Some((_, '=')) = self.chars.peek() { 472 | self.chars.next(); 473 | Some(Ok((i, Token::ShiftRightAssign, i + 3))) 474 | } else { 475 | Some(Ok((i, Token::ShiftRight, i + 2))) 476 | } 477 | } 478 | Some((_, '=')) => { 479 | self.chars.next(); 480 | Some(Ok((i, Token::MoreEqual, i + 2))) 481 | } 482 | _ => Some(Ok((i, Token::More, i + 1))), 483 | }; 484 | } 485 | Some((i, '.')) => { 486 | return match self.chars.peek() { 487 | Some((_, '.')) => { 488 | self.chars.next(); 489 | Some(Ok((i, Token::Range, i + 2))) 490 | } 491 | _ => Some(Ok((i, Token::Member, i + 1))), 492 | }; 493 | } 494 | Some((i, '[')) => return Some(Ok((i, Token::OpenBracket, i + 1))), 495 | Some((i, ']')) => return Some(Ok((i, Token::CloseBracket, i + 1))), 496 | Some((i, ':')) => return Some(Ok((i, Token::Colon, i + 1))), 497 | Some((i, '?')) => return Some(Ok((i, Token::Question, i + 1))), 498 | Some((i, '$')) => return Some(Ok((i, Token::Binding, i + 1))), 499 | Some((start, _)) => { 500 | let mut end; 501 | 502 | loop { 503 | if let Some((i, ch)) = self.chars.next() { 504 | end = i; 505 | 506 | if ch.is_whitespace() { 507 | break; 508 | } 509 | } else { 510 | end = self.input.len(); 511 | break; 512 | } 513 | } 514 | 515 | return Some(Err(LexicalError::UnrecognisedToken( 516 | start, 517 | end, 518 | self.input[start..end].to_owned(), 519 | ))); 520 | } 521 | None => return None, // End of file 522 | } 523 | } 524 | } 525 | } 526 | 527 | pub type Spanned = Result<(Loc, Token, Loc), Error>; 528 | 529 | impl<'input> Iterator for Lexer<'input> { 530 | type Item = Spanned, usize, LexicalError>; 531 | 532 | /// Return the next token 533 | fn next(&mut self) -> Option { 534 | let token = self.next(); 535 | 536 | self.last_tokens = [ 537 | self.last_tokens[1], 538 | match token { 539 | Some(Ok((_, n, _))) => Some(n), 540 | _ => None, 541 | }, 542 | ]; 543 | 544 | token 545 | } 546 | } 547 | -------------------------------------------------------------------------------- /dc_lexer/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub use error::*; 2 | pub use lexer::*; 3 | pub use location::*; 4 | pub use token::*; 5 | 6 | pub mod error; 7 | pub mod lexer; 8 | pub mod location; 9 | pub mod token; 10 | -------------------------------------------------------------------------------- /dc_lexer/src/location.rs: -------------------------------------------------------------------------------- 1 | //! Datatypes to support source location information. 2 | use std::fmt; 3 | 4 | #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Default)] 5 | pub struct Loc(pub usize, pub usize); 6 | 7 | impl Loc { 8 | pub fn new(start: usize, end: usize) -> Self { 9 | Loc(start, end) 10 | } 11 | } 12 | 13 | /// A location somewhere in the sourcecode. 14 | #[derive(Clone, Copy, Debug, Default, PartialEq)] 15 | pub struct Location { 16 | row: usize, 17 | column: usize, 18 | } 19 | 20 | impl fmt::Display for Location { 21 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 22 | write!(f, "line {} column {}", self.row, self.column) 23 | } 24 | } 25 | 26 | impl Location { 27 | pub fn visualize(&self, line: &str, desc: &str) -> String { 28 | format!("{}\n{}\n{}↑", desc, line, " ".repeat(self.column - 1)) 29 | } 30 | } 31 | 32 | impl Location { 33 | pub fn new(row: usize, column: usize) -> Self { 34 | Location { row, column } 35 | } 36 | 37 | pub fn row(&self) -> usize { 38 | self.row 39 | } 40 | 41 | pub fn column(&self) -> usize { 42 | self.column 43 | } 44 | 45 | pub fn reset(&mut self) { 46 | self.row = 1; 47 | self.column = 1; 48 | } 49 | 50 | pub fn go_right(&mut self) { 51 | self.column += 1; 52 | } 53 | 54 | pub fn newline(&mut self) { 55 | self.row += 1; 56 | self.column = 1; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /dc_lexer/src/token.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{self}; 2 | 3 | /// Python source code can be tokenized in a sequence of these tokens. 4 | #[derive(Copy, Clone, PartialEq, Debug)] 5 | pub enum Token<'input> { 6 | Identifier(&'input str), 7 | StringLiteral(&'input str), 8 | NumberLiteral(&'input str, &'input str), 9 | HexLiteral(&'input str), 10 | HexNumber(&'input str), 11 | 12 | DocComment(CommentType, &'input str), 13 | 14 | Package, 15 | Import, 16 | Struct, 17 | Default, 18 | As, 19 | Fun, 20 | If, 21 | Else, 22 | While, 23 | For, 24 | In, 25 | Range, 26 | Break, 27 | Continue, 28 | Return, 29 | Let, 30 | Object, 31 | 32 | Rarrow, 33 | 34 | // type 35 | Bool, 36 | True, 37 | False, 38 | String, 39 | Uint(u16), 40 | Int(u16), 41 | Bytes(u8), 42 | DynamicBytes, 43 | 44 | NewLine, 45 | Binding, 46 | OpenParenthesis, 47 | CloseParenthesis, 48 | OpenCurlyBrace, 49 | CloseCurlyBrace, 50 | 51 | Question, 52 | Colon, 53 | 54 | NotEqual, 55 | Less, 56 | Arrow, 57 | LessEqual, 58 | More, 59 | MoreEqual, 60 | BitwiseXor, 61 | 62 | Or, 63 | BitwiseOr, 64 | BitwiseOrAssign, 65 | 66 | And, 67 | BitwiseAnd, 68 | BitwiseAndAssign, 69 | 70 | ShiftLeft, 71 | ShiftLeftAssign, 72 | 73 | ShiftRight, 74 | ShiftRightAssign, 75 | 76 | Add, 77 | AddAssign, 78 | Increment, 79 | 80 | Subtract, 81 | SubtractAssign, 82 | Decrement, 83 | 84 | BitwiseXorAssign, 85 | ModuloAssign, 86 | 87 | Power, 88 | Mul, 89 | MulAssign, 90 | 91 | Divide, 92 | DivideAssign, 93 | 94 | Modulo, 95 | Not, 96 | Complement, 97 | OpenBracket, 98 | CloseBracket, 99 | Member, 100 | Comma, 101 | Equal, 102 | Assign, 103 | Semicolon, 104 | } 105 | 106 | impl<'input> fmt::Display for Token<'input> { 107 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 108 | use Token::*; 109 | match self { 110 | Identifier(id) => write!(f, "{}", id), 111 | StringLiteral(s) => write!(f, "\"{}\"", s), 112 | HexLiteral(hex) => write!(f, "{}", hex), 113 | NumberLiteral(base, exp) if exp.is_empty() => write!(f, "{}", base), 114 | NumberLiteral(base, exp) => write!(f, "{}e{}", base, exp), 115 | HexNumber(n) => write!(f, "{}", n), 116 | 117 | DocComment(CommentType::Line, s) => write!(f, "///{}", s), 118 | DocComment(CommentType::Block, s) => write!(f, "/**{}\n*/", s), 119 | 120 | Package => write!(f, "package"), 121 | Import => write!(f, "import"), 122 | Struct => write!(f, "struct"), 123 | Default => write!(f, "default"), 124 | As => write!(f, "as"), 125 | Fun => write!(f, "fun"), 126 | 127 | If => write!(f, "if"), 128 | Else => write!(f, "else"), 129 | While => write!(f, "while"), 130 | For => write!(f, "for"), 131 | In => write!(f, "in"), 132 | Range => write!(f, ".."), 133 | 134 | Break => write!(f, "break"), 135 | Continue => write!(f, "continue"), 136 | Return => write!(f, "return"), 137 | Let => write!(f, "let"), 138 | Object => write!(f, "object"), 139 | 140 | // type 141 | Bool => write!(f, "bool"), 142 | True => write!(f, "true"), 143 | False => write!(f, "false"), 144 | 145 | String => write!(f, "string"), 146 | Uint(w) => write!(f, "uint{}", w), 147 | Int(w) => write!(f, "int{}", w), 148 | Bytes(w) => write!(f, "bytes{}", w), 149 | DynamicBytes => write!(f, "bytes"), 150 | 151 | Binding => write!(f, "$"), 152 | NewLine => write!(f, "NEWLINE"), 153 | 154 | Semicolon => write!(f, ";"), 155 | Comma => write!(f, ","), 156 | OpenParenthesis => write!(f, "("), 157 | CloseParenthesis => write!(f, ")"), 158 | OpenCurlyBrace => write!(f, "{{"), 159 | CloseCurlyBrace => write!(f, "}}"), 160 | BitwiseXor => write!(f, "^"), 161 | 162 | Or => write!(f, "||"), 163 | BitwiseOr => write!(f, "|"), 164 | BitwiseOrAssign => write!(f, "|="), 165 | 166 | And => write!(f, "&&"), 167 | BitwiseAnd => write!(f, "&"), 168 | BitwiseAndAssign => write!(f, "&="), 169 | 170 | Add => write!(f, "+"), 171 | AddAssign => write!(f, "+="), 172 | Increment => write!(f, "++"), 173 | 174 | Subtract => write!(f, "-"), 175 | SubtractAssign => write!(f, "-="), 176 | Decrement => write!(f, "--"), 177 | 178 | Power => write!(f, "**"), 179 | Mul => write!(f, "*"), 180 | MulAssign => write!(f, "*="), 181 | 182 | Divide => write!(f, "/"), 183 | DivideAssign => write!(f, "/="), 184 | 185 | Modulo => write!(f, "%"), 186 | Equal => write!(f, "=="), 187 | Assign => write!(f, "="), 188 | NotEqual => write!(f, "!="), 189 | Not => write!(f, "!"), 190 | 191 | ShiftLeft => write!(f, "<<"), 192 | ShiftLeftAssign => write!(f, "<<="), 193 | 194 | ShiftRight => write!(f, "<<"), 195 | ShiftRightAssign => write!(f, "<<="), 196 | 197 | BitwiseXorAssign => write!(f, "^="), 198 | ModuloAssign => write!(f, "%="), 199 | 200 | More => write!(f, ">"), 201 | MoreEqual => write!(f, ">="), 202 | Member => write!(f, "."), 203 | Colon => write!(f, ":"), 204 | OpenBracket => write!(f, "["), 205 | CloseBracket => write!(f, "]"), 206 | Complement => write!(f, "~"), 207 | Question => write!(f, "?"), 208 | Less => write!(f, "<"), 209 | LessEqual => write!(f, "<="), 210 | Arrow => write!(f, "=>"), 211 | Rarrow => f.write_str("'->'"), 212 | } 213 | } 214 | } 215 | 216 | #[derive(Copy, Clone, PartialEq, Debug)] 217 | pub enum CommentType { 218 | Line, 219 | Block, 220 | } 221 | -------------------------------------------------------------------------------- /dc_mir/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dc_mir" 3 | version = "0.1.0" 4 | authors = ["Phodal Huang "] 5 | edition = "2018" 6 | license = "MIT" 7 | readme = "README.md" 8 | repository = "https://github.com/datum-lang/datum" 9 | documentation = "https://github.com/datum-lang/datum" 10 | homepage = "https://github.com/datum-lang/datum" 11 | description = """ 12 | Datum is a language for describe other languages 13 | """ 14 | categories = ["text-processing", "command-line-interface", "development-tools", "parser-implementations", "parsing"] 15 | exclude = [ 16 | "benchmark/*", 17 | "fixtures/*", 18 | ".github/*", 19 | ".gitattributes", 20 | ".adr.json", 21 | ".cargo_vcs_info.json", 22 | ] 23 | 24 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 25 | 26 | [dependencies] 27 | 28 | num-bigint = { version = "0.3", features = ["serde"] } 29 | serde = { version = "1.0", features = ["derive"] } 30 | 31 | dc_lexer = { path = "../dc_lexer" } 32 | -------------------------------------------------------------------------------- /dc_mir/README.md: -------------------------------------------------------------------------------- 1 | Some of the key characteristics of MIR are: 2 | 3 | - It is based on a control-flow graph. 4 | - It does not have nested expressions. 5 | - All types in MIR are fully explicit. 6 | 7 | Key MIR vocabulary 8 | 9 | - Basic blocks: units of the control-flow graph, consisting of: 10 | - statements: actions with one successor 11 | - terminators: actions with potentially multiple successors; always at the end of a block 12 | 13 | ## Python Samples 14 | 15 | ```rust 16 | LoadName { name: "print", scope: Free }, 17 | LoadConst { value: String { value: "hello,world" } }, 18 | CallFunction { typ: Positional(1) }, 19 | 20 | Pop, 21 | LoadConst { value: None }, 22 | ReturnValue 23 | ``` 24 | 25 | 26 | ## Android Smali Samples 27 | 28 | [Smali ZH](https://ctf-wiki.github.io/ctf-wiki/android/basic_operating_mechanism/java_layer/smali/smali-zh/) 29 | 30 | ### Fields 31 | 32 | ```smali 33 | #instance fields 34 | .field <访问权限修饰符> [非权限修饰符] <字段名>:<字段类型> 35 | ``` 36 | 37 | ### Method 38 | 39 | ```smali 40 | # 描述方法类型 41 | .method <访问权限修饰符> [修饰符] <方法原型> 42 | <.locals> 43 | [.parameter] 44 | [.prologue] 45 | [.line] 46 | <代码逻辑> 47 | [.line] 48 | <代码逻辑> 49 | .end 50 | ``` 51 | 52 | ### Class 53 | 54 | ```smali 55 | .class <访问权限修饰符> [非权限修饰符] <类名> 56 | .super <父类名> 57 | .source <源文件名称> 58 | ``` 59 | 60 | ### Annotations 61 | 62 | ```smali 63 | #annotations 64 | .annotation [注解的属性] <注解范围> 65 | [注解字段=值] 66 | ... 67 | .end 68 | ``` 69 | -------------------------------------------------------------------------------- /dc_mir/src/basic_block.rs: -------------------------------------------------------------------------------- 1 | use crate::instruction::ExprKind; 2 | 3 | /// which is [basic block](https://en.wikipedia.org/wiki/Basic_block) 4 | #[derive(Clone, Debug)] 5 | pub struct BasicBlock { 6 | pub name: String, 7 | // todo: ConditionKind ? 8 | pub instructions: Vec, 9 | } 10 | 11 | impl Default for BasicBlock { 12 | fn default() -> Self { 13 | BasicBlock { 14 | name: "".to_string(), 15 | instructions: vec![], 16 | } 17 | } 18 | } 19 | 20 | impl BasicBlock {} 21 | -------------------------------------------------------------------------------- /dc_mir/src/instruction.rs: -------------------------------------------------------------------------------- 1 | use num_bigint::BigInt; 2 | use serde::{Deserialize, Serialize}; 3 | 4 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] 5 | pub enum Constant { 6 | Integer { value: BigInt }, 7 | Float { value: f64 }, 8 | Boolean { value: bool }, 9 | String { value: String }, 10 | } 11 | 12 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] 13 | pub enum MIRKind { 14 | Call {}, 15 | Return, 16 | JMP, 17 | } 18 | 19 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] 20 | pub enum ExprKind { 21 | Var { value: String }, 22 | Call { value: String }, 23 | Print { value: String }, 24 | } 25 | 26 | pub enum TerminatorKind {} 27 | -------------------------------------------------------------------------------- /dc_mir/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod basic_block; 2 | pub mod instruction; 3 | -------------------------------------------------------------------------------- /dc_parser/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dc_parser" 3 | version = "0.1.0" 4 | authors = ["Phodal Huang "] 5 | edition = "2018" 6 | build = "build.rs" # LALRPOP preprocessing 7 | license = "MIT" 8 | readme = "README.md" 9 | repository = "https://github.com/datum-lang/datum" 10 | documentation = "https://github.com/datum-lang/datum" 11 | homepage = "https://github.com/datum-lang/datum" 12 | description = """ 13 | Datum is a language for describe other languages 14 | """ 15 | categories = ["text-processing", "command-line-interface", "development-tools", "parser-implementations", "parsing"] 16 | exclude = [ 17 | "benchmark/*", 18 | "fixtures/*", 19 | ".github/*", 20 | ".gitattributes", 21 | ".adr.json", 22 | ".cargo_vcs_info.json", 23 | ] 24 | 25 | [dependencies] 26 | lalrpop-util = "0.19.6" 27 | unicode-xid = "0.2.0" 28 | phf = { version = "0.8", features = ["macros"] } 29 | num-bigint = "0.3" 30 | num-traits = "0.2" 31 | num-derive = "0.3" 32 | 33 | regex = "1" 34 | 35 | dc_lexer = { path = "../dc_lexer" } 36 | 37 | # Add a build-time dependency on the lalrpop library: 38 | [build-dependencies.lalrpop] 39 | version = "0.19.1" 40 | features = ["lexer"] 41 | -------------------------------------------------------------------------------- /dc_parser/build.rs: -------------------------------------------------------------------------------- 1 | extern crate lalrpop; 2 | 3 | fn main() { 4 | let _result = lalrpop::Configuration::new() 5 | .always_use_colors() 6 | .process_current_dir(); 7 | 8 | // todo: thinking in generate code in codebase 9 | // lalrpop::Configuration::new() 10 | // .generate_in_source_tree() 11 | // .emit_rerun_directives(true) 12 | // .process() 13 | // .unwrap(); 14 | } 15 | -------------------------------------------------------------------------------- /dc_parser/src/datum.lalrpop: -------------------------------------------------------------------------------- 1 | use std::str::FromStr; 2 | use num_bigint::BigInt; 3 | use num_bigint::BigUint; 4 | use num_traits::Pow; 5 | use std::ops::Mul; 6 | 7 | use dc_lexer::location::{Loc, Location}; 8 | use dc_lexer::lexer; 9 | use dc_lexer::token::{Token, CommentType}; 10 | use dc_lexer::error::LexicalError; 11 | use crate::parse_tree::*; 12 | 13 | grammar<'input>(input: &'input str); 14 | 15 | pub Datum: Program = { 16 | ProgramUnit + => Program(<>) 17 | }; 18 | 19 | ProgramUnit: ProgramUnit = { 20 | PackageDecl => ProgramUnit::PackageDecl(<>), 21 | ImportDecl => ProgramUnit::ImportDecl(<>), 22 | ObjectDecl => ProgramUnit::ObjectDecl(<>), 23 | StructDecl => ProgramUnit::StructDecl(<>), 24 | FuncDecl => ProgramUnit::FuncDecl(<>), 25 | StructFuncDecl => ProgramUnit::StructFuncDecl(<>), 26 | } 27 | 28 | PackageDecl: Package = { 29 | "package" => Package::Plain(s), 30 | "pkg" => Package::Plain(s), 31 | } 32 | 33 | ImportDecl: Import = { 34 | "import" => Import::Standard(s), 35 | "import" "as" ";" => Import::GlobalSymbol(s, id), 36 | "import" "." "*" "as" ";" => Import::GlobalSymbol(s, id) 37 | } 38 | 39 | StructDecl: Box = { 40 | "struct" "{" )*> "}" => { 41 | Box::new(StructDecl{loc: Loc(l, r), name, fields}) 42 | } 43 | } 44 | 45 | ObjectDecl: Box = { 46 | "object" "{" )*> "}" => { 47 | Box::new(ObjectDecl{loc: Loc(l, r), name, functions}) 48 | } 49 | } 50 | 51 | 52 | FuncDecl: Box = { 53 | "fun" "{" "}" => { 54 | let params = params.unwrap_or(Vec::new()); 55 | let body = body.unwrap_or(Vec::new()); 56 | 57 | Box::new(FuncDecl { 58 | loc: Loc(l, r), 59 | name, 60 | params, 61 | body: body, 62 | }) 63 | } 64 | }; 65 | 66 | StructFuncDecl: Box = { 67 | "$" " Expression)?> "{" "}" => { 68 | let params = params.unwrap_or(Vec::new()); 69 | let body = body.unwrap_or(Vec::new()); 70 | 71 | Box::new(StructFuncDecl{ 72 | loc: Loc(l, r), 73 | name, struct_name, 74 | params, 75 | body, 76 | returns: returns.map(|returns| returns.1), 77 | }) 78 | } 79 | } 80 | 81 | VariableDecl: Statement = { 82 | ":" => { 83 | Statement { 84 | location: Location::new(l, r), 85 | node: StatementType::VariableDecl { 86 | field, 87 | ty 88 | } 89 | } 90 | } 91 | }; 92 | 93 | Suite: Suite = { 94 | => s.into_iter().flatten().collect(), 95 | }; 96 | 97 | Statement: Suite = { 98 | => vec![s], 99 | }; 100 | 101 | CompoundStatement: Statement = { 102 | // todo: add support for open || close statement 103 | // OpenStatement, // support like in one line if (a > b) return a 104 | // ClosedStatement, 105 | // 106 | IfStatement, 107 | WhileStatement, 108 | ForStatement, 109 | FlowStatement, 110 | ";" => <>, 111 | } 112 | 113 | VariableDeclaration: Statement = { 114 | "let" ":" "=" => { 115 | Statement { 116 | location: Location::new(l, r), 117 | node: StatementType::Assign { 118 | target: name, 119 | ty: typ, 120 | value: e 121 | }, 122 | } 123 | }, 124 | "let" ":" "=" => { 125 | Statement { 126 | location: Location::new(l, r), 127 | node: StatementType::Assign { 128 | target: name, 129 | ty: typ, 130 | value: e 131 | }, 132 | } 133 | } 134 | } 135 | 136 | EmptyObject: Expression = { 137 | "{" "}" => { 138 | Expression { 139 | location: Location::new(l, r), 140 | node: ExpressionType::EmptyObject 141 | } 142 | } 143 | } 144 | 145 | SimpleStatement: Statement = { 146 | VariableDeclaration, 147 | => { 148 | Statement { 149 | location: Location::new(l, r), 150 | node: StatementType::Expression { expr: e }, 151 | } 152 | } 153 | } 154 | 155 | FlowStatement: Statement = { 156 | "break" => { 157 | Statement { 158 | location: Location::new(l, r), 159 | node: StatementType::Break, 160 | } 161 | }, 162 | "continue" => { 163 | Statement { 164 | location: Location::new(l, r), 165 | node: StatementType::Continue, 166 | } 167 | }, 168 | "return" ";" => { 169 | Statement { 170 | location: Location::new(l, r), 171 | node: StatementType::Return { value }, 172 | } 173 | }, 174 | } 175 | 176 | ReturnList: Expression = { 177 | > => { 178 | Expression { 179 | location: Location::new(l, r), 180 | node: ExpressionType::List { elements }, 181 | } 182 | } 183 | } 184 | 185 | ReturnValue: Expression = { 186 | => e, 187 | } 188 | 189 | IfStatement: Statement = { 190 | "if" "(" ")" => { 191 | let mut vec: Vec = Vec::new(); 192 | vec.push(stmt); 193 | 194 | let body = vec as Suite; 195 | 196 | Statement { 197 | location: Location::new(l, r), 198 | node: StatementType::If { 199 | cond, 200 | body, 201 | orelse: None 202 | } 203 | } 204 | }, 205 | "if" "(" ")" "{" "}" => { 206 | let mut last = s3.map(|s| s.2); 207 | 208 | let body = body.unwrap_or(Vec::new()); 209 | 210 | Statement { 211 | location: Location::new(l, r), 212 | node: StatementType::If { 213 | cond, 214 | body, 215 | orelse: last 216 | } 217 | } 218 | } 219 | }; 220 | 221 | WhileStatement: Statement = { 222 | "while" "(" ")" "{" "}" => { 223 | let body = body.unwrap_or(Vec::new()); 224 | Statement { 225 | location: Location::new(l, r), 226 | node: StatementType::While { 227 | cond, 228 | body 229 | } 230 | } 231 | } 232 | }; 233 | 234 | ForStatement: Statement = { 235 | // todo: change target to ExpressionList, 236 | // todo: add support for `for (let i: int = 0; i < 100; i ++) { } 237 | "for" "(" "in" ")" "{" "}" => { 238 | let body = body.unwrap_or(Vec::new()); 239 | 240 | Statement { 241 | location: Location::new(l, r), 242 | node: StatementType::For { 243 | target: Box::new(target), 244 | iter: Box::new(iter), 245 | body 246 | } 247 | } 248 | } 249 | }; 250 | 251 | Expression: Expression = { 252 | UnaryExpr, 253 | } 254 | 255 | UnaryExpr: Expression = { 256 | RangeExpression 257 | } 258 | 259 | RangeExpression: Expression = { 260 | ".." => { 261 | Expression { 262 | location: Location::new(l, r), 263 | node: ExpressionType::Range { start: Box::new(e1), end: Box::new(e2) } 264 | } 265 | }, 266 | OrExpression 267 | } 268 | 269 | OrExpression: Expression = { 270 | "||" => { 271 | let mut values = vec![e1]; 272 | values.push(e2); 273 | 274 | Expression { 275 | location: Location::new(l, r), 276 | node: ExpressionType::BoolOp { op: BooleanOperator::Or, values } 277 | } 278 | }, 279 | AndExpression 280 | } 281 | 282 | AndExpression: Expression = { 283 | "&&" => { 284 | let mut values = vec![e1]; 285 | values.push(e2); 286 | 287 | Expression { 288 | location: Location::new(l, r), 289 | node: ExpressionType::BoolOp { op: BooleanOperator::And, values } 290 | } 291 | }, 292 | CompareExpression 293 | } 294 | 295 | CompareExpression: Expression = { 296 | => { 297 | Expression { 298 | location: Location::new(l, r), 299 | node: ExpressionType::Compare { 300 | op, 301 | left: Box::new(e), 302 | right: Box::new(comparison) 303 | } 304 | } 305 | }, 306 | ShiftExpression, 307 | } 308 | 309 | CompOp: Comparison = { 310 | "==" => Comparison::Equal, 311 | "!=" => Comparison::NotEqual, 312 | "<" => Comparison::Less, 313 | "<=" => Comparison::LessOrEqual, 314 | ">" => Comparison::Greater, 315 | ">=" => Comparison::GreaterOrEqual, 316 | }; 317 | 318 | ShiftExpression: Expression = { 319 | => Expression { 320 | location: Location::new(l, r), 321 | node: ExpressionType::Binop { a: Box::new(e1), op, b: Box::new(e2) } 322 | }, 323 | ArithmeticExpression, 324 | } 325 | 326 | ShiftOp: Operator = { 327 | "<<" => Operator::LShift, 328 | ">>" => Operator::RShift, 329 | }; 330 | 331 | ArithmeticExpression: Expression = { 332 | => Expression { 333 | location: Location::new(l, r), 334 | node: ExpressionType::Binop { a: Box::new(a), op, b: Box::new(b) } 335 | }, 336 | Term 337 | }; 338 | 339 | AddOp: Operator = { 340 | "+" => Operator::Add, 341 | "-" => Operator::Sub, 342 | }; 343 | 344 | Term: Expression = { 345 | => Expression { 346 | location: Location::new(l, r), 347 | node: ExpressionType::Binop { a: Box::new(a), op, b: Box::new(b) } 348 | }, 349 | NotExpr, 350 | }; 351 | 352 | MulOp: Operator = { 353 | "*" => Operator::Multiply, 354 | "/" => Operator::Divide, 355 | "%" => Operator::Modulo, 356 | }; 357 | 358 | NotExpr: Expression = { 359 | "!" => { 360 | Expression { 361 | location: Location::new(l, r), 362 | node: ExpressionType::Unop { op: UnaryOperator::Not, a: Box::new(e) } 363 | } 364 | }, 365 | FactoryExpr, 366 | } 367 | 368 | FactoryExpr: Expression = { 369 | => { 370 | Expression { 371 | location: Location::new(l, r), 372 | node: ExpressionType::Unop { op, a: Box::new(e) } 373 | } 374 | }, 375 | PostfixUnaryOperator 376 | } 377 | 378 | PostfixUnaryOperator: Expression = { 379 | => { 380 | Expression { 381 | location: Location::new(l, r), 382 | node: ExpressionType::PostUnop { op, a: Box::new(e) } 383 | } 384 | }, 385 | PrimaryExpr 386 | } 387 | 388 | AffixesUnOp: AffixesUnaryOperator = { 389 | "++" => AffixesUnaryOperator::Increment, 390 | "--" => AffixesUnaryOperator::Decrement, 391 | }; 392 | 393 | PrimaryExpr: Expression = { 394 | => <>, 395 | "." => { 396 | Expression { 397 | location: Location::new(l, r), 398 | node: ExpressionType::MemberAccess { value: Box::new(e), name } 399 | } 400 | }, 401 | BoolExpr, 402 | Atom, 403 | ParenthesizedExpression, 404 | } 405 | 406 | BoolExpr: Expression = { 407 | "true" => { 408 | Expression { 409 | location: Location::new(l, r), 410 | node: ExpressionType::Bool { value: true } 411 | } 412 | }, 413 | "false" => { 414 | Expression { 415 | location: Location::new(l, r), 416 | node: ExpressionType::Bool { value: false } 417 | } 418 | }, 419 | } 420 | 421 | ParenthesizedExpression: Expression = { 422 | "(" ")" => e, 423 | } 424 | 425 | FunctionCall: Expression = { 426 | "(" > ")" => { 427 | Expression { 428 | location: Location::new(l, r), 429 | node: ExpressionType::Call { function: Box::new(f), args: a } 430 | } 431 | }, 432 | } 433 | 434 | UnOp: UnaryOperator = { 435 | "+" => UnaryOperator::Pos, 436 | "-" => UnaryOperator::Neg, 437 | "~" => UnaryOperator::Inv, 438 | }; 439 | 440 | Atom: Expression = { 441 | // system type & user custom type 442 | TypeLiteral, 443 | // "string" 444 | => Expression { 445 | location: Location::new(l, r), 446 | node: ExpressionType::String { value: value.to_string() } 447 | }, 448 | "[" > "]" => { 449 | Expression { 450 | location: Location::new(l, r), 451 | node: ExpressionType::List { elements: v }, 452 | } 453 | }, 454 | => { 455 | let base: String = n.0.chars().filter(|v| *v != '_').collect(); 456 | let exp: String = n.1.chars().filter(|v| *v != '_').collect(); 457 | 458 | let n = if exp.is_empty() { 459 | BigInt::from_str(&base).unwrap() 460 | } else { 461 | let base = BigInt::from_str(&base).unwrap(); 462 | let exp = BigInt::from_str("10").unwrap().pow(BigUint::from_str(&exp).unwrap()); 463 | 464 | base.mul(exp) 465 | }; 466 | 467 | Expression { 468 | location: Location::new(l, r), 469 | node: ExpressionType::Number { value: n } 470 | } 471 | }, 472 | } 473 | 474 | TypeLiteral: Expression = { 475 | // bool, int, string 476 | => { 477 | Expression { 478 | location: Location::new(l, r), 479 | node: ExpressionType::Type {ty} 480 | } 481 | }, 482 | // name 483 | => { 484 | Expression { 485 | location: Location::new(l, r), 486 | node: ExpressionType::Identifier { id:name } 487 | } 488 | }, 489 | // list 490 | "[" "]" => { 491 | Expression { 492 | location: Location::new(l, r), 493 | // todo: add list value support 494 | node: ExpressionType::List { elements: vec![] }, 495 | } 496 | }, 497 | } 498 | 499 | Type: Type = { 500 | "bool" => Type::Bool, 501 | "string" => Type::String, 502 | Int => Type::Int(<>), 503 | } 504 | 505 | Argument: Argument = { 506 | => { 507 | Argument { location: Location::new(l, r), expr: p } 508 | }, 509 | } 510 | 511 | ParameterList: Vec<(Loc, Option)> = { 512 | "(" ")" => Vec::new(), 513 | "(" ")" => vec!((Loc(l, r), Some(p))), 514 | "(" > ")" => <>, 515 | } 516 | 517 | OptParameter: (Loc, Option) = { 518 | => (Loc(l, r), p), 519 | } 520 | 521 | // A parameter list is used for function arguments, returns, and destructuring statements. 522 | // In destructuring statements, parameters can be optional. So, we make parameters optional 523 | // and as an added bonus we can generate error messages about missing parameters/returns 524 | // to functions 525 | Parameter: Parameter = { 526 | => { 527 | let loc = Loc(l, r); 528 | Parameter{loc, ty, name} 529 | } 530 | } 531 | 532 | Identifier: Identifier = { 533 | => Identifier{loc: Loc(l, r), name: n.to_string()} 534 | } 535 | 536 | StringLiteral: StringLiteral = { 537 | => { 538 | StringLiteral{ loc: Loc(l, r), string: s.to_string() } 539 | } 540 | } 541 | 542 | Comma: Vec = { 543 | => Vec::new(), 544 | CommaOne => <>, 545 | }; 546 | 547 | CommaOne: Vec = { 548 | )*> => { 549 | let mut v = v; 550 | v.insert(0, e); 551 | v 552 | } 553 | }; 554 | 555 | #[inline] 556 | OneOrMore: Vec = { 557 | => { 558 | let mut items = vec![i1]; 559 | items.extend(i2.into_iter().map(|e| e.1)); 560 | items 561 | } 562 | }; 563 | 564 | CommaTwo: Vec = { 565 | )+> => { 566 | let mut v = v; 567 | v.insert(0, e); 568 | v 569 | } 570 | }; 571 | 572 | extern { 573 | type Location = usize; 574 | type Error = LexicalError; 575 | 576 | enum Token<'input> { 577 | LexIdentifier => Token::Identifier(<&'input str>), 578 | LexStringLiteral => Token::StringLiteral(<&'input str>), 579 | LexNumber => Token::NumberLiteral(<&'input str>, <&'input str>), 580 | 581 | DocComment => Token::DocComment(, <&'input str>), 582 | // operators symbol 583 | 584 | // keywords 585 | "default" => Token::Default, 586 | "import" => Token::Import, 587 | "package" => Token::Package, 588 | "struct" => Token::Struct, 589 | "pkg" => Token::Package, 590 | "as" => Token::As, 591 | "fun" => Token::Fun, 592 | "let" => Token::Let, 593 | "object" => Token::Object, 594 | 595 | // statement 596 | "if" => Token::If, 597 | "else" => Token::Else, 598 | "while" => Token::While, 599 | "for" => Token::For, 600 | "in" => Token::In, 601 | ".." => Token::Range, 602 | "break" => Token::Break, 603 | "continue" => Token::Continue, 604 | "return" => Token::Return, 605 | 606 | "bool" => Token::Bool, 607 | "true" => Token::True, 608 | "false" => Token::False, 609 | 610 | "string" => Token::String, 611 | "bytes" => Token::DynamicBytes, 612 | Int => Token::Int(), 613 | Uint => Token::Uint(), 614 | Bytes => Token::Bytes(), 615 | 616 | // other symbols 617 | "$" => Token::Binding, 618 | 619 | ";" => Token::Semicolon, 620 | "{" => Token::OpenCurlyBrace, 621 | "}" => Token::CloseCurlyBrace, 622 | "(" => Token::OpenParenthesis, 623 | ")" => Token::CloseParenthesis, 624 | "=" => Token::Assign, 625 | "==" => Token::Equal, 626 | 627 | // todo: thinking in remove 628 | "=>" => Token::Arrow, 629 | 630 | "|=" => Token::BitwiseOrAssign, 631 | "^=" => Token::BitwiseXorAssign, 632 | "&=" => Token::BitwiseAndAssign, 633 | "<<=" => Token::ShiftLeftAssign, 634 | ">>=" => Token::ShiftRightAssign, 635 | "+=" => Token::AddAssign, 636 | "-=" => Token::SubtractAssign, 637 | "*=" => Token::MulAssign, 638 | "/=" => Token::DivideAssign, 639 | "%=" => Token::ModuloAssign, 640 | 641 | "?" => Token::Question, 642 | ":" => Token::Colon, 643 | "||" => Token::Or, 644 | "&&" => Token::And, 645 | "!=" => Token::NotEqual, 646 | "<" => Token::Less, 647 | "<=" => Token::LessEqual, 648 | ">" => Token::More, 649 | ">=" => Token::MoreEqual, 650 | "->" => Token::Rarrow, 651 | "|" => Token::BitwiseOr, 652 | "&" => Token::BitwiseAnd, 653 | "^" => Token::BitwiseXor, 654 | "<<" => Token::ShiftLeft, 655 | ">>" => Token::ShiftRight, 656 | "+" => Token::Add, 657 | "-" => Token::Subtract, 658 | "*" => Token::Mul, 659 | "/" => Token::Divide, 660 | "%" => Token::Modulo, 661 | "**" => Token::Power, 662 | "!" => Token::Not, 663 | "~" => Token::Complement, 664 | "++" => Token::Increment, 665 | "--" => Token::Decrement, 666 | "[" => Token::OpenBracket, 667 | "]" => Token::CloseBracket, 668 | "." => Token::Member, 669 | "," => Token::Comma, 670 | } 671 | } 672 | 673 | -------------------------------------------------------------------------------- /dc_parser/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate lalrpop_util; 3 | extern crate phf; 4 | 5 | lalrpop_mod!( 6 | #[allow(clippy::all)] 7 | #[allow(unused)] 8 | pub datum 9 | ); // synthesized by LALRPOP 10 | 11 | pub mod parse_tree; 12 | pub mod parser; 13 | 14 | pub use parse_tree::*; 15 | -------------------------------------------------------------------------------- /dc_parser/src/parse_tree.rs: -------------------------------------------------------------------------------- 1 | use core::fmt; 2 | 3 | use num_bigint::BigInt; 4 | 5 | use dc_lexer::{Loc, Location}; 6 | 7 | #[derive(Debug, PartialEq)] 8 | pub struct Program(pub Vec); 9 | 10 | #[derive(Debug, PartialEq)] 11 | pub enum ProgramUnit { 12 | PackageDecl(Package), 13 | ImportDecl(Import), 14 | StructFuncDecl(Box), 15 | FuncDecl(Box), 16 | StructDecl(Box), 17 | ObjectDecl(Box), 18 | } 19 | 20 | // todo: inline for support empty stmt, like ';' 21 | pub type Suite = Vec; 22 | 23 | #[derive(Debug, PartialEq)] 24 | pub struct ObjectDecl { 25 | pub loc: Loc, 26 | pub name: Identifier, 27 | pub functions: Vec>, 28 | } 29 | 30 | #[derive(Debug, PartialEq)] 31 | pub struct FuncDecl { 32 | pub loc: Loc, 33 | pub name: Identifier, 34 | pub params: Vec<(Loc, Option)>, 35 | pub body: Suite, 36 | } 37 | 38 | #[derive(Debug, PartialEq)] 39 | pub struct StructFuncDecl { 40 | pub loc: Loc, 41 | pub name: Identifier, 42 | pub struct_name: Identifier, 43 | // todo: thinking in constructor? 44 | pub params: Vec<(Loc, Option)>, 45 | pub body: Suite, 46 | pub returns: Option, 47 | } 48 | 49 | #[derive(Debug, PartialEq, Default)] 50 | pub struct Parameters { 51 | pub args: Vec, 52 | } 53 | 54 | /// A single formal parameter to a function. 55 | #[derive(Debug, PartialEq)] 56 | pub struct Parameter { 57 | pub loc: Loc, 58 | pub ty: Expression, 59 | pub name: Option, 60 | } 61 | 62 | impl Parameter { 63 | pub fn get_name(&self) -> String { 64 | self.name.as_ref().unwrap().clone().name 65 | } 66 | } 67 | 68 | /// An expression at a given location in the sourcecode. 69 | pub type Expression = Located; 70 | 71 | /// A certain type of expression. 72 | #[derive(Debug, PartialEq)] 73 | pub enum ExpressionType { 74 | Range { 75 | start: Box, 76 | end: Box, 77 | }, 78 | BoolOp { 79 | op: BooleanOperator, 80 | values: Vec, 81 | }, 82 | /// A binary operation on two operands. 83 | /// such as `a * b`, `a + b`, `a · b`. 84 | Binop { 85 | a: Box, 86 | op: Operator, 87 | b: Box, 88 | }, 89 | /// An unary operation. 90 | Unop { 91 | op: UnaryOperator, 92 | a: Box, 93 | }, 94 | /// An post unary operation. 95 | PostUnop { 96 | op: AffixesUnaryOperator, 97 | a: Box, 98 | }, 99 | String { 100 | value: String, 101 | }, 102 | Bool { 103 | value: bool, 104 | }, 105 | /// A numeric literal. 106 | Number { 107 | value: BigInt, 108 | }, 109 | /// A `list` literal value. 110 | List { 111 | elements: Vec, 112 | }, 113 | /// An identifier, designating a certain variable or type. 114 | Identifier { 115 | id: Identifier, 116 | }, 117 | Type { 118 | ty: Type, 119 | }, 120 | /// Attribute access in the form of `value.name`. 121 | MemberAccess { 122 | value: Box, 123 | name: Identifier, 124 | }, 125 | /// A call expression. 126 | Call { 127 | function: Box, 128 | args: Vec, 129 | // keywords: Vec, 130 | }, 131 | Compare { 132 | op: Comparison, 133 | left: Box, 134 | right: Box, 135 | }, 136 | EmptyObject, 137 | } 138 | 139 | #[derive(Debug, Clone, PartialEq)] 140 | pub enum VariableStorage { 141 | Memory { location: Location }, 142 | Storage { location: Location }, 143 | } 144 | 145 | impl VariableStorage { 146 | pub fn location(&self) -> &Location { 147 | match self { 148 | VariableStorage::Memory { location } => location, 149 | VariableStorage::Storage { location } => location, 150 | } 151 | } 152 | } 153 | 154 | impl fmt::Display for VariableStorage { 155 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 156 | match self { 157 | VariableStorage::Memory { .. } => write!(f, "memory"), 158 | VariableStorage::Storage { .. } => write!(f, "storage"), 159 | } 160 | } 161 | } 162 | 163 | #[derive(Debug, PartialEq)] 164 | pub struct Argument { 165 | pub location: Location, 166 | pub expr: Expression, 167 | } 168 | 169 | #[derive(Debug, PartialEq)] 170 | pub struct Keyword { 171 | pub name: Option, 172 | pub value: Expression, 173 | } 174 | 175 | #[derive(Debug, PartialEq)] 176 | pub struct Located { 177 | pub location: Location, 178 | pub node: T, 179 | } 180 | 181 | pub type Statement = Located; 182 | 183 | #[derive(Debug, PartialEq)] 184 | pub enum StatementType { 185 | VariableDecl { 186 | field: Identifier, 187 | ty: Expression, // type 188 | }, 189 | Break, 190 | Continue, 191 | If { 192 | cond: Expression, 193 | body: Suite, 194 | orelse: Option, 195 | }, 196 | While { 197 | cond: Expression, 198 | body: Suite, 199 | }, 200 | For { 201 | target: Box, 202 | iter: Box, 203 | body: Suite, 204 | }, 205 | Loop, 206 | /// Variable assignment. Note that we can assign to multiple targets. 207 | Assign { 208 | target: Identifier, 209 | value: Expression, 210 | ty: Expression, 211 | }, 212 | Return { 213 | value: Option, 214 | }, 215 | Expression { 216 | expr: Expression, 217 | }, 218 | } 219 | 220 | #[derive(Debug, PartialEq)] 221 | pub struct StructDecl { 222 | pub loc: Loc, 223 | pub name: Identifier, 224 | pub fields: Vec, 225 | } 226 | 227 | #[derive(Debug, PartialEq)] 228 | pub enum Package { 229 | Plain(Identifier), 230 | } 231 | 232 | #[derive(Debug, PartialEq)] 233 | pub enum Import { 234 | Standard(Identifier), 235 | Remote, 236 | // for such github.com/phodal/coca 237 | GlobalSymbol(StringLiteral, Identifier), 238 | Rename(StringLiteral, Vec<(Identifier, Option)>), 239 | } 240 | 241 | #[derive(Debug, PartialEq, Clone)] 242 | pub struct StringLiteral { 243 | pub loc: Loc, 244 | pub string: String, 245 | } 246 | 247 | #[derive(Debug, PartialEq, Clone, Default)] 248 | pub struct Identifier { 249 | pub loc: Loc, 250 | pub name: String, 251 | } 252 | 253 | #[derive(Debug, PartialEq, Clone)] 254 | pub struct DocComment { 255 | pub offset: usize, 256 | pub tag: String, 257 | pub value: String, 258 | } 259 | 260 | /// A comparison operation. 261 | #[derive(Debug, PartialEq)] 262 | pub enum Comparison { 263 | Equal, 264 | NotEqual, 265 | Less, 266 | LessOrEqual, 267 | Greater, 268 | GreaterOrEqual, 269 | In, 270 | NotIn, 271 | Is, 272 | IsNot, 273 | } 274 | 275 | /// A numeric literal. 276 | #[derive(Debug, PartialEq)] 277 | pub enum Number { 278 | Integer { value: BigInt }, 279 | Float { value: f64 }, 280 | } 281 | 282 | /// An operator for a binary operation (an operation with two operands). 283 | #[derive(Debug, PartialEq)] 284 | pub enum Operator { 285 | Add, 286 | Sub, 287 | Multiply, 288 | /// `@`, from Python, thinking in remove 289 | MatMult, 290 | Divide, 291 | Modulo, 292 | Pow, 293 | LShift, 294 | RShift, 295 | BitOr, 296 | BitXor, 297 | BitAnd, 298 | /// from Python, thinking in remove 299 | /// also in Java, Math.floorDiv 300 | FloorDiv, 301 | } 302 | 303 | /// An unary operator. This is an operation with only a single operand. 304 | #[derive(Debug, PartialEq)] 305 | pub enum UnaryOperator { 306 | Pos, 307 | Neg, 308 | Not, 309 | Inv, 310 | } 311 | 312 | /// Merge for prefixUnaryOperator `++i` and postfixUnarySuffix `i++` ; 313 | #[derive(Debug, PartialEq)] 314 | pub enum AffixesUnaryOperator { 315 | Increment, 316 | Decrement, 317 | } 318 | 319 | #[derive(Debug, PartialEq, Clone)] 320 | pub enum Type { 321 | Bool, 322 | String, 323 | Int(u16), 324 | Uint(u16), 325 | Bytes(u8), 326 | DynamicBytes, 327 | Void, 328 | } 329 | 330 | /// A boolean operation. 331 | #[derive(Debug, PartialEq)] 332 | pub enum BooleanOperator { 333 | And, 334 | Or, 335 | } 336 | 337 | impl fmt::Display for Type { 338 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 339 | match self { 340 | Type::Bool => write!(f, "bool"), 341 | Type::String => write!(f, "string"), 342 | Type::Int(n) => write!(f, "int{}", n), 343 | Type::Uint(n) => write!(f, "uint{}", n), 344 | Type::Bytes(n) => write!(f, "bytes{}", n), 345 | Type::DynamicBytes => write!(f, "bytes"), 346 | Type::Void => write!(f, "void"), 347 | } 348 | } 349 | } 350 | -------------------------------------------------------------------------------- /dc_parser/src/parser.rs: -------------------------------------------------------------------------------- 1 | use dc_lexer::Diagnostic; 2 | 3 | use crate::datum; 4 | use crate::parse_tree::Program; 5 | 6 | macro_rules! do_lalr_parsing { 7 | ($input: expr) => {{ 8 | let lex = dc_lexer::Lexer::new($input); 9 | match datum::DatumParser::new().parse($input, lex) { 10 | Err(err) => Err(Diagnostic::handle_error(err)), 11 | Ok(s) => Ok(s), 12 | } 13 | }}; 14 | } 15 | 16 | pub fn parse_program(source: &str) -> Result { 17 | do_lalr_parsing!(source) 18 | } 19 | 20 | #[cfg(test)] 21 | mod test { 22 | use crate::parse_tree::{Identifier, Package, Program, ProgramUnit}; 23 | use crate::parser::parse_program; 24 | use crate::{ExpressionType, StatementType}; 25 | use dc_lexer::Loc; 26 | 27 | #[test] 28 | #[rustfmt::skip] 29 | fn parse_parse_empty() { 30 | let parse_ast = parse_program(""); 31 | assert!(parse_ast.is_err()); 32 | } 33 | 34 | #[test] 35 | #[rustfmt::skip] 36 | fn parse_parse_package() { 37 | let package = parse_program("package charj"); 38 | assert_eq!(package.unwrap(), Program { 39 | 0: vec![ProgramUnit::PackageDecl(Package::Plain( 40 | Identifier { 41 | loc: Loc(8, 13), 42 | name: "charj".to_string(), 43 | } 44 | ))] 45 | }); 46 | let pkg_alias = parse_program("pkg charj"); 47 | assert!(pkg_alias.is_ok()); 48 | } 49 | 50 | #[test] 51 | #[rustfmt::skip] 52 | fn parse_parse_struct() { 53 | let package = parse_program("struct IO {}"); 54 | assert!(package.is_ok()); 55 | } 56 | 57 | #[test] 58 | #[rustfmt::skip] 59 | fn parse_basic_location() { 60 | let code = parse_program("pkg charj 61 | struct IO {}"); 62 | assert!(code.is_ok()); 63 | } 64 | 65 | #[test] 66 | #[rustfmt::skip] 67 | fn parse_normal_struct_function() { 68 | let normal_struct_fun = parse_program("default$main() {}"); 69 | assert!(normal_struct_fun.is_ok()); 70 | let with_empty_struct_fun = parse_program("default $ main () {}"); 71 | assert!(with_empty_struct_fun.is_ok()); 72 | } 73 | 74 | #[test] 75 | #[rustfmt::skip] 76 | fn parse_function_parameters() { 77 | let params = parse_program("default$main(string name) {}"); 78 | assert!(params.is_ok()); 79 | 80 | let multi_params = parse_program("default$main(string name, string first, int id) {}"); 81 | assert!(multi_params.is_ok()); 82 | } 83 | 84 | #[test] 85 | #[rustfmt::skip] 86 | fn parse_comment() { 87 | let comments = parse_program("// this is a comment 88 | pkg comment 89 | "); 90 | assert!(comments.is_ok()); 91 | } 92 | 93 | #[test] 94 | #[rustfmt::skip] 95 | fn parse_if_statement() { 96 | let empty_if = parse_program("default$main(string name) { 97 | if(string == \"name\") { 98 | return; 99 | } 100 | }"); 101 | assert!(empty_if.is_ok()); 102 | 103 | let if_with_expr = parse_program("default$main(string name) { 104 | if( a == true) {} 105 | }"); 106 | assert!(if_with_expr.is_ok()); 107 | } 108 | 109 | #[test] 110 | #[rustfmt::skip] 111 | fn parse_while() { 112 | let empty_if = parse_program("default$main(string name) { 113 | while(string == \"name\") { 114 | return; 115 | } 116 | }"); 117 | assert!(empty_if.is_ok()); 118 | 119 | let if_with_expr = parse_program("default$main(string name) { 120 | while( a == true) {} 121 | }"); 122 | assert!(if_with_expr.is_ok()); 123 | } 124 | 125 | #[test] 126 | #[rustfmt::skip] 127 | fn parse_return() { 128 | let if_return = parse_program("default$main(string name) { 129 | if(a == true) { 130 | return a; 131 | } 132 | }"); 133 | assert!(if_return.is_ok()); 134 | 135 | let if_greater = parse_program("default$main(int a, int b) { 136 | if(a > b) { 137 | return a; 138 | } 139 | }"); 140 | assert!(if_greater.is_ok()); 141 | } 142 | 143 | #[test] 144 | #[rustfmt::skip] 145 | fn parse_if_else() { 146 | let if_else = parse_program("default$compare(int a, int b) { 147 | if(a > b) { 148 | return a; 149 | } else { 150 | return b; 151 | } 152 | }"); 153 | assert!(if_else.is_ok()); 154 | } 155 | 156 | #[test] 157 | #[rustfmt::skip] 158 | fn parse_function_return() { 159 | let function_return = parse_program("default$compare(int a, int b) -> int { 160 | if(a > b) { 161 | return a; 162 | } else { 163 | return b; 164 | } 165 | }"); 166 | 167 | match function_return.unwrap().0.get(0).unwrap() { 168 | ProgramUnit::StructFuncDecl(def) => { 169 | let string = format!("{:?}", def.returns.as_ref().unwrap().node); 170 | assert_eq!("Type { ty: Int(256) }", string); 171 | } 172 | _ => { 173 | panic!("expected get StructFuncDef") 174 | } 175 | } 176 | } 177 | 178 | #[test] 179 | #[rustfmt::skip] 180 | fn parse_function_string_return() { 181 | let function_return = parse_program("default$compare(int a, int b) -> string { 182 | }"); 183 | 184 | match function_return.unwrap().0.get(0).unwrap() { 185 | ProgramUnit::StructFuncDecl(def) => { 186 | let string = format!("{:?}", def.returns.as_ref().unwrap().node); 187 | assert_eq!("Type { ty: String }", string); 188 | } 189 | _ => { 190 | panic!("expected get StructFuncDef") 191 | } 192 | } 193 | } 194 | 195 | #[test] 196 | #[rustfmt::skip] 197 | fn parse_parse_import() { 198 | let parse_ast = parse_program("import io"); 199 | assert!(parse_ast.is_ok()); 200 | } 201 | 202 | #[test] 203 | #[rustfmt::skip] 204 | fn parse_function_call() { 205 | let basic_function_call = parse_program("default$main(string name) { 206 | println(\"hello,world\"); 207 | }"); 208 | assert!(basic_function_call.is_ok()); 209 | } 210 | 211 | #[test] 212 | #[rustfmt::skip] 213 | fn parse_utf8_identify() { 214 | let basic_function_call = parse_program("default$主要(string name) { 215 | 显示(\"hello,world\"); 216 | }"); 217 | 218 | assert!(basic_function_call.is_ok()); 219 | } 220 | 221 | #[test] 222 | #[rustfmt::skip] 223 | fn parse_struct_vars() { 224 | let code = parse_program("pkg charj 225 | struct Summary { 226 | Name : string 227 | FanIn : int 228 | FanOut : int 229 | }"); 230 | 231 | match code.unwrap().0.get(1).unwrap() { 232 | ProgramUnit::StructDecl(def) => { 233 | assert_eq!("Summary", def.name.name); 234 | } 235 | _ => { 236 | panic!("expected get StructDef") 237 | } 238 | } 239 | } 240 | 241 | #[test] 242 | #[rustfmt::skip] 243 | fn parse_struct_array_vars() { 244 | let code = parse_program("pkg charj 245 | struct Summary { 246 | Name : []string 247 | }"); 248 | assert!(code.is_ok()); 249 | } 250 | 251 | #[test] 252 | #[rustfmt::skip] 253 | fn parse_struct_with_method_define() { 254 | let code = parse_program("pkg charj 255 | struct Summary { 256 | Name : string 257 | } 258 | 259 | Summary$constructor(string name) { 260 | } 261 | "); 262 | 263 | match code.unwrap().0.get(2).unwrap() { 264 | ProgramUnit::StructFuncDecl(def) => { 265 | assert_eq!("Summary", def.struct_name.name); 266 | } 267 | _ => { 268 | panic!("expected get StructDef") 269 | } 270 | } 271 | } 272 | 273 | #[test] 274 | #[rustfmt::skip] 275 | fn parse_struct_in_struct() { 276 | let code = parse_program("pkg charj 277 | struct Summary { 278 | Name : string 279 | FanIn : int 280 | FanOut : int 281 | } 282 | 283 | struct Hello { 284 | summary : Summary 285 | } 286 | "); 287 | assert!(code.is_ok()); 288 | } 289 | 290 | #[test] 291 | #[rustfmt::skip] 292 | fn parse_assign() { 293 | let str_assign = parse_program("default$main() { 294 | let words: string = \"hello,world\"; 295 | println(words); 296 | let b: int = 2333; 297 | println(b); 298 | }"); 299 | assert!(str_assign.is_ok()); 300 | } 301 | 302 | #[test] 303 | #[rustfmt::skip] 304 | fn parse_assign_with_sum() { 305 | let str_assign = parse_program("default$main() { 306 | let b: int = 2333 + 5; 307 | let c: int = b - 10; 308 | println(b); 309 | }"); 310 | assert!(str_assign.is_ok()); 311 | 312 | let multiple_expr = parse_program("default$main() { 313 | let b: int = 2333 + 5 - 10 -10 + 5 + 100; 314 | }"); 315 | assert!(multiple_expr.is_ok()); 316 | } 317 | 318 | #[test] 319 | #[rustfmt::skip] 320 | fn parse_mul() { 321 | let mul = parse_program("default$main() { 322 | let b: int = 2333 * 5 - 10 + 100; 323 | println(b); 324 | }"); 325 | assert!(mul.is_ok()); 326 | } 327 | 328 | #[test] 329 | #[rustfmt::skip] 330 | fn parse_basic_div() { 331 | let mul = parse_program("default$main() { 332 | let b: int = 2333 * 5 - 10 + 100 / 5; 333 | println(b); 334 | }"); 335 | assert!(mul.is_ok()); 336 | } 337 | 338 | #[test] 339 | #[rustfmt::skip] 340 | fn parse_basic_mode() { 341 | let mod_code = parse_program("default$main() { 342 | let b: int = 100 % 5; 343 | println(b); 344 | }"); 345 | assert!(mod_code.is_ok()); 346 | } 347 | 348 | #[test] 349 | #[rustfmt::skip] 350 | fn parse_and_or_symbol() { 351 | let and_symbol = parse_program("default$main() { 352 | let b: bool = a && b; 353 | }"); 354 | assert!(and_symbol.is_ok()); 355 | 356 | let or_symbol = parse_program("default$main() { 357 | let b: bool = a || b; 358 | }"); 359 | assert!(or_symbol.is_ok()); 360 | 361 | let complex = parse_program("default$main() { 362 | let b: bool = a || b && c || d && e || f; 363 | }"); 364 | assert!(complex.is_ok()); 365 | } 366 | 367 | #[test] 368 | #[rustfmt::skip] 369 | fn parse_loop() { 370 | let for_loop = parse_program("default$main(string name) { 371 | for(x in 1..10) { 372 | println(x); 373 | } 374 | }"); 375 | assert!(for_loop.is_ok()); 376 | } 377 | 378 | #[test] 379 | #[rustfmt::skip] 380 | fn parse_not() { 381 | let not_cond = parse_program("default$main(string name) { 382 | if (!true){} 383 | }"); 384 | assert!(not_cond.is_ok()); 385 | } 386 | 387 | #[test] 388 | #[rustfmt::skip] 389 | fn parse_open_cond() { 390 | let open_cond = parse_program("default$main(string name) { 391 | if (!true) return 1; 392 | }"); 393 | assert!(open_cond.is_ok()); 394 | } 395 | 396 | #[test] 397 | #[rustfmt::skip] 398 | fn parse_shift() { 399 | let shift = parse_program("default$main(string name) { 400 | let a: int = 1000 << 0; 401 | let b: int = 1000 >> 1; 402 | }"); 403 | assert!(shift.is_ok()); 404 | } 405 | 406 | #[test] 407 | #[rustfmt::skip] 408 | fn parse_complex_if() { 409 | let complex_not_cond = parse_program("default$main(string name) { 410 | if ((i % 3) == 0) {} 411 | }"); 412 | assert!(complex_not_cond.is_ok()); 413 | } 414 | 415 | #[test] 416 | #[rustfmt::skip] 417 | fn parse_array() { 418 | let array = parse_program("default$main(string name) { 419 | let i: []int = [1, 2, 3]; 420 | let j: string = [1, 2, 3]; 421 | }"); 422 | assert!(array.is_ok()); 423 | } 424 | 425 | #[test] 426 | #[rustfmt::skip] 427 | fn parse_unop() { 428 | let unop = parse_program("default$main(string name) { 429 | let j: int = -1; 430 | }"); 431 | assert!(unop.is_ok()); 432 | let more_unop = parse_program("default$main(string name) { 433 | let i: int = +1; 434 | let j: bool = !true; 435 | }"); 436 | assert!(more_unop.is_ok()); 437 | } 438 | 439 | #[test] 440 | #[rustfmt::skip] 441 | fn parse_post_unop() { 442 | let post_unop = parse_program("default$main(string name) { 443 | let j: int = -1; 444 | j++; 445 | j--; 446 | }"); 447 | assert!(post_unop.is_ok()); 448 | } 449 | 450 | #[test] 451 | #[rustfmt::skip] 452 | fn parse_multiple_quote() { 453 | let quote = parse_program("default$main(string name) { 454 | ((((((a)))))); 455 | }"); 456 | assert!(quote.is_ok()); 457 | 458 | let error_quote = parse_program("default$main(string name) { 459 | ((((((a))))); 460 | }"); 461 | assert!(error_quote.is_err()); 462 | } 463 | 464 | #[test] 465 | #[rustfmt::skip] 466 | fn parse_object() { 467 | let obj = parse_program("default$main(string name) { 468 | let obj: Object = {}; 469 | }"); 470 | println!("{:?}", obj); 471 | assert!(obj.is_ok()); 472 | } 473 | 474 | #[test] 475 | #[rustfmt::skip] 476 | fn parse_bool_in_expr() { 477 | let bool_return = parse_program("default$main(string name) { 478 | return true; 479 | }"); 480 | match bool_return.unwrap().0.get(0).unwrap() { 481 | ProgramUnit::StructFuncDecl(def) => { 482 | let return_node = &def.body.get(0).unwrap().node; 483 | if let StatementType::Return { value } = return_node { 484 | let expr = &value.as_ref().unwrap().node; 485 | 486 | if let ExpressionType::List { elements } = expr { 487 | let string = format!("{:?}", elements[0]); 488 | assert_eq!(string, "Located { location: Location { row: 39, column: 43 }, node: Bool { value: true } }"); 489 | return; 490 | } 491 | } 492 | } 493 | _ => {} 494 | } 495 | 496 | panic!("not return"); 497 | } 498 | } 499 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Documents 2 | 3 | [Architecture Decision Records](adr/) 4 | -------------------------------------------------------------------------------- /docs/adr/0001-newline-or-semicolon.md: -------------------------------------------------------------------------------- 1 | # 1. newline or semicolon 2 | 3 | Date: 2020-10-26 4 | 5 | ## Status 6 | 7 | 2020-10-26 proposed 8 | 9 | 2020-11-04 done 10 | 11 | ## Context 12 | 13 | In order to save packages or file, we need to compatible different languages import styles. Such like go 14 | 15 | ```go 16 | import ( 17 | "fmt" 18 | "reflect" 19 | "strconv" 20 | 21 | "github.com/antlr/antlr4/runtime/Go/antlr" 22 | ) 23 | ``` 24 | 25 | or rust 26 | 27 | ```rust 28 | use std::iter::Peekable; 29 | use std::str::CharIndices; 30 | 31 | use phf::phf_map; 32 | use unicode_xid::UnicodeXID; 33 | 34 | use crate::error::LexicalError; 35 | use crate::token::Token; 36 | ``` 37 | 38 | or Java like 39 | 40 | ```kotlin 41 | import java.nio.charset.StandardCharsets.UTF_8 42 | import java.nio.file.Files 43 | import java.nio.file.Path 44 | import javax.annotation.processing.Filer 45 | import javax.tools.JavaFileObject 46 | import javax.tools.JavaFileObject.Kind 47 | import javax.tools.SimpleJavaFileObject 48 | import javax.tools.StandardLocation 49 | import kotlin.reflect.KClass 50 | ``` 51 | 52 | ## Decision 53 | 54 | If it's possible, then we need to build a FileSystem to replace '/' to '.' . 55 | 56 | ## Consequences 57 | 58 | Consequences here... 59 | -------------------------------------------------------------------------------- /docs/adr/0002-package-format.md: -------------------------------------------------------------------------------- 1 | # 2. package format 2 | 3 | Date: 2020-10-26 4 | 5 | ## Status 6 | 7 | 2020-10-26 proposed 8 | 9 | 2020-11-04 accepted 10 | 11 | ## Context 12 | 13 | Context here... 14 | 15 | ## Decision 16 | 17 | use `ckp` for package format. 18 | 19 | ## Consequences 20 | 21 | Consequences here... 22 | -------------------------------------------------------------------------------- /docs/adr/0003-standard-library-design.md: -------------------------------------------------------------------------------- 1 | # 3. standard library design 2 | 3 | Date: 2020-10-27 4 | 5 | ## Status 6 | 7 | 2020-10-27 proposed 8 | 9 | 2020-12-17 accepted 10 | 11 | ## Context 12 | 13 | Java use directory as package, it will cause fat package. 14 | 15 | JavaScript don't have a useful package structure. 16 | 17 | Rust use `mod.rs` or `lib.rs`, but it's hard to direct goto package 18 | 19 | Golang almost use `bytes.go` for `bytes` package 20 | 21 | So, we can try in a design 22 | 23 | ``` 24 | - io 25 | - io.cj 26 | - *.cj 27 | ``` 28 | 29 | ## Decision 30 | 31 | Decision here... 32 | 33 | ## Consequences 34 | 35 | Consequences here... 36 | -------------------------------------------------------------------------------- /docs/adr/0004-dependency-manager.md: -------------------------------------------------------------------------------- 1 | # 4. dependency manager 2 | 3 | Date: 2020-10-27 4 | 5 | ## Status 6 | 7 | 2020-10-27 proposed 8 | 9 | ## Context 10 | 11 | As a language, we need a dependency manager. 12 | 13 | ## Decision 14 | 15 | In our case, we will had a very complex dependency manager and it's may be also a build system. 16 | 17 | In first stage, we plan to use Maven's way to manage our dependency, then use Gradle way build a build system. 18 | 19 | But it's still need more time to do it after we had done compiler. 20 | 21 | ## Consequences 22 | 23 | Consequences here... 24 | -------------------------------------------------------------------------------- /docs/adr/0005-types-system.md: -------------------------------------------------------------------------------- 1 | # 5. types system 2 | 3 | Date: 2020-10-27 4 | 5 | ## Status 6 | 7 | 2020-10-27 proposed 8 | 9 | ## Context 10 | 11 | Go examples: 12 | 13 | ```golang 14 | type uint uint 15 | type byte = uint8 16 | type rune = int32 17 | ``` 18 | 19 | in Go: 20 | 21 | ```markdown 22 | Integers 23 | Signed 24 | int 25 | int8 26 | int16 27 | int32 28 | int64 29 | Unsigned 30 | uint 31 | uint8 32 | uint16 33 | uint32 34 | uint64 35 | uintptr 36 | Floats 37 | float32 38 | float64 39 | Complex Numbers 40 | complex64 41 | complex128 42 | Byte 43 | Rune 44 | String 45 | Boolean 46 | ``` 47 | 48 | Rust: 49 | 50 | ``` 51 | bool : The boolean type. 52 | char : A character type. 53 | i8 : The 8-bit signed integer type. 54 | i16 : The 16-bit signed integer type. 55 | i32 : The 32-bit signed integer type. 56 | i64 : The 64-bit signed integer type. 57 | isize : The pointer-sized signed integer type. 58 | u8 : The 8-bit unsigned integer type. 59 | u16 : The 16-bit unsigned integer type. 60 | u32 : The 32-bit unsigned integer type. 61 | u64 : The 64-bit unsigned integer type. 62 | usize : The pointer-sized unsigned integer type. 63 | f32 : The 32-bit floating point type. 64 | f64 : The 64-bit floating point type. 65 | array : A fixed-size array, denoted [T; N], for the element type, T, and the non-negative compile-time constant size, N. 66 | slice : A dynamically-sized view into a contiguous sequence, [T]. 67 | str : String slices. 68 | tuple : A finite heterogeneous sequence, (T, U, ..). 69 | ``` 70 | 71 | ## Decision 72 | 73 | - `bool` can be either `true` or 'false' 74 | 75 | ## Consequences 76 | 77 | Consequences here... 78 | -------------------------------------------------------------------------------- /docs/adr/0006-empty-method.md: -------------------------------------------------------------------------------- 1 | # 6. empty method 2 | 3 | Date: 2020-10-29 4 | 5 | ## Status 6 | 7 | 2020-10-29 proposed 8 | 9 | ## Context 10 | 11 | case of empty method in other languages 12 | 13 | Ruby: 14 | 15 | ```ruby 16 | class Null 17 | def say 18 | end 19 | end 20 | ``` 21 | 22 | Python: 23 | 24 | ```python 25 | # Correct way of writing empty function 26 | # in Python 27 | def fun(): 28 | pass 29 | ``` 30 | 31 | ## Decision 32 | 33 | Decision here... 34 | 35 | ## Consequences 36 | 37 | Consequences here... 38 | -------------------------------------------------------------------------------- /docs/adr/0007-support-for-newline.md: -------------------------------------------------------------------------------- 1 | # 7. support for newline 2 | 3 | Date: 2020-10-29 4 | 5 | ## Status 6 | 7 | 2020-10-29 proposed 8 | 9 | 2020-11-04 done 10 | 11 | ## Context 12 | 13 | [lalrpop](https://github.com/lalrpop/lalrpop) don't have a correct position for code.7 14 | 15 | In different language have different implementation 16 | 17 | 1. RustPython use `'\n'` for new line 18 | 2. solang 19 | 3. gluon use [codespan](https://github.com/brendanzab/codespan) 20 | 21 | ## Decision 22 | 23 | use lexer with `\n` 24 | 25 | ## Consequences 26 | 27 | Consequences here... 28 | -------------------------------------------------------------------------------- /docs/adr/0008-constructor-design.md: -------------------------------------------------------------------------------- 1 | # 8. constructor design 2 | 3 | Date: 2020-11-04 4 | 5 | ## Status 6 | 7 | 2020-11-04 proposed 8 | 9 | ## Context 10 | 11 | Context here... 12 | 13 | ## Decision 14 | 15 | Decision here... 16 | 17 | ## Consequences 18 | 19 | Consequences here... 20 | -------------------------------------------------------------------------------- /docs/adr/0009-compiler-design.md: -------------------------------------------------------------------------------- 1 | # 9. compiler design 2 | 3 | Date: 2020-11-05 4 | 5 | ## Status 6 | 7 | 2020-11-05 proposed 8 | 9 | 2020-12-16 done 10 | 11 | ## Context 12 | 13 | Context here... 14 | 15 | ## Decision 16 | 17 | Use llvm with Inkwell for testing 18 | 19 | stage 1.0: lalrpop -> ast -> hir -> mir -> llvm ir 20 | 21 | stage 2.0: typography -> ast -> hir -> mir -> llvm ir 22 | 23 | stage 3.0: typography -> ast -> hir -> mir -> vm / custom backend 24 | 25 | ## Consequences 26 | 27 | Consequences here... 28 | -------------------------------------------------------------------------------- /docs/adr/0010-target.md: -------------------------------------------------------------------------------- 1 | # 10. target 2 | 3 | Date: 2020-11-05 4 | 5 | ## Status 6 | 7 | 2020-11-05 proposed 8 | 9 | 2020-12-16 accepted 10 | 11 | ## Context 12 | 13 | In this version, we use LLVM IR, so we can support: 14 | 15 | - x86 16 | - webassembly 17 | - bpf 18 | 19 | maybe we can also try others. 20 | 21 | ## Decision 22 | 23 | Decision here... 24 | 25 | ## Consequences 26 | 27 | Consequences here... 28 | -------------------------------------------------------------------------------- /docs/adr/0011-does-syntax-close-to-llvm.md: -------------------------------------------------------------------------------- 1 | # 11. does syntax close to LLVM 2 | 3 | Date: 2020-11-06 4 | 5 | ## Status 6 | 7 | 2020-11-06 proposed 8 | 9 | ## Context 10 | 11 | C hello, world: 12 | 13 | ``` 14 | #include 15 | 16 | int main() { 17 | printf("hello world\n"); 18 | return 0; 19 | } 20 | ``` 21 | 22 | LLVM hello, world 23 | 24 | ``` 25 | ; Copied directly from the documentation 26 | ; Declare the string constant as a global constant. 27 | @.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00" 28 | 29 | ; External declaration of the puts function 30 | declare i32 @puts(i8* nocapture) nounwind 31 | 32 | ; Definition of main function 33 | define i32 @main() { ; i32()* 34 | ; Convert [13 x i8]* to i8 *... 35 | %cast210 = getelementptr [13 x i8],[13 x i8]* @.str, i64 0, i64 0 36 | 37 | ; Call puts function to write out the string to stdout. 38 | call i32 @puts(i8* %cast210) 39 | ret i32 0 40 | } 41 | 42 | ; Named metadata 43 | !0 = !{i32 42, null, !"string"} 44 | !foo = !{!0} 45 | ``` 46 | 47 | ## Decision 48 | 49 | Decision here... 50 | 51 | ## Consequences 52 | 53 | Consequences here... 54 | -------------------------------------------------------------------------------- /docs/adr/0012-builtin-functions.md: -------------------------------------------------------------------------------- 1 | # 12. builtin functions 2 | 3 | Date: 2020-11-09 4 | 5 | ## Status 6 | 7 | 2020-11-09 proposed 8 | 9 | ## Context 10 | 11 | Context here... 12 | 13 | ## Decision 14 | 15 | Decision here... 16 | 17 | ## Consequences 18 | 19 | Consequences here... 20 | -------------------------------------------------------------------------------- /docs/adr/0013-irbuilder.md: -------------------------------------------------------------------------------- 1 | # 13. irbuilder 2 | 3 | Date: 2020-11-10 4 | 5 | ## Status 6 | 7 | 2020-11-10 proposed 8 | 9 | ## Context 10 | 11 | Use IRBuilder to build code: 12 | 13 | ```cpp 14 | void createIRWithIRBuilder() { 15 | LLVMContext Context; 16 | Module *mod = new Module("sum.ll", Context); 17 | 18 | //1、创建IRBuilder 19 | IRBuilder<> builder(Context); 20 | //2、创建main函数 21 | FunctionType *ft = FunctionType::get(builder.getInt32Ty(),false); 22 | Function *mainfunc = Function::Create(ft, Function::ExternalLinkage, "main", mod); 23 | //到此为止之创建了main函数,但是函数体内的包含的Instruction没有添加,因此需要添加。 24 | 25 | //3、创建基本块(这个基本块是空的无内容) 26 | BasicBlock *entry = BasicBlock::Create(Context,"entrypoint",mainfunc); 27 | 28 | //4、设置插入点:插入点设置成相应BasicBlock,<#后面用builder创建的指令都会追加到这个BasicBlock里了#> 29 | //!!!: - 理解:上面的方式是通过直接往BasicBloock中添加Instruction方式来构造基本的basicBlock,这里借助IRBuilder方式,往basicBlock中添加命令。 30 | builder.SetInsertPoint(entry); 31 | 32 | //5、添加全局字符串(IR中字符串全部为全局变量,使用数据序列来表示,每个元素是一个char类型) 33 | Value *helloWorld = builder.CreateGlobalStringPtr("hello world!\n"); 34 | //6、创建put函数 35 | //1)指定函数参数类型,装在一个数组中` 36 | std::vector putsargs; 37 | putsargs.push_back(builder.getInt8Ty()->getPointerTo()); 38 | ArrayRef argsRef(putsargs); 39 | //2)指定函数返回值类型 40 | FunctionType *putsType = FunctionType::get(builder.getInt32Ty(),argsRef,false); 41 | //3)创建“函数调用”,而不是创建函数 42 | FunctionCallee putsFunc = mod->getOrInsertFunction("puts", putsType); 43 | 44 | //7、调用函数(<#理解:通过createXXX创建出来的所有指令都在SetInsertPoint后面#>) 45 | builder.CreateCall(putsFunc,helloWorld); //这是创建方法的指令 46 | 47 | //8、创建返回ret指令 48 | ConstantInt *zero = ConstantInt::get(IntegerType::getInt32Ty(Context), 0); 49 | builder.CreateRet(zero); 50 | 51 | //9、验证。这一步待定! 52 | llvm::VerifierAnalysis::Result Res; 53 | Res.IRBroken = llvm::verifyModule(*mod, &dbgs(), &Res.DebugInfoBroken); 54 | 55 | mod->dump(); 56 | 57 | } 58 | ``` 59 | 60 | 61 | ## Decision 62 | 63 | Decision here... 64 | 65 | ## Consequences 66 | 67 | Consequences here... 68 | -------------------------------------------------------------------------------- /docs/adr/0014-ir-design.md: -------------------------------------------------------------------------------- 1 | # 14. IR Design 2 | 3 | Date: 2020-11-16 4 | 5 | ## Status 6 | 7 | 2020-11-16 proposed 8 | 9 | ## Context 10 | 11 | Rust: [https://blog.rust-lang.org/2016/04/19/MIR.html](https://blog.rust-lang.org/2016/04/19/MIR.html) 12 | 13 | ![Flow](images/flow.svg) 14 | 15 | Rust Old 16 | 17 | "Rust Source" -> "HIR" -> "LLVM IR" -> "Machine Code" 18 | 19 | Rust New 20 | 21 | "Rust Source" -> "HIR" -> "MIR" -> "LLVM IR" -> "Machine Code" 22 | 23 | ## Decision 24 | 25 | Decision here... 26 | 27 | ## Consequences 28 | 29 | Consequences here... 30 | -------------------------------------------------------------------------------- /docs/adr/0015-mir.md: -------------------------------------------------------------------------------- 1 | # 15. mir 2 | 3 | Date: 2020-11-26 4 | 5 | ## Status 6 | 7 | 2020-11-26 proposed 8 | 9 | ## Context 10 | 11 | Context here... 12 | 13 | related: 14 | 15 | 16 | - Rust MIR: [https://rust-lang.github.io/rfcs/1211-mir.html](https://rust-lang.github.io/rfcs/1211-mir.html) 17 | - Common MIR: [https://github.com/vnmakarov/mir](https://github.com/vnmakarov/mir) 18 | 19 | ## Decision 20 | 21 | Decision here... 22 | 23 | ## Consequences 24 | 25 | Consequences here... 26 | -------------------------------------------------------------------------------- /docs/adr/0016-cli-target-improve.md: -------------------------------------------------------------------------------- 1 | # 16. cli target improve 2 | 3 | Date: 2020-12-01 4 | 5 | ## Status 6 | 7 | 2020-12-01 proposed 8 | 9 | Use Rustc like style for build target: https://doc.rust-lang.org/nightly/rustc/targets/index.html 10 | 11 | ```bash 12 | rustc src/main.rs --target=wasm32-unknown-unknown 13 | ``` 14 | Context here... 15 | 16 | ## Decision 17 | 18 | Decision here... 19 | 20 | ## Consequences 21 | 22 | Consequences here... 23 | -------------------------------------------------------------------------------- /docs/adr/0017-container-parser-syntax.md: -------------------------------------------------------------------------------- 1 | # 17. Container Parser Syntax 2 | 3 | Date: 2020-12-16 4 | 5 | ## Status 6 | 7 | 2020-12-16 proposed 8 | 9 | ## Context 10 | 11 | In current design, we need to: 12 | 13 | 1. builtin DSL support for Charj. 14 | 2. implementation a Typography syntax with lalrpop. 15 | 16 | It means we can find some way to combine their' syntax. For examples, 17 | 18 | ```rust 19 | ast { 20 | node parameters { 21 | parameters parameter*; 22 | } 23 | } 24 | ``` 25 | 26 | can like things in Rust 27 | 28 | ```rust 29 | struct AST { 30 | nodes: Vec 31 | } 32 | 33 | enum node { 34 | parameters(Vec) 35 | } 36 | 37 | struct parameter { 38 | 39 | } 40 | ``` 41 | 42 | in top can be like `struct` or `class`, and some method in class. `ast{}` will be equal `struct ast{}` 43 | 44 | In such things, we can call it `container`.a container can be `struct` or `enum` in. 45 | 46 | ## Decision 47 | 48 | Decision here... 49 | 50 | ## Consequences 51 | 52 | Consequences here... 53 | -------------------------------------------------------------------------------- /docs/adr/0018-use-semicolon-for-split-context.md: -------------------------------------------------------------------------------- 1 | # 18. use semicolon for split context 2 | 3 | Date: 2020-12-22 4 | 5 | ## Status 6 | 7 | 2020-12-22 proposed 8 | 9 | ## Context 10 | 11 | In current design with LALRPOP, a expression like `!function()` with be cause such issues. 12 | 13 | ```bash 14 | "!" PrimaryExpr 15 | At that point, if the next token is a `"("`, then the parser can proceed in two different ways. 16 | 17 | First, the parser could execute the production at /Users/fdhuang/charj/charj/dc_parser/src/charj.lalrpop:334:4: 334:14, which would consume the top 1 token(s) from the stack and produce a `NotExpression`. This might then yield a parse tree like 18 | PrimaryExpr ╷ Statement 19 | ├─NotExpression────────┤ │ 20 | ├─Term─────────────────┤ │ 21 | ├─ArithmeticExpression─┤ │ 22 | ├─ShiftExpression──────┤ │ 23 | ├─CompareExpression────┤ │ 24 | ├─AndExpression────────┤ │ 25 | ├─OrExpression─────────┤ │ 26 | ├─RangeExpression──────┤ │ 27 | ├─UnaryExpr────────────┤ │ 28 | ├─Expression───────────┤ │ 29 | ├─ExpressionStatement──┤ │ 30 | ├─CompoundStatement────┤ │ 31 | ├─Statement────────────┤ │ 32 | ├─Statement+───────────┘ │ 33 | └─Statement+─────────────────────┘ 34 | 35 | Alternatively, the parser could shift the `"("` token and later use it to construct a `FunctionCall`. This might then yield a parse tree like 36 | "!" PrimaryExpr "(" Comma ")" 37 | │ ├─FunctionCall────────────────────┤ 38 | │ ├─PrimaryExpr─────────────────────┤ 39 | │ └─NotExpression───────────────────┤ 40 | └─NotExpression───────────────────────┘ 41 | ``` 42 | 43 | ## Decision 44 | 45 | use semicolon for split keywords before we write by ourself. 46 | 47 | ## Consequences 48 | 49 | Consequences here... 50 | -------------------------------------------------------------------------------- /docs/adr/0019-language-server.md: -------------------------------------------------------------------------------- 1 | # 19. language server 2 | 3 | Date: 2021-01-02 4 | 5 | ## Status 6 | 7 | 2021-01-02 proposed 8 | 9 | ## Context 10 | 11 | Context here... 12 | 13 | ## Decision 14 | 15 | Decision here... 16 | 17 | ## Consequences 18 | 19 | Consequences here... 20 | -------------------------------------------------------------------------------- /docs/adr/0020-low-code-design-inside.md: -------------------------------------------------------------------------------- 1 | # 20. low-code design inside 2 | 3 | Date: 2021-01-08 4 | 5 | ## Status 6 | 7 | 2021-01-08 proposed 8 | 9 | ## Context 10 | 11 | Context here... 12 | 13 | ## Decision 14 | 15 | Decision here... 16 | 17 | ## Consequences 18 | 19 | Consequences here... 20 | -------------------------------------------------------------------------------- /docs/adr/0021-replace-tower-lsp.md: -------------------------------------------------------------------------------- 1 | # 21. replace tower-lsp 2 | 3 | Date: 2021-08-21 4 | 5 | ## Status 6 | 7 | 2021-08-21 proposed 8 | 9 | ## Context 10 | 11 | Context here... 12 | 13 | ## Decision 14 | 15 | Decision here... 16 | 17 | ## Consequences 18 | 19 | Consequences here... 20 | -------------------------------------------------------------------------------- /docs/adr/0022-design-by-contract-design.md: -------------------------------------------------------------------------------- 1 | # 22. design by contract design 2 | 3 | 4 | Date: 2021-11-01 5 | 6 | ## Status 7 | 8 | 2021-11-01 proposed 9 | 10 | ## Context 11 | 12 | add basic design for contract design, such as : 13 | 14 | ```nix 15 | assert a==b;"a==b" 16 | ``` 17 | 18 | ## Decision 19 | 20 | Decision here... 21 | 22 | ## Consequences 23 | 24 | Consequences here... 25 | -------------------------------------------------------------------------------- /docs/adr/README.md: -------------------------------------------------------------------------------- 1 | # Architecture Decision Records 2 | 3 | * [1. newline-or-semicolon](0001-newline-or-semicolon.md) 4 | * [2. package-format](0002-package-format.md) 5 | * [3. standard-library-design](0003-standard-library-design.md) 6 | * [4. dependency-manager](0004-dependency-manager.md) 7 | * [5. types-system](0005-types-system.md) 8 | * [6. empty-method](0006-empty-method.md) 9 | * [7. support-for-newline](0007-support-for-newline.md) 10 | * [8. constructor-design](0008-constructor-design.md) 11 | * [9. compiler-design](0009-compiler-design.md) 12 | * [10. target](0010-target.md) 13 | * [11. does-syntax-close-to-llvm](0011-does-syntax-close-to-llvm.md) 14 | * [12. builtin-functions](0012-builtin-functions.md) 15 | * [13. irbuilder](0013-irbuilder.md) 16 | * [14. ir-design](0014-ir-design.md) 17 | * [15. mir](0015-mir.md) 18 | * [16. cli-target-improve](0016-cli-target-improve.md) 19 | * [17. container-parser-syntax](0017-container-parser-syntax.md) 20 | * [18. use-semicolon-for-split-context](0018-use-semicolon-for-split-context.md) 21 | * [19. language-server](0019-language-server.md) 22 | * [20. low-code-design-inside](0020-low-code-design-inside.md) 23 | * [21. replace-tower-lsp](0021-replace-tower-lsp.md) 24 | * [22. design-by-contract-design](0022-design-by-contract-design.md) 25 | -------------------------------------------------------------------------------- /docs/adr/images/flow.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Produced by OmniGraffle 6.5.2 2016-04-19 21:30:07 +0000flowLayer 1Rust SourceHIRLLVM IRMachine CodeParsing and DesugaringBorrow checkingOptimizationType checkingTranslationRust SourceHIRLLVM IRMachine CodeParsing and DesugaringBorrow checkingOptimizationType checkingOptimizationMIRTodayTomorrow 4 | -------------------------------------------------------------------------------- /docs/book/.gitignore: -------------------------------------------------------------------------------- 1 | book 2 | -------------------------------------------------------------------------------- /docs/book/book.toml: -------------------------------------------------------------------------------- 1 | [book] 2 | authors = ["Phodal Huang"] 3 | language = "en" 4 | multilingual = false 5 | src = "src" 6 | title = "Charj Book" 7 | -------------------------------------------------------------------------------- /docs/book/src/SUMMARY.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | - [Chapter 1](./chapter_1.md) 4 | -------------------------------------------------------------------------------- /docs/book/src/chapter_1.md: -------------------------------------------------------------------------------- 1 | # Chapter 1 2 | -------------------------------------------------------------------------------- /docs/compares/rust/README.md: -------------------------------------------------------------------------------- 1 | output hir & mir 2 | 3 | ``` 4 | rustc +nightly -Zunpretty=mir main.rs >> main.mir 5 | rustc +nightly -Zunpretty=hir main.rs >> main.hir 6 | ``` 7 | 8 | output HIR tree 9 | 10 | ``` 11 | rustc +nightly -Zunpretty=hir-tree main.rs >> main.hir-tree 12 | ``` 13 | -------------------------------------------------------------------------------- /docs/compares/rust/main.hir: -------------------------------------------------------------------------------- 1 | #[prelude_import] 2 | use ::std::prelude::v1::*; 3 | #[macro_use] 4 | extern crate std; 5 | fn main() { 6 | { 7 | ::std::io::_print(::core::fmt::Arguments::new_v1(&["Hello World!\n"], 8 | &match () { 9 | () => 10 | [], 11 | })); 12 | }; 13 | } 14 | -------------------------------------------------------------------------------- /docs/compares/rust/main.mir: -------------------------------------------------------------------------------- 1 | // WARNING: This output format is intended for human consumers only 2 | // and is subject to change without notice. Knock yourself out. 3 | fn main() -> () { 4 | let mut _0: (); // return place in scope 0 at main.rs:1:11: 1:11 5 | let _1: (); // in scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:9: 96:62 6 | let mut _2: std::fmt::Arguments; // in scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 7 | let mut _3: &[&str]; // in scope 0 at main.rs:2:14: 2:28 8 | let mut _4: &[&str; 1]; // in scope 0 at main.rs:2:14: 2:28 9 | let _5: &[&str; 1]; // in scope 0 at main.rs:2:14: 2:28 10 | let mut _6: &[std::fmt::ArgumentV1]; // in scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 11 | let mut _7: &[std::fmt::ArgumentV1; 0]; // in scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 12 | let _8: &[std::fmt::ArgumentV1; 0]; // in scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 13 | let mut _9: &[std::fmt::ArgumentV1; 0]; // in scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 14 | let mut _10: &[&str; 1]; // in scope 0 at main.rs:2:14: 2:28 15 | 16 | bb0: { 17 | StorageLive(_1); // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:9: 96:62 18 | StorageLive(_2); // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 19 | StorageLive(_3); // scope 0 at main.rs:2:14: 2:28 20 | StorageLive(_4); // scope 0 at main.rs:2:14: 2:28 21 | StorageLive(_5); // scope 0 at main.rs:2:14: 2:28 22 | _10 = const main::promoted[1]; // scope 0 at main.rs:2:14: 2:28 23 | // ty::Const 24 | // + ty: &[&str; 1] 25 | // + val: Unevaluated(WithOptConstParam { did: DefId(0:3 ~ main[317d]::main[0]), const_param_did: None }, [], Some(promoted[1])) 26 | // mir::Constant 27 | // + span: main.rs:2:14: 2:28 28 | // + literal: Const { ty: &[&str; 1], val: Unevaluated(WithOptConstParam { did: DefId(0:3 ~ main[317d]::main[0]), const_param_did: None }, [], Some(promoted[1])) } 29 | _5 = _10; // scope 0 at main.rs:2:14: 2:28 30 | _4 = _5; // scope 0 at main.rs:2:14: 2:28 31 | _3 = move _4 as &[&str] (Pointer(Unsize)); // scope 0 at main.rs:2:14: 2:28 32 | StorageDead(_4); // scope 0 at main.rs:2:27: 2:28 33 | StorageLive(_6); // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 34 | StorageLive(_7); // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 35 | StorageLive(_8); // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 36 | _9 = const main::promoted[0]; // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 37 | // ty::Const 38 | // + ty: &[std::fmt::ArgumentV1; 0] 39 | // + val: Unevaluated(WithOptConstParam { did: DefId(0:3 ~ main[317d]::main[0]), const_param_did: None }, [], Some(promoted[0])) 40 | // mir::Constant 41 | // + span: /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 42 | // + literal: Const { ty: &[std::fmt::ArgumentV1; 0], val: Unevaluated(WithOptConstParam { did: DefId(0:3 ~ main[317d]::main[0]), const_param_did: None }, [], Some(promoted[0])) } 43 | _8 = _9; // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 44 | _7 = _8; // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 45 | _6 = move _7 as &[std::fmt::ArgumentV1] (Pointer(Unsize)); // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 46 | StorageDead(_7); // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:60: 96:61 47 | _2 = Arguments::new_v1(move _3, move _6) -> bb1; // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 48 | // mir::Constant 49 | // + span: /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 50 | // + user_ty: UserType(0) 51 | // + literal: Const { ty: fn(&[&'static str], &[std::fmt::ArgumentV1]) -> std::fmt::Arguments {std::fmt::Arguments::new_v1}, val: Value(Scalar()) } 52 | } 53 | 54 | bb1: { 55 | StorageDead(_6); // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:60: 96:61 56 | StorageDead(_3); // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:60: 96:61 57 | _1 = _print(move _2) -> bb2; // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:9: 96:62 58 | // mir::Constant 59 | // + span: /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:9: 96:27 60 | // + literal: Const { ty: for<'r> fn(std::fmt::Arguments<'r>) {std::io::_print}, val: Value(Scalar()) } 61 | } 62 | 63 | bb2: { 64 | StorageDead(_2); // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:61: 96:62 65 | StorageDead(_8); // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:62: 96:63 66 | StorageDead(_5); // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:62: 96:63 67 | StorageDead(_1); // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:62: 96:63 68 | _0 = const (); // scope 0 at main.rs:1:11: 3:2 69 | return; // scope 0 at main.rs:3:2: 3:2 70 | } 71 | } 72 | 73 | promoted[0] in main: &[ArgumentV1; 0] = { 74 | let mut _0: &[std::fmt::ArgumentV1; 0]; // return place in scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 75 | let mut _1: [std::fmt::ArgumentV1; 0]; // in scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 76 | 77 | bb0: { 78 | _1 = []; // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 79 | _0 = &_1; // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 80 | return; // scope 0 at /Users/fdhuang/.rustup/toolchains/nightly-x86_64-apple-darwin/lib/rustlib/src/rust/library/std/src/macros.rs:96:28: 96:61 81 | } 82 | } 83 | 84 | promoted[1] in main: &[&str; 1] = { 85 | let mut _0: &[&str; 1]; // return place in scope 0 at main.rs:2:14: 2:28 86 | let mut _1: [&str; 1]; // in scope 0 at main.rs:2:14: 2:28 87 | 88 | bb0: { 89 | _1 = [const "Hello World!\n"]; // scope 0 at main.rs:2:14: 2:28 90 | // ty::Const 91 | // + ty: &str 92 | // + val: Value(Slice { data: Allocation { bytes: [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 10], relocations: Relocations(SortedMap { data: [] }), init_mask: InitMask { blocks: [8191], len: Size { raw: 13 } }, size: Size { raw: 13 }, align: Align { pow2: 0 }, mutability: Not, extra: () }, start: 0, end: 13 }) 93 | // mir::Constant 94 | // + span: main.rs:2:14: 2:28 95 | // + literal: Const { ty: &str, val: Value(Slice { data: Allocation { bytes: [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 10], relocations: Relocations(SortedMap { data: [] }), init_mask: InitMask { blocks: [8191], len: Size { raw: 13 } }, size: Size { raw: 13 }, align: Align { pow2: 0 }, mutability: Not, extra: () }, start: 0, end: 13 }) } 96 | _0 = &_1; // scope 0 at main.rs:2:14: 2:28 97 | return; // scope 0 at main.rs:2:14: 2:28 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /docs/compares/rust/main.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("Hello World!"); 3 | } 4 | -------------------------------------------------------------------------------- /docs/compares/solang.md: -------------------------------------------------------------------------------- 1 | Solang does not have complete language support yet. The language features which are supported are clearly documented. See: [https://solang.readthedocs.io/](https://solang.readthedocs.io/) 2 | 3 | Solang tries to be a traditional compiler 4 | 5 | 1. lexer 6 | 2. parser (outputs: AST) 7 | 3. resolver: (outputs: CFG) 8 | 4. code emitter (output: LLVM IR) 9 | 5. llvm wasm codegen 10 | 6. linker 11 | 12 | The layout of the source code is as follows: 13 | 14 | ### src/parse/* 15 | 16 | lexer and LALRPOP Solidity grammer 17 | 18 | output: Abstract Syntax Tree 19 | 20 | ### src/resolve/* 21 | 22 | Resolves types, variables, functions etc 23 | 24 | Generates control flow graph 25 | 26 | ### src/emit/* 27 | 28 | Converts Control Flow graph to LLVM IR 29 | 30 | Has to do some tricks to generate PHI nodes 31 | 32 | ABI encoder/decoder (eth/scale) 33 | 34 | ### src/link.rs 35 | 36 | Converts wasm object file to final wasm file 37 | 38 | ### src/abi/* 39 | 40 | Generates and reads ABIs 41 | -------------------------------------------------------------------------------- /docs/compares/solang/resolve.md: -------------------------------------------------------------------------------- 1 | # Resolve 2 | 3 | 1. `contracts_to_resolve` get all `ContractDefinition` 4 | 2. `resolve_typenames` get all `fields` 5 | 3. resolve pragmas and imports 6 | 4. we can resolve the structs and events. 7 | 5. resolve the contracts 8 | 6. check state mutability for all contracts 9 | -------------------------------------------------------------------------------- /docs/design.md: -------------------------------------------------------------------------------- 1 | ## More is More 2 | 3 | > Less syntax sugar will make languages easy to understand and convert. 4 | 5 | Charj's IR was defined close to common IR, so we need to define almost all details. 6 | 7 | a basic `hello, world`. 8 | 9 | ```charj 10 | pkg examples 11 | 12 | default$main() { 13 | println("hello,world") 14 | } 15 | ``` 16 | 17 | if we change to this will be better: 18 | 19 | ``` 20 | pkg examples 21 | 22 | import fmt; 23 | 24 | default$main() { 25 | fmt.println("hello,world") 26 | } 27 | ``` 28 | 29 | HIR will be 30 | 31 | ```charj 32 | pkg examples 33 | 34 | import fmt; 35 | 36 | default$main() { 37 | let text: string = "hello,world"; 38 | fmt.println(text); 39 | } 40 | ``` -------------------------------------------------------------------------------- /docs/examples/func-call.cj: -------------------------------------------------------------------------------- 1 | pkg examples 2 | 3 | default$say_hello() { 4 | println("你好,世界!"); 5 | } 6 | 7 | default$b() { 8 | say_hello(); 9 | } 10 | 11 | default$main() { 12 | b(); 13 | } 14 | -------------------------------------------------------------------------------- /docs/examples/hello-world.cj: -------------------------------------------------------------------------------- 1 | pkg examples 2 | 3 | default$main() { 4 | println("hello,world"); 5 | } 6 | -------------------------------------------------------------------------------- /docs/examples/if-condition.cj: -------------------------------------------------------------------------------- 1 | pkg examples 2 | 3 | default$compare(int a, int b) { 4 | if(a > b) { 5 | return a; 6 | } else { 7 | return b; 8 | } 9 | } 10 | 11 | default$is_success(bool c, bool d) { 12 | if (c && d) { 13 | return "successed"; 14 | } else { 15 | return "failed"; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /docs/examples/multiple-import.cj: -------------------------------------------------------------------------------- 1 | pkg examples 2 | 3 | import ( 4 | fmt, 5 | strings 6 | ) 7 | 8 | default$main() { 9 | println("hello, world"); 10 | } 11 | -------------------------------------------------------------------------------- /docs/examples/struct.cj: -------------------------------------------------------------------------------- 1 | pkg charj 2 | 3 | struct Summary { 4 | Name : string 5 | FanIn : int 6 | FanOut : int 7 | } 8 | 9 | struct Hello { 10 | summary : Summary 11 | } 12 | 13 | Summary$new(int a) { 14 | 15 | } 16 | 17 | Summary$analysis() -> int { 18 | 19 | } 20 | 21 | Summary$* { 22 | build(int a) {} 23 | init(int a) -> int {} 24 | } 25 | -------------------------------------------------------------------------------- /docs/langs/111-assignment.md: -------------------------------------------------------------------------------- 1 | 2 | Origin: [Comparison of programming languages](https://en.wikipedia.org/wiki/Assignment_(computer_science)) 3 | 4 | - Augmented assignment: `a = 2*a`, or `a *= 2` 5 | - Chained assignment: `a = b = c = d = f = 0 ` 6 | - Parallel assignment: `a, b := 0, 1` 7 | 8 | 9 | Parallel Assignment in function : 10 | 11 | ```python 12 | def f(): 13 | return 1, 2 14 | a, b = f() 15 | ``` 16 | 17 | | format | Language | 18 | |-|-| 19 | | variable = expression | Fortran, PL/I, C (and descendants such as C++, Java, etc.), Bourne shell, Python, Go (assignment to pre-declared variables), R, PowerShell, etc. | 20 | | variable := expression | ALGOL (and derivatives), Simula, CPL, BCPL, Pascal[23] (and descendants such as Modula), Mary, PL/M, Ada, Smalltalk, Eiffel,[24][25] Oberon, Dylan,[26] Seed7, Python (an assignment expression),[27] Go (shorthand for declaring and defining a variable),[28] Io, AMPL, ML,[29] AutoHotkey etc. | 21 | 22 | Other possibilities include a left arrow or a keyword, though there are other, rarer, variants: 23 | 24 | | `_variable_ << _expression_` | [Magik](https://en.wikipedia.org/wiki/Magik_(programming_language) "Magik (programming language)") | 25 | | `_variable_ <- _expression_` | [F#](https://en.wikipedia.org/wiki/F_Sharp_(programming_language) "F Sharp (programming language)"), [OCaml](https://en.wikipedia.org/wiki/OCaml "OCaml"), [R](https://en.wikipedia.org/wiki/R_(programming_language) "R (programming language)"), [S](https://en.wikipedia.org/wiki/S_(programming_language) "S (programming language)") | 26 | | `_variable_ <<- _expression_` | [R](https://en.wikipedia.org/wiki/R_(programming_language) "R (programming language)") | 27 | | `assign("_variable_", _expression_)` | [R](https://en.wikipedia.org/wiki/R_(programming_language) "R (programming language)") | 28 | | `_variable_ ← _expression_` | [APL](https://en.wikipedia.org/wiki/APL_(programming_language) "APL (programming language)"),[[30]](https://en.wikipedia.org/wiki/Assignment_(computer_science)#cite_note-aplbook-31) [Smalltalk](https://en.wikipedia.org/wiki/Smalltalk "Smalltalk"), [BASIC Programming](https://en.wikipedia.org/wiki/BASIC_Programming "BASIC Programming") | 29 | | `_variable_ =: _expression_` | [J](https://en.wikipedia.org/wiki/J_(programming_language) "J (programming language)") | 30 | | `LET _variable_ = _expression_` | [BASIC](https://en.wikipedia.org/wiki/BASIC "BASIC") | 31 | | `let _variable_ := _expression_` | [XQuery](https://en.wikipedia.org/wiki/XQuery "XQuery") | 32 | | `set _variable_ to _expression_` | [AppleScript](https://en.wikipedia.org/wiki/AppleScript "AppleScript") | 33 | | `set _variable_ = _expression_` | [C shell](https://en.wikipedia.org/wiki/C_shell "C shell") | 34 | | `Set-Variable _variable_ _(expression)_` | [PowerShell](https://en.wikipedia.org/wiki/PowerShell "PowerShell") | 35 | | `_variable_ : _expression_` | [Macsyma, Maxima](https://en.wikipedia.org/wiki/Macsyma "Macsyma"), [Rebol](https://en.wikipedia.org/wiki/Rebol "Rebol"), [K](https://en.wikipedia.org/wiki/K_(programming_language) "K (programming language)") | 36 | | `var _variable_ _expression_` | [mIRC scripting language](https://en.wikipedia.org/wiki/MIRC_scripting_language "MIRC scripting language") | 37 | | `_reference-variable_ :- _reference-expression_` | [Simula](https://en.wikipedia.org/wiki/Simula "Simula") | 38 | 39 | Mathematical [pseudo code](https://en.wikipedia.org/wiki/Pseudocode#Common_mathematical_symbols "Pseudocode") assignments are generally depicted with a left-arrow. 40 | 41 | Some platforms put the expression on the left and the variable on the right: 42 | 43 | | `MOVE _expression_ TO _variable_` | [COBOL](https://en.wikipedia.org/wiki/COBOL "COBOL") | 44 | | `_expression_ → _variable_` | [TI-BASIC](https://en.wikipedia.org/wiki/TI-BASIC "TI-BASIC"), [Casio](https://en.wikipedia.org/wiki/Casio_graphic_calculators "Casio graphic calculators") BASIC | 45 | | `_expression_ -> _variable_` | [POP-2](https://en.wikipedia.org/wiki/POP-2 "POP-2"), [BETA](https://en.wikipedia.org/wiki/BETA_(programming_language) "BETA (programming language)"), [R](https://en.wikipedia.org/wiki/R_(programming_language) "R (programming language)") | 46 | | `put _expression_ into _variable_` | [LiveCode](https://en.wikipedia.org/wiki/LiveCode "LiveCode") | 47 | 48 | Some expression-oriented languages, such as [Lisp](https://en.wikipedia.org/wiki/Lisp_(programming_language) "Lisp (programming language)")[[31]](https://en.wikipedia.org/wiki/Assignment_(computer_science)#cite_note-clisp-32)[[32]](https://en.wikipedia.org/wiki/Assignment_(computer_science)#cite_note-cmlisp-33) and Tcl, uniformly use prefix (or postfix) syntax for all statements, including assignment. 49 | 50 | | `(setf _variable_ _expression_)` | [Common Lisp](https://en.wikipedia.org/wiki/Common_Lisp "Common Lisp") | 51 | | `(set! _variable_ _expression_)` | [Scheme](https://en.wikipedia.org/wiki/Scheme_(programming_language) "Scheme (programming language)")[[33]](https://en.wikipedia.org/wiki/Assignment_(computer_science)#cite_note-scheme-34)[[34]](https://en.wikipedia.org/wiki/Assignment_(computer_science)#cite_note-schemeint-35)[[35]](https://en.wikipedia.org/wiki/Assignment_(computer_science)#cite_note-sussman-36) | 52 | | `set _variable_ _expression_` | [Tcl](https://en.wikipedia.org/wiki/Tcl "Tcl") | 53 | | `_expression_ _variable_ !` | [Forth](https://en.wikipedia.org/wiki/Forth_(programming_language) "Forth (programming language)") | -------------------------------------------------------------------------------- /docs/langs/2-type-system.md: -------------------------------------------------------------------------------- 1 | Origins: [Comparison of programming languages by type system](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system) 2 | 3 | Brief definitions 4 | 5 | * A [nominal type system](https://en.wikipedia.org/wiki/Nominal_type_system "Nominal type system") means that the language decides whether types are compatible and/or equivalent based on explicit declarations and names. 6 | * A [structural type system](https://en.wikipedia.org/wiki/Structural_type_system "Structural type system") means that the language decides whether types are compatible and/or equivalent based on the definition and characteristics of the types. 7 | * _Type checking_ determines whether and when types are verified. Static checking means that type errors are reported based on a program's text (source code). Dynamic checking means that type errors are reported based on a program's dynamic (run-time) behavior. 8 | 9 | | Language | [Type safety](https://en.wikipedia.org/wiki/Type_safety "Type safety") | [Type expression](https://en.wikipedia.org/wiki/Type_system#Explicit_or_implicit_declaration_and_inference "Type system") | [Type compatibility and equivalence](https://en.wikipedia.org/wiki/Type_equivalence "Type equivalence") | [Type checking](https://en.wikipedia.org/wiki/Type_checking "Type checking") | 10 | | --- | --- | --- | --- | --- | 11 | | [ActionScript](https://en.wikipedia.org/wiki/ActionScript "ActionScript") 3.0 | strong | implicit with optional explicit typing | | static | 12 | | [Ada](https://en.wikipedia.org/wiki/Ada_(programming_language) "Ada (programming language)") | strong[[TS 1]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-1) | explicit | nominal | static | 13 | | [Aldor](https://en.wikipedia.org/wiki/Aldor "Aldor") | weak | implicit | | static | 14 | | [ALGOL 58](https://en.wikipedia.org/wiki/ALGOL_58 "ALGOL 58") | strong | explicit | | static | 15 | | [ALGOL 60](https://en.wikipedia.org/wiki/ALGOL_60 "ALGOL 60") | strong | explicit | | static | 16 | | [ALGOL 68](https://en.wikipedia.org/wiki/ALGOL_68 "ALGOL 68") | strong | explicit | structural | static & [tagged unions](https://en.wikipedia.org/wiki/Tagged_union "Tagged union") | 17 | | [APL](https://en.wikipedia.org/wiki/APL_(programming_language) "APL (programming language)") | strong | | | dynamic | 18 | | [AutoHotkey](https://en.wikipedia.org/wiki/AutoHotkey "AutoHotkey") | typeless | n/a | n/a | n/a | 19 | | [Ateji PX](https://en.wikipedia.org/wiki/Ateji_PX "Ateji PX") | strong | explicit | nominal | static | 20 | | [Bash](https://en.wikipedia.org/wiki/Bash_(Unix_shell) "Bash (Unix shell)") | _**?**_ | _**?**_ | _**?**_ | _**?**_ | 21 | | [BASIC](https://en.wikipedia.org/wiki/BASIC "BASIC") | strong | explicit | nominal | static | 22 | | [BLISS](https://en.wikipedia.org/wiki/BLISS "BLISS") | typeless | n/a | n/a | n/a | 23 | | [BeanShell](https://en.wikipedia.org/wiki/BeanShell "BeanShell") | strong | | nominal | dynamic | 24 | | [Boo](https://en.wikipedia.org/wiki/Boo_(programming_language) "Boo (programming language)") | strong | implicit with optional explicit typing | | static with optional dynamic typing | 25 | | [Bro](https://en.wikipedia.org/wiki/Bro_(software) "Bro (software)") | strong | implicit with optional explicit typing | nominal | static | 26 | | [C](https://en.wikipedia.org/wiki/C_(programming_language) "C (programming language)") | weak | explicit | nominal | static | 27 | | [C++](https://en.wikipedia.org/wiki/C%2B%2B "C++") ([ISO/IEC 14882](https://en.wikipedia.org/wiki/ISO/IEC_14882 "ISO/IEC 14882")) | weak | explicit with optional implicit typing (by using auto in C++11) | nominal | static[[TS 2]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-2) | 28 | | [C#](https://en.wikipedia.org/wiki/C_Sharp_(programming_language) "C Sharp (programming language)") | weak[[TS 3]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-r2-3) | implicit with optional explicit typing | nominal | static[[TS 4]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-4) | 29 | | [Clean](https://en.wikipedia.org/wiki/Clean_(programming_language) "Clean (programming language)") | strong | implicit | | static | 30 | | [Clojure](https://en.wikipedia.org/wiki/Clojure "Clojure") | strong | implicit with optional explicit typing | | dynamic | 31 | | [COBOL](https://en.wikipedia.org/wiki/COBOL "COBOL") | strong | explicit | nominal | static | 32 | | [ColdFusion](https://en.wikipedia.org/wiki/ColdFusion_Markup_Language "ColdFusion Markup Language") (CFML) | strong | implicit | | dynamic | 33 | | [Common Lisp](https://en.wikipedia.org/wiki/Common_Lisp "Common Lisp") | strong | implicit with optional explicit typing | structural for implicit typing, nominal for explicit typing | dynamic, some static checking(depending on implementation) | 34 | | [Curl](https://en.wikipedia.org/wiki/Curl_(programming_language) "Curl (programming language)") | strong | | nominal | | 35 | | [Cython](https://en.wikipedia.org/wiki/Cython "Cython") | strong | implicit with optional explicit typing | nominal (extension types) and structural (Python) | dynamic with optional static typing | 36 | | [D](https://en.wikipedia.org/wiki/D_(programming_language) "D (programming language)") | weak[[TS 3]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-r2-3) | explicit | nominal | static | 37 | | [Dylan](https://en.wikipedia.org/wiki/Dylan_(programming_language) "Dylan (programming language)") | strong | | | dynamic | 38 | | [Eiffel](https://en.wikipedia.org/wiki/Eiffel_(programming_language) "Eiffel (programming language)") | strong | | nominal | static | 39 | | [Elixir](https://en.wikipedia.org/wiki/Elixir_(programming_language) "Elixir (programming language)") | strong | implicit | | dynamic | 40 | | [Erlang](https://en.wikipedia.org/wiki/Erlang_(programming_language) "Erlang (programming language)") | strong | implicit | | dynamic | 41 | | [Euphoria](https://en.wikipedia.org/wiki/Euphoria_(programming_language) "Euphoria (programming language)") | strong | explicit, implicit with objects | nominal | static, dynamic with objects | 42 | | [F#](https://en.wikipedia.org/wiki/F_Sharp_(programming_language) "F Sharp (programming language)") | strong | implicit | nominal | static | 43 | | [Forth](https://en.wikipedia.org/wiki/Forth_(programming_language) "Forth (programming language)") | typeless | n/a | n/a | n/a | 44 | | [Fortran](https://en.wikipedia.org/wiki/Fortran "Fortran") | strong | explicit[[TS 5]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-5) | nominal | static | 45 | | [Gambas](https://en.wikipedia.org/wiki/Gambas "Gambas") | strong | explicit | nominal | | 46 | | [GLBasic](https://en.wikipedia.org/wiki/GLBasic "GLBasic") | strong | explicit. Non-explicit declarations available through project options | nominal | static | 47 | | [Go](https://en.wikipedia.org/wiki/Go_(programming_language) "Go (programming language)")[[1]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-6) | strong | implicit with optional explicit typing | structural | static | 48 | | [Gosu](https://en.wikipedia.org/wiki/Gosu_(programming_language) "Gosu (programming language)") | strong | partially implicit (local type inference) | nominal (subclassing) and structural | static | 49 | | [Groovy](https://en.wikipedia.org/wiki/Groovy_(programming_language) "Groovy (programming language)") | strong | implicit with optional explicit typing | | dynamic with optional static typing | 50 | | [Harbour](https://en.wikipedia.org/wiki/Harbour_(programming_language) "Harbour (programming language)") | strong | implicit with optional explicit typing | | dynamic | 51 | | [Haskell](https://en.wikipedia.org/wiki/Haskell_(programming_language) "Haskell (programming language)") | strong | implicit with optional explicit typing | nominal[[2]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-7)[[3]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-:0-8) | static | 52 | | [Haxe](https://en.wikipedia.org/wiki/Haxe "Haxe") | strong | implicit with optional explicit typing | nominal (subclassing) and structural | static with optional dynamic typing | 53 | | [Io](https://en.wikipedia.org/wiki/Io_(programming_language) "Io (programming language)") | strong | implicit | | dynamic | 54 | | [ISLISP](https://en.wikipedia.org/wiki/ISLISP "ISLISP") | strong | | | dynamic | 55 | | [J](https://en.wikipedia.org/wiki/J_(programming_language) "J (programming language)") | strong | | | dynamic | 56 | | [Java](https://en.wikipedia.org/wiki/Java_(programming_language) "Java (programming language)") | strong[[4]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-9) | explicit | nominal | static | 57 | | [JavaScript](https://en.wikipedia.org/wiki/JavaScript "JavaScript") | weak | implicit | n/a | dynamic | 58 | | [Julia](https://en.wikipedia.org/wiki/Julia_(programming_language) "Julia (programming language)") | strong | implicit with optional explicit typing[[5]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-10) | structural for implicit typing, nominal for explicit typing | dynamic | 59 | | [Joy](https://en.wikipedia.org/wiki/Joy_(programming_language) "Joy (programming language)") | strong | | | dynamic | 60 | | [Kotlin](https://en.wikipedia.org/wiki/Kotlin_(programming_language) "Kotlin (programming language)") | strong | partially implicit (local type inference) | nominal | static | 61 | | [LabVIEW](https://en.wikipedia.org/wiki/LabVIEW "LabVIEW") | strong | | | | 62 | | [Lua](https://en.wikipedia.org/wiki/Lua_(programming_language) "Lua (programming language)") | strong | implicit | | dynamic | 63 | | [Maple](https://en.wikipedia.org/wiki/Maple_(programming_language) "Maple (programming language)") | strong | | | dynamic | 64 | | [Mathematica](https://en.wikipedia.org/wiki/Mathematica "Mathematica") | strong | | | dynamic | 65 | | [MATLAB](https://en.wikipedia.org/wiki/MATLAB "MATLAB") M-code | strong | | | dynamic | 66 | | [Modula-2](https://en.wikipedia.org/wiki/Modula-2 "Modula-2") | weak[[TS 3]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-r2-3) | explicit | nominal | static | 67 | | [Modula-3](https://en.wikipedia.org/wiki/Modula-3 "Modula-3") | weak[[TS 3]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-r2-3) | explicit | structural | static | 68 | | [MUMPS](https://en.wikipedia.org/wiki/MUMPS "MUMPS") (M) | typeless | n/a | n/a | n/a | 69 | | [Oberon](https://en.wikipedia.org/wiki/Oberon_(programming_language) "Oberon (programming language)") | strong | explicit | nominal | static and partially dynamic[[TS 6]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-11) | 70 | | [Objective-C](https://en.wikipedia.org/wiki/Objective-C "Objective-C") | strong | explicit | nominal | dynamic with optional static typing[[6]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-12) | 71 | | [OCaml](https://en.wikipedia.org/wiki/OCaml "OCaml") | strong | implicit with optional explicit typing | nominal for records,[[7]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-13) structural for objects[[3]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-:0-8)[[8]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-14) | static | 72 | | [Object Pascal](https://en.wikipedia.org/wiki/Object_Pascal "Object Pascal") | strong | explicit | nominal | static | 73 | | [Opa](https://en.wikipedia.org/wiki/Opa_(programming_language) "Opa (programming language)") | strong | implicit with optional explicit typing | structural | static | 74 | | [Oxygene](https://en.wikipedia.org/wiki/Oxygene_(programming_language) "Oxygene (programming language)") | weak | implicit | | static | 75 | | [Oz-Mozart](https://en.wikipedia.org/wiki/Oz_(programming_language) "Oz (programming language)") | strong | implicit | structural | dynamic | 76 | | [Pascal](https://en.wikipedia.org/wiki/Pascal_(programming_language) "Pascal (programming language)") | weak[[TS 3]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-r2-3) | explicit | nominal | static | 77 | | [Perl](https://en.wikipedia.org/wiki/Perl "Perl") 5 | | implicit | | dynamic | 78 | | [PHP](https://en.wikipedia.org/wiki/PHP "PHP") | | implicit with optional explicit typing | nominal | dynamic | 79 | | [Plus](https://en.wikipedia.org/wiki/Plus_(programming_language) "Plus (programming language)") | strong | explicit | structural | static, dynamic (optional) | 80 | | [Prolog](https://en.wikipedia.org/wiki/Prolog "Prolog") | | | | dynamic | 81 | | [Pure](https://en.wikipedia.org/wiki/Pure_(programming_language) "Pure (programming language)") | | | | dynamic | 82 | | [Python](https://en.wikipedia.org/wiki/Python_(programming_language) "Python (programming language)") | strong | implicit (with optional explicit typing as of 3.5) | n/a | dynamic | 83 | | [Raku](https://en.wikipedia.org/wiki/Raku_(programming_language) "Raku (programming language)") | | partially implicit[[TS 7]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-15) | | dynamic with optional static typing | 84 | | [REBOL](https://en.wikipedia.org/wiki/REBOL "REBOL") | strong | implicit | | dynamic | 85 | | [Rexx](https://en.wikipedia.org/wiki/Rexx "Rexx") | typeless | n/a, implicit wrt numbers | n/a | static+dynamic wrt numbers | 86 | | [RPG](https://en.wikipedia.org/wiki/IBM_RPG "IBM RPG") | weak | | | static | 87 | | [Ruby](https://en.wikipedia.org/wiki/Ruby_(programming_language) "Ruby (programming language)") | strong | implicit | n/a | dynamic | 88 | | [Rust](https://en.wikipedia.org/wiki/Rust_(programming_language) "Rust (programming language)") | strong | explicit with optional implicit typing[[9]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-16) | mostly nominal | static | 89 | | [S](https://en.wikipedia.org/wiki/S_(programming_language) "S (programming language)") | | | | dynamic | 90 | | [S-Lang](https://en.wikipedia.org/wiki/S-Lang_(programming_library) "S-Lang (programming library)") | strong | implicit | | dynamic | 91 | | [Scala](https://en.wikipedia.org/wiki/Scala_(programming_language) "Scala (programming language)") | strong | partially implicit (local type inference) | nominal (subclassing) and structural | static | 92 | | [Scheme](https://en.wikipedia.org/wiki/Scheme_(programming_language) "Scheme (programming language)") | strong | implicit | | dynamic ([latent](https://en.wikipedia.org/wiki/Latent_typing "Latent typing")) | 93 | | [Seed7](https://en.wikipedia.org/wiki/Seed7 "Seed7") | strong | explicit | nominal | static | 94 | | [Simula](https://en.wikipedia.org/wiki/Simula "Simula") | strong | | | static[[TS 8]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-17) | 95 | | [Smalltalk](https://en.wikipedia.org/wiki/Smalltalk "Smalltalk") | strong | implicit | | dynamic | 96 | | [Swift](https://en.wikipedia.org/wiki/Swift_(programming_language) "Swift (programming language)") | strong | partially implicit (local type inference) | nominal (subclassing) and structural | static | 97 | | [Standard ML](https://en.wikipedia.org/wiki/Standard_ML "Standard ML") | strong | implicit with optional explicit typing | structural | static | 98 | | [Tcl](https://en.wikipedia.org/wiki/Tcl "Tcl") | | | | dynamic | 99 | | [TypeScript](https://en.wikipedia.org/wiki/TypeScript "TypeScript") | ? | optional | structural | static | 100 | | [Visual Basic](https://en.wikipedia.org/wiki/Visual_Basic "Visual Basic") | strong | implicit with optional explicit typing | nominal | static | 101 | | [Visual Basic .NET](https://en.wikipedia.org/wiki/Visual_Basic_.NET "Visual Basic .NET") | weak[[TS 3]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-r2-3) | explicit | | static | 102 | | [Visual Prolog](https://en.wikipedia.org/wiki/Visual_Prolog "Visual Prolog") | strong | partially implicit | nominal | static | 103 | | [Wolfram Language](https://en.wikipedia.org/wiki/Wolfram_Language "Wolfram Language") | strong | | | dynamic | 104 | | [Windows PowerShell](https://en.wikipedia.org/wiki/Windows_PowerShell "Windows PowerShell") | strong | implicit | | dynamic | 105 | | [XL](https://en.wikipedia.org/wiki/XL_(programming_language) "XL (programming language)") | strong | | nominal | static | 106 | | [Xojo](https://en.wikipedia.org/wiki/Xojo "Xojo") | strong | explicit | nominal | static | 107 | | [XPath](https://en.wikipedia.org/wiki/XPath "XPath")/[XQuery](https://en.wikipedia.org/wiki/XQuery "XQuery") | strong | partially implicit | nominal | dynamic with optional static typing | 108 | | [Dart](https://en.wikipedia.org/wiki/Dart_(programming_language) "Dart (programming language)") | strong[[10]](https://en.wikipedia.org/wiki/Comparison_of_programming_languages_by_type_system#cite_note-18) | gradual typing | nominal | static with optional dynamic typing | 109 | 110 | -------------------------------------------------------------------------------- /docs/langs/zh-cn/0-design-principle.md: -------------------------------------------------------------------------------- 1 | # 设计原则 2 | 3 | ## 组合 & 简洁 + 可读 4 | 5 | 示例: 6 | 7 | ``` 8 | struct Boilerplate { 9 | 10 | } 11 | 12 | Boilerplate$new() { 13 | 14 | } 15 | ``` 16 | 17 | or 18 | 19 | ``` 20 | impl Boilerplate { 21 | new() { 22 | 23 | } 24 | } 25 | ``` 26 | -------------------------------------------------------------------------------- /docs/llvm/README.md: -------------------------------------------------------------------------------- 1 | # LLVM 2 | 3 | c to llvm 4 | 5 | ``` 6 | clang -S -emit-llvm foo.c 7 | ``` 8 | 9 | llvm to assembly 10 | 11 | ``` 12 | llc hello.c 13 | ``` -------------------------------------------------------------------------------- /docs/llvm/hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | printf("Hello World!\n"); 5 | } 6 | 7 | -------------------------------------------------------------------------------- /docs/llvm/hello.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'hello.c' 2 | source_filename = "hello.c" 3 | target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 4 | target triple = "x86_64-apple-macosx10.15.0" 5 | 6 | @.str = private unnamed_addr constant [14 x i8] c"Hello World!\0A\00", align 1 7 | 8 | ; Function Attrs: noinline nounwind optnone ssp uwtable 9 | define i32 @main() #0 { 10 | %1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0)) 11 | ret i32 0 12 | } 13 | 14 | declare i32 @printf(i8*, ...) #1 15 | 16 | attributes #0 = { noinline nounwind optnone ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "darwin-stkchk-strong-link" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "probe-stack"="___chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } 17 | attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "darwin-stkchk-strong-link" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "probe-stack"="___chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } 18 | 19 | !llvm.module.flags = !{!0, !1, !2} 20 | !llvm.ident = !{!3} 21 | 22 | !0 = !{i32 2, !"SDK Version", [3 x i32] [i32 10, i32 15, i32 6]} 23 | !1 = !{i32 1, !"wchar_size", i32 4} 24 | !2 = !{i32 7, !"PIC Level", i32 2} 25 | !3 = !{!"Apple clang version 11.0.3 (clang-1103.0.32.62)"} 26 | -------------------------------------------------------------------------------- /docs/llvm/hello.s: -------------------------------------------------------------------------------- 1 | .section __TEXT,__text,regular,pure_instructions 2 | .macosx_version_min 10, 15 sdk_version 10, 15, 6 3 | .globl _main ## -- Begin function main 4 | .p2align 4, 0x90 5 | _main: ## @main 6 | .cfi_startproc 7 | ## %bb.0: 8 | pushq %rbp 9 | .cfi_def_cfa_offset 16 10 | .cfi_offset %rbp, -16 11 | movq %rsp, %rbp 12 | .cfi_def_cfa_register %rbp 13 | leaq L_.str(%rip), %rdi 14 | movb $0, %al 15 | callq _printf 16 | xorl %eax, %eax 17 | popq %rbp 18 | retq 19 | .cfi_endproc 20 | ## -- End function 21 | .section __TEXT,__cstring,cstring_literals 22 | L_.str: ## @.str 23 | .asciz "Hello World!\n" 24 | 25 | .subsections_via_symbols 26 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | tests: 2 | cargo test --all 3 | 4 | build: 5 | cargo build 6 | 7 | @bench: 8 | cargo bench 9 | 10 | @lint: 11 | rustup component add clippy 12 | rustup component add rustfmt 13 | cargo clippy -- -D warnings 14 | cargo clippy --tests 15 | cargo fmt -- --check 16 | 17 | @fix: 18 | cargo fmt --all 19 | 20 | clean: 21 | cargo clean 22 | find . -type f -name "*.orig" -exec rm {} \; 23 | find . -type f -name "*.bk" -exec rm {} \; 24 | find . -type f -name ".*~" -exec rm {} \; 25 | -------------------------------------------------------------------------------- /src/bin/dc.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::io::{Read, Write}; 3 | use std::path::PathBuf; 4 | 5 | use clap::{App, Arg, ArgMatches}; 6 | 7 | use dc_compiler::{codegen, process_string, CodegenResult}; 8 | 9 | mod languageserver; 10 | 11 | fn main() { 12 | let matches = App::new("datum") 13 | .version(&*format!("version {}", env!("GIT_HASH"))) 14 | .author(env!("CARGO_PKG_AUTHORS")) 15 | .about(env!("CARGO_PKG_DESCRIPTION")) 16 | .arg( 17 | Arg::with_name("INPUT") 18 | .help("Charj input files") 19 | .required(true) 20 | .conflicts_with("LANGUAGESERVER") 21 | .multiple(true), 22 | ) 23 | .arg( 24 | Arg::with_name("TARGET") 25 | .help("Output target") 26 | .multiple(true) 27 | .last(true) 28 | .default_value("jit"), 29 | ) 30 | .arg( 31 | Arg::with_name("LANGUAGESERVER") 32 | .help("Start language server") 33 | .conflicts_with_all(&["INPUT"]) 34 | .long("language-server"), 35 | ) 36 | .get_matches(); 37 | 38 | if matches.is_present("LANGUAGESERVER") { 39 | languageserver::start_server(); 40 | } 41 | 42 | // todo: split input handler and namespace actions 43 | for filename in matches.values_of("INPUT").unwrap() { 44 | process_filename(filename, &matches); 45 | } 46 | } 47 | 48 | pub fn process_filename(filename: &str, matches: &ArgMatches) { 49 | if let Err(_) = PathBuf::from(filename).canonicalize() { 50 | panic!("lost file: {:?}", filename); 51 | } 52 | 53 | let path = PathBuf::from(filename).canonicalize().unwrap(); 54 | let mut contents = String::new(); 55 | let mut f = File::open(&path).unwrap(); 56 | if let Err(e) = f.read_to_string(&mut contents) { 57 | panic!("failed to read file ‘{}’: {}", filename, e.to_string()) 58 | } 59 | 60 | let mut ns = process_string(&*contents, filename); 61 | match matches.value_of("TARGET") { 62 | Some("jit") => { 63 | codegen(&mut ns, "jit"); 64 | } 65 | Some("wasm") => { 66 | let result = codegen(&mut ns, "wasm"); 67 | let mut file = File::create("out.wasm").unwrap(); 68 | match &result[0] { 69 | CodegenResult::Wasm { code } => file.write_all(code).unwrap(), 70 | _ => {} 71 | } 72 | } 73 | _ => { 74 | panic!("not support target{:?}", matches.value_of("TARGET")); 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/bin/languageserver/mod.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::path::PathBuf; 3 | use std::sync::Mutex; 4 | 5 | use serde_json::Value; 6 | use tower_lsp::jsonrpc::Result; 7 | use tower_lsp::lsp_types::*; 8 | use tower_lsp::{Client, LanguageServer, LspService, Server}; 9 | 10 | #[derive(Debug)] 11 | pub struct FileOffsets { 12 | _files: Vec>, 13 | } 14 | 15 | #[derive(Debug)] 16 | pub struct Hovers { 17 | _offsets: FileOffsets, 18 | _lookup: Vec<(usize, usize, String)>, 19 | } 20 | 21 | #[derive(Debug)] 22 | struct DatumServer { 23 | client: Client, 24 | files: Mutex>, 25 | } 26 | 27 | pub fn start_server() { 28 | let mut rt = tokio::runtime::Runtime::new().unwrap(); 29 | rt.block_on(async { 30 | let stdin = tokio::io::stdin(); 31 | let stdout = tokio::io::stdout(); 32 | 33 | let (service, messages) = LspService::new(|client| DatumServer { 34 | client, 35 | files: Mutex::new(HashMap::new()), 36 | }); 37 | 38 | Server::new(stdin, stdout) 39 | .interleave(messages) 40 | .serve(service) 41 | .await; 42 | }); 43 | std::process::exit(1); 44 | } 45 | 46 | impl DatumServer { 47 | /// Parse file 48 | async fn parse_file(&self, _uri: Url) { 49 | // if let Ok(path) = uri.to_file_path() {} 50 | } 51 | } 52 | 53 | #[tower_lsp::async_trait] 54 | impl LanguageServer for DatumServer { 55 | async fn initialize(&self, _: InitializeParams) -> Result { 56 | Ok(InitializeResult::default()) 57 | } 58 | 59 | async fn initialized(&self, _: InitializedParams) { 60 | self.client 61 | .log_message(MessageType::Info, "server initialized!") 62 | .await; 63 | } 64 | 65 | async fn shutdown(&self) -> Result<()> { 66 | Ok(()) 67 | } 68 | 69 | async fn did_change_workspace_folders(&self, _: DidChangeWorkspaceFoldersParams) { 70 | self.client 71 | .log_message(MessageType::Info, "workspace folders changed!") 72 | .await; 73 | } 74 | 75 | async fn did_change_configuration(&self, _: DidChangeConfigurationParams) { 76 | self.client 77 | .log_message(MessageType::Info, "configuration changed!") 78 | .await; 79 | } 80 | 81 | async fn did_change_watched_files(&self, _: DidChangeWatchedFilesParams) { 82 | self.client 83 | .log_message(MessageType::Info, "watched files have changed!") 84 | .await; 85 | } 86 | 87 | async fn execute_command(&self, _: ExecuteCommandParams) -> Result> { 88 | self.client 89 | .log_message(MessageType::Info, "command executed!") 90 | .await; 91 | Ok(None) 92 | } 93 | 94 | async fn did_open(&self, params: DidOpenTextDocumentParams) { 95 | let uri = params.text_document.uri; 96 | 97 | self.parse_file(uri).await; 98 | } 99 | 100 | async fn did_change(&self, params: DidChangeTextDocumentParams) { 101 | let uri = params.text_document.uri; 102 | 103 | self.parse_file(uri).await; 104 | } 105 | 106 | async fn did_save(&self, params: DidSaveTextDocumentParams) { 107 | let uri = params.text_document.uri; 108 | 109 | self.parse_file(uri).await; 110 | } 111 | 112 | async fn did_close(&self, params: DidCloseTextDocumentParams) { 113 | let uri = params.text_document.uri; 114 | 115 | if let Ok(path) = uri.to_file_path() { 116 | if let Ok(mut files) = self.files.lock() { 117 | files.remove(&path); 118 | } 119 | } 120 | } 121 | 122 | async fn completion(&self, _: CompletionParams) -> Result> { 123 | Ok(None) 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod test { 3 | #[test] 4 | fn should_run_hello_world_file() { 5 | use assert_cmd::Command; 6 | 7 | let mut cmd = Command::cargo_bin("dc").unwrap(); 8 | cmd.arg("docs/examples/hello-world.cj").unwrap(); 9 | 10 | cmd.assert().success().stdout("hello,world\n"); 11 | } 12 | 13 | #[test] 14 | fn should_run_call_hello_world_file() { 15 | use assert_cmd::Command; 16 | 17 | let mut cmd = Command::cargo_bin("dc").unwrap(); 18 | cmd.arg("docs/examples/func-call.cj").unwrap(); 19 | 20 | cmd.assert().success().stdout("你好,世界!\n"); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /stdlib/fmt/fmt.cj: -------------------------------------------------------------------------------- 1 | pkg fmt 2 | 3 | 4 | -------------------------------------------------------------------------------- /stdlib/io/io.cj: -------------------------------------------------------------------------------- 1 | pkg io 2 | -------------------------------------------------------------------------------- /stdlib/net/net.cj: -------------------------------------------------------------------------------- 1 | pkg net 2 | -------------------------------------------------------------------------------- /stdlib/os/os.cj: -------------------------------------------------------------------------------- 1 | pkg os 2 | -------------------------------------------------------------------------------- /stdlib/reflect/reflect.cj: -------------------------------------------------------------------------------- 1 | pkg reflect 2 | -------------------------------------------------------------------------------- /stdlib/strings/strings.cj: -------------------------------------------------------------------------------- 1 | pkg strings 2 | --------------------------------------------------------------------------------