├── .gitignore ├── .travis.yml ├── Cargo.lock ├── Cargo.toml ├── LICENSE.txt ├── Makefile ├── README.md ├── examples ├── beer.hau ├── factorial.hau ├── fibonacci.hau ├── for-each.hau ├── hailstone.hau └── read.hau ├── fix_indentation.py ├── grammar.ebnf ├── haumea.png ├── haumea.svg ├── src ├── codegen │ ├── c.rs │ └── mod.rs ├── lib.rs ├── main.rs ├── parser.rs └── scanner.rs └── tests ├── test_codegen.rs ├── test_parser.rs └── test_scanner.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | out.c 3 | out 4 | .DS_Store 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | rust: 3 | - stable 4 | - beta 5 | - nightly 6 | matrix: 7 | allow_failures: 8 | - rust: nightly -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | [root] 2 | name = "haumea" 3 | version = "0.1.0" 4 | 5 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "haumea" 3 | version = "0.1.0" 4 | authors = ["BookOwl "] 5 | 6 | [dependencies] 7 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Matthew S. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | make: 2 | @cargo build --release 3 | 4 | do: $(file) 5 | @cargo build 6 | @./target/debug/haumea < $(file) > out.c 7 | @cc out.c -o out 8 | @./out 9 | @rm out 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | Haumea 3 |
4 | 5 | [![Join the chat at https://gitter.im/haumea-lang/Lobby](https://badges.gitter.im/haumea-lang/Lobby.svg)](https://gitter.im/haumea-lang/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Build Status](https://travis-ci.org/haumea-lang/haumea-rs.svg?branch=master)](https://travis-ci.org/haumea-lang/haumea-rs) 6 | 7 | Haumea is an experimental language designed to be easy to learn and use. 8 | # Using 9 | 10 | First, make sure that you have Rust and Cargo installed. Then, simply clone this repo- 11 | 12 | ```sh 13 | $ git clone https://github.com/BookOwl/haumea.git 14 | ``` 15 | 16 | -and just use the following to run the Haumea compiler, where `hello.hau` is your Haumea program. (This will compile the Haumea source, if required, compile `hello.hau`, use `cc` to compile the result, and finally run the binary.) 17 | 18 | ```sh 19 | $ make do file=hello.hau 20 | 21 | # or if that doesn't work: 22 | 23 | $ cargo build 24 | $ ./target/debug/haumea < hello.hau > out.c 25 | $ cc out.c -o out 26 | $ ./out 27 | ``` 28 | 29 | # Example programs 30 | 31 | Here is an example program that calculates factorials: 32 | 33 | ``` 34 | to factorial with (n) do 35 | if n = 0 then do 36 | return 1 37 | end 38 | else do 39 | return n * factorial(n - 1) 40 | end 41 | end 42 | 43 | to main do 44 | display(factorial(5)) 45 | end 46 | ``` 47 | 48 | # Find a bug, or want to request a feature? 49 | Please create an issue with your bug report or pull request. 50 | 51 | # Haumea reference. 52 | Please check out the wiki for the Haumea reference and a tutorial. 53 | 54 | # License 55 | Haumea is released under the MIT license. Please see LICENSE.txt for details. 56 | 57 | # Credits 58 | @BookOwl - Created the langauge 59 | 60 | @nanalan - Made an amazing logo 61 | 62 | Many other people who have helped with design decisions 63 | -------------------------------------------------------------------------------- /examples/beer.hau: -------------------------------------------------------------------------------- 1 | /* Displays the entire "99 bottles of beer" song */ 2 | /* TODO we need strings to properly do this */ 3 | 4 | to beer with (start) do 5 | for each bottles in start through 3 by -1 do 6 | /* 7 | display(bottles ++ " bottles of beer on the wall, " ++ bottles ++ " bottles of beer.") 8 | display("Take one down and pass it around, " ++ bottles - 1 ++ " bottles of beer on the wall") 9 | */ 10 | end 11 | 12 | /* 13 | display("2 bottles of beer on the wall, 2 bottles of beer.") 14 | display("Take one down and pass it around, 1 bottle of beer on the wall") 15 | 16 | display("1 bottle of beer on the wall, 1 bottle of beer.") 17 | display("Take one down and pass it around, no more bottles of beer on the wall") 18 | 19 | display("No more bottles of beer on the wall, no more bottles of beer.") 20 | display("Go to the store and buy some more, " ++ start ++ " bottles of beer on the wall.") 21 | */ 22 | end 23 | 24 | to main do 25 | beer(99) 26 | end 27 | -------------------------------------------------------------------------------- /examples/factorial.hau: -------------------------------------------------------------------------------- 1 | /* Returns the factorial of n */ 2 | to factorial with (n) do 3 | if n = 0 then return 1 4 | else return n * factorial(n - 1) 5 | end 6 | 7 | to main do 8 | display(factorial(5)) 9 | end 10 | -------------------------------------------------------------------------------- /examples/fibonacci.hau: -------------------------------------------------------------------------------- 1 | to fib with (n) do 2 | if n < 2 then return 1 3 | else return fib(n-1) + fib(n-2) 4 | end 5 | 6 | to main do 7 | forever do 8 | display(fib(read())) 9 | end 10 | end 11 | 12 | -------------------------------------------------------------------------------- /examples/for-each.hau: -------------------------------------------------------------------------------- 1 | to main do 2 | for each i in 1 to 5 do 3 | /* 1 2 3 4 */ 4 | display(i) 5 | end 6 | 7 | for each i in 1 through 5 do 8 | /* 1 2 3 4 5 */ 9 | display(i) 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /examples/hailstone.hau: -------------------------------------------------------------------------------- 1 | /* Calculates the hailstone sequence for a given number */ 2 | 3 | to collatz with (n) do 4 | while n != 1 do 5 | display(n) 6 | if n modulo 2 = 0 then 7 | set n to n / 2 8 | else 9 | set n to 3 * n + 1 10 | end 11 | display(1) 12 | end 13 | 14 | to main do 15 | collatz(read()) 16 | end 17 | -------------------------------------------------------------------------------- /examples/read.hau: -------------------------------------------------------------------------------- 1 | to main do 2 | display(read()) 3 | end 4 | -------------------------------------------------------------------------------- /fix_indentation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | for root, _, files in os.walk("src"): 4 | for file in files: 5 | if file[-2:] != "rs": continue 6 | content = open(os.path.join(root, file)).read() 7 | content = content.replace("\t", " ") 8 | open(os.path.join(root, file), "w").write(content) 9 | -------------------------------------------------------------------------------- /grammar.ebnf: -------------------------------------------------------------------------------- 1 | = []* 2 | 3 | = 4 | 5 | = "to" ["with" ] 6 | 7 | = "(" [ ","]* ")" 8 | 9 | = ( | | | 10 | | | | 11 | | | ) 12 | 13 | = "return" 14 | 15 | = "do" []* "end" 16 | 17 | = "(" [ ["," ]* ] ")" 18 | 19 | = [] 20 | 21 | = "if" "then" 22 | 23 | = "else" 24 | 25 | = "set" "to" 26 | 27 | = "change" "by" 28 | 29 | = "forever" 30 | 31 | = "while" 32 | 33 | = "for each" "in" 34 | 35 | = to ["by" ] 36 | | through 37 | 38 | = [ ]* 39 | = [ ]* 40 | = [] 41 | = | | | "(" ")" 42 | = "+" | "-" | "or" 43 | = "*" | "/" | "and" 44 | = "-" | "not" 45 | 46 | = ["0"|"1"|"2"|"3"|"4"|"4"|"5"|"6"|"7"|"8"|"9"]+ 47 | = "." 48 | 49 | = /[a-zA-Z_/+/ 50 | -------------------------------------------------------------------------------- /haumea.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haumea-lang/haumea-rs/b50ebe11ad0ec2e2b702016790b48c09c3950761/haumea.png -------------------------------------------------------------------------------- /haumea.svg: -------------------------------------------------------------------------------- 1 | 2 | 19 | 21 | 22 | 24 | image/svg+xml 25 | 27 | 28 | 29 | 30 | 31 | 33 | 57 | 61 | 67 | 68 | 71 | 75 | 79 | 83 | 87 | 91 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /src/codegen/c.rs: -------------------------------------------------------------------------------- 1 | //! c.rs 2 | //! The C code generator for the haumea language. 3 | use std::rc::Rc; 4 | use parser; 5 | use codegen; 6 | 7 | /// Unwraps a Rc or panics if it is not possible to do so. 8 | /// This is a macro because it needs to not take a reference to the passed in Rc, 9 | /// which is what would happen if it was a function. 10 | macro_rules! unwrap_rc { 11 | ( $rc:expr ) => ( (*Rc::make_mut(&mut ($rc).clone())).clone() ); 12 | // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 13 | // This is probably the ugliest line of Rust I've ever written. :P 14 | } 15 | 16 | pub struct CodeGenerator<'a> { 17 | indent: &'a str, 18 | prolog: &'a str, 19 | epilog: &'a str, 20 | ast: parser::Program, 21 | _name_number: u32, 22 | out: String, 23 | } 24 | 25 | impl<'a> codegen::CodeGen for CodeGenerator<'a> { 26 | /// Compile an Program created by `parser::parse` into a C program 27 | fn compile(&mut self) -> String { 28 | self.out.push_str(self.prolog); 29 | for func in self.ast.clone().into_iter() { 30 | self.compile_function(func); 31 | } 32 | self.out.push_str(self.epilog); 33 | self.out.clone() 34 | } 35 | } 36 | 37 | impl<'a> CodeGenerator<'a> { 38 | /// Constructs a new CodeGenerator 39 | pub fn new(ast: parser::Program) -> CodeGenerator<'a> { 40 | CodeGenerator { 41 | indent: " ", 42 | prolog: " 43 | /* Haumea prolog */ 44 | #include 45 | 46 | long display(long n) { 47 | printf(\"%ld\\n\", n); 48 | return 0; 49 | } 50 | 51 | long read() { 52 | printf(\"Enter an integer: \"); 53 | long n; 54 | scanf(\"%ld\", &n); 55 | return n; 56 | } 57 | 58 | /* End prolog */ 59 | 60 | /* Start compiled program */ 61 | ", 62 | epilog: " 63 | /* End compiled program */ 64 | ", 65 | ast: ast, 66 | _name_number: 0, 67 | out: String::new(), 68 | } 69 | } 70 | 71 | /// Compiles a Function 72 | fn compile_function(&mut self, func: parser::Function) { 73 | self.out.push_str("\n"); 74 | self.out.push_str(if func.name == "main" { "int " } else { "long " }); 75 | self.out.push_str(&func.name); 76 | self.out.push_str("("); 77 | if let Some(sig) = func.signature { 78 | if let Some((last_param, first_params)) = sig.split_last() { 79 | for param in first_params { 80 | self.out.push_str(&format!("long {:}, ", param)); 81 | } 82 | self.out.push_str(&format!("long {:}", last_param)); 83 | } 84 | } 85 | self.out.push_str(") "); 86 | self.out.push_str("{\n"); 87 | self.compile_statement(func.code, 1); 88 | self.out.push_str(&format!("{:}return 0l;", self.indent)); 89 | self.out.push_str("\n}\n"); 90 | } 91 | 92 | /// Compiles a statement 93 | fn compile_statement(&mut self, statement: parser::Statement, indent: i32) { 94 | use parser::Statement; 95 | 96 | match statement { 97 | Statement::Return(exp) => { 98 | let exp = self.compile_expression(exp); 99 | self.out.push_str(&format!("{:}return {:};", 100 | replicate(self.indent, indent), 101 | exp)); 102 | }, 103 | Statement::Do(block) => { 104 | self.out.push_str(&format!("{:}{{\n", replicate(self.indent, indent))); 105 | for sub_statement in block { 106 | let sub = unwrap_rc!(sub_statement); 107 | self.compile_statement(sub, indent+1); 108 | }; 109 | self.out.push_str(&format!("{:}}}\n", replicate(self.indent, indent))); 110 | }, 111 | Statement::Call { 112 | function: func, 113 | arguments: args, 114 | } => { 115 | self.out.push_str(&format!("{:}{:}(", replicate(self.indent, indent), func)); 116 | let len = args.len(); 117 | for (index, arg) in args.into_iter().enumerate() { 118 | let arg = self.compile_expression(arg); 119 | if index == len-1 { 120 | self.out.push_str(&arg); 121 | } else { 122 | self.out.push_str(&format!("{:}, ", arg)); 123 | } 124 | } 125 | self.out.push_str(");\n"); 126 | }, 127 | Statement::Var(ident) => { 128 | self.out.push_str(&format!("{:}long {:};\n", replicate(self.indent, indent), ident)); 129 | }, 130 | Statement::Set(ident, expr) => { 131 | let expr = self.compile_expression(expr); 132 | self.out.push_str(&format!("{:}{:} = {:};\n", 133 | replicate(self.indent, indent), 134 | ident, 135 | expr 136 | )); 137 | }, 138 | Statement::Change(ident, expr) => { 139 | let expr = self.compile_expression(expr); 140 | self.out.push_str(&format!("{:}{:} += {:};\n", 141 | replicate(self.indent, indent), 142 | ident, 143 | expr 144 | )); 145 | }, 146 | Statement::If { 147 | cond, 148 | if_clause, 149 | else_clause, 150 | } => { 151 | let cond = self.compile_expression(cond); 152 | self.out.push_str(&format!("{:}if {:}\n", replicate(self.indent, indent), cond)); 153 | let if_clause = unwrap_rc!(if_clause); 154 | self.compile_statement(if_clause, indent+1); 155 | let else_clause = unwrap_rc!(else_clause); 156 | if let Some(else_) = else_clause { 157 | self.out.push_str(&format!("\n{:}else\n", replicate(self.indent, indent))); 158 | self.compile_statement(else_, indent+1); 159 | self.out.push_str("\n"); 160 | } 161 | }, 162 | Statement::Forever(block) => { 163 | self.out.push_str(&format!("{:}while (1)\n", replicate(self.indent, indent))); 164 | let block = unwrap_rc!(block); 165 | self.compile_statement(block, indent+1); 166 | }, 167 | Statement::While { 168 | cond, 169 | body, 170 | } => { 171 | let cond = self.compile_expression(cond); 172 | self.out.push_str(&format!("{:}while {:}\n", replicate(self.indent, indent), 173 | cond)); 174 | let body = unwrap_rc!(body); 175 | self.compile_statement(body, indent+1); 176 | }, 177 | Statement::ForEach { 178 | ident, 179 | start, 180 | end, 181 | by, 182 | range_type, 183 | body, 184 | } => { 185 | let comparitor; 186 | let neg_comparitor; 187 | if range_type == "to" { 188 | comparitor = "<"; 189 | neg_comparitor = ">"; 190 | } else if range_type == "through" { 191 | comparitor = "<="; 192 | neg_comparitor = ">="; 193 | } else { 194 | panic!("Invalid range type {:?}!", range_type) 195 | } 196 | 197 | let start_name = self.get_unique_name(); 198 | let end_name = self.get_unique_name(); 199 | let by_name = self.get_unique_name(); 200 | 201 | let start = self.compile_expression(start); 202 | self.out.push_str(&format!("{:}long {:} = {:};\n", 203 | replicate(self.indent, indent), 204 | start_name, 205 | start, 206 | )); 207 | let end = self.compile_expression(end); 208 | self.out.push_str(&format!("{:}long {:} = {:};\n", 209 | replicate(self.indent, indent), 210 | end_name, 211 | end) 212 | ); 213 | let by = self.compile_expression(by); 214 | self.out.push_str(&format!("{:}long {:} = {:};\n", 215 | replicate(self.indent, indent), 216 | by_name, 217 | by) 218 | ); 219 | let comp = format!("({:} < {:} ? {:} {:} {:} : {:} {:} {:})", 220 | start_name, end_name, ident, comparitor, end_name, ident, neg_comparitor, end_name); 221 | self.out.push_str(&format!("{:}for (long {:} = {:}; {:}; {:} += {:})\n", replicate(self.indent, indent), 222 | ident, start_name, comp, ident, by_name 223 | )); 224 | let body = unwrap_rc!(body); 225 | self.compile_statement(body, indent+1); 226 | }, 227 | } 228 | } 229 | 230 | /// Compiles an expression 231 | fn compile_expression(&self, expr: parser::Expression) -> String { 232 | use parser::Expression; 233 | 234 | match expr { 235 | Expression::Integer(i) => format!("{:?}l", i), 236 | Expression::Ident(name) => name, 237 | Expression::BinaryOp { 238 | operator: op, 239 | left, 240 | right, 241 | } => { 242 | let lh = unwrap_rc!(left); 243 | let rh = unwrap_rc!(right); 244 | format!("({:} {:} {:})", 245 | self.compile_expression(lh), 246 | get_c_name(op), 247 | self.compile_expression(rh) 248 | ) 249 | }, 250 | Expression::Call { 251 | function: func, 252 | arguments: args, 253 | } => { 254 | let mut out = String::new(); 255 | out.push_str(&format!("{:}(", func)); 256 | let len = args.len(); 257 | for (index, arg) in args.into_iter().enumerate() { 258 | let arg = unwrap_rc!(arg); 259 | if index == len-1 { 260 | out.push_str(&self.compile_expression(arg)); 261 | } else { 262 | out.push_str(&format!("{:}, ", self.compile_expression(arg))); 263 | } 264 | } 265 | out.push_str(")"); 266 | out 267 | }, 268 | Expression::UnaryOp { 269 | operator: op, 270 | expression: exp, 271 | } => { 272 | let exp = unwrap_rc!(exp); 273 | format!("({:}{:})", 274 | get_c_name(op), 275 | self.compile_expression(exp) 276 | ) 277 | } 278 | } 279 | } 280 | 281 | /// Returns a unique name 282 | fn get_unique_name(&mut self) -> String { 283 | self._name_number += 1; 284 | format!("__HAUMEA_TEMP_{:}", self._name_number) 285 | } 286 | } 287 | 288 | // Utility functions 289 | 290 | /// Replicates a &str t times 291 | fn replicate(s: &str, t: i32) -> String { 292 | if t == 0 { 293 | "".to_string() 294 | } else { 295 | replicate(s, t-1) + s 296 | } 297 | } 298 | 299 | /// Returns the C name of an operator 300 | fn get_c_name(op: parser::Operator) -> &'static str { 301 | use parser::Operator::*; 302 | match op { 303 | Add => "+", 304 | Sub | Negate => "-", 305 | Mul => "*", 306 | Div => "/", 307 | Equals => "==", 308 | NotEquals => "!=", 309 | Gt => ">", 310 | Lt => "<", 311 | Gte => ">=", 312 | Lte => "<=", 313 | LogicalAnd => "&&", 314 | LogicalOr => "||", 315 | LogicalNot => "!", 316 | BinaryAnd => "&", 317 | BinaryOr => "|", 318 | BinaryNot => "~", 319 | Modulo => "%", 320 | } 321 | } 322 | -------------------------------------------------------------------------------- /src/codegen/mod.rs: -------------------------------------------------------------------------------- 1 | //! This module contains the different Haumea code generators. 2 | 3 | pub mod c; 4 | 5 | pub trait CodeGen { 6 | fn compile(&mut self) -> String; 7 | } -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod scanner; 2 | pub mod parser; 3 | pub mod codegen; 4 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate haumea; 2 | use std::io; 3 | use std::io::prelude::*; 4 | 5 | // Load the CodeGen trait into scope 6 | use haumea::codegen::CodeGen; 7 | 8 | fn main() { 9 | let mut source = String::new(); 10 | let mut stdin = io::stdin(); 11 | stdin.read_to_string(&mut source).expect("Must provide input"); 12 | let scanner = haumea::scanner::Scanner::new(&source); 13 | let ast = haumea::parser::parse(scanner); 14 | let mut cg = haumea::codegen::c::CodeGenerator::new(ast); 15 | let out = cg.compile(); 16 | println!("{}", out); 17 | } 18 | -------------------------------------------------------------------------------- /src/parser.rs: -------------------------------------------------------------------------------- 1 | /// src/parser.rs 2 | /// The parser for the haumea language. 3 | use std::rc::Rc; 4 | use scanner::{Scanner, Token, ScanState}; 5 | 6 | /// A Program is a Vec of Functions 7 | pub type Program = Vec; 8 | 9 | /// A Block is a Vec of Rcs 10 | pub type Block = Vec>; 11 | 12 | /// A Type is a String (for now) 13 | pub type Type = String; 14 | 15 | /// An Ident is a String 16 | pub type Ident = String; 17 | 18 | /// A Signature is a Vec of Strings 19 | pub type Signature = Vec; 20 | 21 | /// A function is a callable unit of code that returns a value 22 | #[derive(Debug, Clone, PartialEq)] 23 | pub struct Function { 24 | /// The name of the function 25 | pub name: String, 26 | /// The signature of the function 27 | /// 28 | /// It is a Some(Signature) when there is a signature, 29 | /// or None if there is no signature, which means that 30 | /// the function takes no arguments and return the Integer 0 31 | pub signature: Option, 32 | /// The code of the function 33 | pub code: Statement, 34 | } 35 | 36 | /// A Haumea statement 37 | #[derive(Debug, Clone, PartialEq)] 38 | pub enum Statement { 39 | /// A return statement 40 | /// 41 | /// return 1 42 | Return(Expression), 43 | /* /// A let statement 44 | /// 45 | /// let x be an Integer 46 | Let(Ident, Type), */ 47 | /// A variable statement 48 | /// 49 | /// variable x 50 | Var(Ident), 51 | /// An assignment statement 52 | /// 53 | /// set x to 5 54 | Set(Ident, Expression), 55 | /// A change statement 56 | /// 57 | /// change x by -2 58 | Change(Ident, Expression), 59 | /// An if statement 60 | /// 61 | /// if True then return 1 62 | /// else return -3 63 | /// (else is optional) 64 | If { 65 | cond: Expression, 66 | if_clause: Rc, 67 | else_clause: Rc>, 68 | }, 69 | /// A Do statement 70 | /// 71 | /// do 72 | /// statement1 73 | /// statement2 74 | /// end 75 | Do(Block), 76 | /// A call statment 77 | /// 78 | /// write_ln(1) 79 | Call { 80 | function: Ident, 81 | arguments: Vec, 82 | }, 83 | /// A forever loop 84 | /// 85 | /// forever do ... end 86 | Forever(Rc), 87 | /// A while loop 88 | /// 89 | /// while x < 5 change x by 1 90 | While { 91 | cond: Expression, 92 | body: Rc, 93 | }, 94 | /// A for each loop 95 | ForEach { 96 | ident: Ident, 97 | start: Expression, 98 | end: Expression, 99 | by: Expression, 100 | range_type: String, 101 | body: Rc, 102 | } 103 | } 104 | 105 | /// The operators in Haumea 106 | #[derive(Debug, Clone, PartialEq)] 107 | pub enum Operator { 108 | /// Addition (+) 109 | Add, 110 | /// Subtraction (-) 111 | Sub, 112 | /// Multiplication (*) 113 | Mul, 114 | /// Division (/) 115 | Div, 116 | /// Modulo (modulo) 117 | Modulo, 118 | /// Negation (-) 119 | Negate, 120 | /// Equals (=) 121 | Equals, 122 | ///Not equals (!=) 123 | NotEquals, 124 | /// Greater than (>) 125 | Gt, 126 | /// Lesser than (<) 127 | Lt, 128 | /// Greater than or equal to (>=) 129 | Gte, 130 | /// Lesser than or equal to (<=) 131 | Lte, 132 | /// Logical And (and) 133 | LogicalAnd, 134 | /// Logical OR (or) 135 | LogicalOr, 136 | /// Logical Not (not) 137 | LogicalNot, 138 | /// Binary And (&) 139 | BinaryAnd, 140 | /// Binary Or (|) 141 | BinaryOr, 142 | /// Binary Not (~) 143 | BinaryNot, 144 | } 145 | 146 | #[derive(Debug, Clone, PartialEq)] 147 | pub enum Expression { 148 | /// A binary operation (eg, "1 + 2" or "True or False") 149 | BinaryOp { 150 | operator: Operator, 151 | left: Rc, 152 | right: Rc, 153 | }, 154 | /// A unary operation (eg, "not False" or "-(1 + 2)") 155 | UnaryOp { 156 | operator: Operator, 157 | expression: Rc, 158 | }, 159 | /// An integer literal 160 | Integer(i32), 161 | /// An identifier 162 | Ident(Ident), 163 | /// A function call 164 | Call { 165 | function: Ident, 166 | arguments: Vec>, 167 | }, 168 | } 169 | 170 | pub fn parse(scanner: Scanner) -> Program { 171 | let mut tokens = scanner.collect::>(); 172 | let mut program = vec![]; 173 | while !tokens.is_empty() { 174 | program.push(parse_function(&mut tokens)); 175 | } 176 | program 177 | } 178 | 179 | fn match_token(mut token_stream: &mut Vec, expected: &Token) -> Result { 180 | let t = token_stream.remove(0); 181 | if t == *expected { 182 | Ok(t) 183 | } else { 184 | Err(t) 185 | } 186 | } 187 | 188 | fn match_panic(mut token_stream: &mut Vec, expected: Token) { 189 | match match_token(&mut token_stream, &expected) { 190 | Ok(_) => (), 191 | Err(t) => panic!(format!("Expected {:?}, but found {:?}!", expected, t)), 192 | } 193 | } 194 | 195 | fn parse_function(mut token_stream: &mut Vec) -> Function { 196 | match_panic(&mut token_stream, Token::Keyword("to".to_string(), ScanState::empty())); 197 | let name = match token_stream.remove(0) { 198 | Token::Ident(s, _) => s, 199 | t => { 200 | let s = t.clone().state(); 201 | panic!("At line {:}:{:}, expected an identifier, but found {:?}!", 202 | s.line, s.column, t) 203 | }, 204 | }; 205 | let signature = parse_signature(&mut token_stream); 206 | let code = parse_statement(&mut token_stream); 207 | Function { 208 | name: name, 209 | signature: signature, 210 | code: code, 211 | } 212 | } 213 | 214 | fn parse_signature(mut token_stream: &mut Vec) -> Option { 215 | if token_stream[0] == Token::Keyword("with".to_string(), ScanState::empty()) { 216 | let mut args = vec![]; 217 | match_panic(&mut token_stream, Token::Keyword("with".to_string(), ScanState::empty())); 218 | match_panic(&mut token_stream, Token::Lp(ScanState::empty())); 219 | loop { 220 | args.push(match token_stream.remove(0) { 221 | Token::Ident(name, _) => name, 222 | Token::Rp(_) => break, 223 | t => { 224 | let s = t.clone().state(); 225 | panic!("At line {:}:{:}, expected an identifier, but found {:?}!", 226 | s.line, s.column, t) 227 | }, 228 | }); 229 | if token_stream[0] == Token::Rp(ScanState::empty()) { 230 | token_stream.remove(0); 231 | break; 232 | } 233 | match_panic(&mut token_stream, Token::Comma(ScanState::empty())); 234 | } 235 | Some(args) 236 | } else { 237 | None 238 | } 239 | } 240 | 241 | fn parse_statement(mut token_stream: &mut Vec) -> Statement { 242 | match token_stream.remove(0) { 243 | Token::Keyword(t, _) => { 244 | if t == "return" { 245 | parse_return(&mut token_stream) 246 | } else if t == "do" { 247 | parse_do(&mut token_stream) 248 | } else if t == "if" { 249 | parse_if(&mut token_stream) 250 | } else if t == "set" { 251 | parse_set(&mut token_stream) 252 | } else if t == "change" { 253 | parse_change(&mut token_stream) 254 | } else if t == "variable" { 255 | parse_declare(&mut token_stream) 256 | } else if t == "forever" { 257 | parse_forever(&mut token_stream) 258 | } else if t == "while" { 259 | parse_while(&mut token_stream) 260 | } else if t == "for" { 261 | parse_for_each(&mut token_stream) 262 | } else { 263 | panic!("Invalid statement!") 264 | } 265 | } 266 | t @ Token::Ident(..) => { 267 | token_stream.insert(0, t); 268 | parse_call(&mut token_stream) 269 | }, 270 | t => { 271 | let s = t.clone().state(); 272 | panic!("Syntax error at line {:}:{:}, found {:?}", 273 | s.line, s.column, t) 274 | }, 275 | } 276 | } 277 | 278 | fn parse_forever(mut token_stream: &mut Vec) -> Statement { 279 | Statement::Forever(Rc::new(parse_statement(&mut token_stream))) 280 | } 281 | 282 | fn parse_while(mut token_stream: &mut Vec) -> Statement { 283 | Statement::While{ 284 | cond: parse_expression(&mut token_stream), 285 | body: Rc::new(parse_statement(&mut token_stream)) 286 | } 287 | } 288 | 289 | fn parse_for_each(mut token_stream: &mut Vec) -> Statement { 290 | match_panic(&mut token_stream, Token::Keyword("each".to_string(), ScanState::empty())); 291 | let ident = match token_stream.remove(0) { 292 | Token::Ident(name, _) => name, 293 | t => { 294 | let s = t.clone().state(); 295 | panic!("At line {:}:{:}, expected an identifier, but found {:?}!", 296 | s.line, s.column, t) 297 | }, 298 | }; 299 | match_panic(&mut token_stream, Token::Keyword("in".to_string(), ScanState::empty())); 300 | let start = parse_expression(&mut token_stream); 301 | 302 | let range_token = token_stream.remove(0); 303 | let end = parse_expression(&mut token_stream); 304 | let range_type; 305 | 306 | if range_token == Token::Keyword("to".to_string(), ScanState::empty()) { 307 | range_type = "to"; 308 | } else if range_token == Token::Keyword("through".to_string(), ScanState::empty()) { 309 | range_type = "through"; 310 | } else { 311 | let s = range_token.clone().state(); 312 | panic!("At line {:}:{:}, expected 'to' or 'through', not {:?}", s.line, s.column, range_token); 313 | } 314 | 315 | let by = match token_stream[0] { 316 | Token::Keyword(ref kw, _) => kw == &"by", 317 | _ => false, 318 | }; 319 | let by = if by { 320 | token_stream.remove(0); 321 | parse_expression(&mut token_stream) 322 | } else { 323 | Expression::Integer(1) 324 | }; 325 | Statement::ForEach { 326 | ident: ident, 327 | start: start, 328 | end: end, 329 | by: by, 330 | range_type: range_type.to_string(), 331 | body: Rc::new(parse_statement(&mut token_stream)) 332 | } 333 | } 334 | 335 | fn parse_return(mut token_stream: &mut Vec) -> Statement { 336 | Statement::Return(parse_expression(&mut token_stream)) 337 | } 338 | 339 | fn parse_declare(mut token_stream: &mut Vec) -> Statement { 340 | let ident = match token_stream.remove(0) { 341 | Token::Ident(ident, _) => ident, 342 | t => { 343 | let s = t.clone().state(); 344 | panic!("At line {:}:{:}, expected an identifier, but found {:?}!", 345 | s.line, s.column, t) 346 | }, 347 | }; 348 | Statement::Var(ident) 349 | } 350 | fn parse_do(mut token_stream: &mut Vec) -> Statement { 351 | let mut block = vec![]; 352 | while token_stream[0] != Token::Keyword("end".to_string(), ScanState::empty()) { 353 | block.push(Rc::new(parse_statement(&mut token_stream))); 354 | } 355 | token_stream.remove(0); 356 | Statement::Do(block) 357 | } 358 | 359 | fn parse_if(mut token_stream: &mut Vec) -> Statement { 360 | let cond = parse_expression(&mut token_stream); 361 | match_panic(&mut token_stream, Token::Keyword("then".to_string(), ScanState::empty())); 362 | let if_clause = Rc::new(parse_statement(&mut token_stream)); 363 | let else_clause = Rc::new(if !token_stream.is_empty() && 364 | token_stream[0] == Token::Keyword("else".to_string(), ScanState::empty()) { 365 | match_panic(&mut token_stream, Token::Keyword("else".to_string(), ScanState::empty())); 366 | Some(parse_statement(&mut token_stream)) 367 | } else { 368 | None 369 | }); 370 | Statement::If { 371 | cond: cond, 372 | if_clause: if_clause, 373 | else_clause: else_clause, 374 | } 375 | } 376 | 377 | fn parse_set(mut token_stream: &mut Vec) -> Statement { 378 | let ident = match token_stream.remove(0) { 379 | Token::Ident(ident, _) => ident, 380 | t => { 381 | let s = t.clone().state(); 382 | panic!("At line {:}:{:}, expected an identifier, but found {:?}!", 383 | s.line, s.column, t) 384 | }, 385 | }; 386 | match_panic(&mut token_stream, Token::Keyword("to".to_string(), ScanState::empty())); 387 | let expr = parse_expression(&mut token_stream); 388 | Statement::Set(ident, expr) 389 | } 390 | 391 | fn parse_change(mut token_stream: &mut Vec) -> Statement { 392 | let ident = match token_stream.remove(0) { 393 | Token::Ident(ident, _) => ident, 394 | t => { 395 | let s = t.clone().state(); 396 | panic!("At line {:}:{:}, expected an identifier, but found {:?}!", 397 | s.line, s.column, t) 398 | }, 399 | }; 400 | match_panic(&mut token_stream, Token::Keyword("by".to_string(), ScanState::empty())); 401 | let expr = parse_expression(&mut token_stream); 402 | Statement::Change(ident, expr) 403 | } 404 | 405 | fn parse_call(mut token_stream: &mut Vec) -> Statement { 406 | let ident = match token_stream.remove(0) { 407 | Token::Ident(ident, _) => ident, 408 | t => { 409 | let s = t.clone().state(); 410 | panic!("At line {:}:{:}, expected an identifier, but found {:?}!", 411 | s.line, s.column, t) 412 | }, 413 | }; 414 | match_panic(&mut token_stream, Token::Lp(ScanState::empty())); 415 | let mut args = vec![]; 416 | if token_stream[0] != Token::Rp(ScanState::empty()) { 417 | loop { 418 | args.push(parse_expression(&mut token_stream)); 419 | if token_stream[0] == Token::Rp(ScanState::empty()) { 420 | token_stream.remove(0); 421 | break; 422 | } 423 | match_panic(&mut token_stream, Token::Comma(ScanState::empty())); 424 | } 425 | } 426 | Statement::Call{ 427 | function: ident, 428 | arguments: args, 429 | } 430 | } 431 | 432 | fn parse_expression(mut token_stream: &mut Vec) -> Expression { 433 | prec_4(&mut token_stream) 434 | } 435 | 436 | fn prec_0(mut token_stream: &mut Vec) -> Expression { 437 | if token_stream[0] == Token::Lp(ScanState::empty()) { 438 | token_stream.remove(0); 439 | let exp = parse_expression(&mut token_stream); 440 | match_panic(&mut token_stream, Token::Rp(ScanState::empty())); 441 | exp 442 | } else { 443 | match token_stream.remove(0) { 444 | Token::Number(n, _) => Expression::Integer(n), 445 | Token::Operator(op, s) => { 446 | if op == "-" { 447 | Expression::UnaryOp { 448 | operator: Operator::Sub, 449 | expression: Rc::new(parse_expression(&mut token_stream)) 450 | } 451 | } else { 452 | panic!("At line {:}:{:}, expected \"-\", but found {:?}!", 453 | s.line, s.column, op) 454 | } 455 | } 456 | Token::Ident(id, _) => { 457 | if !token_stream.is_empty() && token_stream[0] == Token::Lp(ScanState::empty()) { 458 | match_panic(&mut token_stream, Token::Lp(ScanState::empty())); 459 | let mut args = vec![]; 460 | if token_stream[0] != Token::Rp(ScanState::empty()) { 461 | loop { 462 | args.push(Rc::new(parse_expression(&mut token_stream))); 463 | if token_stream[0] == Token::Rp(ScanState::empty()) { 464 | token_stream.remove(0); 465 | break; 466 | } 467 | match_panic(&mut token_stream, Token::Comma(ScanState::empty())); 468 | } 469 | } else { 470 | token_stream.remove(0); 471 | } 472 | Expression::Call{ 473 | function: id, 474 | arguments: args, 475 | } 476 | } else { 477 | Expression::Ident(id) 478 | } 479 | }, 480 | t => { 481 | let s = t.clone().state(); 482 | panic!("At line {:}:{:}, expected an expression, but found {:?}!", 483 | s.line, s.column, t) 484 | }, 485 | } 486 | } 487 | } 488 | 489 | fn prec_1(mut token_stream: &mut Vec) -> Expression { 490 | let lh = prec_0(&mut token_stream); 491 | if !token_stream.is_empty() { 492 | let op = match token_stream.get(0) { 493 | Some(&Token::Operator(ref name, _)) => { 494 | if *name == "*" { 495 | Operator::Mul 496 | } else if *name == "/" { 497 | Operator::Div 498 | } else if *name == "modulo" { 499 | Operator::Modulo 500 | } else { 501 | return lh 502 | } 503 | }, 504 | _ => return lh, 505 | }; 506 | token_stream.remove(0); 507 | let rh = prec_1(&mut token_stream); 508 | Expression::BinaryOp { 509 | operator: op, 510 | left: Rc::new(lh), 511 | right: Rc::new(rh), 512 | } 513 | } else { 514 | lh 515 | } 516 | } 517 | 518 | fn prec_2(mut token_stream: &mut Vec) -> Expression { 519 | let lh = prec_1(&mut token_stream); 520 | if !token_stream.is_empty() { 521 | let op = match token_stream.get(0) { 522 | Some(&Token::Operator(ref name, _)) => { 523 | if *name == "+" { 524 | Operator::Add 525 | } else if *name == "-" { 526 | Operator::Sub 527 | } else { 528 | return lh 529 | } 530 | }, 531 | _ => return lh, 532 | }; 533 | token_stream.remove(0); 534 | let rh = prec_2(&mut token_stream); 535 | Expression::BinaryOp { 536 | operator: op, 537 | left: Rc::new(lh), 538 | right: Rc::new(rh), 539 | } 540 | } else { 541 | lh 542 | } 543 | } 544 | 545 | fn prec_3(mut token_stream: &mut Vec) -> Expression { 546 | let lh = prec_2(&mut token_stream); 547 | if !token_stream.is_empty() { 548 | let op = match token_stream.get(0) { 549 | Some(&Token::Operator(ref name, _)) => { 550 | if *name == ">" { 551 | Operator::Gt 552 | } else if *name == ">=" { 553 | Operator::Gte 554 | } else if *name == "<" { 555 | Operator::Lt 556 | } else if *name == "<=" { 557 | Operator::Lte 558 | } else if *name == "=" { 559 | Operator::Equals 560 | } else if *name == "!=" { 561 | Operator::NotEquals 562 | } else { 563 | return lh 564 | } 565 | }, 566 | _ => return lh 567 | }; 568 | token_stream.remove(0); 569 | let rh = prec_3(&mut token_stream); 570 | Expression::BinaryOp { 571 | operator: op, 572 | left: Rc::new(lh), 573 | right: Rc::new(rh), 574 | } 575 | } else { 576 | lh 577 | } 578 | } 579 | 580 | fn prec_4(mut token_stream: &mut Vec) -> Expression { 581 | let lh = prec_3(&mut token_stream); 582 | if !token_stream.is_empty() { 583 | let op = match token_stream.get(0) { 584 | Some(&Token::Operator(ref name, _)) => { 585 | if *name == "and" { 586 | Operator::LogicalAnd 587 | } else if *name == "or" { 588 | Operator::LogicalOr 589 | } else { 590 | return lh 591 | } 592 | }, 593 | _ => return lh 594 | }; 595 | token_stream.remove(0); 596 | let rh = prec_4(&mut token_stream); 597 | Expression::BinaryOp { 598 | operator: op, 599 | left: Rc::new(lh), 600 | right: Rc::new(rh), 601 | } 602 | } else { 603 | lh 604 | } 605 | } 606 | -------------------------------------------------------------------------------- /src/scanner.rs: -------------------------------------------------------------------------------- 1 | //! src/scanner.rs 2 | //! The scanner for the haumea language 3 | 4 | use std::str::Chars; // We need to bring the Chars struct into scope 5 | use std::iter::Peekable; 6 | /// The scanner struct 7 | #[derive(Debug)] 8 | pub struct Scanner<'a> { 9 | /// The source &str used to create the scanner. 10 | /// 11 | /// Scanner doesn't do anything with it currently, but it is kept in case clients 12 | /// want to get back the source code and, more importantly, 13 | /// to keep it in scope so that the source_chars iterator can work 14 | pub source_str: &'a str, 15 | /// An iterator of chars over the source str 16 | source_chars: Peekable>, 17 | /// A vector of chars that can be in operators 18 | operator_chars: Vec, 19 | /// A vector of allowed operators 20 | operators: Vec<&'static str>, 21 | /// A vector of chars that can be in identifiers 22 | ident_chars: Vec, 23 | // A vector of keywords in haumea 24 | reserved_words: Vec<&'static str>, 25 | /// The look ahead char 26 | pub peek: Option, 27 | /// The column the scanner is on in the source 28 | pub column: u32, 29 | /// The line the scanner is on in the source 30 | pub line: u32, 31 | } 32 | 33 | /// A structure containing the state of the scanner when it found a token 34 | #[derive(Debug)] 35 | #[derive(Copy, Clone)] 36 | pub struct ScanState { 37 | /// The line the scanner was on 38 | pub line: u32, 39 | /// The column the scanner was on 40 | pub column: u32, 41 | } 42 | 43 | impl ScanState { 44 | /// Constructs a new ScanState 45 | pub fn new(line: u32, column: u32) -> ScanState { 46 | ScanState { line: line, column: column } 47 | } 48 | /// Constructs an empty ScanState 49 | pub fn empty() -> ScanState { 50 | ScanState::new(0, 0) 51 | } 52 | } 53 | 54 | /// An enum representing the various tokens that can occur 55 | #[derive(Debug)] 56 | #[derive(Clone)] 57 | pub enum Token { 58 | /// An integer number 59 | /// 60 | /// The content is the number read as an i64 61 | Number(i32, ScanState), 62 | /// An identifier 63 | /// 64 | /// The content is the name of the identifier 65 | Ident(String, ScanState), 66 | /// A reserved word (or keyword) 67 | /// 68 | /// The content is the name of the keyword 69 | Keyword(String, ScanState), 70 | /// An operator 71 | /// 72 | /// The content is the name of the operator 73 | Operator(String, ScanState), 74 | /// Left parens 75 | Lp(ScanState), 76 | /// Right parens 77 | Rp(ScanState), 78 | /// A comma 79 | Comma(ScanState), 80 | /// An unexpected char was read 81 | /// 82 | /// The content is the char read 83 | Error(char, ScanState), 84 | /// End of input 85 | EOF(ScanState), 86 | } 87 | 88 | impl Token { 89 | pub fn state(self) -> ScanState { 90 | use self::Token::*; 91 | match self { 92 | Number(_, s) => s, 93 | Ident(_, s) => s, 94 | Keyword(_, s) => s, 95 | Operator(_, s) => s, 96 | Error(_, s) => s, 97 | Lp(s) => s, 98 | Rp(s) => s, 99 | Comma(s) => s, 100 | EOF(s) => s, 101 | } 102 | } 103 | } 104 | impl PartialEq for Token { 105 | fn eq(&self, other: &Token) -> bool { 106 | use self::Token::*; 107 | match (self, other) { 108 | (&Number(ref a, _), &Number(ref b, _)) => a == b, 109 | (&Ident(ref a, _), &Ident(ref b, _)) => a == b, 110 | (&Keyword(ref a, _), &Keyword(ref b, _)) => a == b, 111 | (&Operator(ref a, _), &Operator(ref b, _)) => a == b, 112 | (&Lp(_), &Lp(_)) => true, 113 | (&Rp(_), &Rp(_)) => true, 114 | (&Comma(_), &Comma(_)) => true, 115 | (&Error(ref a, _), &Error(ref b, _)) => a == b, 116 | (&EOF(_), &EOF(_)) => true, 117 | _ => false, 118 | } 119 | } 120 | } 121 | 122 | impl<'a> Scanner<'a> { 123 | /// Constructs a new Scanner from a source &str 124 | /// 125 | /// # Examples 126 | /// ``` 127 | /// # use haumea::scanner::{Scanner, Token}; 128 | /// let source = "1 + 1"; 129 | /// let scanner = Scanner::new(source); 130 | /// assert_eq!(scanner.source_str, source); 131 | /// assert_eq!(scanner.peek, Some(' ')); 132 | /// ``` 133 | pub fn new(source: &'a str) -> Scanner { 134 | let chars = source.chars().peekable(); 135 | let peek = Some(' '); 136 | Scanner { 137 | source_str: source, 138 | source_chars: chars, 139 | operator_chars: vec!['+', '=', '-', '*', '/', '<', '>', '~', '|', '&', '(', ')', '!'], 140 | operators: vec!["+", "=", "-", "*", "/", "<", ">", ">=", "<=", 141 | "~", "|", "&", "and", "or", "not", "(", ")", "!=", "modulo"], 142 | ident_chars: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".chars().collect::>(), 143 | reserved_words: vec!["to", "with", "is", "return", "do", "end", 144 | "if", "then", "else", "let", "be", "forever", 145 | "while", "for", "each", "in", 146 | "set", "to", "through", "change", "by", "variable"], 147 | peek: peek, 148 | column: 0, 149 | line: 1, 150 | } 151 | } 152 | 153 | /// Returns the next token in the source. Token::EOF means that all the input has been read 154 | /// 155 | /// # Examples 156 | /// ``` 157 | /// # use haumea::scanner::{Scanner, Token, ScanState}; 158 | /// let mut s = Scanner::new("1 + 1"); 159 | /// assert_eq!(s.next_token(), Token::Number(1, ScanState::empty())); 160 | /// assert_eq!(s.next_token(), Token::Operator("+".to_string(), ScanState::empty())); 161 | /// assert_eq!(s.next_token(), Token::Number(1, ScanState::empty())); 162 | /// assert_eq!(s.next_token(), Token::EOF(ScanState::empty())); 163 | /// ``` 164 | pub fn next_token(&mut self) -> Token { 165 | self.skip_white(); 166 | let state = ScanState::new(self.line, self.column); 167 | match self.peek { 168 | Some(c) => { 169 | if self.ident_chars.contains(&c) { 170 | self.get_ident_token(state) 171 | } else if c.is_digit(10) { 172 | Token::Number(self.get_num(), state) 173 | } else if c == '(' { 174 | self.get_char(); 175 | Token::Lp(state) 176 | } else if c == ')' { 177 | self.get_char(); 178 | Token::Rp(state) 179 | } else if c == ',' { 180 | self.get_char(); 181 | Token::Comma(state) 182 | } else if self.operator_chars.contains(&c) { 183 | Token::Operator(self.get_op(), state) 184 | } else { 185 | self.get_char(); 186 | Token::Error(c, state) 187 | } 188 | }, 189 | None => Token::EOF(state), 190 | } 191 | } 192 | 193 | /// Sets self.peek to be the next char in self.source_chars 194 | fn get_char(&mut self) { 195 | self.peek = self.source_chars.next(); 196 | self.column += 1; 197 | if let Some('\n') = self.peek { 198 | self.line += 1; 199 | self.column = 1; 200 | }; 201 | } 202 | 203 | /// Skips over whitespace in self.source_chars 204 | fn skip_white(&mut self) { 205 | loop { 206 | match self.peek { 207 | Some(c) if c.is_whitespace() => { 208 | self.get_char() 209 | } 210 | _ => break, 211 | } 212 | } 213 | self.skip_comments(); 214 | loop { 215 | match self.peek { 216 | Some(c) if c.is_whitespace() => { 217 | self.get_char() 218 | } 219 | _ => break, 220 | } 221 | } 222 | } 223 | 224 | /// Skips over comments in self.source_chars 225 | fn skip_comments(&mut self) { 226 | let should_skip = match self.peek { 227 | Some(c) if c == '/' => { 228 | if let Some(n) = self.source_chars.peek() { 229 | n == &'*' 230 | } else { 231 | false 232 | } 233 | }, 234 | _ => false 235 | }; 236 | if should_skip { 237 | self.skip_until_comment_end() 238 | } 239 | } 240 | 241 | /// Skips until the end of a comment 242 | fn skip_until_comment_end(&mut self) { 243 | self.get_char(); // Skip the ? in the start of the comment 244 | loop { 245 | self.get_char(); 246 | match self.peek { 247 | Some(c) if c == '*' => { 248 | if let Some(n) = self.source_chars.peek() { 249 | if n == &'/' { 250 | break; 251 | } 252 | } 253 | }, 254 | Some(c) if c == '/' => self.skip_comments(), 255 | _ => () 256 | } 257 | } 258 | self.get_char(); 259 | self.get_char(); 260 | } 261 | 262 | /// Returns the next number that can be found in self.source_chars 263 | fn get_num(&mut self) -> i32 { 264 | let mut s = String::new(); 265 | s.push(self.peek.unwrap()); 266 | loop { 267 | self.get_char(); 268 | match self.peek { 269 | Some(c) if c.is_digit(10) => s.push(c), 270 | _ => break, 271 | } 272 | } 273 | s.parse::().unwrap() 274 | } 275 | 276 | /// Returns an Token that contains the next identifier in self.source_chars 277 | /// 278 | /// It can be one of three Tokens: 279 | /// 1. Token::Keyword (if the identifier is a reserved word) 280 | /// 2. Token::Operator (if the identifier is the name of an operator like `and` or `or`) 281 | /// 3. Token::Ident (otherwise) 282 | fn get_ident_token(&mut self, state: ScanState) -> Token { 283 | let mut s = String::new(); 284 | s.push(self.peek.unwrap()); 285 | loop { 286 | self.get_char(); 287 | match self.peek { 288 | Some(c) if self.ident_chars.contains(&c) => s.push(c), 289 | _ => break, 290 | } 291 | }; 292 | if self.reserved_words.contains(&&s[..]) { 293 | Token::Keyword(s, state) 294 | } else if self.operators.contains(&&s[..]) { 295 | Token::Operator(s, state) 296 | } else { 297 | Token::Ident(s, state) 298 | } 299 | } 300 | 301 | /// Returns a String containing the next symbol spelt operator 302 | fn get_op(&mut self) -> String { 303 | let mut s = String::new(); 304 | s.push(self.peek.unwrap()); 305 | loop { 306 | self.get_char(); 307 | match self.peek { 308 | Some(c) if self.operator_chars.contains(&c) => s.push(c), 309 | _ => break, 310 | } 311 | }; 312 | s 313 | } 314 | } 315 | 316 | // Implement Iterator for Scanner 317 | impl<'a> Iterator for Scanner<'a> { 318 | type Item = Token; 319 | 320 | /// Returns the next token as an Option 321 | /// 322 | /// Token::EOF is translated into the end of the iteration 323 | /// 324 | /// # Examples 325 | ///``` 326 | /// # use haumea::scanner::{Scanner, Token, ScanState}; 327 | /// let s = Scanner::new("1 + 1"); 328 | /// assert_eq!(s.next(), Some(Token::Number(1, ScanState::empty()))); 329 | /// assert_eq!(s.next(), Some(Token::Operator("+".to_string(), ScanState::empty()))); 330 | /// assert_eq!(s.next(), Some(Token::Number(1, ScanState::empty()))); 331 | /// assert_eq!(s.next(), None); 332 | ///``` 333 | fn next(&mut self) -> Option { 334 | let tok = self.next_token(); 335 | match tok { 336 | Token::EOF(_) => None, 337 | _ => Some(tok), 338 | } 339 | } 340 | } 341 | -------------------------------------------------------------------------------- /tests/test_codegen.rs: -------------------------------------------------------------------------------- 1 | //! Tests for `haumea::codegen` -------------------------------------------------------------------------------- /tests/test_parser.rs: -------------------------------------------------------------------------------- 1 | //! Tests for `haumea::parser` 2 | extern crate haumea; 3 | 4 | use std::rc::Rc; 5 | 6 | use haumea::scanner::*; 7 | use haumea::parser::*; 8 | use haumea::parser::Statement::*; 9 | use haumea::parser::Operator::*; 10 | use haumea::parser::Expression::*; 11 | 12 | fn assert_parsed_is(source: &str, expected: Vec) { 13 | let scanner = Scanner::new(&source); 14 | let ast: Vec = parse(scanner); 15 | assert_eq!(ast, expected); 16 | } 17 | 18 | #[test] 19 | fn test_display_addition() { 20 | let hello_world_code = "to main do 21 | display(1+2) 22 | end"; 23 | 24 | let expected_ast = vec![ 25 | Function { 26 | name: "main".to_string(), 27 | signature: None, 28 | code: Do(vec![ 29 | Rc::new(Statement::Call { 30 | function: "display".to_string(), 31 | arguments: vec![ 32 | BinaryOp { 33 | operator: Add, 34 | left: Rc::new(Integer(1)), 35 | right: Rc::new(Integer(2)) 36 | } 37 | ] 38 | }) 39 | ]) 40 | } 41 | ]; 42 | 43 | assert_parsed_is(&hello_world_code, expected_ast); 44 | } 45 | -------------------------------------------------------------------------------- /tests/test_scanner.rs: -------------------------------------------------------------------------------- 1 | //! Tests for `haumea::scanner` 2 | extern crate haumea; 3 | use haumea::scanner::*; 4 | use haumea::scanner::Token::*; 5 | 6 | fn assert_scan_is(source: &str, expected: Vec) { 7 | let scanner = Scanner::new(source); 8 | let found: Vec = scanner.collect(); 9 | assert_eq!(found, expected); 10 | } 11 | 12 | #[test] 13 | fn test_scanner_simple() { 14 | assert_scan_is("1+1", vec![Number(1, ScanState::empty()), Operator("+".to_string(), ScanState::empty()), Number(1, ScanState::empty())]); 15 | assert_scan_is(" 1 + 16 | 1 ", vec![Number(1, ScanState::empty()), Operator("+".to_string(), ScanState::empty()), Number(1, ScanState::empty())]); 17 | assert_scan_is("foo * I_love_bars", vec![Ident("foo".to_string(), ScanState::empty()), Operator("*".to_string(), ScanState::empty()), Ident("I_love_bars".to_string(), ScanState::empty())]); 18 | } 19 | 20 | #[test] 21 | fn test_keywords() { 22 | let keywords = vec!["to", "with", "is", "return", "do", "end", 23 | "if", "then", "else", "let", "be", "forever", 24 | "while", "for", "each", "in", 25 | "set", "to", "through", "change", "by", "variable"]; 26 | let keywords: Vec = keywords.iter().map(|kw| Keyword(kw.to_string(), ScanState::empty())).collect(); 27 | assert_scan_is("to with is return do end if then else let be forever while for each in set to through change by variable", keywords); 28 | } 29 | 30 | #[test] 31 | fn test_comments() { 32 | assert_scan_is("/* This is a comment 33 | */ 1 /*So is this */ + /* And this*/ 1", vec![Number(1, ScanState::empty()), Operator("+".to_string(), ScanState::empty()), Number(1, ScanState::empty())]); 34 | assert_scan_is("/* This /* is /* a /* very */ nested */ comment */ */ 1+1", vec![Number(1, ScanState::empty()), Operator("+".to_string(), ScanState::empty()), Number(1, ScanState::empty())]); 35 | } 36 | 37 | --------------------------------------------------------------------------------