├── .gitignore
├── examples
    └── foo.f1
├── Cargo.toml
├── src
    ├── main.rs
    ├── ast.rs
    ├── eval.rs
    └── parse.rs
├── README.md
└── Cargo.lock


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | **/*.rs.bk
3 | 


--------------------------------------------------------------------------------
/examples/foo.f1:
--------------------------------------------------------------------------------
1 | (begin
2 | 	(define foo 1007)
3 | 	(define bar 330)
4 | 	(print (+ foo bar))) ; ~> prints 1337
5 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | authors = ["Will Speak <lithiumflame@gmail.com>"]
3 | edition = "2021"
4 | name = "formula-one"
5 | version = "0.1.0"
6 | 
7 | [dependencies]
8 | codespan = "*"
9 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
 1 | #[deny(missing_docs)]
 2 | mod ast;
 3 | mod eval;
 4 | mod parse;
 5 | 
 6 | use std::fs;
 7 | use std::io::prelude::*;
 8 | 
 9 | /// Main Entry Point
10 | ///
11 | /// Runs the REPL for the language
12 | fn main() {
13 |     let args = std::env::args();
14 |     if args.len() > 1 {
15 |         for arg in args.skip(1) {
16 |             let source = fs::read_to_string(&arg).expect("Could not read source file");
17 |             print(eval::eval(parse::parse(&source)));
18 |         }
19 |     } else {
20 |         let mut env = eval::make_global_env();
21 |         loop {
22 |             print(eval::eval_with_env(read(), &mut env));
23 |         }
24 |     }
25 | }
26 | 
27 | /// Read the input string from source and parse it
28 | fn read() -> ast::Expr {
29 |     let mut buff = String::new();
30 |     print!("\u{1F3CE}  > ");
31 |     std::io::stdout().flush().unwrap();
32 |     std::io::stdin().read_line(&mut buff).unwrap();
33 |     parse::parse(&buff)
34 | }
35 | 
36 | /// Print out the result of an expression evaluation
37 | fn print(result: eval::EvalResult) {
38 |     match result {
39 |         Ok(value) => println!(" ~> {}", value),
40 |         Err(error) => println!(" !! {}", error),
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 🏎 Formula One - An Experimental LISP 🏎
 2 | 
 3 | Formula One is an experiment in ways to ergonomically build syntax trees and transformations in Rust.
 4 | 
 5 | ```
 6 | 🏎  > (begin (define foo 1007) (define bar 330) (+ foo bar))
 7 |  ~> 1337
 8 | ```
 9 | 
10 | ## Blog Post
11 | 
12 | The early development of this language is discussed on my blog in [Lisp in Two Days with Rust][blogpost]
13 | 
14 |  [blogpost]: https://willspeak.me/2019/07/10/lisp-in-two-days-with-rust.html
15 |  
16 | ## Features
17 | 
18 | The language is a small subset of the LISP described in <https://norvig.com/lispy.html>. Notably it supports the following special forms:
19 | 
20 |  * `(if <cond> <then> <elze>)` for conditional evaluation of `<then>` or `<elze>`
21 |  * `(define <sym> <expr>)` binding a value to a symbol
22 |  * `(<sym> <args>...)` for calling a named function `<sym>`
23 | 
24 | All evaluation takes place in a single global environment. The language does not support user-defined functions with `labda` or the nested environments that they would entail. Quoting of values with `'` or `quote` is also not supported. The parser recognises comments and whitespace but is yet to bind them to primary tokens as trivia.
25 | 
26 | ## 🐉 Here be Dragons 🐉
27 | 
28 | This is only intended as an experiment to develop techniques for building syntax trees in code. It isn't intended as a production use language.
29 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
 1 | # This file is automatically @generated by Cargo.
 2 | # It is not intended for manual editing.
 3 | version = 3
 4 | 
 5 | [[package]]
 6 | name = "codespan"
 7 | version = "0.11.1"
 8 | source = "registry+https://github.com/rust-lang/crates.io-index"
 9 | checksum = "3362992a0d9f1dd7c3d0e89e0ab2bb540b7a95fea8cd798090e758fda2899b5e"
10 | dependencies = [
11 |  "codespan-reporting",
12 | ]
13 | 
14 | [[package]]
15 | name = "codespan-reporting"
16 | version = "0.11.1"
17 | source = "registry+https://github.com/rust-lang/crates.io-index"
18 | checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e"
19 | dependencies = [
20 |  "termcolor",
21 |  "unicode-width",
22 | ]
23 | 
24 | [[package]]
25 | name = "formula-one"
26 | version = "0.1.0"
27 | dependencies = [
28 |  "codespan",
29 | ]
30 | 
31 | [[package]]
32 | name = "termcolor"
33 | version = "1.1.3"
34 | source = "registry+https://github.com/rust-lang/crates.io-index"
35 | checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
36 | dependencies = [
37 |  "winapi-util",
38 | ]
39 | 
40 | [[package]]
41 | name = "unicode-width"
42 | version = "0.1.9"
43 | source = "registry+https://github.com/rust-lang/crates.io-index"
44 | checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
45 | 
46 | [[package]]
47 | name = "winapi"
48 | version = "0.3.9"
49 | source = "registry+https://github.com/rust-lang/crates.io-index"
50 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
51 | dependencies = [
52 |  "winapi-i686-pc-windows-gnu",
53 |  "winapi-x86_64-pc-windows-gnu",
54 | ]
55 | 
56 | [[package]]
57 | name = "winapi-i686-pc-windows-gnu"
58 | version = "0.4.0"
59 | source = "registry+https://github.com/rust-lang/crates.io-index"
60 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
61 | 
62 | [[package]]
63 | name = "winapi-util"
64 | version = "0.1.5"
65 | source = "registry+https://github.com/rust-lang/crates.io-index"
66 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
67 | dependencies = [
68 |  "winapi",
69 | ]
70 | 
71 | [[package]]
72 | name = "winapi-x86_64-pc-windows-gnu"
73 | version = "0.4.0"
74 | source = "registry+https://github.com/rust-lang/crates.io-index"
75 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
76 | 


--------------------------------------------------------------------------------
/src/ast.rs:
--------------------------------------------------------------------------------
 1 | //! Formula One Syntax Tree
 2 | //!
 3 | //! This module contains the types which define the syntax tree for
 4 | //! the language. It's basically an `enum` with the possible node
 5 | //! types.
 6 | //!
 7 | //! The LISP we have to parse is fairly simplified. Token wise we only have:
 8 | //!
 9 | //!  * `(` and `)` - puncutation
10 | //!  * `[0-9]+` - number literals
11 | //!  * Everything else is a symbol
12 | //!
13 | //! Tokens do however contain a list of leading and trailing trivia
14 | //! which can include whitepace and comments.
15 | //!
16 | //! Expression wise we have the following forms:
17 | //!
18 | //!  * `<symbol>` - reference to the variable `<symbol>`
19 | //!  * `<number>` - reference to a numeric literal
20 | //!  * `(if <cond> <then> <else>)` - condition expression.
21 | //!  * `(define <symbol> <expr>)` - defines a variable to a given
22 | //!                                 value
23 | //!  * `(<symbol> <arg>...)` - Procedure call to `<symbol>`
24 | 
25 | use codespan::*;
26 | 
27 | /// A single lexical token in the source text
28 | ///
29 | /// Each token represents a single logocal item in the source text. A
30 | /// token is made up of four things:
31 | ///
32 | ///  * `kind` - the type of token
33 | ///  * `span` - the location of the token in the text
34 | ///  * `leading_triva` - the token trivia immediately before this token
35 | ///  * `trailing_trivia` - the trivia after this token to the end of line
36 | #[derive(Debug, PartialEq)]
37 | pub struct Token {
38 |     pub kind: TokenKind,
39 |     span: Span,
40 | }
41 | 
42 | /// Datum for the four kinds of token
43 | #[derive(Debug, PartialEq)]
44 | pub enum TokenKind {
45 |     /// The token is the `(` bracket
46 |     LeftBracket,
47 |     /// The token is the `)` bracket
48 |     RightBracket,
49 |     /// The token is a numeric literal
50 |     Number(i64),
51 |     /// The token is an unnamed symbol
52 |     Symbol(String),
53 | }
54 | 
55 | impl Token {
56 |     /// Create a token with the given `kind` and `span`
57 |     pub fn with_span(kind: TokenKind, span: Span) -> Self {
58 |         Token { kind, span }
59 |     }
60 | }
61 | 
62 | /// Syntax expression enum
63 | ///
64 | /// Represnts one of the expression forms in the lanauge.
65 | #[derive(Debug, PartialEq)]
66 | pub enum Expr {
67 |     /// A direct reference to a variable symbol
68 |     Symbol(Token, String),
69 |     /// A numeric literal
70 |     Number(Token, i64),
71 |     /// A conditional expression
72 |     If(Token, Token, Box<Expr>, Box<Expr>, Box<Expr>, Token),
73 |     /// A variable declaration
74 |     Define(Token, Token, Token, Box<Expr>, Token),
75 |     /// A funciton call expression
76 |     Call(Token, Token, Vec<Expr>, Token),
77 | }
78 | 


--------------------------------------------------------------------------------
/src/eval.rs:
--------------------------------------------------------------------------------
  1 | //! Execution and Evaluation
  2 | //!
  3 | //! This module is responsible for walking expression trees and
  4 | //! evaluating the programs that they represent. It revolves around
  5 | //! the `eval` method.
  6 | 
  7 | use super::ast;
  8 | 
  9 | use std::collections::HashMap;
 10 | use std::fmt;
 11 | 
 12 | /// Stores one of the varying value kinds that are used in
 13 | /// evaluation. This can be the result of evaluating an expression or
 14 | /// stored in an environment.
 15 | #[derive(Debug, PartialEq, Copy, Clone)]
 16 | pub enum Value {
 17 |     /// A numeric value
 18 |     Number(i64),
 19 |     /// A callable value
 20 |     Callable(Callable),
 21 |     /// The empty list and an invalid or placeholder value
 22 |     Nil,
 23 | }
 24 | 
 25 | impl Value {
 26 |     /// Check the trunthyness of a given value
 27 |     fn is_truthy(&self) -> bool {
 28 |         use Value::*;
 29 |         match *self {
 30 |             Number(n) => n != 0,
 31 |             _ => true,
 32 |         }
 33 |     }
 34 | 
 35 |     /// Convert a value to a number
 36 |     fn into_num(self) -> i64 {
 37 |         match self {
 38 |             Value::Number(n) => n,
 39 |             other => panic!("can't use {:?}, it isn't a number", other),
 40 |         }
 41 |     }
 42 | }
 43 | 
 44 | impl fmt::Display for Value {
 45 |     fn fmt(&self, out: &mut fmt::Formatter) -> fmt::Result {
 46 |         match *self {
 47 |             Value::Number(n) => write!(out, "{}", n),
 48 |             Value::Callable(c) => write!(out, "<callable {:x?}>", c),
 49 |             Value::Nil => write!(out, "nil"),
 50 |         }
 51 |     }
 52 | }
 53 | 
 54 | /// Evaluation error values
 55 | ///
 56 | /// This contains the different kinds of errors that can occur when
 57 | /// evaluating a value.
 58 | #[derive(Debug, PartialEq)]
 59 | pub struct EvalError(String);
 60 | 
 61 | impl fmt::Display for EvalError {
 62 |     fn fmt(&self, out: &mut fmt::Formatter) -> fmt::Result {
 63 |         write!(out, "error: {}", self.0)
 64 |     }
 65 | }
 66 | 
 67 | /// Evaluation Result Type
 68 | ///
 69 | /// Used as the return value of calls to `eval`. Returns a `Value` on
 70 | /// success or an `EvalError` on failure.
 71 | pub type EvalResult = Result<Value, EvalError>;
 72 | 
 73 | /// The type of a funtion call in our LISP
 74 | type Callable = fn(Vec<Value>) -> EvalResult;
 75 | 
 76 | /// Simple Evaluation
 77 | ///
 78 | /// Convenience function to evaluate a given expression in a new
 79 | /// environment. This is used by the main driver when evaluating
 80 | /// expressions from a function.
 81 | pub fn eval(expr: ast::Expr) -> EvalResult {
 82 |     eval_with_env(expr, &mut make_global_env())
 83 | }
 84 | 
 85 | /// Main evaluation function. This function accepts a parsed syntax
 86 | /// tree and evaluates it into a single Value using the given
 87 | /// environment..
 88 | pub fn eval_with_env(expr: ast::Expr, env: &mut HashMap<String, Value>) -> EvalResult {
 89 |     use ast::Expr::*;
 90 |     match expr {
 91 |         Symbol(_, s) => env
 92 |             .get(&s)
 93 |             .cloned()
 94 |             .ok_or_else(|| EvalError(format!("eval: Undefined symbol {}", s))),
 95 |         Number(_, n) => Ok(Value::Number(n)),
 96 |         If(_, _, cond, then, elz, _) => Ok(if eval_with_env(*cond, env)?.is_truthy() {
 97 |             eval_with_env(*then, env)?
 98 |         } else {
 99 |             eval_with_env(*elz, env)?
100 |         }),
101 |         Define(_, _, sym, value, _) => {
102 |             let value = eval_with_env(*value, env)?;
103 |             let sym = to_sym(sym)?;
104 |             env.insert(sym, value.clone());
105 |             Ok(value)
106 |         }
107 |         Call(_, sym, args, _) => {
108 |             let sym = to_sym(sym)?;
109 |             match env.get(&sym) {
110 |                 Some(Value::Callable(c)) => c(args
111 |                     .into_iter()
112 |                     .map(|a| eval_with_env(a, env))
113 |                     .collect::<Result<Vec<_>, _>>()?),
114 |                 _ => Err(EvalError(format!("eval: Invalid function {}", sym))),
115 |             }
116 |         }
117 |     }
118 | }
119 | 
120 | /// Convert a token to a symbol.
121 | fn to_sym(token: ast::Token) -> Result<String, EvalError> {
122 |     match token.kind {
123 |         ast::TokenKind::Symbol(s) => Ok(s),
124 |         other => Err(EvalError(format!("Token '{:?}' is not symbol", other))),
125 |     }
126 | }
127 | 
128 | /// Get the last value or `Nil` if there are none
129 | fn last_or_nil(values: Vec<Value>) -> Value {
130 |     values.last().cloned().unwrap_or(Value::Nil)
131 | }
132 | 
133 | /// Create the global environment. This is the root environment and
134 | /// has the builtin operators and functions defined in it.
135 | pub fn make_global_env() -> HashMap<String, Value> {
136 |     let mut env = HashMap::new();
137 | 
138 |     env.insert(
139 |         "print".into(),
140 |         Value::Callable(|values| {
141 |             for value in values.iter() {
142 |                 println!("{}", value);
143 |             }
144 |             Ok(last_or_nil(values))
145 |         }),
146 |     );
147 |     env.insert(
148 |         "exit".into(),
149 |         Value::Callable(|values| {
150 |             let status = values.into_iter().last().unwrap_or(Value::Number(0));
151 |             std::process::exit(status.into_num() as i32)
152 |         }),
153 |     );
154 |     env.insert(
155 |         "begin".into(),
156 |         Value::Callable(|values| Ok(last_or_nil(values))),
157 |     );
158 |     env.insert(
159 |         "+".into(),
160 |         Value::Callable(|values| Ok(Value::Number(values.iter().map(|i| i.into_num()).sum()))),
161 |     );
162 |     env.insert(
163 |         "*".into(),
164 |         Value::Callable(|values| Ok(Value::Number(values.iter().map(|i| i.into_num()).product()))),
165 |     );
166 |     env.insert(
167 |         "-".into(),
168 |         Value::Callable(|values| {
169 |             Ok(if let Some((first, rest)) = values.split_first() {
170 |                 let first = first.into_num();
171 |                 if rest.len() == 0 {
172 |                     Value::Number(-first)
173 |                 } else {
174 |                     Value::Number(rest.iter().fold(first, |acc, n| acc - n.into_num()))
175 |                 }
176 |             } else {
177 |                 // (-) ~> 0 ; apparently
178 |                 Value::Number(0)
179 |             })
180 |         }),
181 |     );
182 |     env.insert(
183 |         "/".into(),
184 |         Value::Callable(|values| {
185 |             if let Some((first, rest)) = values.split_first() {
186 |                 let first = first.into_num();
187 |                 Ok(if rest.len() == 0 {
188 |                     Value::Number(1 / first)
189 |                 } else {
190 |                     Value::Number(rest.iter().fold(first, |acc, n| acc / n.into_num()))
191 |                 })
192 |             } else {
193 |                 Err(EvalError("Wrong number of arguments: /, 0".into()))
194 |             }
195 |         }),
196 |     );
197 | 
198 |     env
199 | }
200 | 


--------------------------------------------------------------------------------
/src/parse.rs:
--------------------------------------------------------------------------------
  1 | //! Syntax Parser
  2 | //!
  3 | //! The syntax parser is responsible for taking buffers of characters
  4 | //! and returning structured syntax trees.
  5 | 
  6 | use super::ast;
  7 | use codespan::*;
  8 | 
  9 | /// Tokenisation state
 10 | ///
 11 | /// Each variant represents a state in the DFA used by the tokeniser
 12 | /// to recognise source text.
 13 | enum TokeniseState {
 14 |     /// Initial token state. This is not a final state.
 15 |     Start,
 16 |     /// left parenthesis seen. This is a final state
 17 |     Lparen,
 18 |     /// Right parenthesis seen. This is a final state
 19 |     Rparen,
 20 |     /// One or more digits seen. This is a final state.
 21 |     Number,
 22 |     /// One or more symbol characters seen. This is a final state.
 23 |     Symbol,
 24 |     /// Unicode whitespace characters
 25 |     Whitespace,
 26 |     /// Single line comment
 27 |     Comment,
 28 | }
 29 | 
 30 | /// Tokenise a given string
 31 | ///
 32 | /// Takes a given input string and transforms it into a vector of
 33 | /// tokens by running a state machine over it.
 34 | fn tokenise(source: &str) -> Vec<ast::Token> {
 35 |     use TokeniseState::*;
 36 | 
 37 |     let mut result = Vec::new();
 38 |     let mut start = 0;
 39 | 
 40 |     loop {
 41 |         let mut state = Start;
 42 |         let mut end = start;
 43 | 
 44 |         // Search through the remaining characters until the state
 45 |         // machine can make no further transitions.
 46 |         for c in source[start as usize..].chars() {
 47 |             // This two-level match encodes the state transitions for
 48 |             // the automaton. First we dispatch based on the current
 49 |             // state, then the character we are looking at.
 50 |             let next = match state {
 51 |                 Start => match c {
 52 |                     '(' => Some(Lparen),
 53 |                     ')' => Some(Rparen),
 54 |                     '0'..='9' => Some(Number),
 55 |                     'a'..='z'
 56 |                     | 'A'..='Z'
 57 |                     | '!'
 58 |                     | '%'
 59 |                     | '&'
 60 |                     | '*'
 61 |                     | '+'
 62 |                     | '-'
 63 |                     | '.'
 64 |                     | '/'
 65 |                     | ':'
 66 |                     | '<'
 67 |                     | '='
 68 |                     | '>'
 69 |                     | '?'
 70 |                     | '@'
 71 |                     | '$'
 72 |                     | '^' => Some(Symbol),
 73 |                     ';' => Some(Comment),
 74 |                     c if c.is_whitespace() => Some(Whitespace),
 75 |                     _ => None,
 76 |                 },
 77 |                 Lparen | Rparen => None,
 78 |                 Number => match c {
 79 |                     '0'..='9' => Some(Number),
 80 |                     _ => None,
 81 |                 },
 82 |                 Symbol => match c {
 83 |                     'A'..='Z'
 84 |                     | 'a'..='z'
 85 |                     | '!'
 86 |                     | '%'
 87 |                     | '&'
 88 |                     | '*'
 89 |                     | '+'
 90 |                     | '-'
 91 |                     | '.'
 92 |                     | '/'
 93 |                     | ':'
 94 |                     | '<'
 95 |                     | '='
 96 |                     | '>'
 97 |                     | '?'
 98 |                     | '@'
 99 |                     | '$'
100 |                     | '^'
101 |                     | '0'..='9' => Some(Symbol),
102 |                     _ => None,
103 |                 },
104 |                 Whitespace => {
105 |                     if c.is_whitespace() {
106 |                         Some(Whitespace)
107 |                     } else {
108 |                         None
109 |                     }
110 |                 }
111 |                 Comment => {
112 |                     if c == '\r' || c == '\n' {
113 |                         None
114 |                     } else {
115 |                         Some(Comment)
116 |                     }
117 |                 }
118 |             };
119 | 
120 |             // If we transitioned then accept the character by moving
121 |             // on our `end` index.
122 |             if let Some(next_state) = next {
123 |                 state = next_state;
124 |                 end += c.len_utf8();
125 |             } else {
126 |                 break;
127 |             }
128 |         }
129 | 
130 |         let token_str = &source[start..end];
131 |         let span = Span::new((start as u32) + 1, (end as u32) + 1);
132 | 
133 |         start = end;
134 | 
135 |         // all our states are accepting other than `Start` and
136 |         // `Whitespace`. Choose the token kind based on the state we
137 |         // have landed in.
138 |         let kind = match state {
139 |             // If no transition was followed from the start state we
140 |             // have completed tokenisation
141 |             Start => break,
142 |             Lparen => ast::TokenKind::LeftBracket,
143 |             Rparen => ast::TokenKind::RightBracket,
144 |             Number => ast::TokenKind::Number(token_str.parse().unwrap()),
145 |             Symbol => ast::TokenKind::Symbol(token_str.into()),
146 |             // Skip whitespace for now
147 |             Whitespace | Comment => continue,
148 |         };
149 | 
150 |         result.push(ast::Token::with_span(kind, span));
151 |     }
152 | 
153 |     result
154 | }
155 | 
156 | /// Parser state structure
157 | ///
158 | /// Contains the lookahead inforation for the parser
159 | struct ParseState<I: Iterator<Item = ast::Token>>(std::iter::Peekable<I>);
160 | 
161 | impl<I> ParseState<I>
162 | where
163 |     I: Iterator<Item = ast::Token>,
164 | {
165 |     /// Pase a single form from a list of tokens
166 |     fn parse_expr(&mut self) -> ast::Expr {
167 |         if let Some(token) = self.0.next() {
168 |             use ast::TokenKind::*;
169 |             match token.kind {
170 |                 LeftBracket => self.parse_form(token),
171 |                 RightBracket => panic!("unexpected token!"),
172 |                 Number(n) => ast::Expr::Number(token, n),
173 |                 Symbol(ref s) => {
174 |                     let sym = s.clone();
175 |                     ast::Expr::Symbol(token, sym)
176 |                 }
177 |             }
178 |         } else {
179 |             panic!("invalid expression.")
180 |         }
181 |     }
182 | 
183 |     // Parse one of our recognised strucutred forms beginning with the
184 |     // given token
185 |     fn parse_form(&mut self, open: ast::Token) -> ast::Expr {
186 |         use ast::TokenKind::*;
187 |         match self.0.peek() {
188 |             Some(&ast::Token {
189 |                 kind: Symbol(ref sym),
190 |                 ..
191 |             }) => match &sym[..] {
192 |                 "if" => {
193 |                     let if_tok = self.0.next().unwrap();
194 |                     let cond = self.parse_expr();
195 |                     let if_true = self.parse_expr();
196 |                     let if_false = self.parse_expr();
197 |                     let close = self.0.next().unwrap();
198 |                     ast::Expr::If(
199 |                         open,
200 |                         if_tok,
201 |                         Box::new(cond),
202 |                         Box::new(if_true),
203 |                         Box::new(if_false),
204 |                         close,
205 |                     )
206 |                 }
207 |                 "define" => {
208 |                     let define_tok = self.0.next().unwrap();
209 |                     let sym_tok = self.0.next().unwrap();
210 |                     let value = self.parse_expr();
211 |                     let close = self.0.next().unwrap();
212 |                     ast::Expr::Define(open, define_tok, sym_tok, Box::new(value), close)
213 |                 }
214 |                 _ => {
215 |                     let sym_tok = self.0.next().unwrap();
216 |                     let mut args = Vec::new();
217 |                     while let Some(token) = self.0.peek() {
218 |                         if token.kind == RightBracket {
219 |                             break;
220 |                         }
221 |                         args.push(self.parse_expr());
222 |                     }
223 |                     let close = self.0.next().unwrap();
224 |                     ast::Expr::Call(open, sym_tok, args, close)
225 |                 }
226 |             },
227 |             _ => panic!("invalid expression"),
228 |         }
229 |     }
230 | }
231 | 
232 | /// Parse source text into a structured AST expression
233 | ///
234 | /// This first tokenises the source text and then parses the resulting
235 | /// list of tokens into a single expression form.
236 | pub fn parse(source: &str) -> ast::Expr {
237 |     let tokens = tokenise(source);
238 |     ParseState(tokens.into_iter().peekable()).parse_expr()
239 | }
240 | 
241 | #[cfg(test)]
242 | mod test {
243 | 
244 |     use super::*;
245 | 
246 |     #[test]
247 |     fn tokenise_number_literals() {
248 |         assert_eq!(
249 |             vec![ast::Token::with_span(
250 |                 ast::TokenKind::Number(0),
251 |                 Span::new(ByteIndex(1), ByteIndex(2))
252 |             )],
253 |             tokenise("0")
254 |         );
255 |         assert_eq!(
256 |             vec![ast::Token::with_span(
257 |                 ast::TokenKind::Number(1234),
258 |                 Span::new(ByteIndex(1), ByteIndex(5))
259 |             )],
260 |             tokenise("1234")
261 |         );
262 |     }
263 | 
264 |     #[test]
265 |     fn tokenise_symbols() {
266 |         assert_eq!(
267 |             vec![ast::Token::with_span(
268 |                 ast::TokenKind::Symbol("hello/world".into()),
269 |                 Span::new(ByteIndex(1), ByteIndex(12))
270 |             )],
271 |             tokenise("hello/world")
272 |         );
273 |         assert_eq!(
274 |             vec![
275 |                 ast::Token::with_span(
276 |                     ast::TokenKind::Symbol("hello".into()),
277 |                     Span::new(ByteIndex(1), ByteIndex(6))
278 |                 ),
279 |                 ast::Token::with_span(
280 |                     ast::TokenKind::Symbol("world".into()),
281 |                     Span::new(ByteIndex(7), ByteIndex(12))
282 |                 )
283 |             ],
284 |             tokenise("hello world")
285 |         );
286 |         assert_eq!(
287 |             vec![ast::Token::with_span(
288 |                 ast::TokenKind::Symbol("hello.world".into()),
289 |                 Span::new(ByteIndex(1), ByteIndex(12))
290 |             )],
291 |             tokenise("hello.world")
292 |         );
293 |         assert_eq!(
294 |             vec![ast::Token::with_span(
295 |                 ast::TokenKind::Symbol("+".into()),
296 |                 Span::new(ByteIndex(1), ByteIndex(2))
297 |             )],
298 |             tokenise("+")
299 |         )
300 |     }
301 | 
302 |     #[test]
303 |     fn tokenise_brackets() {
304 |         assert_eq!(
305 |             vec![ast::Token::with_span(
306 |                 ast::TokenKind::LeftBracket,
307 |                 Span::new(ByteIndex(1), ByteIndex(2))
308 |             )],
309 |             tokenise("(")
310 |         );
311 |         assert_eq!(
312 |             vec![ast::Token::with_span(
313 |                 ast::TokenKind::RightBracket,
314 |                 Span::new(ByteIndex(1), ByteIndex(2))
315 |             )],
316 |             tokenise(")")
317 |         );
318 |         assert_eq!(
319 |             vec![
320 |                 ast::Token::with_span(
321 |                     ast::TokenKind::LeftBracket,
322 |                     Span::new(ByteIndex(1), ByteIndex(2))
323 |                 ),
324 |                 ast::Token::with_span(
325 |                     ast::TokenKind::RightBracket,
326 |                     Span::new(ByteIndex(2), ByteIndex(3))
327 |                 )
328 |             ],
329 |             tokenise("()")
330 |         );
331 |         assert_eq!(
332 |             vec![
333 |                 ast::Token::with_span(
334 |                     ast::TokenKind::LeftBracket,
335 |                     Span::new(ByteIndex(1), ByteIndex(2))
336 |                 ),
337 |                 ast::Token::with_span(
338 |                     ast::TokenKind::LeftBracket,
339 |                     Span::new(ByteIndex(2), ByteIndex(3))
340 |                 ),
341 |                 ast::Token::with_span(
342 |                     ast::TokenKind::LeftBracket,
343 |                     Span::new(ByteIndex(3), ByteIndex(4))
344 |                 ),
345 |                 ast::Token::with_span(
346 |                     ast::TokenKind::RightBracket,
347 |                     Span::new(ByteIndex(4), ByteIndex(5))
348 |                 ),
349 |                 ast::Token::with_span(
350 |                     ast::TokenKind::RightBracket,
351 |                     Span::new(ByteIndex(5), ByteIndex(6))
352 |                 ),
353 |                 ast::Token::with_span(
354 |                     ast::TokenKind::RightBracket,
355 |                     Span::new(ByteIndex(6), ByteIndex(7))
356 |                 )
357 |             ],
358 |             tokenise("((()))")
359 |         );
360 |     }
361 | 
362 |     #[test]
363 |     fn tokenise_comments() {
364 |         assert_eq!(Vec::<ast::Token>::new(), tokenise("; hello world"));
365 |         assert_eq!(
366 |             Vec::<ast::Token>::new(),
367 |             tokenise("; hello world\n; another comment\r\n; windows eol")
368 |         );
369 |     }
370 | 
371 |     #[test]
372 |     fn parse_atoms() {
373 |         assert_eq!(
374 |             ast::Expr::Number(
375 |                 ast::Token::with_span(
376 |                     ast::TokenKind::Number(64),
377 |                     Span::new(ByteIndex(1), ByteIndex(3))
378 |                 ),
379 |                 64
380 |             ),
381 |             parse("64")
382 |         );
383 |         assert_eq!(
384 |             ast::Expr::Number(
385 |                 ast::Token::with_span(
386 |                     ast::TokenKind::Number(12364),
387 |                     Span::new(ByteIndex(1), ByteIndex(6))
388 |                 ),
389 |                 12364
390 |             ),
391 |             parse("12364")
392 |         );
393 |         assert_eq!(
394 |             ast::Expr::Number(
395 |                 ast::Token::with_span(
396 |                     ast::TokenKind::Number(9223372036854775807),
397 |                     Span::new(ByteIndex(1), ByteIndex(20))
398 |                 ),
399 |                 9223372036854775807
400 |             ),
401 |             parse("9223372036854775807")
402 |         );
403 |     }
404 | }
405 | 


--------------------------------------------------------------------------------