├── .editorconfig ├── .gitignore ├── Cargo.toml ├── README.md └── src ├── compiler ├── memory.rs ├── mod.rs └── naive.rs ├── lib.rs ├── parser.rs └── syntax.rs /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | root = true 3 | 4 | [*.rs] 5 | trim_trailing_whitespace = true 6 | insert_final_newline = true 7 | max_line_length = 80 8 | 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | target/ 12 | **/*.rs.bk 13 | Cargo.lock 14 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "smoltok" 3 | version = "0.1.0" 4 | authors = ["Matt Parsons "] 5 | 6 | [dependencies] 7 | combine = "2.5.*" 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # smoltok 2 | 3 | An implementation of Smalltalk 80 in Rust based on the freely available [blue book](http://www.mirandabanda.org/bluebook/). 4 | 5 | ## Parser 6 | 7 | The parser is implemented using [`combine`](https://github.com/Marwes/combine). 8 | I'm coming from Haskell, so the Parsec inspiration was familiar and welcome. 9 | 10 | The parser was initially implemented by reading the first chapter on syntax and following along. 11 | That proved to be a bit treachorous, with quite a few subtle implementation bugs. 12 | Fortunately, the back of the book provided a railway syntax chart, which made it quite easy to write the parser for. 13 | -------------------------------------------------------------------------------- /src/compiler/memory.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | #[derive(Debug, PartialEq, Clone, Eq, Hash)] 4 | pub struct Object { 5 | pub class: Box, 6 | pub fields: Vec 7 | } 8 | 9 | /// A newtype wrapper around `usize`, used for accessing values in the `Heap`. 10 | #[derive(Debug, PartialEq, Clone, Eq, Hash)] 11 | pub struct Pointer(usize); 12 | 13 | #[derive(Debug, PartialEq, Clone, Eq, Hash)] 14 | /// A `Class` is a newtype wrapper around an Object. All classes are objects, 15 | /// after all -- but sometimes it helps to distinguish them. 16 | pub struct Class { obj: Object } 17 | 18 | #[derive(Debug, PartialEq, Clone)] 19 | /// The `Heap` record is the heap of objects for the Smoltok runtime. It 20 | /// contains some methods that operation 21 | pub struct Heap { 22 | pub objects: HashMap, 23 | pub curr_idx: Pointer, 24 | } 25 | 26 | impl Heap { 27 | pub fn new() -> Self { 28 | Heap { 29 | objects: HashMap::new(), 30 | curr_idx: Pointer(0) 31 | } 32 | } 33 | 34 | fn access(&self, ptr: &Pointer) -> Option<&Object> { 35 | self.objects.get(ptr) 36 | } 37 | 38 | // object pointer access 39 | 40 | pub fn fetch_pointer(&self, field_idx: usize, of_object: Object) 41 | -> Option<&Object> { 42 | let ptr = &(of_object.fields[field_idx]); 43 | self.access(ptr) 44 | } 45 | 46 | // pub fn store_pointer(mut self, ix: usize, object: Object, value: Object) { 47 | // let ptr = object.fields[ix]; 48 | // 49 | // 50 | // } 51 | } 52 | -------------------------------------------------------------------------------- /src/compiler/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod naive; 2 | pub mod memory; 3 | -------------------------------------------------------------------------------- /src/compiler/naive.rs: -------------------------------------------------------------------------------- 1 | // a naive translation of the smalltalk compiler 2 | 3 | pub struct Interpreter { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate combine; 3 | 4 | pub mod syntax; 5 | pub mod compiler; 6 | pub mod parser; 7 | 8 | #[cfg(test)] 9 | mod tests { 10 | #[test] 11 | fn it_works() { 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/parser.rs: -------------------------------------------------------------------------------- 1 | /// Parser for the Smalltalk programming language. 2 | 3 | use combine::{none_of, many, many1, try, token, optional}; 4 | use combine::Parser; 5 | use combine::primitives::Stream; 6 | use combine::combinator::*; 7 | use combine::char::*; 8 | 9 | use syntax::*; 10 | 11 | parser! { 12 | fn expr[I]()(I) -> Expr 13 | where [I: Stream] 14 | { 15 | try( 16 | (ident(), assignment(), expr()) 17 | .map(|t| Expr::Assign(t.0, Box::new(t.2))) 18 | ).or(try(cascaded_message_expr())) 19 | .or(try(message_expr())) 20 | .or(try(primary())) 21 | .or(try(method_p().map(Expr::Method))) 22 | } 23 | } 24 | 25 | parser! { 26 | fn unary_object[I]()(I) -> Expr 27 | where [I: Stream] 28 | { 29 | look_ahead(any()).then(|_|primary().or(unary_expr())) 30 | } 31 | } 32 | 33 | /// The mutual recursion between unary object and unary expr is the cause of the 34 | /// problem. 35 | parser! { 36 | fn unary_expr[I]()(I) -> Expr 37 | where [I: Stream] 38 | { 39 | (unary_object(), unary_selector()) 40 | .map(|(o, s)| 41 | Expr::Message { 42 | receiver: Box::new(o), 43 | selector: s 44 | } 45 | ) 46 | } 47 | } 48 | 49 | parser! { 50 | fn unary_selector[I]()(I) -> Msg 51 | where [I: Stream] 52 | { 53 | ident().map(Msg::Unary) 54 | } 55 | } 56 | 57 | parser! { 58 | fn binary_object[I]()(I) -> Expr 59 | where [I: Stream] 60 | { 61 | spaces().then(|_| look_ahead(any()).then(|_| unary_object().or(binary_expr()))) 62 | } 63 | } 64 | 65 | parser! { 66 | fn binary_expr[I]()(I) -> Expr 67 | where [I: Stream] 68 | { 69 | (binary_object(), binary_selector(), unary_object()) 70 | .map(|(bin_o, bin_sel, obj)| 71 | Expr::Message { 72 | receiver: Box::new(bin_o), 73 | selector: Msg::Binary(bin_sel, Box::new(obj)) 74 | } 75 | ) 76 | } 77 | } 78 | 79 | parser! { 80 | fn keyword_expr[I]()(I) -> Expr 81 | where [I: Stream] 82 | { 83 | (binary_object(), 84 | many1( 85 | (spaces(), keyword_lit(), binary_object()) 86 | .map(|(_, s, o)| Keyword { 87 | id: Ident(s), 88 | val: o 89 | }) 90 | ) 91 | ).map(|(bin_obj, exprs): (_, Vec)| 92 | Expr::Message { 93 | receiver: Box::new(bin_obj), 94 | selector: Msg::Kwargs(exprs) 95 | } 96 | ) 97 | } 98 | } 99 | 100 | parser! { 101 | fn message_expr[I]()(I) -> Expr 102 | where [I: Stream] 103 | { 104 | try(keyword_expr()) 105 | .or(try(binary_expr())) 106 | .or(unary_expr()) 107 | } 108 | } 109 | 110 | parser! { 111 | fn cascaded_message_expr[I]()(I) -> Expr 112 | where [I: Stream] 113 | { 114 | let next = ( 115 | optional(token(';')), 116 | unary_selector() 117 | .or( 118 | (binary_selector(), unary_object()) 119 | .map(|(bin_sel, expr)| 120 | Msg::Binary(bin_sel, Box::new(expr)) 121 | ) 122 | ).or( 123 | many1( 124 | (keyword_lit(), binary_object()) 125 | .map(|(id, val)| Keyword { id: Ident(id), val }) 126 | ).map(Msg::Kwargs) 127 | ) 128 | ).map(|t| t.1); 129 | (message_expr(), many1(next)) 130 | .map(|(a, b): (_, Vec)| { 131 | b.iter().fold(a, |acc, msg| Expr::Message { 132 | receiver: Box::new(acc), 133 | selector: msg.clone() 134 | }) 135 | }) 136 | } 137 | } 138 | 139 | parser! { 140 | fn keyword_lit[I]()(I) -> String 141 | where [I: Stream] 142 | { 143 | (ident(), token(':'), spaces()).map(|(Ident(i), _, _)| format!("{}:", i)) 144 | } 145 | } 146 | 147 | parser! { 148 | fn primary[I]()(I) -> Expr 149 | where [I: Stream] 150 | { 151 | ident().map(Expr::Id) 152 | .or(literal().map(Expr::Lit)) 153 | .or(block()) 154 | .or( 155 | between( 156 | (token('('), spaces()), 157 | token(')'), 158 | expr() 159 | ) 160 | ) 161 | } 162 | } 163 | 164 | parser! { 165 | fn block[I]()(I) -> Expr 166 | where [I: Stream] 167 | { 168 | between( 169 | (token('['), spaces()), 170 | token(']'), 171 | (block_vars(), token('|'), spaces(), statements()) 172 | .map(|(vars, _, _, statements)| Expr::Block { vars, statements }) 173 | ) 174 | } 175 | } 176 | 177 | parser! { 178 | fn statements[I]()(I) -> Vec 179 | where [I: Stream] 180 | { 181 | (token('^'), spaces(), expr()).map(|(_, _, e)| vec![Statement::Ret(e)]) 182 | .or( 183 | try((expr(), token('.'), spaces(), statements())) 184 | .map(|(e, _, _, s)| { 185 | let mut m = Vec::new(); 186 | m.push(Statement::E(e)); 187 | m.extend(s); 188 | m 189 | }) 190 | ).or( 191 | expr().map(|e| vec![Statement::E(e)]) 192 | ).or(value(vec![])) 193 | } 194 | } 195 | 196 | 197 | parser! { 198 | fn block_vars[I]()(I) -> Vec 199 | where [I: Stream] 200 | { 201 | many1((token(':'), ident()).map(|t| t.1)) 202 | } 203 | } 204 | 205 | 206 | /// Parse an identifier. 207 | parser! { 208 | fn ident[I]()(I) -> Ident 209 | where [I: Stream] 210 | { 211 | (letter(), many(alpha_num()), spaces()).map(|(c, cs, _): (char, String, _)| 212 | Ident(format!("{}{}", c, cs)) 213 | ) 214 | } 215 | } 216 | 217 | /// Parse assignment syntax. Smalltalk supports multiple assignment, so we 218 | /// return a list of string identifiers 219 | parser! { 220 | fn assignment[I]()(I) -> () 221 | where [I: Stream] 222 | { 223 | ( string("<-"), 224 | spaces(), 225 | ).map(|(_, e)| e) 226 | } 227 | } 228 | 229 | /// Parse an integral number. 230 | parser! { 231 | fn digits[I]()(I) -> u32 232 | where [I: Stream] 233 | { 234 | many1(digit()) 235 | .and_then(|s: String| s.parse()) 236 | } 237 | } 238 | 239 | /// Parse an uppercase character or a digit. 240 | parser! { 241 | fn upper_digit[I]()(I) -> char 242 | where [I: Stream] 243 | { 244 | digit().or(upper()) 245 | } 246 | } 247 | 248 | /// Parse a Smalltalk number. 249 | parser! { 250 | fn number[I]()(I) -> Num 251 | where [I: Stream] 252 | { 253 | struct_parser!{ 254 | Num { 255 | radix: optional(try( 256 | (digits(), 257 | token('r') 258 | ).map(|t| t.0 as u8) 259 | )), 260 | integer: many1(upper_digit()), 261 | mantissa: optional( 262 | (token('.'), 263 | many1(upper_digit()) 264 | ).map(|t| t.1)), 265 | exponent: optional( 266 | (token('e'), 267 | digits() 268 | ).map(|t| t.1) 269 | ) 270 | } 271 | } 272 | } 273 | } 274 | 275 | /// Parse a Smalltalk character. 276 | parser! { 277 | fn sm_char[I]()(I) -> Literal 278 | where [I:Stream] 279 | { 280 | (token('$'), 281 | any() 282 | ).map(|t| Literal::Char(t.1)) 283 | } 284 | } 285 | 286 | /// Parse a Smalltalk string. 287 | parser! { 288 | fn sm_string[I]()(I) -> Literal 289 | where [I:Stream] 290 | { 291 | (token('\''), 292 | many( 293 | none_of("'".chars()) 294 | .or(try(string("''").map(|_| '\'' ))) 295 | ), 296 | token('\'') 297 | ).map(|t| Literal::Str(t.1)) 298 | } 299 | } 300 | 301 | parser! { 302 | fn array[I]()(I) -> Literal 303 | where [I:Stream] 304 | { 305 | between( 306 | token('('), 307 | token(')'), 308 | sep_by( 309 | number().map(Literal::Number) 310 | .or(symbol()) 311 | .or(sm_string()) 312 | .or(sm_char()) 313 | .or(array()), 314 | spaces() 315 | ) 316 | ).map(Literal::Array) 317 | } 318 | } 319 | 320 | parser! { 321 | fn symbol[I]()(I) -> Literal 322 | where [I:Stream] 323 | { 324 | ident().map(|Ident(i)| Literal::Symbol(i)) 325 | .or(binary_selector().map(Literal::Symbol)) 326 | .or( 327 | many1(keyword_lit()) 328 | .map(|kws: Vec<_>| Literal::Symbol(kws.join(""))) 329 | ) 330 | 331 | } 332 | } 333 | 334 | parser! { 335 | fn binary_selector[I]()(I) -> String 336 | where [I:Stream] 337 | { 338 | spaces().then(|_| (special_char(), optional(special_char()), spaces()) 339 | .or(token('-').map(|t| (t, None, ()))) 340 | .map(|(c, mc, _)| match mc { 341 | Some(x) => format!("{}{}", c, x), 342 | None => format!("{}", c) 343 | })) 344 | 345 | } 346 | } 347 | 348 | parser! { 349 | fn special_char[I]()(I) -> char 350 | where [I:Stream] 351 | { 352 | one_of("+/\\*~<>=@%|&?!".chars()) 353 | } 354 | } 355 | 356 | parser! { 357 | fn message_pattern[I]()(I) -> MsgPat 358 | where [I:Stream] 359 | { 360 | let kwargs = many1( 361 | try((keyword_lit(), ident())) 362 | .map(|(k, var)| KeyPat { keyword: Ident(k), var }) 363 | ).map(MsgPat::Kwargs); 364 | 365 | let bin = (binary_selector(), ident()).map(|(a, b)| MsgPat::Bin(Ident(a), b)); 366 | try(kwargs) 367 | .or(try(bin)) 368 | .or(ident().map(MsgPat::Unary)) 369 | } 370 | } 371 | 372 | parser! { 373 | fn temporaries[I]()(I) -> Vec 374 | where [I:Stream] 375 | { 376 | between( 377 | token('|').then(|_| spaces()), 378 | token('|').then(|_| spaces()), 379 | many1(ident()) 380 | ) 381 | } 382 | } 383 | 384 | parser! { 385 | fn method_p[I]()(I) -> Method 386 | where [I:Stream] 387 | { 388 | ( message_pattern(), 389 | optional(temporaries()), 390 | optional(statements()) 391 | ).map(|(sig, temps, stmts)| 392 | Method { sig, temps, stmts } 393 | ) 394 | } 395 | } 396 | 397 | /// Parse any kind of Smalltalk literal. Don't worry. Just throw whatever you 398 | /// got at it. 399 | parser! { 400 | fn literal[I]()(I) -> Literal 401 | where [I:Stream] 402 | { 403 | spaces().then(|_| number().map(Literal::Number) 404 | .or(sm_char()) 405 | .or(sm_string()) 406 | .or((token('#'), array().or(symbol())).map(|t| t.1)) 407 | ) 408 | } 409 | } 410 | 411 | #[cfg(test)] 412 | mod tests { 413 | use super::*; 414 | 415 | fn is_err(x : Result) -> bool { 416 | match x { 417 | Ok(_) => false, 418 | _ => true, 419 | } 420 | } 421 | 422 | #[test] 423 | fn test_digits() { 424 | let ans: u32 = 10; 425 | let res = digits().parse("10"); 426 | assert_eq!(res, Ok((ans, ""))); 427 | } 428 | 429 | #[test] 430 | fn test_bare_number() { 431 | let res = number().parse("10"); 432 | let ans = Num::int_from_str("10"); 433 | assert_eq!(res, Ok((ans, ""))); 434 | } 435 | 436 | #[test] 437 | fn test_exponent() { 438 | let res = number().parse("10e3"); 439 | let ans = Num { 440 | integer: String::from("10"), 441 | exponent: Some(3), 442 | mantissa: None, 443 | radix: None, 444 | }; 445 | assert_eq!(res, Ok((ans, ""))); 446 | } 447 | 448 | #[test] 449 | fn test_full_number() { 450 | let res = number().parse("10r10.5e3"); 451 | let ans = Num { 452 | integer: String::from("10"), 453 | exponent: Some(3), 454 | mantissa: Some(String::from("5")), 455 | radix: Some(10), 456 | }; 457 | assert_eq!(res, Ok((ans, ""))); 458 | } 459 | 460 | #[test] 461 | fn test_float() { 462 | let res = number().parse("123.456"); 463 | let ans = Num { 464 | integer: String::from("123"), 465 | exponent: None, 466 | mantissa: Some(String::from("456")), 467 | radix: None, 468 | }; 469 | assert_eq!(res, Ok((ans, ""))); 470 | } 471 | 472 | #[test] 473 | fn test_radix() { 474 | let res = number().parse("16rAC.DCe10"); 475 | let ans = Num { 476 | integer: String::from("AC"), 477 | exponent: Some(10), 478 | mantissa: Some(String::from("DC")), 479 | radix: Some(16), 480 | }; 481 | assert_eq!(res, Ok((ans, ""))); 482 | } 483 | 484 | #[test] 485 | fn test_char() { 486 | let res = sm_char().parse("$a"); 487 | let ans = Literal::Char('a'); 488 | assert_eq!(res, Ok((ans, ""))); 489 | } 490 | 491 | #[test] 492 | fn test_string() { 493 | let res = sm_string().parse("'hello world'"); 494 | let ans = Literal::Str(String::from("hello world")); 495 | assert_eq!(res, Ok((ans, ""))); 496 | } 497 | 498 | #[test] 499 | fn test_string_quotes() { 500 | let res = sm_string().parse("'hello ''world'''"); 501 | let ans = Literal::Str(String::from("hello 'world'")); 502 | assert_eq!(res, Ok((ans, ""))); 503 | } 504 | 505 | #[test] 506 | fn test_symbol() { 507 | let res = literal().parse("#foobar123"); 508 | let ans = Literal::Symbol(String::from("foobar123")); 509 | assert_eq!(res, Ok((ans, ""))); 510 | } 511 | 512 | #[test] 513 | fn test_literal() { 514 | let res = literal().parse("#('hello' 123 world)"); 515 | let ans = Literal::Array(vec![ 516 | Literal::Str(String::from("hello")), 517 | Literal::Number(Num::int_from_str("123")), 518 | Literal::Symbol(String::from("world")), 519 | ]); 520 | assert_eq!(res, Ok((ans, ""))); 521 | } 522 | 523 | #[test] 524 | fn test_ident() { 525 | let res = ident().parse("index"); 526 | let ans = mk_ident("index"); 527 | assert_eq!(res, Ok((ans, ""))) 528 | } 529 | 530 | #[test] 531 | fn test_single_assignment() { 532 | let res = expr().parse("foo <- bar"); 533 | let ans = Expr::Assign(mk_ident("foo"), Box::new(mk_ident_expr("bar"))); 534 | assert_eq!(res, Ok((ans, ""))) 535 | } 536 | 537 | #[test] 538 | fn test_expr_assigment() { 539 | let res = expr().parse("foo <- 'hello world'"); 540 | let ans = Expr::Assign( 541 | mk_ident("foo"), 542 | Box::new(Expr::Lit(Literal::Str(String::from("hello world")))), 543 | ); 544 | assert_eq!(res, Ok((ans, ""))); 545 | } 546 | 547 | #[test] 548 | fn test_assign_number() { 549 | let res = expr().parse("foo <- 3r2e3"); 550 | let ans = Expr::Assign( 551 | mk_ident("foo"), 552 | Box::new(Expr::Lit(Literal::Number(Num { 553 | radix: Some(3), 554 | integer: String::from("2"), 555 | mantissa: None, 556 | exponent: Some(3), 557 | }))), 558 | ); 559 | assert_eq!(res, Ok((ans, ""))); 560 | } 561 | 562 | #[test] 563 | fn test_multiple_assignment() { 564 | let res = expr().parse("foo <- bar <- 'hello world'"); 565 | let ans = Expr::Assign( 566 | mk_ident("foo"), 567 | Box::new(Expr::Assign( 568 | mk_ident("bar"), 569 | Box::new( 570 | Expr::Lit(Literal::Str(String::from("hello world"))), 571 | ), 572 | )), 573 | ); 574 | assert_eq!(res, Ok((ans, ""))); 575 | } 576 | 577 | #[test] 578 | fn test_unary_message_expr() { 579 | let res = expr().parse("theta sin"); 580 | let ans = Expr::Message { 581 | receiver: Box::new(mk_ident_expr("theta")), 582 | selector: Msg::Unary(mk_ident("sin")), 583 | }; 584 | assert_eq!(res, Ok((ans, ""))); 585 | } 586 | 587 | #[test] 588 | fn test_binary_expr_num() { 589 | let res = binary_expr().parse("3 + 2"); 590 | let ans = Expr::Message { 591 | receiver: Box::new(Expr::Lit(Literal::Number(Num::int_from_str("3")))), 592 | selector: Msg::Binary(String::from("+"), Box::new(Expr::Lit(Literal::Number(Num::int_from_str("2"))))) 593 | }; 594 | assert_eq!(res, Ok((ans, ""))); 595 | } 596 | 597 | #[test] 598 | fn test_binary_expr() { 599 | let res = expr().parse("foo + 2"); 600 | let ans = Expr::Message { 601 | receiver: Box::new(mk_ident_expr("foo")), 602 | selector: Msg::Binary(String::from("+"), Box::new(Expr::Lit(Literal::Number(Num::int_from_str("2"))))) 603 | }; 604 | assert_eq!(res, Ok((ans, ""))); 605 | } 606 | 607 | #[test] 608 | fn test_keyword_message() { 609 | let res = expr().parse("a b: 2"); 610 | let ans = Expr::Message { 611 | receiver: Box::new(mk_ident_expr("a")), 612 | selector: Msg::Kwargs(vec![ 613 | Keyword { 614 | id: mk_ident("b:"), 615 | val: Expr::Lit(Literal::Number(Num::int_from_str("2"))) 616 | }, 617 | ]) 618 | }; 619 | assert_eq!(res, Ok((ans, ""))); 620 | } 621 | 622 | #[test] 623 | fn test_keyword_messages() { 624 | let res = expr().parse("a b: 2 c: 3"); 625 | let ans = Expr::Message { 626 | receiver: Box::new(mk_ident_expr("a")), 627 | selector: Msg::Kwargs(vec![ 628 | Keyword { 629 | id: mk_ident("b:"), 630 | val: Expr::Lit(Literal::Number(Num::int_from_str("2"))) 631 | }, 632 | Keyword { 633 | id: mk_ident("c:"), 634 | val: Expr::Lit(Literal::Number(Num::int_from_str("3"))) 635 | }, 636 | ]) 637 | }; 638 | assert_eq!(res, Ok((ans, ""))); 639 | } 640 | 641 | #[test] 642 | fn test_many_unary_messages() { 643 | let res = expr().parse("theta sin round"); 644 | let ans = Expr::Message { 645 | receiver: Box::new(Expr::Message { 646 | receiver: Box::new(mk_ident_expr("theta")), 647 | selector: Msg::Unary(mk_ident("sin")), 648 | }), 649 | selector: Msg::Unary(mk_ident("round")), 650 | }; 651 | assert_eq!(res, Ok((ans, ""))); 652 | } 653 | 654 | #[test] 655 | fn test_empty_statements() { 656 | let res = statements().parse(""); 657 | let ans = vec![]; 658 | assert_eq!(res, Ok((ans, ""))); 659 | } 660 | 661 | #[test] 662 | fn test_empty_expr() { 663 | let res = expr().parse(""); 664 | assert!(is_err(res)); 665 | } 666 | 667 | #[test] 668 | fn test_empty_ident() { 669 | assert!(is_err(ident().parse(""))); 670 | } 671 | 672 | #[test] 673 | fn test_empty_primary() { 674 | assert!(is_err(primary().parse(""))); 675 | } 676 | 677 | #[test] 678 | fn test_empty_message_expr() { 679 | assert!(is_err(message_expr().parse(""))); 680 | } 681 | 682 | #[test] 683 | fn test_empty_keyword_expr() { 684 | assert!(is_err(keyword_expr().parse(""))); 685 | } 686 | 687 | #[test] 688 | fn test_empty_binary_object() { 689 | assert!(is_err(binary_object().parse(""))); 690 | } 691 | 692 | #[test] 693 | fn test_empty_unary_object() { 694 | assert!(is_err(unary_object().parse(""))); 695 | } 696 | 697 | #[test] 698 | fn test_empty_cascaded_message_expr() { 699 | assert!(is_err(cascaded_message_expr().parse(""))); 700 | } 701 | 702 | #[test] 703 | fn test_expr_statement() { 704 | let res = statements().parse("what"); 705 | let ans = vec![Statement::E(mk_ident_expr("what"))]; 706 | assert_eq!(res, Ok((ans, ""))); 707 | } 708 | 709 | 710 | #[test] 711 | fn test_return_statement() { 712 | let res = statements().parse("^ 'foo'"); 713 | let ans = vec![Statement::Ret(Expr::Lit(Literal::Str(String::from("foo"))))]; 714 | assert_eq!(res, Ok((ans, ""))); 715 | } 716 | 717 | #[test] 718 | fn test_many_statements() { 719 | let res = statements().parse("foo <- bar. ^ foo"); 720 | let ans = vec![ 721 | Statement::E(Expr::Assign( 722 | mk_ident("foo"), 723 | Box::new(mk_ident_expr("bar")), 724 | )), 725 | Statement::Ret(mk_ident_expr("foo")), 726 | ]; 727 | assert_eq!(res, Ok((ans, ""))); 728 | } 729 | 730 | #[test] 731 | fn test_message_pattern_unary() { 732 | let res = message_pattern().parse("hello"); 733 | let ans = MsgPat::Unary(mk_ident("hello")); 734 | assert_eq!(res, Ok((ans, ""))); 735 | } 736 | 737 | #[test] 738 | fn test_message_pattern_binary() { 739 | let res = message_pattern().parse("+ hello"); 740 | let ans = MsgPat::Bin(mk_ident("+"), mk_ident("hello")); 741 | assert_eq!(res, Ok((ans, ""))); 742 | } 743 | 744 | #[test] 745 | fn test_mssage_pattern_kwargs() { 746 | let res = message_pattern().parse("foo: bar"); 747 | let ans = MsgPat::Kwargs(vec![ 748 | KeyPat { 749 | keyword: mk_ident("foo:"), 750 | var: mk_ident("bar") 751 | } 752 | ]); 753 | assert_eq!(res, Ok((ans, ""))); 754 | } 755 | 756 | #[test] 757 | fn test_method() { 758 | let res = method_p().parse("foo ^ bar"); 759 | let ans = Method { 760 | sig: MsgPat::Unary(mk_ident("foo")), 761 | temps: None, 762 | stmts: Some(vec![ 763 | Statement::Ret(mk_ident_expr("bar")) 764 | ]) 765 | }; 766 | assert_eq!(res, Ok((ans, ""))); 767 | } 768 | 769 | #[test] 770 | fn test_method_temps() { 771 | let res = method_p().parse("foo |asdf| ^ bar"); 772 | let ans = Method { 773 | sig: MsgPat::Unary(mk_ident("foo")), 774 | temps: Some(vec![mk_ident("asdf")]), 775 | stmts: Some(vec![ 776 | Statement::Ret(mk_ident_expr("bar")) 777 | ]) 778 | }; 779 | assert_eq!(res, Ok((ans, ""))); 780 | } 781 | 782 | #[test] 783 | fn test_method_bare_ret_kwargs() { 784 | let res = method_p().parse("foo: asdf bar"); 785 | let ans = Method { 786 | sig: MsgPat::Kwargs(vec![ 787 | KeyPat { 788 | keyword: mk_ident("foo:"), 789 | var: mk_ident("asdf"), 790 | } 791 | ]), 792 | temps: None, 793 | stmts: Some(vec![ 794 | Statement::E(mk_ident_expr("bar")) 795 | ]) 796 | }; 797 | assert_eq!(res, Ok((ans, ""))); 798 | } 799 | 800 | #[test] 801 | fn test_method_bare_ret() { 802 | let res = method_p().parse("foo bar"); 803 | let ans = Method { 804 | sig: MsgPat::Unary(mk_ident("foo")), 805 | temps: None, 806 | stmts: Some(vec![ 807 | Statement::E(mk_ident_expr("bar")) 808 | ]) 809 | }; 810 | assert_eq!(res, Ok((ans, ""))); 811 | } 812 | 813 | #[test] 814 | fn test_method_kwargs() { 815 | let res = method_p().parse("foo: asdf ^ bar"); 816 | let ans = Method { 817 | sig: MsgPat::Kwargs(vec![ 818 | KeyPat { 819 | keyword: mk_ident("foo:"), 820 | var: mk_ident("asdf"), 821 | } 822 | ]), 823 | temps: None, 824 | stmts: Some(vec![ 825 | Statement::Ret(mk_ident_expr("bar")) 826 | ]) 827 | }; 828 | assert_eq!(res, Ok((ans, ""))); 829 | } 830 | 831 | #[test] 832 | fn test_temporaries_empty() { 833 | let res = temporaries().parse(""); 834 | assert!(is_err(res)); 835 | } 836 | 837 | #[test] 838 | fn test_temporaries() { 839 | let res = temporaries().parse("| foo |"); 840 | let ans = vec![mk_ident("foo")]; 841 | assert_eq!(res, Ok((ans, ""))); 842 | } 843 | 844 | 845 | #[test] 846 | fn test_any_whitespace() { 847 | assert!(is_err(any().parse(""))); 848 | } 849 | 850 | #[test] 851 | fn test_rectangle_constructor() { 852 | let res = expr().parse("Rectangle 853 | origin: (Point x:0 y:10) 854 | extent: (Point x:5 y:15)"); 855 | let ans = Expr::Message { 856 | receiver: Box::new( 857 | mk_ident_expr("Rectangle") 858 | ), 859 | selector: Msg::Kwargs(vec![ 860 | Keyword { 861 | id: mk_ident("origin:"), 862 | val: Expr::Message { 863 | receiver: Box::new(mk_ident_expr("Point")), 864 | selector: Msg::Kwargs(vec![ 865 | Keyword { 866 | id: mk_ident("x:"), 867 | val: Num::int_from_str("0").to_expr(), 868 | }, 869 | Keyword { 870 | id: mk_ident("y:"), 871 | val: Num::int_from_str("10").to_expr(), 872 | } 873 | ]), 874 | } 875 | }, 876 | Keyword { 877 | id: mk_ident("extent:"), 878 | val: Expr::Message { 879 | receiver: Box::new(mk_ident_expr("Point")), 880 | selector: Msg::Kwargs(vec![ 881 | Keyword { 882 | id: mk_ident("x:"), 883 | val: Num::int_from_str("5").to_expr(), 884 | }, 885 | Keyword { 886 | id: mk_ident("y:"), 887 | val: Num::int_from_str("15").to_expr() 888 | } 889 | ]), 890 | } 891 | }, 892 | ]) 893 | }; 894 | 895 | assert_eq!(res, Ok((ans, ""))); 896 | } 897 | } 898 | -------------------------------------------------------------------------------- /src/syntax.rs: -------------------------------------------------------------------------------- 1 | // Syntax data types for the Smoltok programming language. 2 | 3 | /// The datatype representing valid syntax in Smoltok. Currently, we don't have 4 | /// a type for declarations. 5 | pub enum Syntax { 6 | Expr(Expr) 7 | } 8 | 9 | #[derive(Debug, PartialEq, Clone)] 10 | pub enum Expr { 11 | Id(Ident), 12 | Assign(Ident, Box), 13 | Lit(Literal), 14 | Message { receiver: Box, selector: Msg }, 15 | Block { vars: Vec, statements: Vec}, 16 | Method(Method), 17 | } 18 | 19 | #[derive(Debug, PartialEq, Clone)] 20 | pub enum MsgPat { 21 | Unary(Ident), 22 | Bin(Ident, Ident), 23 | Kwargs(Vec) 24 | } 25 | 26 | #[derive(Debug, PartialEq, Clone)] 27 | pub struct KeyPat { 28 | pub keyword: Ident, 29 | pub var: Ident, 30 | } 31 | 32 | #[derive(Debug, PartialEq, Clone)] 33 | pub enum Statement { 34 | E(Expr), 35 | Ret(Expr) 36 | } 37 | 38 | #[derive(Debug, PartialEq, Clone)] 39 | pub enum Msg { 40 | Unary(Ident), 41 | Binary(String, Box), 42 | Kwargs(Vec), 43 | } 44 | 45 | #[derive(Debug, PartialEq, Clone)] 46 | pub struct Keyword { 47 | pub id: Ident, 48 | pub val: Expr, 49 | } 50 | 51 | #[derive(Debug, PartialEq, Clone)] 52 | pub enum Literal { 53 | Number(Num), 54 | Char(char), 55 | Str(String), 56 | Symbol(String), 57 | Array(Vec), 58 | } 59 | 60 | #[derive(Debug, PartialEq, Clone)] 61 | pub struct Method { 62 | pub sig: MsgPat, 63 | pub temps: Option>, 64 | pub stmts: Option>, 65 | } 66 | 67 | #[derive(Debug, PartialEq, Clone)] 68 | pub struct Ident(pub String); 69 | 70 | /// Create an Expr from a string. 71 | /// 72 | /// # Examples 73 | /// 74 | /// ``` 75 | /// use smoltok::syntax::*; 76 | /// assert_eq!( 77 | /// mk_ident_expr("hey"), 78 | /// Expr::Id(Ident(String::from("hey"))) 79 | /// ); 80 | /// ``` 81 | pub fn mk_ident_expr(s: &str) -> Expr { 82 | Expr::Id(mk_ident(s)) 83 | } 84 | 85 | pub fn mk_ident(s: &str) -> Ident { 86 | Ident(String::from(s)) 87 | } 88 | 89 | #[derive(Debug, PartialEq, Clone)] 90 | pub struct Num { 91 | /// Smalltalk numbers can include an optional radix to specify the base of 92 | /// the number. This is given as as `Nr` where `N` is the base. 93 | pub radix: Option, 94 | /// The integral part of the number is kept as a `String`. This is done to 95 | /// permit bases greater than 10. 96 | pub integer: String, 97 | /// For floating point numbers, the mantissa may be represented as `.N`, 98 | /// where `N` is some number permitted by the given base. 99 | pub mantissa: Option, 100 | /// Finally, the exponent is available as `eN` where `N` is some number 101 | /// permitted by the given base. 102 | pub exponent: Option, 103 | } 104 | 105 | impl Num { 106 | /// Convenient alias for creating a base 10 integral number from a string. 107 | pub fn int_from_str(s: &str) -> Self { 108 | Num { 109 | integer: String::from(s), 110 | radix: None, 111 | mantissa: None, 112 | exponent: None 113 | } 114 | } 115 | 116 | pub fn to_expr(self) -> Expr { 117 | Expr::Lit(Literal::Number(self)) 118 | } 119 | } 120 | --------------------------------------------------------------------------------