├── .gitignore ├── README.md ├── LICENSE ├── turnstile.rs └── regex.rs /.gitignore: -------------------------------------------------------------------------------- 1 | regex 2 | turnstile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Regex Session Artifacts 2 | 3 | Artifacts of that [Regex Tsoding Session](https://www.youtube.com/watch?v=MH56D5M9xSQ). 4 | 5 | [![screencast](http://i3.ytimg.com/vi/MH56D5M9xSQ/hqdefault.jpg)](https://www.youtube.com/watch?v=MH56D5M9xSQ) 6 | 7 | ## Quick Start 8 | 9 | Make sure you have `rustc` available in `$PATH`. 10 | 11 | ### Turnstile Example 12 | 13 | ```console 14 | $ rustc turnstile.rs 15 | $ ./turnstile 16 | ``` 17 | 18 | ### Regex Example 19 | 20 | ```console 21 | $ rustc regex.rs 22 | $ ./regex 23 | ``` 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2021 Alexey Kutepov 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /turnstile.rs: -------------------------------------------------------------------------------- 1 | use std::io::{self, BufRead, Write}; 2 | 3 | enum State { 4 | Locked, 5 | Unlocked, 6 | } 7 | 8 | const LOCKED: usize = 0; 9 | const UNLOCKED: usize = 1; 10 | const STATES_COUNT: usize = 2; 11 | 12 | const PUSH: usize = 0; 13 | const COIN: usize = 1; 14 | const EVENTS_COUNT: usize = 2; 15 | 16 | const FSM: [[usize; EVENTS_COUNT]; STATES_COUNT] = [ 17 | // PUSH COIN 18 | [LOCKED, UNLOCKED], // LOCKED 19 | [LOCKED, UNLOCKED], // UNLOCKED 20 | ]; 21 | 22 | fn next_state(state: usize, event: usize) -> usize { 23 | FSM[state][event] 24 | } 25 | 26 | fn state_to_str(state: usize) -> &'static str { 27 | match state { 28 | LOCKED => "Locked", 29 | UNLOCKED => "Unlocked", 30 | _ => unreachable!() 31 | } 32 | } 33 | 34 | fn main() { 35 | let mut state = LOCKED; 36 | 37 | println!("State: {}", state_to_str(state)); 38 | print!("> "); 39 | io::stdout().flush().unwrap(); 40 | for line in io::stdin().lock().lines() { 41 | match line.unwrap().as_str() { 42 | "coin" => state = next_state(state, COIN), 43 | "push" => state = next_state(state, PUSH), 44 | "quit" => return, 45 | unknown => { 46 | eprintln!("ERROR: Unknown event {}", unknown); 47 | } 48 | } 49 | 50 | println!("State: {}", state_to_str(state)); 51 | print!("> "); 52 | io::stdout().flush().unwrap(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /regex.rs: -------------------------------------------------------------------------------- 1 | type FsmIndex = usize; 2 | 3 | const FSM_COLUMN_SIZE: usize = 130; 4 | const FSM_LINEEND: FsmIndex = 129; 5 | 6 | #[derive(Default, Clone, Copy)] 7 | struct FsmAction { 8 | next: FsmIndex, 9 | offset: i32, 10 | } 11 | 12 | #[derive(Clone)] 13 | struct FsmColumn { 14 | ts: [FsmAction; FSM_COLUMN_SIZE], 15 | } 16 | 17 | impl FsmColumn { 18 | fn new() -> Self { 19 | Self { 20 | ts: [Default::default(); FSM_COLUMN_SIZE] 21 | } 22 | } 23 | } 24 | 25 | struct Regex { 26 | cs: Vec 27 | } 28 | 29 | #[derive(Debug, Copy, Clone)] 30 | enum Atom { 31 | Any, 32 | EndLine, 33 | Char(char), 34 | } 35 | 36 | #[derive(Debug)] 37 | enum Token { 38 | Atom(Atom), 39 | Star(Atom), 40 | } 41 | 42 | fn lexer(src: &str) -> Vec { 43 | let bytes = src.as_bytes(); 44 | 45 | let mut result = Vec::new(); 46 | let mut i = 0; 47 | while i < bytes.len() { 48 | let atom = match bytes[i] as char { 49 | '.' => Atom::Any, 50 | '$' => Atom::EndLine, 51 | '*' | '+' => panic!("Invalid target for quantifier"), 52 | x => Atom::Char(x), 53 | }; 54 | 55 | i += 1; 56 | 57 | match bytes.get(i).map(|x| *x as char) { 58 | Some('*') => { 59 | i += 1; 60 | result.push(Token::Star(atom)); 61 | } 62 | Some('+') => { 63 | i += 1; 64 | result.push(Token::Atom(atom)); 65 | result.push(Token::Star(atom)); 66 | } 67 | _ => { 68 | result.push(Token::Atom(atom)); 69 | } 70 | }; 71 | } 72 | result 73 | } 74 | 75 | fn compile_atom(atom: &Atom, success: FsmIndex) -> FsmColumn { 76 | use Atom::*; 77 | 78 | let mut column = FsmColumn::new(); 79 | 80 | match atom { 81 | Any => { 82 | for i in 32..127 { 83 | column.ts[i] = FsmAction { 84 | next: success, 85 | offset: 1, 86 | }; 87 | } 88 | }, 89 | EndLine => { 90 | column.ts[FSM_LINEEND] = FsmAction { 91 | next: success, 92 | offset: 1, 93 | }; 94 | } 95 | Char(x) => { 96 | column.ts[*x as usize] = FsmAction { 97 | next: success, 98 | offset: 1, 99 | }; 100 | } 101 | } 102 | 103 | column 104 | } 105 | 106 | impl Regex { 107 | fn compile(src: &str) -> Self { 108 | let tokens = lexer(src); 109 | let mut fsm = Self { cs: Vec::new() }; 110 | fsm.cs.push(FsmColumn::new()); // default failed state 111 | 112 | for token in tokens.iter() { 113 | let current_state = fsm.cs.len(); 114 | let next_state = fsm.cs.len() + 1; 115 | 116 | match token { 117 | Token::Atom(atom) => { 118 | let column = compile_atom(atom, next_state); 119 | fsm.cs.push(column); 120 | }, 121 | 122 | Token::Star(atom) => { 123 | let mut column = compile_atom(atom, current_state); 124 | for action in column.ts.iter_mut() { 125 | if action.next == 0 { 126 | action.next = next_state; 127 | } else { 128 | assert!(action.next == current_state); 129 | } 130 | } 131 | fsm.cs.push(column); 132 | } 133 | } 134 | } 135 | 136 | fsm 137 | } 138 | 139 | fn match_str(&self, input: &str) -> bool { 140 | let mut state = 1; 141 | let mut head = 0; 142 | let chars = input.chars().collect::>(); 143 | let n = chars.len(); 144 | 145 | while 0 < state && state < self.cs.len() && head < n { 146 | let action = self.cs[state].ts[chars[head] as usize]; 147 | state = action.next; 148 | head = (head as i32 + action.offset) as usize; 149 | } 150 | 151 | if state == 0 { 152 | return false; 153 | } 154 | 155 | if state < self.cs.len() { 156 | let action = self.cs[state].ts[FSM_LINEEND]; 157 | state = action.next; 158 | } 159 | 160 | return state >= self.cs.len(); 161 | } 162 | 163 | #[allow(dead_code)] 164 | fn dump(&self) { 165 | for symbol in 0..FSM_COLUMN_SIZE { 166 | print!("{:03} => ", symbol); 167 | for column in self.cs.iter() { 168 | print!("({}, {}) ", 169 | column.ts[symbol].next, 170 | column.ts[symbol].offset); 171 | } 172 | println!(); 173 | } 174 | } 175 | } 176 | 177 | fn test_regex(regex_src: &str, test_cases: &[(&str, bool)]) { 178 | let regex = Regex::compile(regex_src); 179 | 180 | println!("Testing {:?}", regex_src); 181 | for (input, expected_outcome) in test_cases { 182 | println!(" input: {:?}", input); 183 | println!(" match: {:?}", *expected_outcome); 184 | assert_eq!(regex.match_str(input), *expected_outcome); 185 | println!(); 186 | } 187 | } 188 | 189 | fn main() { 190 | let tests = vec!{ 191 | ("a+bc$", vec![ 192 | ("Hello, World", false), 193 | ("bc", false), 194 | ("abc", true), 195 | ("aabc", true), 196 | ("aaabc", true), 197 | ("bbc", false), 198 | ("cbc", false), 199 | ("cbd", false), 200 | ("cbt", false), 201 | ("abcd", false), 202 | ], false), 203 | (".*bc", vec![ 204 | ("bc", true), 205 | ("abc", true), 206 | ("aabc", true), 207 | ], true), 208 | }; 209 | 210 | for (regex_src, test_cases, ignored) in tests.iter() { 211 | if !ignored { 212 | test_regex(regex_src, &test_cases); 213 | } 214 | } 215 | } 216 | --------------------------------------------------------------------------------