├── .gitignore ├── return_2.c ├── README.md ├── Cargo.toml ├── src ├── compiler.rs ├── main.rs ├── tokenizer.rs └── driver.rs └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /return_2.c: -------------------------------------------------------------------------------- 1 | int main(void){ 2 | return 2; 3 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rustcc 2 | 3 | a educational minimal C compiler using rust 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rustcc" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | regex = "1.10.6" 10 | -------------------------------------------------------------------------------- /src/compiler.rs: -------------------------------------------------------------------------------- 1 | use crate::tokenizer; 2 | 3 | pub fn compile(_output_name: &str, source_code: &String, running_mode: Option<&str>) { 4 | println!("compiling the source code ..."); 5 | // Running compiler partial with options 6 | if let Some(matched) = running_mode { 7 | match matched { 8 | "lex" => tokenizer::tokenizer_lexer(source_code), 9 | _ => (), 10 | } 11 | } else { 12 | // Run the hole compilation 13 | tokenizer::tokenizer_lexer(source_code); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.1.3" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "memchr" 16 | version = "2.7.4" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 19 | 20 | [[package]] 21 | name = "regex" 22 | version = "1.10.6" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" 25 | dependencies = [ 26 | "aho-corasick", 27 | "memchr", 28 | "regex-automata", 29 | "regex-syntax", 30 | ] 31 | 32 | [[package]] 33 | name = "regex-automata" 34 | version = "0.4.7" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" 37 | dependencies = [ 38 | "aho-corasick", 39 | "memchr", 40 | "regex-syntax", 41 | ] 42 | 43 | [[package]] 44 | name = "regex-syntax" 45 | version = "0.8.4" 46 | source = "registry+https://github.com/rust-lang/crates.io-index" 47 | checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" 48 | 49 | [[package]] 50 | name = "rustcc" 51 | version = "0.1.0" 52 | dependencies = [ 53 | "regex", 54 | ] 55 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::env::args; 2 | 3 | mod compiler; 4 | mod driver; 5 | mod tokenizer; 6 | 7 | fn main() { 8 | let argc: Vec = args().collect(); 9 | if argc.len() < 2 { 10 | panic!("Wrong number of argument, rustcc ./source_code.c "); 11 | } 12 | let mut running_mode: Option<&str> = None; 13 | let mut only_assembly: bool = false; 14 | let mut help_mode: bool = false; 15 | 16 | for arg in &argc[1..] { 17 | match arg.as_str() { 18 | "--help" => help_mode = true, 19 | "--lex" => running_mode = Some("lex"), 20 | "--parse" => running_mode = Some("parse"), 21 | "--codegen" => running_mode = Some("codegen"), 22 | "-S" => only_assembly = true, 23 | _ => (), 24 | } 25 | } 26 | if help_mode { 27 | println!(" 28 | USAGE: 29 | rustcc ./source.c [OPTIONS] 30 | 31 | OPTIONS: 32 | --help Show the list of commands 33 | --lex Directs it to run the lexer, but stop before parsing 34 | --parse Directs it to run the lexer and parser, but stop before assembly generation 35 | --codegen Directs it to perform lexing, parsing, and assembly generation, but stop before code emission 36 | -S directs your compiler to emit an assembly file, but not assemble or link it 37 | "); 38 | return; 39 | } 40 | 41 | let file_path: &str = argc[1].as_str(); 42 | 43 | let output = driver::run(file_path, running_mode, only_assembly); 44 | match output { 45 | Ok(()) => println!("File Compiled Successfully"), 46 | Err(e) => println!("{}", e), 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/tokenizer.rs: -------------------------------------------------------------------------------- 1 | use core::panic; 2 | use regex::{Match, Regex}; 3 | use std::collections::HashMap; 4 | 5 | pub fn tokenizer_lexer(source_code: &String) { 6 | let tokens: HashMap<&str, &str> = HashMap::from([ 7 | ("Identifier", r"[a-zA-Z_]\w*\b"), 8 | ("Constant", r"[0-9]+\b"), 9 | ("int keyword", r"int\b"), 10 | ("void keyword", r"void\b"), 11 | ("return keyword", r"return\b"), 12 | ("Open parenthesis", r"\("), 13 | ("Close parenthesis", r"\)"), 14 | ("Open brace", r"\{"), 15 | ("Close brace", r"\}"), 16 | ("Semicolon", r";"), 17 | ]); 18 | let mut valid_tokens: Vec<(&str, &str)> = Vec::new(); 19 | 20 | let input = source_code.as_str(); 21 | let input_lines = input.split("\n"); 22 | 23 | for (index, original_line) in input_lines.enumerate() { 24 | let mut line = original_line; 25 | while !line.is_empty() { 26 | line = line.trim_start(); 27 | let mut max_size: usize = 0; 28 | let mut token: Option<(Match<'_>, &str)> = None; 29 | 30 | for (key, value) in tokens.clone().into_iter() { 31 | let token_regex = Regex::new(value).unwrap(); 32 | if let Some(matched) = token_regex.find_at(line, 0) { 33 | // Check match for any tokens 34 | if matched.len() > max_size && matched.start() == 0 { 35 | max_size = matched.len(); 36 | token = Some((matched, key)); 37 | } 38 | // Check keyword 39 | if let Some((old_matched, _old_key)) = token { 40 | if matched.start() == 0 41 | && key.contains("keyword") == true 42 | && old_matched.len() == matched.len() 43 | { 44 | max_size = matched.len(); 45 | token = Some((matched, key)); 46 | } 47 | } 48 | }; 49 | } 50 | if let Some((matched, key)) = token { 51 | valid_tokens.push((key, matched.as_str())); 52 | line = &line[matched.len()..]; 53 | } else if !line.is_empty() { 54 | tokenizer_error_handler(index + 1, original_line); 55 | } 56 | } 57 | } 58 | println!("{:#?}", valid_tokens) 59 | } 60 | 61 | fn tokenizer_error_handler(line: usize, source_line: &str) { 62 | panic!( 63 | " 64 | Compiler Error: Invalid token 65 | 66 | {} | {} 67 | ", 68 | line, source_line 69 | ); 70 | } 71 | -------------------------------------------------------------------------------- /src/driver.rs: -------------------------------------------------------------------------------- 1 | use crate::compiler; 2 | use core::result::Result; 3 | use std::{fs, process::Command}; 4 | 5 | const PREPROCESSED_FILE_NAME: &str = "preprocessed_file.c"; 6 | const ASSEMBLY_OUTPUT_FILE_NAME: &str = "assembly_file.s"; 7 | 8 | pub fn run(file_path: &str, running_mode: Option<&str>, only_assembly: bool) -> Result<(), String> { 9 | preprocessor(file_path, PREPROCESSED_FILE_NAME)?; 10 | 11 | let (output_name, source_code) = read_source_code(file_path)?; 12 | compiler::compile(ASSEMBLY_OUTPUT_FILE_NAME, &source_code, running_mode); 13 | 14 | if !only_assembly { 15 | assembly_linker(ASSEMBLY_OUTPUT_FILE_NAME, output_name)?; 16 | } 17 | Ok(()) 18 | } 19 | 20 | pub fn read_source_code(file_path: &str) -> Result<(String, String), String> { 21 | let source_code = 22 | fs::read_to_string(PREPROCESSED_FILE_NAME).expect("Failed to read the preprocessed file"); 23 | fs::remove_file(PREPROCESSED_FILE_NAME).expect("Failed to remove the preprocessed file"); 24 | 25 | let mut output_vector: Vec<&str> = file_path.split("/").collect(); 26 | output_vector = output_vector 27 | .last() 28 | .expect("Error While reading filename") 29 | .split('.') 30 | .collect(); 31 | let output_name = output_vector 32 | .first() 33 | .expect("Error While reading filename") 34 | .to_string(); 35 | 36 | Ok((output_name, source_code)) 37 | } 38 | 39 | pub fn assembly_linker( 40 | input_assembly_source: &str, 41 | output_executable: String, 42 | ) -> Result<(), String> { 43 | println!("linking the compiled assembly to a executable with gcc ..."); 44 | 45 | let output = Command::new("gcc") 46 | .arg(input_assembly_source) 47 | .arg("-o") 48 | .arg(output_executable) 49 | .spawn(); 50 | 51 | let mut child = match output { 52 | Ok(child) => child, 53 | Err(error) => return Err(format!("Compiler Driver Error: {:#?}", error)), 54 | }; 55 | let exit_status = match child.wait() { 56 | Ok(exit_status) => exit_status, 57 | Err(error) => return Err(format!("Failed to wait for assembly process: {:#?}", error)), 58 | }; 59 | if exit_status.success() == true { 60 | Ok(()) 61 | } else { 62 | Err(format!("Failed to run the assembly linker")) 63 | } 64 | } 65 | 66 | pub fn preprocessor(file_path: &str, output_file: &str) -> Result<(), String> { 67 | println!("running the preprocessor with gcc ..."); 68 | 69 | let output = Command::new("gcc") 70 | .arg("-E") 71 | .arg("-P") 72 | .arg(file_path) 73 | .arg("-o") 74 | .arg(output_file) 75 | .spawn(); 76 | 77 | let mut child = match output { 78 | Ok(child) => child, 79 | Err(error) => return Err(format!("Compiler Driver Error: {:#?}", error)), 80 | }; 81 | let exit_status = match child.wait() { 82 | Ok(exit_status) => exit_status, 83 | Err(error) => return Err(format!("Failed to wait for process: {:#?}", error)), 84 | }; 85 | if exit_status.success() == true { 86 | Ok(()) 87 | } else { 88 | Err(format!("Failed to run the preprocessor")) 89 | } 90 | } 91 | --------------------------------------------------------------------------------