├── Cargo.toml ├── README.md ├── examples ├── cloc.rs ├── cloc_git.rs └── stylo.rs ├── resources └── test.rs └── src └── lib.rs /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "unsafe-unicorn" 3 | version = "0.0.1" 4 | authors = ["ddh "] 5 | description = "library for analyzing usage of unsafe code in rust projects" 6 | readme = "./README.md" 7 | keywords = ["unsafe"] 8 | categories = ["text-processing"] 9 | repository = "https://github.com/avadacatavra/unsafe-unicorn" 10 | license = "MPL-2.0" 11 | 12 | [dependencies] 13 | lazy_static = "0.2" 14 | regex = "0.2" 15 | prettytable-rs = "^0.6" 16 | 17 | [dev-dependencies] 18 | git2 = "0.6" 19 | 20 | [[example]] 21 | name = "cloc" 22 | 23 | [[example]] 24 | name = "cloc_git" 25 | 26 | [[example]] 27 | name = "stylo" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A crate for analyzing the usage of unsafe code in Rust, based on [cloc-rust](https://github.com/avadacatavra/cloc-rust). 2 | 3 | Currently, this is based on a textual analysis of code. In the future, this could be expanded to use the AST for further analysis. 4 | 5 | For more information on unsafe code: 6 | - [Meet Safe and Unsafe](https://doc.rust-lang.org/nomicon/meet-safe-and-unsafe.html) 7 | - [Rust book](https://doc.rust-lang.org/book/second-edition/ch19-01-unsafe-rust.html) 8 | - [Rust by Example](https://rustbyexample.com/unsafe.html) 9 | 10 | 11 | 12 | ## Example 13 | 14 | ```rust 15 | extern crate unsafe_unicorn; 16 | 17 | use unsafe_unicorn::{ClocStats, Cloc, ClocVerbosity}; 18 | 19 | fn main() { 20 | 21 | // Get the stats for a single file 22 | let c = ClocStats::from_file("./resources/test.rs").unwrap(); 23 | println!("{}", c); 24 | 25 | // Get the stats for the resources directory 26 | let mut cloc = Cloc::new(); 27 | cloc.analyze_dir("./resources").unwrap(); 28 | println!("{}", cloc); 29 | 30 | // Change the verbosity to be file based and then get stats file by file for resources dir 31 | cloc.set_verbose(ClocVerbosity::File); 32 | cloc.analyze_dir("./resources").unwrap(); 33 | println!("{}", cloc) 34 | 35 | } 36 | ``` 37 | 38 | More examples are available in `examples/` 39 | 40 | 41 | ## TODO 42 | - [ ] make PR for [tokei](https://github.com/Aaronepower/tokei/tree/master/src) 43 | - [ ] add dependency analysis 44 | - [ ] expand tests 45 | - [ ] add docs 46 | - [ ] clean up code 47 | - [ ] look for c-like array iteration? 48 | -------------------------------------------------------------------------------- /examples/cloc.rs: -------------------------------------------------------------------------------- 1 | extern crate unsafe_unicorn; 2 | 3 | use unsafe_unicorn::{ClocStats, Cloc, ClocVerbosity}; 4 | 5 | 6 | fn main() { 7 | 8 | // Get the stats for a single file 9 | let c = ClocStats::from_file("./resources/test.rs").unwrap(); 10 | println!("{}", c); 11 | 12 | // Get the stats for the resources directory 13 | let mut cloc = Cloc::new(); 14 | cloc.analyze_dir("./resources").unwrap(); 15 | println!("{}", cloc); 16 | 17 | // Add the stats for the source directory 18 | // FIXME it's counting the unsafe regexes I think 19 | cloc.analyze_dir("./src").unwrap(); 20 | println!("{}", cloc); 21 | 22 | // Change the verbosity to be file based and then get stats file by file for resources dir 23 | cloc.set_verbose(ClocVerbosity::File); 24 | cloc.analyze_dir("./resources").unwrap(); 25 | println!("{}", cloc) 26 | 27 | } 28 | -------------------------------------------------------------------------------- /examples/cloc_git.rs: -------------------------------------------------------------------------------- 1 | extern crate unsafe_unicorn; 2 | extern crate git2; 3 | 4 | use git2::Repository; 5 | use std::fs; 6 | use unsafe_unicorn::Cloc; 7 | 8 | fn git_example(url: &str) { 9 | // clone into a temporary repository 10 | Repository::clone(url, "./cloc-git-tmp").unwrap(); 11 | 12 | let mut c = Cloc::new(); 13 | c.analyze_dir("./cloc-git-tmp").unwrap(); 14 | println!("{}", c); 15 | fs::remove_dir_all("./cloc-git-tmp").unwrap(); 16 | } 17 | 18 | fn main() { 19 | // let servo_url = "https://github.com/servo/servo.git"; 20 | // let rust_url = "https://github.com/rust-lang/rust"; 21 | let wr_url = "https://github.com/servo/webrender"; 22 | 23 | git_example(wr_url); 24 | } 25 | -------------------------------------------------------------------------------- /examples/stylo.rs: -------------------------------------------------------------------------------- 1 | extern crate unsafe_unicorn; 2 | 3 | use unsafe_unicorn::{Cloc, ClocVerbosity}; 4 | 5 | // what files account for the unsafety? 6 | // pull out the top 5 files for each directory analyzed in main 7 | fn analyze_files(dir: &str) { 8 | let mut cloc = Cloc::new(); 9 | cloc.set_verbose(ClocVerbosity::File); 10 | cloc.analyze_dir(dir).unwrap(); 11 | 12 | let top_cloc = cloc.top_unsafe(5); 13 | 14 | if top_cloc.len() > 0 { 15 | println!("{}", top_cloc) 16 | } else { 17 | println!("Nothing unsafe here!"); 18 | } 19 | 20 | } 21 | 22 | fn main() { 23 | 24 | let mut cloc = Cloc::new(); 25 | cloc.analyze_dir("/Users/ddh/mozilla/stylo").unwrap(); 26 | println!("{}", cloc); 27 | 28 | for s in cloc.stats() { 29 | println!("Top unsafe files for {:?}", s.name()); 30 | analyze_files(s.name().to_str().unwrap()) 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /resources/test.rs: -------------------------------------------------------------------------------- 1 | use std::vec; 2 | 3 | pub fn foo() { 4 | let x = vec!(0, 1, 2, 3, 4, 5, 6, 7); 5 | let y = 0; 6 | for ele in x.iter() { 7 | y += ele 8 | } 9 | } 10 | 11 | pub unsafe fn unsafe_foo() { 12 | let x = vec!(0, 1, 2, 3, 4, 5, 6, 7); 13 | let y = 0; 14 | for ele in x.iter() { 15 | y += ele 16 | } 17 | 18 | } 19 | 20 | // comment test 21 | pub fn bar() { 22 | let x = vec!(0, 1, 2, 3, 4, 5, 6, 7); 23 | 24 | unsafe { 25 | let y = 0; 26 | for ele in x.iter() { 27 | y += ele 28 | } 29 | } 30 | } 31 | /* total functions: 3 32 | * not safe functions: 1 33 | * not safe blocks: 1 34 | * total lines of not safe: 9 (counting the line with close bracket) 35 | */ 36 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] extern crate lazy_static; 2 | extern crate regex; 3 | 4 | use std::fmt; 5 | use std::io::{self, Read}; 6 | use std::path::{Path, PathBuf}; 7 | use std::fs::{self,File}; 8 | use std::result::Result::Err; 9 | use regex::RegexSet; 10 | 11 | type ClocResult = Result; 12 | 13 | //TODO dependency analysis 14 | //TODO maybe use pretty table? 15 | //TODO expand unit tests 16 | 17 | lazy_static!{ 18 | static ref REGEXES: RegexSet = RegexSet::new(&[ 19 | r"```", // block comment 20 | r"^//|^\s/\*|^\s\*|^\s\*/", //comment 21 | r"\s*fn\s+[a-zA-Z_]*", //function 22 | r"\s*unsafe impl.*for.*", //unsafe impl 23 | r"\s*unsafe\s*\{.*\}", //unsafe one liner 24 | r".*unsafe\s*\{", //more unsafe 25 | r"panic", //panic 26 | ]).unwrap(); 27 | } 28 | 29 | /// exclude tests etc from analysis 30 | lazy_static!{ 31 | static ref EXCLUDE: Vec<&'static str> = vec!( 32 | ".git", 33 | "tests", 34 | "examples", 35 | "benches" 36 | ); 37 | } 38 | 39 | /// Determine how to summarize and display statistics 40 | /// File: show unsafe info file by file 41 | /// Crate: (default) show info 'crate' by 'crate' 42 | /// TopLevel: combine all subdirectory stats into one toplevel output 43 | #[derive(Copy, Clone, Debug, PartialEq)] 44 | pub enum ClocVerbosity { 45 | File, 46 | Crate, 47 | TopLevel, 48 | } 49 | 50 | // cloc should be the struct that you actually interact with 51 | // so you set the verbosity and call it on a path, then it figures out how to split all of the data up 52 | #[derive(Debug)] 53 | pub struct Cloc { 54 | verbose: ClocVerbosity, 55 | stats: Vec, 56 | } 57 | 58 | impl Cloc { 59 | pub fn new() -> Cloc { 60 | Cloc { 61 | verbose: ClocVerbosity::Crate, 62 | stats: vec!(), 63 | } 64 | } 65 | 66 | pub fn stats(&self) -> &Vec { 67 | &self.stats 68 | } 69 | 70 | pub fn set_verbose(&mut self, level: ClocVerbosity) { 71 | self.verbose = level; 72 | } 73 | 74 | pub fn add_stats(&mut self, stats: ClocStats) { 75 | self.stats.push(stats); 76 | } 77 | 78 | pub fn clear_stats(&mut self) { 79 | self.stats.clear() 80 | } 81 | 82 | pub fn len(&self) -> usize { 83 | self.stats.len() 84 | } 85 | 86 | pub fn analyze_dir(&mut self, dir: &str) -> Result<(), io::Error> { 87 | 88 | let mut c = ClocStats::new(PathBuf::from(dir)); 89 | let mut subdirs = vec!(); 90 | subdirs.push((dir.to_owned(), fs::read_dir(&Path::new(dir))?)); 91 | 92 | while !subdirs.is_empty(){ 93 | let (dir_name, paths) = subdirs.pop().unwrap(); 94 | 95 | // when you switch subdirectories, check to see if you need a new CLocStats 96 | if PathBuf::from(&dir_name).join("Cargo.toml").exists() && self.verbose == ClocVerbosity::Crate { 97 | if !(c.is_empty()) { 98 | self.add_stats(c.clone()); 99 | } 100 | c = ClocStats::new(PathBuf::from(dir_name)); 101 | } 102 | 103 | for p in paths { 104 | let p = p.unwrap(); 105 | 106 | if p.file_type().unwrap().is_dir(){ 107 | if !(EXCLUDE.contains(&p.path().file_name().unwrap().to_str().unwrap())) { 108 | let ppath = p.path(); 109 | let subdir_name = ppath.to_str().unwrap(); 110 | subdirs.push((subdir_name.to_owned(), fs::read_dir(subdir_name).unwrap())); 111 | } 112 | } else { 113 | if p.path().extension().unwrap_or_default() == "rs" { 114 | match self.verbose { 115 | ClocVerbosity::File => { 116 | let path = p.path(); 117 | let c = ClocStats::from_file(path.to_str().unwrap()).unwrap(); 118 | self.add_stats(c); 119 | }, 120 | _ => c.cloc_file(&mut File::open(p.path()).expect("Couldn't open file")), 121 | 122 | }; 123 | 124 | } 125 | } 126 | } 127 | 128 | } 129 | if !(c.is_empty()) { 130 | self.add_stats(c.clone()); 131 | } 132 | Ok(()) 133 | } 134 | 135 | pub fn sort_stats(&mut self) { 136 | self.stats.sort_by(|a, b| { 137 | b.unsafe_ratio().partial_cmp(&a.unsafe_ratio()).unwrap() 138 | }); 139 | } 140 | 141 | // returns a Cloc object to make output better 142 | pub fn top_unsafe(&mut self, num: usize) -> Cloc { 143 | let mut c = Cloc::new(); 144 | c.set_verbose(self.verbose); 145 | 146 | self.sort_stats(); 147 | for s in self.stats.iter() { 148 | if c.len() == num { 149 | break; 150 | } 151 | if s.num_unsafe > 0 { 152 | c.add_stats(s.clone()); 153 | } 154 | } 155 | c 156 | } 157 | } 158 | 159 | impl fmt::Display for Cloc { 160 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 161 | let header = ["\t", "#files", "blank", "comment", "code", "unsafe", "%unsafe", 162 | "#fns", "#unsafe fns", "%unsafe fns", "#panics"]; 163 | for h in header.iter() { 164 | write!(f, "{}\t", h)?; 165 | } 166 | write!(f, "\n")?; 167 | for s in &self.stats { 168 | write!(f, "{}\t", s.name().file_name().unwrap().to_str().unwrap())?; 169 | for val in s.summarize(){ 170 | match val { 171 | SummaryType::Ratio(x) => write!(f, "{:.*}\t", 2, x)?, 172 | SummaryType::Int(x) => write!(f, "{}\t", x)?, 173 | }; 174 | } 175 | write!(f, "\n")?; 176 | 177 | } 178 | Ok(()) 179 | } 180 | } 181 | 182 | #[derive(Clone, Debug, PartialEq, Eq)] 183 | pub struct ClocStats { 184 | name: PathBuf, 185 | pub num_unsafe: usize, 186 | unsafe_fns: usize, 187 | total_fns: usize, 188 | blank: usize, 189 | comment: usize, 190 | files: usize, 191 | code: usize, 192 | panics: usize, 193 | } 194 | 195 | // helper type to store all summary values in a vec 196 | #[derive(Debug, PartialEq)] 197 | pub enum SummaryType { 198 | Ratio(f64), 199 | Int(usize) 200 | } 201 | 202 | impl ClocStats { 203 | pub fn new(dir_name: PathBuf) -> ClocStats { 204 | ClocStats { 205 | name: dir_name.to_owned(), 206 | num_unsafe: 0, 207 | unsafe_fns: 0, 208 | total_fns: 0, 209 | blank: 0, 210 | comment: 0, 211 | files: 0, 212 | code: 0, 213 | panics: 0, 214 | } 215 | 216 | } 217 | 218 | pub fn name(&self) -> &PathBuf { 219 | &self.name 220 | } 221 | 222 | pub fn count_fns(&self) -> usize { 223 | self.total_fns 224 | } 225 | 226 | pub fn count_unsafe_fns(&self) -> usize { 227 | self.unsafe_fns 228 | } 229 | 230 | pub fn to_vec(&self) -> Vec { 231 | vec!(self.files, self.blank, self.comment, self.code, 232 | self.num_unsafe, self.total_fns, self.unsafe_fns, self.panics) 233 | } 234 | 235 | // Consider empty if there haven't been any functions 236 | pub fn is_empty(&self) -> bool { 237 | !(self.total_fns > 0) 238 | } 239 | 240 | pub fn summarize(&self) -> Vec { 241 | let mut unsafe_ratio = self.num_unsafe as f64 / self.code as f64 * 100.0; 242 | let mut fn_ratio = self.unsafe_fns as f64 / self.total_fns as f64 * 100.0; 243 | if unsafe_ratio.is_nan() { 244 | unsafe_ratio = 0.0; 245 | } 246 | if fn_ratio.is_nan() { 247 | fn_ratio = 0.0; 248 | } 249 | vec!( 250 | SummaryType::Int(self.files), 251 | SummaryType::Int(self.blank), 252 | SummaryType::Int(self.comment), 253 | SummaryType::Int(self.code), 254 | SummaryType::Int(self.num_unsafe), 255 | SummaryType::Ratio(unsafe_ratio), 256 | SummaryType::Int(self.total_fns), 257 | SummaryType::Int(self.unsafe_fns), 258 | SummaryType::Ratio(fn_ratio), 259 | SummaryType::Int(self.panics)) 260 | } 261 | 262 | /// Gets stats for a single file 263 | pub fn from_file(filename: &str) -> ClocResult { 264 | let file_path = Path::new(filename); 265 | if file_path.extension().unwrap().to_str().unwrap() != "rs" { 266 | return Err("Not a rust file".to_owned()); 267 | } 268 | let mut f = File::open(filename).expect("Couldn't open file"); 269 | 270 | let mut c = ClocStats::new(PathBuf::from(filename)); 271 | c.cloc_file(&mut f); 272 | Ok(c) 273 | } 274 | 275 | /// Aggregates stats for an entire directory 276 | pub fn from_directory(dir: &str) -> ClocResult { 277 | let mut c = ClocStats::new(PathBuf::from(dir)); 278 | let mut subdirs = vec!(); 279 | subdirs.push(fs::read_dir(&Path::new(dir)).unwrap()); 280 | 281 | while !subdirs.is_empty(){ 282 | let paths = subdirs.pop(); 283 | for p in paths.unwrap() { 284 | let p = p.unwrap(); 285 | if p.file_type().unwrap().is_dir(){ 286 | if p.path().to_str().unwrap().contains(".git") {continue} 287 | //TODO ignore git 288 | subdirs.push(fs::read_dir(p.path()).unwrap()); 289 | } else { 290 | if p.path().extension().unwrap_or_default() == "rs" { 291 | c.cloc_file(&mut File::open(p.path()).expect("Couldn't open file")); 292 | } 293 | } 294 | } 295 | } 296 | 297 | Ok(c) 298 | 299 | } 300 | 301 | // TODO compare performance with BufReader -- isolate read for benchmarking 302 | // TODO will if/else work better than continue? 303 | fn cloc_file(&mut self, f: &mut File) { 304 | self.files += 1; 305 | let mut contents = String::new(); 306 | 307 | // track brackets for unsafe blocks, fns etc 308 | let mut bracket_count = 0; 309 | // track comment flag 310 | let mut comment_flag = false; // handles ```...``` 311 | let mut block_flag = false; //not totally sure if i need 2 flags? might be able to reuse 312 | 313 | f.read_to_string(&mut contents).expect( 314 | "something went wrong reading the file", 315 | ); 316 | 317 | // TODO could probably split into methods if i store the flag/count in the struct 318 | for line in contents.lines() { 319 | let contains = REGEXES.matches(line); 320 | // skip content lines 321 | if contains.matched(0) { 322 | self.comment += 1; 323 | comment_flag = !comment_flag; 324 | continue; 325 | } 326 | if contains.matched(1) { 327 | self.comment += 1; 328 | continue; 329 | } 330 | //skip blank lines 331 | if line.len() == 0 { 332 | self.blank += 1; 333 | continue; 334 | } 335 | self.code += 1; 336 | if block_flag { 337 | if line.contains("{") { 338 | bracket_count += 1; 339 | } 340 | if line.contains("}") { 341 | bracket_count -= 1; 342 | } 343 | if bracket_count == 0 { 344 | block_flag = false; 345 | } else { 346 | self.num_unsafe += 1 347 | } 348 | } 349 | if contains.matched(3) { 350 | self.num_unsafe += 1; //TODO is this always a 1 liner 351 | } 352 | if contains.matched(2) { 353 | self.total_fns += 1; 354 | if line.contains("unsafe") { 355 | block_flag = true; 356 | bracket_count += 1; 357 | self.unsafe_fns += 1; 358 | } 359 | } else if contains.matched(4) { 360 | self.num_unsafe += 1; 361 | } else if contains.matched(5) { 362 | block_flag = true; 363 | bracket_count += 1; 364 | } 365 | if contains.matched(6) { 366 | self.panics += 1; 367 | } 368 | } 369 | 370 | } 371 | 372 | /// Compute ratio of unsafe code to total code 373 | pub fn unsafe_ratio(&self) -> f64 { 374 | match self.code { 375 | 0 => 0.0, 376 | _ => self.num_unsafe as f64 / self.code as f64 377 | } 378 | } 379 | } 380 | 381 | impl fmt::Display for ClocStats { 382 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 383 | write!( 384 | f, 385 | "{}, {}, {}, {}, {}, {}, {}, {}", 386 | self.num_unsafe, 387 | self.unsafe_fns, 388 | self.total_fns, 389 | self.blank, 390 | self.comment, 391 | self.files, 392 | self.code, 393 | self.panics 394 | ) 395 | } 396 | } 397 | 398 | #[cfg(test)] 399 | mod tests { 400 | use super::*; 401 | 402 | #[test] 403 | fn it_works() { 404 | let c = ClocStats::from_file("./resources/test.rs").unwrap(); 405 | assert_eq!(c.to_vec(), vec!(1, 5, 5, 25, 9, 3, 1, 0) ); 406 | } 407 | } 408 | --------------------------------------------------------------------------------