├── Cargo.toml
├── README.md
├── examples
    ├── cloc.rs
    ├── cloc_git.rs
    └── stylo.rs
├── resources
    └── test.rs
└── src
    └── lib.rs


/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "unsafe-unicorn"
 3 | version = "0.0.1"
 4 | authors = ["ddh <dianehosfelt@gmail.com>"]
 5 | description = "library for analyzing usage of unsafe code in rust projects"
 6 | readme = "./README.md"
 7 | keywords = ["unsafe"]
 8 | categories = ["text-processing"]
 9 | repository = "https://github.com/avadacatavra/unsafe-unicorn"
10 | license = "MPL-2.0"
11 | 
12 | [dependencies]
13 | lazy_static = "0.2"
14 | regex = "0.2"
15 | prettytable-rs = "^0.6"
16 | 
17 | [dev-dependencies]
18 | git2 = "0.6"
19 | 
20 | [[example]]
21 | name = "cloc"
22 | 
23 | [[example]]
24 | name = "cloc_git"
25 | 
26 | [[example]]
27 | name = "stylo"


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | A crate for analyzing the usage of unsafe code in Rust, based on [cloc-rust](https://github.com/avadacatavra/cloc-rust).
 2 | 
 3 | Currently, this is based on a textual analysis of code. In the future, this could be expanded to use the AST for further analysis.
 4 | 
 5 | For more information on unsafe code:
 6 | - [Meet Safe and Unsafe](https://doc.rust-lang.org/nomicon/meet-safe-and-unsafe.html)
 7 | - [Rust book](https://doc.rust-lang.org/book/second-edition/ch19-01-unsafe-rust.html)
 8 | - [Rust by Example](https://rustbyexample.com/unsafe.html)
 9 | 
10 | 
11 | 
12 | ## Example
13 | 
14 | ```rust
15 | extern crate unsafe_unicorn;
16 | 
17 | use unsafe_unicorn::{ClocStats, Cloc, ClocVerbosity};
18 | 
19 | fn main() {
20 | 
21 |     // Get the stats for a single file
22 |     let c = ClocStats::from_file("./resources/test.rs").unwrap();
23 |     println!("{}", c);
24 | 
25 |     // Get the stats for the resources directory
26 |     let mut cloc = Cloc::new();
27 |     cloc.analyze_dir("./resources").unwrap();
28 |     println!("{}", cloc);
29 | 
30 |     // Change the verbosity to be file based and then get stats file by file for resources dir
31 |     cloc.set_verbose(ClocVerbosity::File);
32 |     cloc.analyze_dir("./resources").unwrap();
33 |     println!("{}", cloc)
34 | 
35 | }
36 | ```
37 | 
38 | More examples are available in `examples/`
39 | 
40 | 
41 | ## TODO
42 | - [ ] make PR for [tokei](https://github.com/Aaronepower/tokei/tree/master/src)
43 | - [ ] add dependency analysis
44 | - [ ] expand tests
45 | - [ ] add docs
46 | - [ ] clean up code
47 | - [ ] look for c-like array iteration?
48 | 


--------------------------------------------------------------------------------
/examples/cloc.rs:
--------------------------------------------------------------------------------
 1 | extern crate unsafe_unicorn;
 2 | 
 3 | use unsafe_unicorn::{ClocStats, Cloc, ClocVerbosity};
 4 | 
 5 | 
 6 | fn main() {
 7 | 
 8 |     // Get the stats for a single file
 9 |     let c = ClocStats::from_file("./resources/test.rs").unwrap();
10 |     println!("{}", c);
11 | 
12 |     // Get the stats for the resources directory
13 |     let mut cloc = Cloc::new();
14 |     cloc.analyze_dir("./resources").unwrap();
15 |     println!("{}", cloc);
16 | 
17 |     // Add the stats for the source directory
18 |     // FIXME it's counting the unsafe regexes I think
19 |     cloc.analyze_dir("./src").unwrap();
20 |     println!("{}", cloc);
21 | 
22 |     // Change the verbosity to be file based and then get stats file by file for resources dir
23 |     cloc.set_verbose(ClocVerbosity::File);
24 |     cloc.analyze_dir("./resources").unwrap();
25 |     println!("{}", cloc)
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/examples/cloc_git.rs:
--------------------------------------------------------------------------------
 1 | extern crate unsafe_unicorn;
 2 | extern crate git2;
 3 | 
 4 | use git2::Repository;
 5 | use std::fs;
 6 | use unsafe_unicorn::Cloc;
 7 | 
 8 | fn git_example(url: &str) {
 9 |     // clone into a temporary repository
10 |     Repository::clone(url, "./cloc-git-tmp").unwrap();
11 | 
12 |     let mut c = Cloc::new();
13 |     c.analyze_dir("./cloc-git-tmp").unwrap();
14 |     println!("{}", c);
15 |     fs::remove_dir_all("./cloc-git-tmp").unwrap();
16 | }
17 | 
18 | fn main() {
19 |     // let servo_url = "https://github.com/servo/servo.git";
20 |     // let rust_url = "https://github.com/rust-lang/rust";
21 |     let wr_url = "https://github.com/servo/webrender";
22 | 
23 |     git_example(wr_url);
24 | }
25 | 


--------------------------------------------------------------------------------
/examples/stylo.rs:
--------------------------------------------------------------------------------
 1 | extern crate unsafe_unicorn;
 2 | 
 3 | use unsafe_unicorn::{Cloc, ClocVerbosity};
 4 | 
 5 | // what files account for the unsafety?
 6 | // pull out the top 5 files for each directory analyzed in main
 7 | fn analyze_files(dir: &str) {
 8 |     let mut cloc = Cloc::new();
 9 |     cloc.set_verbose(ClocVerbosity::File);
10 |     cloc.analyze_dir(dir).unwrap();
11 | 
12 |     let top_cloc = cloc.top_unsafe(5);
13 | 
14 |     if top_cloc.len() > 0 {
15 |         println!("{}", top_cloc)
16 |     } else {
17 |         println!("Nothing unsafe here!");
18 |     }
19 | 
20 | }
21 | 
22 | fn main() {
23 | 
24 |     let mut cloc = Cloc::new();
25 |     cloc.analyze_dir("/Users/ddh/mozilla/stylo").unwrap();
26 |     println!("{}", cloc);
27 | 
28 |     for s in cloc.stats() {
29 |         println!("Top unsafe files for {:?}", s.name());
30 |         analyze_files(s.name().to_str().unwrap())
31 |     }
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/resources/test.rs:
--------------------------------------------------------------------------------
 1 | use std::vec;
 2 | 
 3 | pub fn foo() {
 4 |  let x = vec!(0, 1, 2, 3, 4, 5, 6, 7);
 5 |  let y = 0;
 6 |  for ele in x.iter() {
 7 |      y += ele
 8 |  }
 9 | }
10 | 
11 | pub unsafe fn unsafe_foo() {
12 |  let x = vec!(0, 1, 2, 3, 4, 5, 6, 7);
13 |  let y = 0;
14 |  for ele in x.iter() {
15 |      y += ele
16 |  }
17 | 
18 | }
19 | 
20 | // comment test
21 | pub fn bar() {
22 |     let x = vec!(0, 1, 2, 3, 4, 5, 6, 7);
23 | 
24 |     unsafe {
25 |         let y = 0;
26 |         for ele in x.iter() {
27 |             y += ele
28 |         }
29 |     }
30 | }
31 | /* total functions: 3
32 |  * not safe functions: 1
33 |  * not safe blocks: 1
34 |  * total lines of not safe: 9 (counting the line with close bracket)
35 |  */
36 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #[macro_use] extern crate lazy_static;
  2 | extern crate regex;
  3 | 
  4 | use std::fmt;
  5 | use std::io::{self, Read};
  6 | use std::path::{Path, PathBuf};
  7 | use std::fs::{self,File};
  8 | use std::result::Result::Err;
  9 | use regex::RegexSet;
 10 | 
 11 | type ClocResult = Result<ClocStats, String>;
 12 | 
 13 | //TODO dependency analysis
 14 | //TODO maybe use pretty table?
 15 | //TODO expand unit tests
 16 | 
 17 | lazy_static!{
 18 |   static ref REGEXES: RegexSet = RegexSet::new(&[
 19 |     r"```",                     // block comment
 20 |     r"^//|^\s/\*|^\s\*|^\s\*/", //comment
 21 |     r"\s*fn\s+[a-zA-Z_]*",      //function
 22 |     r"\s*unsafe impl.*for.*",   //unsafe impl
 23 |     r"\s*unsafe\s*\{.*\}",      //unsafe one liner
 24 |     r".*unsafe\s*\{",           //more unsafe
 25 |     r"panic",                   //panic
 26 |   ]).unwrap();
 27 | }
 28 | 
 29 | /// exclude tests etc from analysis
 30 | lazy_static!{
 31 |     static ref EXCLUDE: Vec<&'static str> = vec!(
 32 |         ".git",
 33 |         "tests",
 34 |         "examples",
 35 |         "benches"
 36 |         );
 37 | }
 38 | 
 39 | /// Determine how to summarize and display statistics
 40 | ///     File: show unsafe info file by file
 41 | ///     Crate: (default) show info 'crate' by 'crate'
 42 | ///     TopLevel: combine all subdirectory stats into one toplevel output
 43 | #[derive(Copy, Clone, Debug, PartialEq)]
 44 | pub enum ClocVerbosity {
 45 |     File,
 46 |     Crate,
 47 |     TopLevel,
 48 | }
 49 | 
 50 | // cloc should be the struct that you actually interact with
 51 | // so you set the verbosity and call it on a path, then it figures out how to split all of the data up
 52 | #[derive(Debug)]
 53 | pub struct Cloc {
 54 |     verbose: ClocVerbosity,
 55 |     stats: Vec<ClocStats>,
 56 | }
 57 | 
 58 | impl Cloc {
 59 |     pub fn new() -> Cloc {
 60 |         Cloc {
 61 |             verbose: ClocVerbosity::Crate,
 62 |             stats: vec!(),
 63 |         }
 64 |     }
 65 | 
 66 |     pub fn stats(&self) -> &Vec<ClocStats> {
 67 |         &self.stats
 68 |     }
 69 | 
 70 |     pub fn set_verbose(&mut self, level: ClocVerbosity) {
 71 |         self.verbose = level;
 72 |     }
 73 | 
 74 |     pub fn add_stats(&mut self, stats: ClocStats) {
 75 |         self.stats.push(stats);
 76 |     }
 77 | 
 78 |     pub fn clear_stats(&mut self) {
 79 |         self.stats.clear()
 80 |     }
 81 | 
 82 |     pub fn len(&self) -> usize {
 83 |         self.stats.len()
 84 |     }
 85 | 
 86 |     pub fn analyze_dir(&mut self, dir: &str) -> Result<(), io::Error> {
 87 | 
 88 |         let mut c = ClocStats::new(PathBuf::from(dir));
 89 |         let mut subdirs = vec!();
 90 |         subdirs.push((dir.to_owned(), fs::read_dir(&Path::new(dir))?));
 91 | 
 92 |         while !subdirs.is_empty(){
 93 |             let (dir_name, paths) = subdirs.pop().unwrap();
 94 | 
 95 |             // when you switch subdirectories, check to see if you need a new CLocStats
 96 |             if PathBuf::from(&dir_name).join("Cargo.toml").exists() && self.verbose == ClocVerbosity::Crate {
 97 |                     if !(c.is_empty()) {
 98 |                         self.add_stats(c.clone());
 99 |                     }
100 |                     c = ClocStats::new(PathBuf::from(dir_name));
101 |                 }
102 | 
103 |             for p in paths {
104 |                 let p = p.unwrap();
105 |                 
106 |                 if p.file_type().unwrap().is_dir(){
107 |                     if !(EXCLUDE.contains(&p.path().file_name().unwrap().to_str().unwrap())) {
108 |                         let ppath = p.path();
109 |                         let subdir_name = ppath.to_str().unwrap();
110 |                         subdirs.push((subdir_name.to_owned(), fs::read_dir(subdir_name).unwrap()));
111 |                     }
112 |                 } else {
113 |                     if p.path().extension().unwrap_or_default() == "rs" {
114 |                         match self.verbose {
115 |                             ClocVerbosity::File => {
116 |                                 let path = p.path();
117 |                                 let c = ClocStats::from_file(path.to_str().unwrap()).unwrap();
118 |                                 self.add_stats(c);
119 |                             },
120 |                             _ => c.cloc_file(&mut File::open(p.path()).expect("Couldn't open file")),
121 |                         
122 |                         };
123 | 
124 |                     }
125 |                 }
126 |             }
127 | 
128 |         }
129 |         if !(c.is_empty()) {
130 |                 self.add_stats(c.clone());
131 |         }
132 |         Ok(())
133 |     }
134 | 
135 |     pub fn sort_stats(&mut self) {
136 |         self.stats.sort_by(|a, b| {
137 |             b.unsafe_ratio().partial_cmp(&a.unsafe_ratio()).unwrap()
138 |         });
139 |     }
140 | 
141 |     // returns a Cloc object to make output better
142 |     pub fn top_unsafe(&mut self, num: usize) -> Cloc {
143 |         let mut c = Cloc::new();
144 |         c.set_verbose(self.verbose);
145 | 
146 |         self.sort_stats();
147 |         for s in self.stats.iter() {
148 |             if c.len() == num {
149 |                 break;
150 |             }
151 |             if s.num_unsafe > 0 {
152 |                 c.add_stats(s.clone());
153 |             }
154 |         }
155 |         c
156 |     }
157 | }
158 | 
159 | impl fmt::Display for Cloc {
160 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
161 |         let header = ["\t", "#files", "blank", "comment", "code", "unsafe", "%unsafe", 
162 |                       "#fns", "#unsafe fns", "%unsafe fns", "#panics"];
163 |         for h in header.iter() {
164 |             write!(f, "{}\t", h)?;
165 |         }
166 |         write!(f, "\n")?;
167 |         for s in &self.stats {
168 |             write!(f, "{}\t", s.name().file_name().unwrap().to_str().unwrap())?;
169 |             for val in s.summarize(){
170 |                 match val {
171 |                     SummaryType::Ratio(x) => write!(f, "{:.*}\t", 2, x)?,
172 |                     SummaryType::Int(x) => write!(f, "{}\t", x)?,
173 |                 };
174 |             }
175 |             write!(f, "\n")?;
176 | 
177 |         }
178 |         Ok(())
179 |     }
180 | }
181 | 
182 | #[derive(Clone, Debug, PartialEq, Eq)]
183 | pub struct ClocStats {
184 |     name: PathBuf,
185 |     pub num_unsafe: usize,
186 |     unsafe_fns: usize,
187 |     total_fns: usize,
188 |     blank: usize,
189 |     comment: usize,
190 |     files: usize,
191 |     code: usize,
192 |     panics: usize,
193 | }
194 | 
195 | // helper type to store all summary values in a vec
196 | #[derive(Debug, PartialEq)]
197 | pub enum SummaryType {
198 |     Ratio(f64),
199 |     Int(usize)
200 | }
201 | 
202 | impl ClocStats {
203 |     pub fn new(dir_name: PathBuf) -> ClocStats {
204 |         ClocStats {
205 |             name: dir_name.to_owned(),
206 |             num_unsafe: 0,
207 |             unsafe_fns: 0,
208 |             total_fns: 0,
209 |             blank: 0,
210 |             comment: 0,
211 |             files: 0,
212 |             code: 0,
213 |             panics: 0,
214 |         }
215 | 
216 |     }
217 | 
218 |     pub fn name(&self) -> &PathBuf {
219 |         &self.name
220 |     }
221 | 
222 |     pub fn count_fns(&self) -> usize {
223 |         self.total_fns
224 |     }
225 | 
226 |     pub fn count_unsafe_fns(&self) -> usize {
227 |         self.unsafe_fns
228 |     }
229 | 
230 |     pub fn to_vec(&self) -> Vec<usize> {
231 |         vec!(self.files, self.blank, self.comment, self.code, 
232 |              self.num_unsafe, self.total_fns, self.unsafe_fns, self.panics)
233 |     }
234 | 
235 |     // Consider empty if there haven't been any functions
236 |     pub fn is_empty(&self) -> bool {
237 |         !(self.total_fns > 0)
238 |     }
239 | 
240 |     pub fn summarize(&self) -> Vec<SummaryType> {
241 |         let mut unsafe_ratio = self.num_unsafe as f64 / self.code as f64 * 100.0;
242 |         let mut fn_ratio = self.unsafe_fns as f64 / self.total_fns as f64 * 100.0;
243 |         if unsafe_ratio.is_nan() {
244 |             unsafe_ratio = 0.0;
245 |         }
246 |         if fn_ratio.is_nan() {
247 |             fn_ratio = 0.0;
248 |         }
249 |         vec!(
250 |             SummaryType::Int(self.files), 
251 |             SummaryType::Int(self.blank),
252 |             SummaryType::Int(self.comment),
253 |             SummaryType::Int(self.code),
254 |             SummaryType::Int(self.num_unsafe),
255 |             SummaryType::Ratio(unsafe_ratio),
256 |             SummaryType::Int(self.total_fns),
257 |             SummaryType::Int(self.unsafe_fns),
258 |             SummaryType::Ratio(fn_ratio), 
259 |             SummaryType::Int(self.panics))
260 |     }
261 | 
262 |     /// Gets stats for a single file
263 |     pub fn from_file(filename: &str) -> ClocResult {
264 |         let file_path = Path::new(filename);
265 |         if file_path.extension().unwrap().to_str().unwrap() != "rs" {
266 |             return Err("Not a rust file".to_owned());
267 |         } 
268 |         let mut f = File::open(filename).expect("Couldn't open file");
269 | 
270 |         let mut c = ClocStats::new(PathBuf::from(filename));
271 |         c.cloc_file(&mut f);
272 |         Ok(c)
273 |     }
274 | 
275 |     /// Aggregates stats for an entire directory
276 |     pub fn from_directory(dir: &str) -> ClocResult {
277 |         let mut c = ClocStats::new(PathBuf::from(dir));
278 |         let mut subdirs = vec!();
279 |         subdirs.push(fs::read_dir(&Path::new(dir)).unwrap());
280 | 
281 |         while !subdirs.is_empty(){
282 |             let paths = subdirs.pop();
283 |             for p in paths.unwrap() {
284 |                 let p = p.unwrap();
285 |                 if p.file_type().unwrap().is_dir(){
286 |                     if p.path().to_str().unwrap().contains(".git") {continue}
287 |                     //TODO ignore git
288 |                     subdirs.push(fs::read_dir(p.path()).unwrap());
289 |                 } else {
290 |                     if p.path().extension().unwrap_or_default() == "rs" {
291 |                         c.cloc_file(&mut File::open(p.path()).expect("Couldn't open file"));
292 |                     }
293 |                 }
294 |             }
295 |         }
296 | 
297 |         Ok(c)
298 | 
299 |     }
300 | 
301 |     // TODO compare performance with BufReader -- isolate read for benchmarking
302 |     // TODO will if/else work better than continue?
303 |     fn cloc_file(&mut self, f: &mut File) {
304 |         self.files += 1;
305 |         let mut contents = String::new();
306 | 
307 |         // track brackets for unsafe blocks, fns etc
308 |         let mut bracket_count = 0;
309 |         // track comment flag
310 |         let mut comment_flag = false; // handles ```...```
311 |         let mut block_flag = false; //not totally sure if i need 2 flags? might be able to reuse
312 | 
313 |         f.read_to_string(&mut contents).expect(
314 |             "something went wrong reading the file",
315 |         );
316 | 
317 |         // TODO could probably split into methods if i store the flag/count in the struct
318 |         for line in contents.lines() {
319 |             let contains = REGEXES.matches(line);
320 |             // skip content lines
321 |             if contains.matched(0) {
322 |                 self.comment += 1;
323 |                 comment_flag = !comment_flag;
324 |                 continue;
325 |             }
326 |             if contains.matched(1) {
327 |                 self.comment += 1;
328 |                 continue;
329 |             }
330 |             //skip blank lines
331 |             if line.len() == 0 {
332 |                 self.blank += 1;
333 |                 continue;
334 |             }
335 |             self.code += 1;
336 |             if block_flag {
337 |                 if line.contains("{") {
338 |                     bracket_count += 1;
339 |                 }
340 |                 if line.contains("}") {
341 |                     bracket_count -= 1;
342 |                 }
343 |                 if bracket_count == 0 {
344 |                     block_flag = false;
345 |                 } else {
346 |                     self.num_unsafe += 1
347 |                 }
348 |             }
349 |             if contains.matched(3) {
350 |                 self.num_unsafe += 1;   //TODO is this always a 1 liner
351 |             }
352 |             if contains.matched(2) {
353 |                 self.total_fns += 1;
354 |                 if line.contains("unsafe") {
355 |                     block_flag = true;
356 |                     bracket_count += 1;
357 |                     self.unsafe_fns += 1;
358 |                 }
359 |             } else if contains.matched(4) {
360 |                 self.num_unsafe += 1;
361 |             } else if contains.matched(5) {
362 |                 block_flag = true;
363 |                 bracket_count += 1;
364 |             }
365 |             if contains.matched(6) {
366 |                 self.panics += 1;
367 |             }
368 |         }
369 | 
370 |     }
371 | 
372 |     /// Compute ratio of unsafe code to total code
373 |     pub fn unsafe_ratio(&self) -> f64 {
374 |         match self.code {
375 |             0 => 0.0,
376 |             _ => self.num_unsafe as f64 / self.code as f64
377 |         }
378 |     }
379 | }
380 | 
381 | impl fmt::Display for ClocStats {
382 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
383 |         write!(
384 |             f,
385 |             "{}, {}, {}, {}, {}, {}, {}, {}",
386 |             self.num_unsafe,
387 |             self.unsafe_fns,
388 |             self.total_fns,
389 |             self.blank,
390 |             self.comment,
391 |             self.files,
392 |             self.code,
393 |             self.panics
394 |         )
395 |     }
396 | }
397 | 
398 | #[cfg(test)]
399 | mod tests {
400 |     use super::*;
401 | 
402 |     #[test]
403 |     fn it_works() {
404 |         let c = ClocStats::from_file("./resources/test.rs").unwrap();
405 |         assert_eq!(c.to_vec(), vec!(1, 5, 5, 25, 9, 3, 1, 0) );
406 |     }
407 | }
408 | 


--------------------------------------------------------------------------------