├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── definitions.toml ├── src ├── git.rs └── main.rs └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | /target/ 3 | **/*.rs.bk 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sourcesecrets" 3 | version = "0.1.0" 4 | authors = ["Lander Brandt "] 5 | 6 | [dependencies] 7 | csv = "1.0.0-beta.5" 8 | clap = "2.29" 9 | regex = "1" 10 | toml = "0.4" 11 | serde = "1.0" 12 | serde_derive = "1.0" 13 | serde_json = "1.0" 14 | pbr = "1.0" 15 | base64 = "0.9" 16 | scopeguard = "0.3" 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Lander Brandt 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sourcesecrets 2 | 3 | A tool for finding patterns of text in Git history 4 | 5 | ## Rationale 6 | 7 | Repository maintainers sometimes do not fully consider the impact of the "secret" they are redacting or may not realize they have committed the removal of a secret from source code without fully redacting it or mitigating the issue. `sourcesecrets` allows you to provide a file extension or regular expression to find in every single commit ever made to a repository to find secrets left in source code history. 8 | 9 | ## Usage 10 | 11 | Example 1: 12 | 13 | ``` 14 | sourcesecrets -o secrets.csv repos/*/ 15 | ``` 16 | 17 | Example 2: 18 | 19 | ``` 20 | sourcesecrets -o secrets.csv -d definitions.private.toml repo_path 21 | ``` 22 | 23 | ## Defining patterns 24 | 25 | Patterns you want to have hits on need be defined in a TOML file and either placed in the application's executable directory or provided with the `-d/--definitions` flag on the command line. An example definitions file looks like so: 26 | 27 | ```toml 28 | [[patterns]] 29 | description = "Password in code" 30 | pattern = "Password = \"[^\"]+\"[^;]+" 31 | 32 | [[files]] 33 | description = "Private key file" 34 | extension = "pfx" 35 | binary = true 36 | 37 | [[filters]] 38 | description = "Remove bad hits in documentation" 39 | pattern = "" 40 | ``` 41 | 42 | The patterns section defines content patterns to hit on, files are file extensions to match on, and filters are negative patterns for any content pattern match. 43 | 44 | A [definitions.toml](https://github.com/landaire/sourcesecrets/blob/master/definitions.toml) file useful for ASP.NET repositories has already been provided. 45 | 46 | ## Improvements to be made 47 | 48 | 1. The `git` utility is invoked for *every* commit to get contents and other details. Using some `libgit2` bindings or another library may provide benefits over the overhead of invoking a new process for every commit. 49 | 2. Add a "deleted-log" command that simply logs all files that were deleted 50 | -------------------------------------------------------------------------------- /definitions.toml: -------------------------------------------------------------------------------- 1 | [[patterns]] 2 | description = "Password properties" 3 | pattern = "Password = \"[^\"]+\"[^;]+" 4 | 5 | [[patterns]] 6 | description = "Password nodes" 7 | pattern = "description=\"[^\"]+Password\"" 8 | 9 | [[patterns]] 10 | description = "Machine keys useful for RCE" 11 | pattern = ">, 18 | } 19 | 20 | pub struct GitClient { 21 | pub repo_path: String, 22 | } 23 | 24 | impl GitClient { 25 | pub fn new(repo_path: String) -> GitClient { 26 | GitClient { repo_path } 27 | } 28 | 29 | pub fn get_commits(&self, since_date: Option<&str>, until_date: Option<&str>) -> Vec { 30 | let mut args: Vec = vec![ 31 | "log".to_string(), 32 | "--format=%H %aI".to_string(), 33 | "--branches=*".to_string(), 34 | ]; 35 | 36 | if let Some(date) = since_date { 37 | // could totally do command injection here 38 | args.push(format!("--since=\"{}\"", date)); 39 | } 40 | 41 | if let Some(date) = until_date { 42 | args.push(format!("--until=\"{}\"", date)); 43 | } 44 | 45 | let result = self.exec(args.as_slice()); 46 | 47 | str::lines(&String::from_utf8(result.stdout).unwrap()) 48 | .map(|l| { 49 | let mut parts = l.split_whitespace(); 50 | Commit { 51 | hash: parts.next().unwrap().to_string(), 52 | date: parts.next().unwrap().to_string(), 53 | client: None, 54 | } 55 | }).collect::>() 56 | } 57 | 58 | pub fn get_commit_content(&self, commit: &Commit) -> String { 59 | let args = vec![ 60 | "diff".to_string(), 61 | "-U0".to_string(), 62 | format!("{}^!", commit.hash), 63 | ]; 64 | String::from_utf8_lossy(&self.exec(&args).stdout).into_owned() 65 | } 66 | 67 | pub fn get_file_at_commit(&self, commit: &str, filename: Option<&String>) -> Vec { 68 | let commit = match filename { 69 | Some(path) => format!("{}:{}", commit.to_string(), path), 70 | None => commit.to_string(), 71 | }; 72 | let args = vec!["show".to_string(), commit]; 73 | let output = self.exec(&args); 74 | // if output.stderr.len() != 0 { 75 | // eprintln!("{}", String::from_utf8_lossy(&output.stderr)); 76 | // } 77 | 78 | output.stdout 79 | } 80 | 81 | pub fn get_file_names_for_commit(&self, commit: &Commit) -> VecDeque { 82 | let args = vec![ 83 | "diff".to_string(), 84 | "--name-only".to_string(), 85 | commit.hash.clone(), 86 | ]; 87 | let output = self.exec(&args); 88 | str::lines(&String::from_utf8_lossy(&output.stdout).into_owned()) 89 | .map(|line| line.to_string()) 90 | .collect() 91 | } 92 | 93 | fn exec(&self, args: &[String]) -> Output { 94 | Command::new("git") 95 | .args(args) 96 | .current_dir(&self.repo_path) 97 | .output() 98 | .expect("failed to execute git") 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "aho-corasick" 3 | version = "0.6.4" 4 | source = "registry+https://github.com/rust-lang/crates.io-index" 5 | dependencies = [ 6 | "memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", 7 | ] 8 | 9 | [[package]] 10 | name = "ansi_term" 11 | version = "0.10.2" 12 | source = "registry+https://github.com/rust-lang/crates.io-index" 13 | 14 | [[package]] 15 | name = "atty" 16 | version = "0.2.6" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | dependencies = [ 19 | "libc 0.2.36 (registry+https://github.com/rust-lang/crates.io-index)", 20 | "termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)", 21 | "winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", 22 | ] 23 | 24 | [[package]] 25 | name = "base64" 26 | version = "0.9.0" 27 | source = "registry+https://github.com/rust-lang/crates.io-index" 28 | dependencies = [ 29 | "byteorder 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", 30 | "safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", 31 | ] 32 | 33 | [[package]] 34 | name = "bitflags" 35 | version = "1.0.1" 36 | source = "registry+https://github.com/rust-lang/crates.io-index" 37 | 38 | [[package]] 39 | name = "byteorder" 40 | version = "1.2.1" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | 43 | [[package]] 44 | name = "clap" 45 | version = "2.29.2" 46 | source = "registry+https://github.com/rust-lang/crates.io-index" 47 | dependencies = [ 48 | "ansi_term 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)", 49 | "atty 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", 50 | "bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", 51 | "strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", 52 | "textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 53 | "unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", 54 | "vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", 55 | ] 56 | 57 | [[package]] 58 | name = "csv" 59 | version = "1.0.0-beta.5" 60 | source = "registry+https://github.com/rust-lang/crates.io-index" 61 | dependencies = [ 62 | "csv-core 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", 63 | "serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", 64 | ] 65 | 66 | [[package]] 67 | name = "csv-core" 68 | version = "0.1.4" 69 | source = "registry+https://github.com/rust-lang/crates.io-index" 70 | dependencies = [ 71 | "memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", 72 | ] 73 | 74 | [[package]] 75 | name = "dtoa" 76 | version = "0.4.2" 77 | source = "registry+https://github.com/rust-lang/crates.io-index" 78 | 79 | [[package]] 80 | name = "itoa" 81 | version = "0.3.4" 82 | source = "registry+https://github.com/rust-lang/crates.io-index" 83 | 84 | [[package]] 85 | name = "kernel32-sys" 86 | version = "0.2.2" 87 | source = "registry+https://github.com/rust-lang/crates.io-index" 88 | dependencies = [ 89 | "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", 90 | "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", 91 | ] 92 | 93 | [[package]] 94 | name = "lazy_static" 95 | version = "1.0.0" 96 | source = "registry+https://github.com/rust-lang/crates.io-index" 97 | 98 | [[package]] 99 | name = "libc" 100 | version = "0.2.36" 101 | source = "registry+https://github.com/rust-lang/crates.io-index" 102 | 103 | [[package]] 104 | name = "memchr" 105 | version = "2.0.1" 106 | source = "registry+https://github.com/rust-lang/crates.io-index" 107 | dependencies = [ 108 | "libc 0.2.36 (registry+https://github.com/rust-lang/crates.io-index)", 109 | ] 110 | 111 | [[package]] 112 | name = "num-traits" 113 | version = "0.1.42" 114 | source = "registry+https://github.com/rust-lang/crates.io-index" 115 | 116 | [[package]] 117 | name = "pbr" 118 | version = "1.0.0" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | dependencies = [ 121 | "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", 122 | "libc 0.2.36 (registry+https://github.com/rust-lang/crates.io-index)", 123 | "time 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)", 124 | "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", 125 | ] 126 | 127 | [[package]] 128 | name = "quote" 129 | version = "0.3.15" 130 | source = "registry+https://github.com/rust-lang/crates.io-index" 131 | 132 | [[package]] 133 | name = "redox_syscall" 134 | version = "0.1.37" 135 | source = "registry+https://github.com/rust-lang/crates.io-index" 136 | 137 | [[package]] 138 | name = "redox_termios" 139 | version = "0.1.1" 140 | source = "registry+https://github.com/rust-lang/crates.io-index" 141 | dependencies = [ 142 | "redox_syscall 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", 143 | ] 144 | 145 | [[package]] 146 | name = "regex" 147 | version = "1.0.3" 148 | source = "registry+https://github.com/rust-lang/crates.io-index" 149 | dependencies = [ 150 | "aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", 151 | "memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", 152 | "regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", 153 | "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", 154 | "utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", 155 | ] 156 | 157 | [[package]] 158 | name = "regex-syntax" 159 | version = "0.6.2" 160 | source = "registry+https://github.com/rust-lang/crates.io-index" 161 | dependencies = [ 162 | "ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", 163 | ] 164 | 165 | [[package]] 166 | name = "safemem" 167 | version = "0.2.0" 168 | source = "registry+https://github.com/rust-lang/crates.io-index" 169 | 170 | [[package]] 171 | name = "scopeguard" 172 | version = "0.3.3" 173 | source = "registry+https://github.com/rust-lang/crates.io-index" 174 | 175 | [[package]] 176 | name = "serde" 177 | version = "1.0.27" 178 | source = "registry+https://github.com/rust-lang/crates.io-index" 179 | 180 | [[package]] 181 | name = "serde_derive" 182 | version = "1.0.27" 183 | source = "registry+https://github.com/rust-lang/crates.io-index" 184 | dependencies = [ 185 | "quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", 186 | "serde_derive_internals 0.19.0 (registry+https://github.com/rust-lang/crates.io-index)", 187 | "syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)", 188 | ] 189 | 190 | [[package]] 191 | name = "serde_derive_internals" 192 | version = "0.19.0" 193 | source = "registry+https://github.com/rust-lang/crates.io-index" 194 | dependencies = [ 195 | "syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)", 196 | "synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)", 197 | ] 198 | 199 | [[package]] 200 | name = "serde_json" 201 | version = "1.0.9" 202 | source = "registry+https://github.com/rust-lang/crates.io-index" 203 | dependencies = [ 204 | "dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", 205 | "itoa 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", 206 | "num-traits 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", 207 | "serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", 208 | ] 209 | 210 | [[package]] 211 | name = "sourcesecrets" 212 | version = "0.1.0" 213 | dependencies = [ 214 | "base64 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 215 | "clap 2.29.2 (registry+https://github.com/rust-lang/crates.io-index)", 216 | "csv 1.0.0-beta.5 (registry+https://github.com/rust-lang/crates.io-index)", 217 | "pbr 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", 218 | "regex 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", 219 | "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", 220 | "serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", 221 | "serde_derive 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", 222 | "serde_json 1.0.9 (registry+https://github.com/rust-lang/crates.io-index)", 223 | "toml 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", 224 | ] 225 | 226 | [[package]] 227 | name = "strsim" 228 | version = "0.6.0" 229 | source = "registry+https://github.com/rust-lang/crates.io-index" 230 | 231 | [[package]] 232 | name = "syn" 233 | version = "0.11.11" 234 | source = "registry+https://github.com/rust-lang/crates.io-index" 235 | dependencies = [ 236 | "quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", 237 | "synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)", 238 | "unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)", 239 | ] 240 | 241 | [[package]] 242 | name = "synom" 243 | version = "0.11.3" 244 | source = "registry+https://github.com/rust-lang/crates.io-index" 245 | dependencies = [ 246 | "unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)", 247 | ] 248 | 249 | [[package]] 250 | name = "termion" 251 | version = "1.5.1" 252 | source = "registry+https://github.com/rust-lang/crates.io-index" 253 | dependencies = [ 254 | "libc 0.2.36 (registry+https://github.com/rust-lang/crates.io-index)", 255 | "redox_syscall 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", 256 | "redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", 257 | ] 258 | 259 | [[package]] 260 | name = "textwrap" 261 | version = "0.9.0" 262 | source = "registry+https://github.com/rust-lang/crates.io-index" 263 | dependencies = [ 264 | "unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", 265 | ] 266 | 267 | [[package]] 268 | name = "thread_local" 269 | version = "0.3.6" 270 | source = "registry+https://github.com/rust-lang/crates.io-index" 271 | dependencies = [ 272 | "lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", 273 | ] 274 | 275 | [[package]] 276 | name = "time" 277 | version = "0.1.39" 278 | source = "registry+https://github.com/rust-lang/crates.io-index" 279 | dependencies = [ 280 | "libc 0.2.36 (registry+https://github.com/rust-lang/crates.io-index)", 281 | "redox_syscall 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", 282 | "winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", 283 | ] 284 | 285 | [[package]] 286 | name = "toml" 287 | version = "0.4.5" 288 | source = "registry+https://github.com/rust-lang/crates.io-index" 289 | dependencies = [ 290 | "serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", 291 | ] 292 | 293 | [[package]] 294 | name = "ucd-util" 295 | version = "0.1.1" 296 | source = "registry+https://github.com/rust-lang/crates.io-index" 297 | 298 | [[package]] 299 | name = "unicode-width" 300 | version = "0.1.4" 301 | source = "registry+https://github.com/rust-lang/crates.io-index" 302 | 303 | [[package]] 304 | name = "unicode-xid" 305 | version = "0.0.4" 306 | source = "registry+https://github.com/rust-lang/crates.io-index" 307 | 308 | [[package]] 309 | name = "utf8-ranges" 310 | version = "1.0.0" 311 | source = "registry+https://github.com/rust-lang/crates.io-index" 312 | 313 | [[package]] 314 | name = "vec_map" 315 | version = "0.8.0" 316 | source = "registry+https://github.com/rust-lang/crates.io-index" 317 | 318 | [[package]] 319 | name = "winapi" 320 | version = "0.2.8" 321 | source = "registry+https://github.com/rust-lang/crates.io-index" 322 | 323 | [[package]] 324 | name = "winapi" 325 | version = "0.3.4" 326 | source = "registry+https://github.com/rust-lang/crates.io-index" 327 | dependencies = [ 328 | "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 329 | "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 330 | ] 331 | 332 | [[package]] 333 | name = "winapi-build" 334 | version = "0.1.1" 335 | source = "registry+https://github.com/rust-lang/crates.io-index" 336 | 337 | [[package]] 338 | name = "winapi-i686-pc-windows-gnu" 339 | version = "0.4.0" 340 | source = "registry+https://github.com/rust-lang/crates.io-index" 341 | 342 | [[package]] 343 | name = "winapi-x86_64-pc-windows-gnu" 344 | version = "0.4.0" 345 | source = "registry+https://github.com/rust-lang/crates.io-index" 346 | 347 | [metadata] 348 | "checksum aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d6531d44de723825aa81398a6415283229725a00fa30713812ab9323faa82fc4" 349 | "checksum ansi_term 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6b3568b48b7cefa6b8ce125f9bb4989e52fbcc29ebea88df04cc7c5f12f70455" 350 | "checksum atty 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "8352656fd42c30a0c3c89d26dea01e3b77c0ab2af18230835c15e2e13cd51859" 351 | "checksum base64 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "229d032f1a99302697f10b27167ae6d03d49d032e6a8e2550e8d3fc13356d2b4" 352 | "checksum bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b3c30d3802dfb7281680d6285f2ccdaa8c2d8fee41f93805dba5c4cf50dc23cf" 353 | "checksum byteorder 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "652805b7e73fada9d85e9a6682a4abd490cb52d96aeecc12e33a0de34dfd0d23" 354 | "checksum clap 2.29.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4151c5790817c7d21bbdc6c3530811f798172915f93258244948b93ba19604a6" 355 | "checksum csv 1.0.0-beta.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e7a9e063dcebdb56c306f23e672bfd31df3da8ec5f6d696b35f2c29c2a9572f0" 356 | "checksum csv-core 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4dd8e6d86f7ba48b4276ef1317edc8cc36167546d8972feb4a2b5fec0b374105" 357 | "checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab" 358 | "checksum itoa 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8324a32baf01e2ae060e9de58ed0bc2320c9a2833491ee36cd3b4c414de4db8c" 359 | "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" 360 | "checksum lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c8f31047daa365f19be14b47c29df4f7c3b581832407daabe6ae77397619237d" 361 | "checksum libc 0.2.36 (registry+https://github.com/rust-lang/crates.io-index)" = "1e5d97d6708edaa407429faa671b942dc0f2727222fb6b6539bf1db936e4b121" 362 | "checksum memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "796fba70e76612589ed2ce7f45282f5af869e0fdd7cc6199fa1aa1f1d591ba9d" 363 | "checksum num-traits 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "9936036cc70fe4a8b2d338ab665900323290efb03983c86cbe235ae800ad8017" 364 | "checksum pbr 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e048e3afebb6c454bb1c5d0fe73fda54698b4715d78ed8e7302447c37736d23a" 365 | "checksum quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e920b65c65f10b2ae65c831a81a073a89edd28c7cce89475bff467ab4167a" 366 | "checksum redox_syscall 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)" = "0d92eecebad22b767915e4d529f89f28ee96dbbf5a4810d2b844373f136417fd" 367 | "checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" 368 | "checksum regex 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3d8c9f33201f46669484bacc312b00e7541bed6aaf296dffe2bb4e0ac6b8ce2a" 369 | "checksum regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "747ba3b235651f6e2f67dfa8bcdcd073ddb7c243cb21c442fc12395dfcac212d" 370 | "checksum safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e27a8b19b835f7aea908818e871f5cc3a5a186550c30773be987e155e8163d8f" 371 | "checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" 372 | "checksum serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)" = "db99f3919e20faa51bb2996057f5031d8685019b5a06139b1ce761da671b8526" 373 | "checksum serde_derive 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)" = "f4ba7591cfe93755e89eeecdbcc668885624829b020050e6aec99c2a03bd3fd0" 374 | "checksum serde_derive_internals 0.19.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6e03f1c9530c3fb0a0a5c9b826bdd9246a5921ae995d75f512ac917fc4dd55b5" 375 | "checksum serde_json 1.0.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c9db7266c7d63a4c4b7fe8719656ccdd51acf1bed6124b174f933b009fb10bcb" 376 | "checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694" 377 | "checksum syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad" 378 | "checksum synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a393066ed9010ebaed60b9eafa373d4b1baac186dd7e008555b0f702b51945b6" 379 | "checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" 380 | "checksum textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c0b59b6b4b44d867f1370ef1bd91bfb262bf07bf0ae65c202ea2fbc16153b693" 381 | "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" 382 | "checksum time 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "a15375f1df02096fb3317256ce2cee6a1f42fc84ea5ad5fc8c421cfe40c73098" 383 | "checksum toml 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a7540f4ffc193e0d3c94121edb19b055670d369f77d5804db11ae053a45b6e7e" 384 | "checksum ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd2be2d6639d0f8fe6cdda291ad456e23629558d466e2789d2c3e9892bda285d" 385 | "checksum unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "bf3a113775714a22dcb774d8ea3655c53a32debae63a063acc00a91cc586245f" 386 | "checksum unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8c1f860d7d29cf02cb2f3f359fd35991af3d30bac52c57d265a3c461074cb4dc" 387 | "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" 388 | "checksum vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "887b5b631c2ad01628bbbaa7dd4c869f80d3186688f8d0b6f58774fbe324988c" 389 | "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" 390 | "checksum winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "04e3bd221fcbe8a271359c04f21a76db7d0c6028862d1bb5512d85e1e2eb5bb3" 391 | "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" 392 | "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 393 | "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 394 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate clap; 2 | extern crate csv; 3 | extern crate regex; 4 | extern crate toml; 5 | #[macro_use] 6 | extern crate serde_derive; 7 | extern crate base64; 8 | extern crate pbr; 9 | extern crate serde_json; 10 | #[macro_use(defer)] 11 | extern crate scopeguard; 12 | 13 | mod git; 14 | 15 | use base64::encode; 16 | use clap::{App, Arg}; 17 | use pbr::ProgressBar; 18 | use regex::Regex; 19 | use std::collections::VecDeque; 20 | use std::env::current_exe; 21 | use std::fs::File; 22 | use std::io::prelude::*; 23 | use std::io::{stdout, Write}; 24 | use std::iter::FromIterator; 25 | use std::path::Path; 26 | use std::process::exit; 27 | use std::str; 28 | use std::sync::atomic::{AtomicUsize, Ordering, ATOMIC_USIZE_INIT}; 29 | use std::sync::{Arc, Mutex, RwLock}; 30 | use std::thread; 31 | use std::vec::Vec; 32 | 33 | use git::{ChangeType, Commit, GitClient}; 34 | 35 | const NUM_THREADS: usize = 6; 36 | const MAX_LINE_LENGTH: usize = 5000; 37 | static mut VERBOSE: bool = false; 38 | static THREAD_DONE_COUNT: AtomicUsize = ATOMIC_USIZE_INIT; 39 | 40 | macro_rules! verbose_print( 41 | ($($arg:tt)*) => { { 42 | let mut v = Default::default(); 43 | unsafe { 44 | v = VERBOSE; 45 | } 46 | if v { 47 | let r = writeln!(&mut ::std::io::stdout(), $($arg)*); 48 | r.expect("failed printing to stdout"); 49 | } 50 | } } 51 | ); 52 | 53 | #[derive(Clone, Serialize, PartialEq, Debug)] 54 | pub enum MatchType { 55 | Pattern, 56 | File, 57 | } 58 | 59 | #[derive(Debug, Default, Deserialize)] 60 | struct Config { 61 | patterns: Option>, 62 | filters: Option>, 63 | files: Option>, 64 | } 65 | 66 | #[derive(Debug, Default, Serialize, Deserialize, Clone)] 67 | struct Pattern { 68 | description: String, 69 | pattern: String, 70 | enabled: Option, 71 | case_sensitive: Option, 72 | 73 | #[serde(skip_deserializing, skip_serializing)] 74 | regex: Option, 75 | } 76 | 77 | #[derive(Debug, Default, Serialize, Deserialize, Clone)] 78 | struct FilePattern { 79 | description: String, 80 | extension: String, 81 | binary: Option, 82 | } 83 | 84 | #[derive(Clone, Serialize)] 85 | struct PatternMatch { 86 | description: String, 87 | text: String, 88 | repo_path: String, 89 | file: String, 90 | full_path: String, 91 | match_type: MatchType, 92 | change_type: ChangeType, 93 | commit_hash: String, 94 | commit_date: String, 95 | } 96 | 97 | fn main() { 98 | let args = App::new("Source Secrets") 99 | .version("1.0") 100 | .author("Lander Brandt ") 101 | .about("Searches a git repository for secrets") 102 | .arg( 103 | Arg::with_name("repos") 104 | .value_name("GIT_REPO_PATH") 105 | .help("Sets the path of the git repository") 106 | .multiple(true) 107 | .required(true), 108 | ).arg( 109 | Arg::with_name("definitions") 110 | .short("d") 111 | .value_name("definitions.toml") 112 | .help("File containing pattern definitions") 113 | .takes_value(true), 114 | ).arg( 115 | Arg::with_name("output_file") 116 | .short("o") 117 | .value_name("OUTPUT_FILE") 118 | .help("File to output data to write results to (use - for stdout)") 119 | .takes_value(true) 120 | .required(true), 121 | ).arg( 122 | Arg::with_name("since") 123 | .short("s") 124 | .value_name("DATE") 125 | .help("Look at commits since this date (e.g. \"Jan 1, 2018\" or \"2 weeks ago\")") 126 | .takes_value(true), 127 | ).arg( 128 | Arg::with_name("until") 129 | .short("u") 130 | .value_name("DATE") 131 | .help("Look at commits before this date (e.g. \"Jan 1, 2018\" or \"2 weeks ago\")") 132 | .takes_value(true), 133 | ).arg( 134 | Arg::with_name("verbose") 135 | .short("v") 136 | .value_name("VERBOSE") 137 | .help("Set verbose output (shows results as they come in)") 138 | .takes_value(false), 139 | ).get_matches(); 140 | unsafe { 141 | VERBOSE = args.is_present("verbose"); 142 | } 143 | 144 | let repos = args.values_of_lossy("repos").unwrap(); 145 | 146 | let output_file = match args.value_of("output_file").unwrap() { 147 | "-" => Box::new(stdout()) as Box, 148 | filename => { 149 | Box::new(File::create(filename).expect("Unable to create output file")) as Box 150 | } 151 | }; 152 | 153 | let definitions_path = match args.value_of("definitions") { 154 | Some(p) => p.to_owned(), 155 | None => { 156 | let mut p = current_exe() 157 | .unwrap() 158 | .parent() 159 | .unwrap() 160 | .join("definitions.toml"); 161 | p.to_str().unwrap().to_owned() 162 | } 163 | }; 164 | 165 | let mut definitions_file = File::open(definitions_path).expect("definitions.toml not found"); 166 | let mut config_contents = String::new(); 167 | definitions_file 168 | .read_to_string(&mut config_contents) 169 | .expect("error while reading definitions file"); 170 | 171 | let pattern_config = toml::from_str(&config_contents); 172 | 173 | if let Err(err) = pattern_config { 174 | eprintln!("Error parsing config: {:?}", err); 175 | exit(1); 176 | } 177 | 178 | let pattern_config: Config = pattern_config.unwrap(); 179 | let mut patterns = pattern_config.patterns.unwrap(); 180 | let mut filters: Option> = pattern_config.filters; 181 | let mut files = pattern_config.files.unwrap(); 182 | 183 | for file in &mut files { 184 | file.extension = ".".to_owned() + &file.extension; 185 | } 186 | 187 | // loop over all of the patterns to compile their regexes 188 | for pattern in &mut patterns { 189 | compile_patterns(&mut filters.as_mut().unwrap()); 190 | } 191 | 192 | // loop over all of the patterns to compile their regexes 193 | if filters.is_some() { 194 | compile_patterns(&mut filters.as_mut().unwrap()); 195 | } 196 | 197 | let mut all_commits = Vec::new(); 198 | let mut clients = Vec::new(); 199 | 200 | // ensure all of the repos exist 201 | for repo in &repos { 202 | let repo_path = Path::new(&repo); 203 | // not being pedantic and checking if .git path is a folder here 204 | // if a .git file exists in a folder I want to see how this thing blows up 205 | // TODO: add test for .git file, not folder, existing in repo path 206 | if !repo_path.exists() || !repo_path.join(".git").exists() { 207 | eprintln!("Repo path {} does not exist", repo); 208 | continue; 209 | } 210 | verbose_print!("Getting data for repo {}", repo); 211 | 212 | let client = Arc::new(GitClient::new(repo.to_string())); 213 | 214 | let mut commits = client 215 | .clone() 216 | .get_commits(args.value_of("since"), args.value_of("until")); 217 | all_commits.reserve(commits.len()); 218 | 219 | for mut commit in commits { 220 | commit.client = Some(client.clone()); 221 | all_commits.push(commit); 222 | } 223 | 224 | clients.push(client); 225 | } 226 | 227 | let mut threads = Vec::new(); 228 | // set up the progress bar for all threads + commits 229 | let pb = Arc::new(Mutex::new(ProgressBar::new( 230 | (all_commits.len() + NUM_THREADS as usize) as u64, 231 | ))); 232 | let found_matches = Arc::new(RwLock::new(VecDeque::new() as VecDeque)); 233 | 234 | if all_commits.is_empty() { 235 | println!("No commits found to search"); 236 | return; 237 | } 238 | 239 | let commits_per_thread = all_commits.len() / NUM_THREADS; 240 | let last_thread_commit_count = commits_per_thread + (all_commits.len() % NUM_THREADS); 241 | for i in 0..NUM_THREADS { 242 | let mut num_commits = if i == NUM_THREADS - 1 { 243 | last_thread_commit_count 244 | } else { 245 | commits_per_thread 246 | }; 247 | 248 | let commits: VecDeque = VecDeque::from_iter(all_commits.drain(0..num_commits)); 249 | let patterns = patterns.clone(); 250 | let found_matches = found_matches.clone(); 251 | let pb = pb.clone(); 252 | let files = files.clone(); 253 | 254 | threads.push(thread::spawn(move || { 255 | pattern_matcher_thread( 256 | commits, 257 | &patterns, 258 | &files, 259 | &pb, 260 | move |matched: PatternMatch| { 261 | found_matches.write().unwrap().push_back(matched); 262 | }, 263 | ) 264 | })); 265 | } 266 | // this should be empty here -- let's explicitly get rid of this resource 267 | drop(all_commits); 268 | 269 | let mut csv_writer = csv::Writer::from_writer(output_file); 270 | let found_matches = found_matches.clone(); 271 | while found_matches.read().unwrap().len() > 0 272 | || THREAD_DONE_COUNT.load(Ordering::Relaxed) != NUM_THREADS 273 | { 274 | let mut matches = found_matches.write().unwrap(); 275 | 'outer: loop { 276 | match matches.pop_front() { 277 | Some(pattern_match) => { 278 | if pattern_match.match_type == MatchType::Pattern && filters.as_ref().is_some() 279 | { 280 | let filters = filters.as_ref().unwrap(); 281 | for filter in filters { 282 | if filter.regex.as_ref().unwrap().is_match(&pattern_match.text) { 283 | continue 'outer; 284 | } 285 | } 286 | } 287 | verbose_print!( 288 | "{:?} {} in repo {}", 289 | &pattern_match.match_type, 290 | &pattern_match.file, 291 | &pattern_match.repo_path 292 | ); 293 | 294 | csv_writer 295 | .serialize(pattern_match) 296 | .expect("failed to serialize pattern"); 297 | } 298 | None => { 299 | csv_writer.flush().unwrap(); 300 | break; 301 | } 302 | } 303 | } 304 | } 305 | 306 | for thread in threads { 307 | if let Err(err) = thread.join() { 308 | eprintln!("Error joining thread: {:?}", err); 309 | } 310 | } 311 | } 312 | 313 | // 314 | // Compile regex patterns for a given Pattern struct 315 | // 316 | fn compile_patterns(patterns: &mut [Pattern]) { 317 | for mut pattern in patterns { 318 | if !pattern.enabled.unwrap_or(true) { 319 | continue; 320 | } 321 | 322 | if !pattern.case_sensitive.unwrap_or(false) { 323 | pattern.pattern = "(?i)".to_owned() + &pattern.pattern; 324 | } 325 | 326 | pattern.regex = match Regex::new(&pattern.pattern) { 327 | Ok(r) => Some(r), 328 | Err(e) => { 329 | eprintln!( 330 | "Could not compile pattern {}: {}", 331 | pattern.description.clone(), 332 | e 333 | ); 334 | None 335 | } 336 | }; 337 | } 338 | } 339 | 340 | fn pattern_matcher_thread( 341 | mut commits: VecDeque, 342 | patterns: &[Pattern], 343 | files: &[FilePattern], 344 | pb: &Arc>>, 345 | on_found: F, 346 | ) where 347 | F: Fn(PatternMatch), 348 | T: Write, 349 | { 350 | defer!({ 351 | // this one is faked because otherwise the user gets a false impression that all work is done 352 | let mut pb = pb.lock().unwrap(); 353 | pb.inc(); 354 | drop(pb); 355 | 356 | THREAD_DONE_COUNT.fetch_add(1, Ordering::SeqCst); 357 | }); 358 | 359 | let mut in_file = false; 360 | let mut file_info: Option = None; 361 | let mut file_name: Option = None; 362 | let mut file_index: Option = None; 363 | 364 | loop { 365 | let commit = commits.pop_front(); 366 | 367 | match commit { 368 | Some(commit) => { 369 | let client = commit.client.as_ref().unwrap(); 370 | let mut pb = pb.lock().unwrap(); 371 | pb.inc(); 372 | drop(pb); 373 | 374 | let content = client.get_commit_content(&commit); 375 | in_file = false; 376 | file_info = None; 377 | file_name = None; 378 | file_index = None; 379 | 380 | 'outer: for line in str::lines(&content) { 381 | let line = line.trim(); 382 | 383 | // ignore @@ lines since that just tells you the line range 384 | // and that we're on a new file boundary 385 | if line.starts_with("diff --git") { 386 | // it does, so now let's parse it out 387 | // NOTE: this could easily be broken by paths with spaces... 388 | // we're going to assume that the repos do not contain any folder 389 | // ending with " b/" 390 | 391 | // 11 is the length of "git --diff a/" 392 | file_name = Some(line.chars().skip(13).collect()); 393 | file_name = 394 | Some(file_name.unwrap().split(" b/").next().unwrap().to_string()); 395 | 396 | for file in files { 397 | // just check if the line contains the extension first 398 | if line.contains(&file.extension) 399 | && file_name.as_ref().unwrap().ends_with(&file.extension) 400 | { 401 | in_file = true; 402 | file_info = Some(file.clone()); 403 | continue 'outer; 404 | } 405 | } 406 | 407 | file_index = None; 408 | continue; 409 | } 410 | 411 | if in_file { 412 | if line.starts_with("index ") { 413 | file_index = Some(line.split("..").skip(1).take(1).collect()); 414 | continue; 415 | } 416 | 417 | if file_index.is_some() { 418 | // we're in a file that we have a pattern for -- we need to get its 419 | // contents now 420 | let mut file_data = 421 | client.get_file_at_commit(file_index.as_ref().unwrap(), None); 422 | if file_data.is_empty() { 423 | file_data = client.get_file_at_commit( 424 | &commit.hash, 425 | Some(file_name.as_ref().unwrap()), 426 | ); 427 | } 428 | 429 | let file_data_string: String = 430 | if file_info.as_ref().unwrap().binary.unwrap_or(false) { 431 | // if it's a binary file we need to encode as base64 432 | encode(&file_data.as_slice()) 433 | } else { 434 | String::from_utf8_lossy(&file_data).into_owned() 435 | }; 436 | 437 | let fname = file_name.as_ref().unwrap().clone(); 438 | let matched = PatternMatch { 439 | description: file_info.as_ref().unwrap().description.clone(), 440 | text: file_data_string, 441 | match_type: MatchType::File, 442 | repo_path: client.repo_path.clone(), 443 | full_path: Path::new(&client.repo_path) 444 | .join(&fname) 445 | .into_os_string() 446 | .into_string() 447 | .unwrap(), 448 | file: fname, 449 | change_type: ChangeType::Unknown, 450 | commit_hash: commit.hash.clone(), 451 | commit_date: commit.date.clone(), 452 | }; 453 | on_found(matched); 454 | 455 | in_file = false; 456 | } 457 | 458 | continue; 459 | } 460 | 461 | if line.len() > MAX_LINE_LENGTH { 462 | verbose_print!("Skipping line -- too long"); 463 | continue; 464 | } 465 | 466 | check_patterns( 467 | &patterns, 468 | &line, 469 | &on_found, 470 | &client.repo_path, 471 | file_name.as_ref().unwrap(), 472 | &commit, 473 | ); 474 | } 475 | } 476 | _ => break, 477 | } 478 | } 479 | } 480 | 481 | fn check_patterns( 482 | patterns: &[Pattern], 483 | line: &str, 484 | on_found: &F, 485 | repo_path: &str, 486 | file_name: &str, 487 | commit: &Commit, 488 | ) where 489 | F: Fn(PatternMatch), 490 | { 491 | for pattern in patterns { 492 | if !pattern.enabled.unwrap_or(true) { 493 | continue; 494 | } 495 | 496 | if pattern.regex.as_ref().unwrap().is_match(&line) { 497 | let mat = pattern.regex.as_ref().unwrap().find(&line).unwrap(); 498 | let matched_string: String = line 499 | .chars() 500 | .skip(mat.start() - 1) 501 | .take(mat.end() - 1) 502 | .collect(); 503 | 504 | let change_type = match line.chars().next().unwrap() { 505 | '+' => ChangeType::Addition, 506 | '-' => ChangeType::Removal, 507 | '@' => { 508 | continue; 509 | } 510 | unknown_type => { 511 | eprintln!("Unexpected value for change type: {:?}", unknown_type); 512 | continue; 513 | } 514 | }; 515 | 516 | let matched_text: String = matched_string.chars().skip(1).collect(); 517 | let matched = PatternMatch { 518 | description: pattern.description.clone(), 519 | text: matched_text.trim().to_owned(), 520 | match_type: MatchType::Pattern, 521 | repo_path: repo_path.to_string(), 522 | file: file_name.to_owned(), 523 | full_path: Path::new(&repo_path) 524 | .join(file_name) 525 | .into_os_string() 526 | .into_string() 527 | .unwrap(), 528 | change_type, 529 | commit_hash: commit.hash.clone(), 530 | commit_date: commit.date.clone(), 531 | }; 532 | 533 | on_found(matched); 534 | } 535 | } 536 | } 537 | --------------------------------------------------------------------------------