├── .devcontainer ├── Dockerfile └── devcontainer.json ├── .github ├── dependabot.yml └── workflows │ └── base.yml ├── .gitignore ├── BUCK ├── CODE-OF-CONDUCT.md ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE ├── LICENSE-MIT ├── README.md ├── assets └── image-20230605104420197.png ├── examples └── test_gitleaks.toml ├── gitleaks.toml └── src ├── api.rs ├── entity ├── mod.rs └── models.rs ├── errors.rs ├── lib.rs ├── main.rs ├── routes ├── rules.rs └── scan.rs ├── service ├── db_service.rs ├── detect_service.rs ├── git_service.rs └── mod.rs └── utils ├── detect_utils.rs ├── git_util.rs └── mod.rs /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM almalinux:8.10-20250307 2 | 3 | # Install tools and dependencies 4 | RUN dnf update -y \ 5 | && dnf group install -y "Development Tools" \ 6 | && dnf install -y glibc-langpack-en sudo tzdata \ 7 | && dnf install -y curl openssl-devel 8 | 9 | # Create and switch to user 10 | ARG USERNAME 11 | ARG USER_UID 12 | RUN useradd -m -s /bin/bash -u $USER_UID $USERNAME \ 13 | && mkdir -p /etc/sudoers.d \ 14 | && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ 15 | && chmod 0440 /etc/sudoers.d/$USERNAME 16 | USER $USERNAME 17 | 18 | # Install Rust, set environment variable 19 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y 20 | ENV PATH="/home/$USERNAME/.cargo/bin:${PATH}" 21 | 22 | # Create and set permissions for workspace directory 23 | USER root 24 | RUN mkdir -p /workspace && chown $USERNAME:$USERNAME /workspace 25 | USER $USERNAME 26 | 27 | WORKDIR /workspace 28 | 29 | ENV TZ=Asia/Shanghai 30 | ENV LANG=en_US.UTF-8 31 | ENV LC_ALL=en_US.UTF-8 32 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sensleak Development", 3 | "build": { 4 | "dockerfile": "./Dockerfile", 5 | "args": { 6 | "USERNAME": "rust", // Used for creating "/home/$USERNAME" directory within container 7 | "USER_UID": "1000" 8 | } 9 | }, 10 | "workspaceFolder": "/workspace", 11 | "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind", 12 | "mounts": [], 13 | "remoteUser": "rust", 14 | "customizations": { 15 | "vscode": { 16 | "extensions": [ 17 | "fill-labs.dependi", 18 | "ms-azuretools.vscode-docker", 19 | "saoudrizwan.claude-dev", 20 | "tamasfe.even-better-toml" 21 | ] 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "cargo" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "daily" 12 | -------------------------------------------------------------------------------- /.github/workflows/base.yml: -------------------------------------------------------------------------------- 1 | # Based on https://github.com/actions-rs/meta/blob/master/recipes/quickstart.md 2 | # 3 | # History: 4 | # 1. 2023-02-14: Created at 2023-02-14T16:00:00Z by Quanyi Ma 5 | # 6 | # 7 | # 8 | 9 | on: [ push, pull_request ] 10 | 11 | name: Base GitHub Action for Check, Test and Lints 12 | 13 | jobs: 14 | check: 15 | name: Check 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v2 19 | - uses: actions-rs/toolchain@v1 20 | with: 21 | profile: minimal 22 | toolchain: stable 23 | override: true 24 | - uses: actions-rs/cargo@v1 25 | with: 26 | command: check 27 | 28 | test: 29 | name: Test Suite 30 | runs-on: ubuntu-latest 31 | steps: 32 | - uses: actions/checkout@v2 33 | - uses: actions-rs/toolchain@v1 34 | with: 35 | profile: minimal 36 | toolchain: stable 37 | override: true 38 | - uses: actions-rs/cargo@v1 39 | with: 40 | command: test 41 | 42 | clippy: 43 | name: Clippy 44 | runs-on: ubuntu-latest 45 | steps: 46 | - uses: actions/checkout@v2 47 | - uses: actions-rs/toolchain@v1 48 | with: 49 | profile: minimal 50 | toolchain: stable 51 | override: true 52 | - run: rustup component add clippy 53 | - uses: actions-rs/cargo@v1 54 | with: 55 | command: clippy 56 | args: -- -D warnings -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | debug/ 4 | target/ 5 | 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 8 | Cargo.lock 9 | 10 | # These are backup files generated by rustfmt 11 | **/*.rs.bk 12 | 13 | # MSVC Windows builds of rustc generate these, which store debugging information 14 | *.pdb 15 | 16 | # Compiled binary file 17 | .gitignore 18 | 19 | log.sh -------------------------------------------------------------------------------- /BUCK: -------------------------------------------------------------------------------- 1 | load("@prelude//rust:cargo_package.bzl", "cargo") 2 | 3 | # package definitions 4 | filegroup( 5 | name = "sensleak-0.3.0.crate", 6 | srcs = glob(["src/**/*.rs"]), 7 | ) 8 | 9 | pkg_deps = [ 10 | "//third-party:actix-cors", 11 | "//third-party:actix-web", 12 | "//third-party:assert_cmd", 13 | "//third-party:chrono", 14 | "//third-party:clap", 15 | "//third-party:csv", 16 | "//third-party:env_logger", 17 | "//third-party:git2", 18 | "//third-party:log", 19 | "//third-party:mockito", 20 | "//third-party:postgres", 21 | "//third-party:rayon", 22 | "//third-party:regex", 23 | "//third-party:sea-orm", 24 | "//third-party:serde", 25 | "//third-party:serde_json", 26 | "//third-party:tempfile", 27 | "//third-party:tokio", 28 | "//third-party:toml", 29 | "//third-party:utoipa", 30 | "//third-party:utoipa-swagger-ui", 31 | "//third-party:walkdir", 32 | ] 33 | 34 | # targets 35 | cargo.rust_library( 36 | name = "sensleak", 37 | srcs = [":sensleak-0.3.0.crate"], 38 | crate_root = "sensleak-0.3.0.crate/src/lib.rs", 39 | edition = "2024", 40 | deps = pkg_deps, 41 | visibility = ["PUBLIC"], 42 | ) 43 | 44 | cargo.rust_binary( 45 | name = "api", 46 | srcs = [":sensleak-0.3.0.crate"], 47 | crate_root = "sensleak-0.3.0.crate/src/api.rs", 48 | edition = "2024", 49 | deps = [":sensleak"] + pkg_deps, 50 | visibility = ["PUBLIC"], 51 | ) 52 | 53 | cargo.rust_binary( 54 | name = "scan", 55 | srcs = [":sensleak-0.3.0.crate"], 56 | crate_root = "sensleak-0.3.0.crate/src/main.rs", 57 | edition = "2024", 58 | deps = [":sensleak"] + pkg_deps, 59 | visibility = ["PUBLIC"], 60 | ) 61 | -------------------------------------------------------------------------------- /CODE-OF-CONDUCT.md: -------------------------------------------------------------------------------- 1 | # CODE OF CONDUCT 2 | 3 | This code of conduct outlines the expected behavior of all members of Open Rust Initiative to ensure a safe, productive, and inclusive environment for everyone. 4 | 5 | All members of Open Rust Initiative, including employees, contractors, interns, volunteers, and anyone else represents the company, are expected to behave in a professional, respectful, considerate, and collaborative manner. Harassment, discrimination, or toxic behavior of any kind will not be tolerated. 6 | 7 | Open Rust Initiative is committed to providing an environment free of harassment and discrimination for everyone, regardless of gender, gender identity and expression, sexual orientation, disability, physical appearance, body size, race, age, or religion. We do not tolerate harassment of participants in any form. Harassment includes offensive comments related to these characteristics, as well as deliberate intimidation, stalking, following, harassing photography or recording, sustained disruption of talks or other events, inappropriate physical contact, and unwelcome sexual attention. 8 | 9 | If you experience or witness unacceptable behavior, see something that makes you feel unsafe, or have concerns about the well-being of a participant, please report it to Eli Ma or Charles Feng immediately. All reports will be handled confidentially. 10 | 11 | We value diverse opinions, skills, and experiences. We strive to build an inclusive environment where everyone feels safe and respected. Together, we can achieve great things. 12 | 13 | THANK YOU FOR YOUR COOPERATION IN ADVANCING OUR COMMITMENT TO INCLUSION AND RESPECT. 14 | 15 | Responsibilities 16 | 17 | All members of Open Rust Initiative are expected to: 18 | 19 | - Treat all people with respect and consideration, valuing a diversity of views and opinions. 20 | • Communicate openly and thoughtfully. 21 | • Avoid personal attacks directed at other participants. 22 | • Be mindful of your surroundings and your fellow participants. Alert Eli Ma if you notice a dangerous situation or someone in distress. 23 | • Respect personal space and property. 24 | • Refrain from demeaning, discriminatory, or harassing behavior, speech, and imagery. 25 | • Be considerate in your use of space and resources. For example, avoid excessive noise from conversations, laptops, and other electronic devices. Be courteous when taking up shared space such as tables and walkways. 26 | • Follow the instructions of Open Rust Initiative staff and security. 27 | • Avoid using language that reinforces social and cultural structures of domination related to gender, gender identity and expression, sexual orientation, disability, physical appearance, body size, race, age, religion, or other personal characteristics. 28 | 29 | Consequences 30 | 31 | Failure to comply with this Code of Conduct may result in disciplinary action, including removal from Open Rust Initiative spaces and events and prohibition from future participation. 32 | 33 | Contact Information 34 | 35 | If you have questions or concerns about this Code of Conduct, contact Eli Ma or Charles Feng. 36 | 37 | # Enforcement 38 | 39 | Open Rust Initiative prioritizes creating a safe and positive experience for everyone. We do not tolerate harassment or discrimination of any kind. 40 | 41 | We expect participants to follow these rules at all Open Rust Initiative venues and events. Open Rust Initiative staff will enforce this Code of Conduct. 42 | 43 | If a participant engages in harassing or discriminatory behavior, Open Rust Initiative staff will take reasonable action they deem appropriate, including warning the offender, expulsion from an event, or banning them from future events. 44 | 45 | At their discretion, Open Rust Initiative staff may report offenders to local law enforcement. Open Rust Initiative staff may take action against participants for other behaviors that violate this Code of Conduct or negatively impact the safety and inclusion of event participants. -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # CONTRIBUTING 2 | 3 | Thank you for your interest in contributing to this project. There are many ways you can contribute, from writing tutorials or blog posts, improving the documentation, submitting bug reports and feature requests, all the way to developing code which can be incorporated into the project. 4 | 5 | As a contributor, you agree to abide by the Code of Conduct enforced in this community. 6 | 7 | ## How to contribute 8 | 9 | Here are some guidelines for contributing to this project: 10 | 11 | 1. Report issues/bugs: If you find any issues or bugs in the project, please report them by creating an issue on the issue tracker. Describe the issue in detail and also mention the steps to reproduce it. The more details you provide, the easier it will be for me to investigate and fix the issue. 12 | 2. Suggest enhancements: If you have an idea to enhance or improve this project, you can suggest it by creating an issue on the issue tracker. Explain your enhancement in detail along with its use cases and benefits. I appreciate well-thought-out enhancement suggestions. 13 | 3. Contribute code: If you want to develop and contribute code, follow these steps: 14 | - Fork the repository and clone it locally. 15 | - Create a new branch for your feature/bugfix. 16 | - Make necessary changes and commit them with proper commit messages. 17 | - Push your changes to your fork and create a pull request. 18 | - I will review your changes and merge the PR if found suitable. Please ensure your code is properly formatted and follows the same style as the existing codebase. 19 | 1. Write tutorials/blog posts: You can contribute by writing tutorials or blog posts to help users get started with this project. Submit your posts on the issue tracker for review and inclusion. High quality posts that provide value to users are highly appreciated. 20 | 2. Improve documentation: If you find any gaps in the documentation or think any part can be improved, you can make changes to files in the documentation folder and submit a PR. Ensure the documentation is up-to-date with the latest changes. 21 | 22 | Your contributions are highly appreciated. Feel free to ask any questions if you have any doubts or facing issues while contributing. The more you contribute, the more you will learn and improve your skills. -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sensleak" 3 | version = "0.3.0" 4 | edition = "2024" 5 | 6 | [[bin]] 7 | name = "scan" 8 | path = "src/main.rs" 9 | 10 | [[bin]] 11 | name = "api" 12 | path = "src/api.rs" 13 | 14 | [dependencies] 15 | actix-cors = "0.7" 16 | actix-web = "4.10" 17 | assert_cmd = "2.0" 18 | chrono = "0.4" 19 | clap = { version = "4.5", features = ["derive"] } 20 | csv = "1.3" 21 | env_logger = "0.11" 22 | git2 = "0.20" 23 | log = "0.4" 24 | mockito = "1.7" 25 | postgres = { version = "0.19" } 26 | rayon = "1.10" 27 | regex = "1.11" 28 | sea-orm = { version = "1.1", features = [ 29 | "runtime-tokio-rustls", 30 | "sqlx-postgres", 31 | ] } 32 | serde = { version = "1.0", features = ["derive"] } 33 | serde_json = "1.0" 34 | tempfile = "3.19" 35 | tokio = { version = "1.44", features = ["full"] } 36 | toml = "0.8" 37 | utoipa = { version = "5.3", features = ["actix_extras"] } 38 | utoipa-swagger-ui = { version = "9.0", features = ["actix-web"] } 39 | walkdir = "2.5" 40 | zip = "=2.4.2" # ! 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 open-rust-initiative 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 - 2023 Open Rust Initiative 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sensleak - scan git repo secrets 2 | 3 | sensleak is a Rust-based tool that scans Git repositories for sensitive data, specifically targeting sensitive information such as passwords, API keys, certificates, and private keys embedded within code. 4 | 5 | ## Background 6 | 7 | Many developers store sensitive information such as keys and certificates in their code, which poses security risks. Therefore, there are commercial services like GitGuardian scanning GitHub and GitLab, as well as open-source components like truffleHog and Gitleaks that support similar functionalities. 8 | 9 | ## Feature 10 | 11 | - **Enhanced Security.** Develop the tool in Rust to ensure improved security and memory safety. 12 | - **Command-line Interface**. Create a user-friendly command-line tool that generates a comprehensive test report. 13 | - **REST API with Access Control**. Enable the tool to run as a service and provide access control through a REST API. Utilize Swagger to generate API documentation. 14 | - **Concurrent Scanning**. Utilize a thread pool to control concurrent scanning of secrets, thereby improving overall efficiency. 15 | - **Batch Processing**. Implement batch processing of files to further optimize the scanning process and enhance efficiency. 16 | 17 | ## Technology 18 | 19 | - Development Language: Rust 20 | - Command-line Interaction: [clap.rs](https://github.com/clap-rs/clap) 21 | - Git Repository Operations: [git2](https://github.com/rust-lang/git2-rs) 22 | - Web Framework: [actix-web](https://actix.rs) 23 | - Auto-generated OpenAPI Documentation: [utoipa](https://github.com/juhaku/utoipa) 24 | 25 | ## Usage 26 | 27 | ### CLI Usage 28 | 29 | Running the tool in the command-line interface (CLI) to perform sensitive data checks. 30 | 31 | ``` 32 | cargo run --bin scan -- -help 33 | ``` 34 | 35 | ```shell 36 | Usage: scan [OPTIONS] --repo 37 | 38 | Options: 39 | --repo Target repository 40 | --config Config path [default: gitleaks.toml] 41 | --threads Maximum number of threads sensleak spawns [default: 10] 42 | --chunk The number of files processed in each batch [default: 10] 43 | --report Path to write json leaks file 44 | --report-format json, csv, sarif [default: json] 45 | -v, --verbose Show verbose output from scan 46 | --pretty Pretty print json if leaks are present 47 | --commit sha of commit to scan 48 | --commits comma separated list of a commits to scan 49 | --commits-file file of new line separated list of a commits to scan 50 | --commit-since Scan commits more recent than a specific date. Ex: '2006-01-02' or '2023-01-02T15:04:05-0700' format 51 | --commit-until Scan commits older than a specific date. Ex: '2006-01-02' or '2006-10-02T15:04:05-0700' format 52 | --commit-from Commit to start scan from 53 | --commit-to Commit to stop scan 54 | --branch Branch to scan 55 | --uncommitted Run sensleak on uncommitted code 56 | --user Set user to scan [default: ] 57 | --repo-config Load config from target repo. Config file must be ".gitleaks.toml" or "gitleaks.toml" 58 | --debug log debug messages 59 | --disk Clones repo(s) to disk 60 | --to-db Output to database 61 | -h, --help Print help (see more with '--help') 62 | -V, --version Print version 63 | 64 | run 'cargo run --bin api' to get REST API. 65 | Repository: https://github.com/open-rust-initiative/sensleak-rs 66 | 67 | ``` 68 | 69 | Example: 70 | 71 | Test https://github.com/sonichen/Expiry-Reminder-Assistant.git 72 | 73 | ```shell 74 | $ cargo run --bin scan -- --repo="D:/Workplace/Java/project/ExpiryReminderAssistant" -v --pretty 75 | ``` 76 | 77 | ```shell 78 | [INFO][2023-06-05 09:59:59] Clone repo ... 79 | [ 80 | Leak { 81 | line: " String secret = \"1708b0314f18f420d3fe8128652af43c\"; //自己小程序的SECRET", 82 | line_number: 67, 83 | offender: "secret = \"1708b0314f18f420d3fe8128652af43c\"", 84 | commit: "410eb5a84408d3e63edb4d0975e5516e56f6ea6a", 85 | repo: "ExpiryReminderAssistant", 86 | rule: "Generic API Key", 87 | commit_message: "submit code\n", 88 | author: "sonichen", 89 | email: "1606673007@qq.com", 90 | file: "/backend/src/main/java/com/cyj/controller/login/WXLoginController.java", 91 | date: "2023-05-31 18:09:42 -08:00", 92 | }, 93 | Leak { 94 | line: " businessException.apiResponse = apiResponse;", 95 | line_number: 64, 96 | offender: "apiResponse = apiResponse;", 97 | commit: "410eb5a84408d3e63edb4d0975e5516e56f6ea6a", 98 | repo: "ExpiryReminderAssistant", 99 | rule: "Generic API Key", 100 | commit_message: "submit code\n", 101 | author: "sonichen", 102 | email: "1606673007@qq.com", 103 | file: "/backend/src/main/java/com/cyj/exception/BusinessException.java", 104 | date: "2023-05-31 18:09:42 -08:00", 105 | }, 106 | Leak { 107 | line: "// app_secret:bm92ZWk2WFdoR3RkV3ZiUk5SUnVXUT09", 108 | line_number: 5, 109 | offender: "secret:bm92ZWk2WFdoR3RkV3ZiUk5SUnVXUT09", 110 | commit: "410eb5a84408d3e63edb4d0975e5516e56f6ea6a", 111 | repo: "ExpiryReminderAssistant", 112 | rule: "Generic API Key", 113 | commit_message: "submit code\n", 114 | author: "sonichen", 115 | email: "1606673007@qq.com", 116 | file: "/backend/src/main/java/com/cyj/utils/constants/DevelopConstants.java", 117 | date: "2023-05-31 18:09:42 -08:00", 118 | }, 119 | Leak { 120 | line: " public static final String APP_SECRET=\"bm92ZWk2WFdoR3RkV3ZiUk5SUnVXUT09\";", 121 | line_number: 7, 122 | offender: "SECRET=\"bm92ZWk2WFdoR3RkV3ZiUk5SUnVXUT09\"", 123 | commit: "410eb5a84408d3e63edb4d0975e5516e56f6ea6a", 124 | repo: "ExpiryReminderAssistant", 125 | rule: "Generic API Key", 126 | commit_message: "submit code\n", 127 | author: "sonichen", 128 | email: "1606673007@qq.com", 129 | file: "/backend/src/main/java/com/cyj/utils/constants/DevelopConstants.java", 130 | date: "2023-05-31 18:09:42 -08:00", 131 | }, 132 | Leak { 133 | line: "// public static final String APPSECRET = \"94f391d306875101822ffa1b2c3cff09\";", 134 | line_number: 17, 135 | offender: "SECRET = \"94f391d306875101822ffa1b2c3cff09\"", 136 | commit: "410eb5a84408d3e63edb4d0975e5516e56f6ea6a", 137 | repo: "ExpiryReminderAssistant", 138 | rule: "Generic API Key", 139 | commit_message: "submit code\n", 140 | author: "sonichen", 141 | email: "1606673007@qq.com", 142 | file: "/backend/src/main/java/com/cyj/utils/secret/AuthUtil.java", 143 | date: "2023-05-31 18:09:42 -08:00", 144 | }, 145 | Leak { 146 | line: " secret: \"c6e1180dda3eaca49f3d7ed912718e4d\" #小程序密钥", 147 | line_number: 36, 148 | offender: "secret: \"c6e1180dda3eaca49f3d7ed912718e4d\"", 149 | commit: "410eb5a84408d3e63edb4d0975e5516e56f6ea6a", 150 | repo: "ExpiryReminderAssistant", 151 | rule: "Generic API Key", 152 | commit_message: "submit code\n", 153 | author: "sonichen", 154 | email: "1606673007@qq.com", 155 | file: "/backend/src/main/resources/application.yaml", 156 | date: "2023-05-31 18:09:42 -08:00", 157 | }, 158 | Leak { 159 | line: " secret: \"c6e1180dda3eaca49f3d7ed912718e4d\" #小程序密钥", 160 | line_number: 36, 161 | offender: "secret: \"c6e1180dda3eaca49f3d7ed912718e4d\"", 162 | commit: "410eb5a84408d3e63edb4d0975e5516e56f6ea6a", 163 | repo: "ExpiryReminderAssistant", 164 | rule: "Generic API Key", 165 | commit_message: "submit code\n", 166 | author: "sonichen", 167 | email: "1606673007@qq.com", 168 | file: "/backend/target/classes/application.yaml", 169 | date: "2023-05-31 18:09:42 -08:00", 170 | }, 171 | ] 172 | [WARN][2023-06-05 10:00:02]7 leaks detected. 1 commits scanned in 1.2538834s 173 | ``` 174 | 175 | ### API Document 176 | 177 | Run the following code to read the project document. 178 | 179 | ```shell 180 | cargo run --bin api 181 | ``` 182 | 183 | The API document is located at http://localhost:7000/swagger-ui/#/ 184 | 185 | ### Project Document 186 | 187 | Run the following code to read the project document. 188 | 189 | ```shell 190 | cargo doc --document-private-items --open 191 | ``` 192 | 193 | ### Configuration 194 | 195 | Use the [gitleaks configuration](https://github.com/gitleaks/gitleaks#configuration) in this project. The difference is that in this project, the paths need to start with a "/". 196 | 197 | ```toml 198 | # Title for the gitleaks configuration file. 199 | title = "Gitleaks title" 200 | 201 | # Extend the base (this) configuration. When you extend a configuration 202 | # the base rules take precedence over the extended rules. I.e., if there are 203 | # duplicate rules in both the base configuration and the extended configuration 204 | # the base rules will override the extended rules. 205 | # Another thing to know with extending configurations is you can chain together 206 | # multiple configuration files to a depth of 2. Allowlist arrays are appended 207 | # and can contain duplicates. 208 | # useDefault and path can NOT be used at the same time. Choose one. 209 | [extend] 210 | # useDefault will extend the base configuration with the default gitleaks config: 211 | # https://github.com/zricethezav/gitleaks/blob/master/config/gitleaks.toml 212 | useDefault = true 213 | # or you can supply a path to a configuration. Path is relative to where gitleaks 214 | # was invoked, not the location of the base config. 215 | path = "common_config.toml" 216 | 217 | # An array of tables that contain information that define instructions 218 | # on how to detect secrets 219 | [[rules]] 220 | 221 | # Unique identifier for this rule 222 | id = "awesome-rule-1" 223 | 224 | # Short human readable description of the rule. 225 | description = "awesome rule 1" 226 | 227 | # Golang regular expression used to detect secrets. Note Golang's regex engine 228 | # does not support lookaheads. 229 | regex = '''one-go-style-regex-for-this-rule''' 230 | 231 | # Golang regular expression used to match paths. This can be used as a standalone rule or it can be used 232 | # in conjunction with a valid `regex` entry. 233 | path = '''a-file-path-regex''' 234 | 235 | # Array of strings used for metadata and reporting purposes. 236 | tags = ["tag","another tag"] 237 | 238 | # Int used to extract secret from regex match and used as the group that will have 239 | # its entropy checked if `entropy` is set. 240 | secretGroup = 3 241 | 242 | # Float representing the minimum shannon entropy a regex group must have to be considered a secret. 243 | entropy = 3.5 244 | 245 | # Keywords are used for pre-regex check filtering. Rules that contain 246 | # keywords will perform a quick string compare check to make sure the 247 | # keyword(s) are in the content being scanned. Ideally these values should 248 | # either be part of the idenitifer or unique strings specific to the rule's regex 249 | # (introduced in v8.6.0) 250 | keywords = [ 251 | "auth", 252 | "password", 253 | "token", 254 | ] 255 | 256 | # You can include an allowlist table for a single rule to reduce false positives or ignore commits 257 | # with known/rotated secrets 258 | [rules.allowlist] 259 | description = "ignore commit A" 260 | commits = [ "commit-A", "commit-B"] 261 | paths = [ 262 | '''\go\.mod''', 263 | '''\go\.sum''' 264 | ] 265 | # note: (rule) regexTarget defaults to check the _Secret_ in the finding. 266 | # if regexTarget is not specified then _Secret_ will be used. 267 | # Acceptable values for regexTarget are "match" and "line" 268 | regexTarget = "match" 269 | regexes = [ 270 | '''process''', 271 | '''getenv''', 272 | ] 273 | # note: stopwords targets the extracted secret, not the entire regex match 274 | # like 'regexes' does. (stopwords introduced in 8.8.0) 275 | stopwords = [ 276 | '''client''', 277 | '''endpoint''', 278 | ] 279 | 280 | 281 | # This is a global allowlist which has a higher order of precedence than rule-specific allowlists. 282 | # If a commit listed in the `commits` field below is encountered then that commit will be skipped and no 283 | # secrets will be detected for said commit. The same logic applies for regexes and paths. 284 | [allowlist] 285 | description = "global allow list" 286 | commits = [ "commit-A", "commit-B", "commit-C"] 287 | paths = [ 288 | '''gitleaks\.toml''', 289 | '''(.*?)(jpg|gif|doc)''' 290 | ] 291 | 292 | # note: (global) regexTarget defaults to check the _Secret_ in the finding. 293 | # if regexTarget is not specified then _Secret_ will be used. 294 | # Acceptable values for regexTarget are "match" and "line" 295 | regexTarget = "match" 296 | 297 | regexes = [ 298 | '''219-09-9999''', 299 | '''078-05-1120''', 300 | '''(9[0-9]{2}|666)-\d{2}-\d{4}''', 301 | ] 302 | # note: stopwords targets the extracted secret, not the entire regex match 303 | # like 'regexes' does. (stopwords introduced in 8.8.0) 304 | stopwords = [ 305 | '''client''', 306 | '''endpoint''', 307 | ] 308 | ``` 309 | 310 | ## Contributing 311 | 312 | The project relies on community contributions and aims to simplify getting started. To use sensleak, clone the repo, install dependencies, and run sensleak. Pick an issue, make changes, and submit a pull request for community review. 313 | 314 | To contribute to rkos, you should: 315 | 316 | - Familiarize yourself with the [Code of Conduct](https://github.com/open-rust-initiative/rkos/blob/main/CODE-OF-CONDUCT.md). sensleak-rs has a strict policy against abusive, unethical, or illegal behavior. 317 | - Review the [Contributing Guidelines](https://github.com/open-rust-initiative/rkos/blob/main/CONTRIBUTING.md). This document outlines the process for submitting bug reports, feature requests, and pull requests to sensleak-rs. 318 | - Sign the [Developer Certificate of Origin](https://developercertificate.org) (DCO) by adding a `Signed-off-by` line to your commit messages. This certifies that you wrote or have the right to submit the code you are contributing to the project. 319 | - Choose an issue to work on. Issues labeled `good first issue` are suitable for newcomers. You can also look for issues marked `help wanted`. 320 | - Fork the sensleak-rs repository and create a branch for your changes. 321 | - Make your changes and commit them with a clear commit message. 322 | - Push your changes to GitHub and open a pull request. 323 | - Respond to any feedback on your pull request. The sensleak-rs maintainers will review your changes and may request modifications before merging. 324 | - Once your pull request is merged, you will be listed as a contributor in the project repository and documentation. 325 | 326 | To comply with the requirements, contributors must include both a `Signed-off-by` line and a PGP signature in their commit messages. You can find more information about how to generate a PGP key [here](https://docs.github.com/en/github/authenticating-to-github/managing-commit-signature-verification/generating-a-new-gpg-key). 327 | 328 | Git even has a `-s` command line option to append this automatically to your commit message, and `-S` to sign your commit with your PGP key. For example: 329 | 330 | ```shell 331 | $ git commit -S -s -m 'This is my commit message' 332 | ``` 333 | 334 | ## License 335 | 336 | sensleak-rs is licensed under this licensed: 337 | 338 | - MIT LICENSE ( https://opensource.org/licenses/MIT) 339 | 340 | ## References 341 | 342 | 1. [What is Gitleaks and how to use it?](https://akashchandwani.medium.com/what-is-gitleaks-and-how-to-use-it-a05f2fb5b034) 343 | 2. [Gitleaks.tools](https://github.com/gitleaks/gitleaks) 344 | -------------------------------------------------------------------------------- /assets/image-20230605104420197.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crates-pro/sensleak-rs/4006679cc127fa47ad1ae6d5ec0a0437c0fa82c1/assets/image-20230605104420197.png -------------------------------------------------------------------------------- /examples/test_gitleaks.toml: -------------------------------------------------------------------------------- 1 | [[rules]] 2 | description = "11111111111" 3 | id = "stripe-access-token" 4 | keywords = ["adafruit"] 5 | regex = '''(?i)(?:adafruit)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([a-z0-9_-]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)''' 6 | 7 | [[rules]] 8 | description = "Generic API Key" 9 | entropy = 3.5 10 | id = "generic-api-key" 11 | keywords = [ 12 | "key", 13 | "api", 14 | "token", 15 | "secret", 16 | "client", 17 | "passwd", 18 | "password", 19 | "auth", 20 | "access", 21 | ] 22 | regex = '''(?i)(?:key|api|token|secret|client|passwd|password|auth|access)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([0-9a-z\-_.=]{10,150})(?:['|\"|\n|\r|\s|\x60|;]|$)''' 23 | secretGroup = 1 24 | 25 | [[rules]] 26 | description = "Digit" 27 | id = "key" 28 | keywords = ["1212"] 29 | regex = '\d+' 30 | secretGroup = 1 31 | 32 | [rules.allowlist] 33 | regexTarget = "line" 34 | stopwords = ["token"] 35 | 36 | [[rules]] 37 | description = "Adafruit API Key" 38 | id = "adafruit-api-key" 39 | keywords = ["adafruit"] 40 | regex = '''(?i)(?:adafruit)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([a-z0-9_-]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)''' 41 | 42 | [[rules]] 43 | description = "Adafruit API Key" 44 | id = "adafruit-api-key" 45 | keywords = ["adafruit"] 46 | regex = '''(?i)(?:adafruit)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([a-z0-9_-]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)''' 47 | 48 | [allowlist] 49 | description = "global allow lists" 50 | paths = [ 51 | "(.*?)(jpg|gif|doc|docx|zip|xls|pdf|bin|svg|socket)$", 52 | "gradle.lockfile", 53 | "node_modules", 54 | "package-lock.json", 55 | ] 56 | stopwords = ["token"] 57 | -------------------------------------------------------------------------------- /src/api.rs: -------------------------------------------------------------------------------- 1 | use sensleak::start; 2 | #[tokio::main] 3 | async fn main() -> Result<(), Box> { 4 | println!("The API document is located at http://localhost:7000/swagger-ui/#/"); 5 | start().await?; 6 | Ok(()) 7 | } 8 | 9 | -------------------------------------------------------------------------------- /src/entity/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod models; 2 | -------------------------------------------------------------------------------- /src/entity/models.rs: -------------------------------------------------------------------------------- 1 | use chrono::{DateTime, FixedOffset}; 2 | use clap::Parser; 3 | use serde::{Deserialize, Serialize}; 4 | use utoipa::{ToSchema}; 5 | use sea_orm::{entity::prelude::*, ActiveValue}; 6 | /// Represents the configuration for sensleaks tool. 7 | #[derive(Parser, Debug)] 8 | #[command( 9 | author = "yjchen", 10 | version = "0.3.0", 11 | about = "sensleaks-rs", 12 | long_about = "sensleaks: A tool to detect sensitive information in Git repository", 13 | after_help = "run 'cargo run --bin api' to get REST API.\nRepository: https://github.com/open-rust-initiative/sensleak-rs" 14 | )] 15 | #[derive(Deserialize, Serialize)] 16 | pub struct Config { 17 | /// Target repository. 18 | #[arg(long)] 19 | pub repo: String, 20 | 21 | /// Config path 22 | #[arg(long, default_value = "gitleaks.toml")] 23 | pub config: String, 24 | 25 | /// Maximum number of threads sensleak spawns 26 | #[arg(long, default_value = "10")] 27 | pub threads: Option, 28 | 29 | /// The number of files processed in each batch 30 | #[arg(long, default_value = "10")] 31 | pub chunk: Option, 32 | 33 | /// Path to write json leaks file. 34 | #[arg(long)] 35 | pub report: Option, 36 | 37 | /// json, csv, sarif 38 | #[arg(long, default_value = "json")] 39 | pub report_format: Option, 40 | 41 | /// Show verbose output from scan. 42 | #[arg(short, long, default_value = "false")] 43 | pub verbose: bool, 44 | 45 | /// Pretty print json if leaks are present. 46 | #[arg(long, default_value = "false")] 47 | pub pretty: bool, 48 | 49 | /// sha of commit to scan 50 | #[arg(long)] 51 | pub commit: Option, 52 | 53 | /// comma separated list of a commits to scan 54 | #[arg(long)] 55 | pub commits: Option, 56 | 57 | /// file of new line separated list of a commits to scan 58 | #[arg(long)] 59 | pub commits_file: Option, 60 | 61 | /// Scan commits more recent than a specific date. Ex: '2006-01-02' or '2023-01-02T15:04:05-0700' format. 62 | #[arg(long)] 63 | pub commit_since: Option, 64 | 65 | /// Scan commits older than a specific date. Ex: '2006-01-02' or '2006-10-02T15:04:05-0700' format. 66 | #[arg(long)] 67 | pub commit_until: Option, 68 | 69 | /// Commit to start scan from 70 | #[arg(long)] 71 | pub commit_from: Option, 72 | 73 | /// Commit to stop scan 74 | #[arg(long)] 75 | pub commit_to: Option, 76 | 77 | /// Branch to scan 78 | #[arg(long)] 79 | pub branch: Option, 80 | 81 | /// Run sensleak on uncommitted code 82 | #[arg(long, default_value = "false")] 83 | // pub uncommitted: bool , 84 | pub uncommitted: bool, 85 | 86 | /// Set user to scan 87 | #[arg(long, default_value = "")] 88 | pub user: Option, 89 | 90 | /// Load config from target repo. Config file must be ".gitleaks.toml" or "gitleaks.toml" 91 | #[arg(long)] 92 | pub repo_config: bool, 93 | 94 | /// log debug messages. 95 | #[arg(long, default_value = "false")] 96 | pub debug: bool, 97 | 98 | /// Clones repo(s) to disk. 99 | #[arg(long)] 100 | pub disk: Option, 101 | 102 | /// Output to database 103 | #[arg(long)] 104 | pub to_db: bool, 105 | // /// Start API 106 | // #[arg(long, default_value = "false")] 107 | // pub api: bool, 108 | } 109 | impl Default for Config { 110 | fn default() -> Self { 111 | Config { 112 | repo: String::default(), 113 | config: String::from("gitleaks.toml"), 114 | threads: Some(50), 115 | chunk: Some(10), 116 | report: None, 117 | report_format: Some(String::from("json")), 118 | verbose: false, 119 | pretty: false, 120 | commit: None, 121 | commits: None, 122 | commits_file: None, 123 | commit_since: None, 124 | commit_until: None, 125 | commit_from: None, 126 | commit_to: None, 127 | branch: None, 128 | uncommitted: false, 129 | user: Some("".to_string()), 130 | repo_config: false, 131 | debug: false, 132 | disk: None, 133 | to_db: false, 134 | // api: false, 135 | } 136 | } 137 | } 138 | 139 | /// # An array of tables that contain information that define instructions on how to detect secrets. 140 | #[derive(Debug, Serialize, Clone, Deserialize,ToSchema)] 141 | pub struct Rule { 142 | /// Short human readable description of the rule. 143 | pub description: String, 144 | 145 | /// Unique identifier for this rule. 146 | pub id: String, 147 | 148 | /// Regular expression used to detect secrets. 149 | pub regex: String, 150 | 151 | // /// Float representing the minimum shannon entropy a regex group must have to be considered a secret. 152 | // pub entropy: Option, 153 | 154 | /// Keywords are used for pre-regex check filtering. Rules that contain keywords will perform a quick string compare check to make sure the keyword(s) are in the content being scanned. Ideally these values should either be part of the idenitifer or unique strings specific to the rule's regex 155 | pub keywords: Vec, 156 | 157 | /// You can include an allowlist table for a single rule to reduce false positives or ignore commits with known/rotated secrets. 158 | pub allowlist: Option, 159 | } 160 | 161 | impl Rule { 162 | pub fn new() -> Rule { 163 | Rule { 164 | description: String::from("11"), 165 | id: String::from("11"), 166 | regex: String::from("(?i)(?:key|api|token|secret|client|passwd|password|auth|access)"), 167 | // entropy: Some(3.1), 168 | keywords: Vec::new(), 169 | allowlist: None, 170 | } 171 | } 172 | } 173 | 174 | impl Default for Rule { 175 | fn default() -> Self { 176 | Self::new() 177 | } 178 | } 179 | 180 | /// Skip the allowlist 181 | #[derive(Debug, Deserialize, Serialize, Clone,ToSchema)] 182 | pub struct Allowlist { 183 | /// Skip the paths. 184 | pub paths: Vec, 185 | 186 | /// Skip the commits. 187 | pub commits: Vec, 188 | 189 | /// Acceptable values for regexTarget are "match" and "line". 190 | pub regex_target: String, 191 | 192 | /// Skip the secrets that satisfy the regexes. 193 | pub regexes: Vec, 194 | 195 | /// Skip the secrets that contain the stopwords. 196 | pub stopwords: Vec, 197 | } 198 | impl Allowlist { 199 | pub fn new() -> Allowlist { 200 | Allowlist { 201 | paths: Vec::new(), 202 | commits: Vec::new(), 203 | regex_target: String::from("match"), 204 | regexes: Vec::new(), 205 | stopwords: Vec::new(), 206 | } 207 | } 208 | } 209 | impl Default for Allowlist { 210 | fn default() -> Self { 211 | Self::new() 212 | } 213 | } 214 | 215 | /// Sea-orm Entity 216 | #[derive(Clone, Debug, PartialEq, Eq, DeriveEntityModel)] 217 | #[sea_orm(table_name = "leaks")] 218 | pub struct Model { 219 | #[sea_orm(primary_key)] 220 | pub id: i32, 221 | pub line: String, 222 | pub line_number: u32, 223 | pub offender: String, 224 | pub commit: String, 225 | pub repo: String, 226 | pub rule: String, 227 | pub commit_message: String, 228 | pub author: String, 229 | pub email: String, 230 | pub file: String, 231 | pub date: String, 232 | } 233 | 234 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 235 | pub enum Relation {} 236 | 237 | impl ActiveModelBehavior for ActiveModel {} 238 | 239 | /// Represents an item in the scanned output. 240 | #[derive(Debug, Serialize, Deserialize, Clone, ToSchema)] 241 | pub struct Leak { 242 | /// The line containing the sensitive information. 243 | pub line: String, 244 | 245 | /// The line number where the sensitive information is found. 246 | pub line_number: u32, 247 | 248 | /// The sensitive information detected. 249 | pub offender: String, 250 | 251 | /// The commit info. 252 | pub commit: String, 253 | 254 | /// The repository where the sensitive information is found. 255 | pub repo: String, 256 | 257 | /// The rule used to detect the sensitive information. 258 | pub rule: String, 259 | 260 | /// The commit message associated with the sensitive information. 261 | pub commit_message: String, 262 | 263 | /// The author of the commit. 264 | pub author: String, 265 | 266 | /// The email of the commit author. 267 | pub email: String, 268 | 269 | /// The file path where the sensitive information is found. 270 | pub file: String, 271 | 272 | /// The date of the commit. 273 | pub date: String, 274 | } 275 | 276 | impl Leak { 277 | pub fn to_active_model(&self) -> ActiveModel { 278 | ActiveModel { 279 | line: ActiveValue::set(self.line.clone()), 280 | line_number: ActiveValue::set(self.line_number), 281 | offender: ActiveValue::set(self.offender.clone()), 282 | commit: ActiveValue::set(self.commit.clone()), 283 | repo: ActiveValue::set(self.repo.clone()), 284 | rule: ActiveValue::set(self.rule.clone()), 285 | commit_message: ActiveValue::set(self.commit_message.clone()), 286 | author: ActiveValue::set(self.author.clone()), 287 | email: ActiveValue::set(self.email.clone()), 288 | file: ActiveValue::set(self.file.clone()), 289 | date: ActiveValue::set(self.date.clone()), 290 | ..Default::default() 291 | } 292 | } 293 | } 294 | 295 | /// The scan condition 296 | #[derive(Debug, Clone)] 297 | pub struct Scan { 298 | /// allow list 299 | pub allowlist: Allowlist, 300 | 301 | /// the rules list 302 | pub ruleslist: Vec, 303 | 304 | pub threads: Option, 305 | pub chunk: Option, 306 | } 307 | impl Scan { 308 | pub fn new() -> Self { 309 | Scan { 310 | allowlist: Allowlist::new(), 311 | ruleslist: Vec::new(), 312 | // keywords:Vec::new(), 313 | threads: Some(10), 314 | chunk: Some(10), 315 | } 316 | } 317 | } 318 | 319 | impl Default for Scan { 320 | fn default() -> Self { 321 | Self::new() 322 | } 323 | } 324 | 325 | /// The commit info 326 | #[derive(Debug, Clone)] 327 | pub struct CommitInfo { 328 | /// repo name 329 | pub repo: String, 330 | 331 | /// commit id 332 | pub commit: git2::Oid, 333 | 334 | /// author name 335 | pub author: String, 336 | 337 | /// the email of author 338 | pub email: String, 339 | 340 | /// commit message 341 | pub commit_message: String, 342 | 343 | /// commit date 344 | pub date: DateTime, 345 | 346 | /// file 347 | pub files: Vec<(String, String)>, 348 | } 349 | 350 | /// The Results of the project 351 | #[derive(Debug)] 352 | pub struct Results { 353 | /// The number of commits being scanned 354 | pub commits_number: usize, 355 | 356 | /// The leaks 357 | pub outputs: Vec, 358 | } 359 | impl Results { 360 | pub fn new() -> Self { 361 | Results { 362 | commits_number: 0, 363 | outputs: Vec::new(), 364 | } 365 | } 366 | } 367 | impl Default for Results { 368 | fn default() -> Self { 369 | Self::new() 370 | } 371 | } 372 | /// CSV Object 373 | #[derive(Debug, Serialize, Deserialize)] 374 | pub struct CsvResult { 375 | /// The line containing the sensitive information. 376 | pub line: String, 377 | 378 | /// The line number where the sensitive information is found. 379 | pub line_number: u32, 380 | 381 | /// The sensitive information detected. 382 | pub offender: String, 383 | 384 | /// The commit info. 385 | pub commit: String, 386 | 387 | /// The repository where the sensitive information is found. 388 | pub repo: String, 389 | 390 | /// The rule used to detect the sensitive information. 391 | pub rule: String, 392 | 393 | /// The commit message associated with the sensitive information. 394 | pub commit_message: String, 395 | 396 | /// The author of the commit. 397 | pub author: String, 398 | 399 | /// The email of the commit author. 400 | pub email: String, 401 | 402 | /// The file path where the sensitive information is found. 403 | pub file: String, 404 | 405 | /// The date of the commit. 406 | pub date: String, 407 | } 408 | 409 | /// Config to connect to the database 410 | #[derive(Debug, Default,Serialize, Deserialize)] 411 | pub struct ConnectDbConfig { 412 | /// The host of the database 413 | pub host: String, 414 | /// The user of the database 415 | pub user: String, 416 | /// The password of the database 417 | pub password: String, 418 | /// The name of the database 419 | pub dbname: String, 420 | /// The port of the database 421 | pub port: String, 422 | } 423 | 424 | impl ConnectDbConfig { 425 | /// Translate the config to connection url 426 | pub fn to_connection_url(&self) -> String { 427 | format!( 428 | "postgresql://{}:{}@{}:{}/{}", 429 | self.user, self.password, self.host, self.port, self.dbname 430 | ) 431 | } 432 | pub fn new() -> Self { 433 | ConnectDbConfig { 434 | host: String::from(""), 435 | user: String::from(""), 436 | password: String::from(""), 437 | dbname: String::from(""), 438 | port: String::from(""), 439 | } 440 | } 441 | } 442 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::fmt; 3 | 4 | /// CustomError represents custom errors that can occur in the application. 5 | #[derive(Debug)] 6 | pub enum CustomError { 7 | EmptyFileError, 8 | EmptyConfigFileError, 9 | 10 | FailDeleteDir, 11 | FailCreateDir, 12 | FailLoadRepo, 13 | FailCloneRepo, 14 | InvalidRepoName, 15 | ObjectNotFound, 16 | RepoInternalError, 17 | ObjectNotAccess, 18 | ObjectConvertFail, 19 | AccessWalkerError, 20 | RepoCommitError, 21 | WalkerSortError, 22 | PushWalkerHeadError, 23 | InvalidDateFormat, 24 | InvalidTimeFormat, 25 | InvalidTomlFile, 26 | 27 | ExportCsvError, 28 | ExportSarifError, 29 | ExportJsonError, 30 | } 31 | 32 | impl fmt::Display for CustomError { 33 | /// Formats the error message for display. 34 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 35 | let error_message = match *self { 36 | CustomError::EmptyFileError => "Empty file", 37 | CustomError::EmptyConfigFileError => "Empty Config file", 38 | 39 | CustomError::ExportCsvError => "Export CSV Error", 40 | CustomError::ExportSarifError => "Export Sarif Error", 41 | CustomError::ExportJsonError => "Export Json Error", 42 | 43 | CustomError::FailDeleteDir => "Failed to delete directory", 44 | CustomError::FailCreateDir => "Failed to create directory", 45 | CustomError::FailLoadRepo => "Failed to load repository", 46 | CustomError::FailCloneRepo => "Failed to clone repository", 47 | CustomError::InvalidRepoName => "Invalid repository name", 48 | CustomError::RepoInternalError => "Internal error within the repository", 49 | CustomError::ObjectNotFound => { 50 | "Failure to find a blob or tree object in the repository" 51 | } 52 | CustomError::ObjectNotAccess => "Failed to access the repository's object database", 53 | CustomError::ObjectConvertFail => "Failed to convert object to commit", 54 | CustomError::AccessWalkerError => "Failure to create or access the revision walker", 55 | CustomError::RepoCommitError => "Failed to find a commit in the repository", 56 | CustomError::WalkerSortError => { 57 | "Failed to set the sorting order of the revision walker" 58 | } 59 | CustomError::PushWalkerHeadError => { 60 | "Failed to push the HEAD reference to the revision walker" 61 | } 62 | CustomError::InvalidDateFormat => "Invalid date format", 63 | CustomError::InvalidTimeFormat => "Invalid time format", 64 | CustomError::InvalidTomlFile => "Invalid TOML file", 65 | }; 66 | write!(f, "{}", error_message) 67 | } 68 | } 69 | 70 | impl Error for CustomError {} 71 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | mod errors; 2 | 3 | mod utils { 4 | pub mod detect_utils; 5 | pub mod git_util; 6 | } 7 | 8 | pub mod entity{ 9 | pub mod models; 10 | } 11 | 12 | pub mod service{ 13 | pub mod detect_service; 14 | pub mod git_service; 15 | pub mod db_service; 16 | } 17 | 18 | pub use entity::models; 19 | pub use errors::*; 20 | pub use utils::detect_utils; 21 | pub use utils::git_util; 22 | pub use git_util::*; 23 | pub use models::*; 24 | 25 | use actix_web::{App, HttpServer}; 26 | use actix_cors::Cors; 27 | use utoipa::OpenApi; 28 | use utoipa_swagger_ui::SwaggerUi; 29 | 30 | mod routes{ 31 | pub mod scan; 32 | pub mod rules; 33 | } 34 | pub use routes::scan::*; 35 | pub use routes::rules::*; 36 | 37 | use crate::routes::*; 38 | 39 | pub async fn start() -> Result<(), Box> { 40 | #[derive(OpenApi)] 41 | #[openapi( 42 | paths( 43 | scan::scan_repo, 44 | rules::get_all, 45 | rules::add_rules, 46 | rules::delete_rules_by_id, 47 | rules::update_rules 48 | ), 49 | components( 50 | schemas(ConfigDto,ScanResponse,RulesDto,JsonResponse,Rule,Allowlist,Leak) 51 | ), 52 | 53 | tags( 54 | (name = "scan", description = "Scan Git repositories API"), 55 | (name = "rules", description = "Rules management API"), 56 | ) 57 | )] 58 | struct ApiDoc; 59 | 60 | HttpServer::new(|| { 61 | let cors = Cors::default() 62 | .allow_any_origin() 63 | .allow_any_method() 64 | .allow_any_header(); 65 | 66 | App::new() 67 | .wrap(cors) 68 | .service( 69 | SwaggerUi::new("/swagger-ui/{_:.*}") 70 | .url("/api-docs/openapi.json", ApiDoc::openapi()) 71 | ) 72 | .service(scan_repo) 73 | .service(rules::get_all) 74 | .service(rules::add_rules) 75 | .service(rules::delete_rules_by_id) 76 | .service(rules::update_rules) 77 | }) 78 | .bind("0.0.0.0:7000")? 79 | .run() 80 | .await?; 81 | 82 | Ok(()) 83 | } 84 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use sensleak::service::detect_service::sensleaks; 2 | 3 | /// The entry of the project 4 | #[tokio::main] 5 | async fn main() { 6 | sensleaks().await; 7 | } 8 | 9 | -------------------------------------------------------------------------------- /src/routes/rules.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::detect_utils::*; 2 | use crate::models::{Allowlist, Rule}; 3 | use actix_web::{post, web, HttpResponse, Responder}; 4 | use serde::{Deserialize, Serialize}; 5 | use utoipa::ToSchema; 6 | 7 | /// Rules Dto 8 | #[derive(Serialize, Deserialize, Debug, ToSchema)] 9 | pub struct RulesDto { 10 | config: String, 11 | rule: Option, 12 | rule_id: Option, 13 | } 14 | 15 | /// The response object 16 | #[derive(Serialize, ToSchema)] 17 | pub struct JsonResponse { 18 | code: usize, 19 | allowlist: Option, 20 | ruleslist: Option>, 21 | message: Option, 22 | } 23 | 24 | /// Load the rules 25 | /// 26 | /// Load the allowlists and ruleslist. 27 | #[utoipa::path( 28 | post, 29 | path = "/rules/get_all", 30 | request_body = RulesDto, 31 | responses( 32 | (status = 200, description = "success", body = JsonResponse), 33 | (status = 400, description = "fail", body = JsonResponse) 34 | ) 35 | )] 36 | #[post("/rules/get_all")] 37 | pub async fn get_all(body: web::Json) -> impl Responder { 38 | match load_config_file(&body.config) { 39 | Ok(scan) => HttpResponse::Ok().json(JsonResponse { 40 | code: 200, 41 | allowlist: Some(scan.allowlist), 42 | ruleslist: Some(scan.ruleslist), 43 | message: None, 44 | }), 45 | Err(err) => HttpResponse::BadRequest().json(JsonResponse { 46 | code: 400, 47 | message: Some(err.to_string()), 48 | allowlist: None, 49 | ruleslist: None, 50 | }), 51 | } 52 | } 53 | 54 | /// Add rules. 55 | /// 56 | /// Add one single rule. 57 | #[utoipa::path( 58 | post, 59 | path = "/rules/add_rules", 60 | request_body = RulesDto, 61 | responses( 62 | (status = 200, description = "success", body = JsonResponse), 63 | (status = 400, description = "fail", body = JsonResponse) 64 | ) 65 | )] 66 | #[post("/rules/add_rules")] 67 | pub async fn add_rules(body: web::Json) -> impl Responder { 68 | let rule: Rule = match &body.rule { 69 | Some(value) => value.clone(), 70 | None => { 71 | return HttpResponse::BadRequest().json(JsonResponse { 72 | code: 400, 73 | message: Some("It is not a Rule struct".to_string()), 74 | allowlist: None, 75 | ruleslist: None, 76 | }) 77 | } 78 | }; 79 | 80 | match append_rule_to_toml(&rule, &body.config) { 81 | Ok(_) => HttpResponse::Ok().json(JsonResponse { 82 | code: 200, 83 | message: Some("success".to_string()), 84 | allowlist: None, 85 | ruleslist: None, 86 | }), 87 | Err(err) => HttpResponse::BadRequest().json(JsonResponse { 88 | code: 400, 89 | message: Some(err.to_string()), 90 | allowlist: None, 91 | ruleslist: None, 92 | }), 93 | } 94 | } 95 | 96 | /// Delete rules. 97 | /// 98 | /// Delete one rule by id. 99 | #[utoipa::path( 100 | post, 101 | path = "/rules/delete_rules_by_id", 102 | request_body = RulesDto, 103 | responses( 104 | (status = 200, description = "success", body = JsonResponse), 105 | (status = 400, description = "fail", body = JsonResponse) 106 | ) 107 | )] 108 | #[post("/rules/delete_rules_by_id")] 109 | pub async fn delete_rules_by_id(body: web::Json) -> impl Responder { 110 | let rule_id = match &body.rule_id { 111 | Some(value) => value.clone(), 112 | None => { 113 | return HttpResponse::BadRequest().json(JsonResponse { 114 | code: 400, 115 | message: Some("It is not a rule id".to_string()), 116 | allowlist: None, 117 | ruleslist: None, 118 | }) 119 | } 120 | }; 121 | 122 | match delete_rule_by_id(&body.config, &rule_id) { 123 | Ok(_) => HttpResponse::Ok().json(JsonResponse { 124 | code: 200, 125 | message: Some("success".to_string()), 126 | allowlist: None, 127 | ruleslist: None, 128 | }), 129 | Err(err) => HttpResponse::BadRequest().json(JsonResponse { 130 | code: 400, 131 | message: Some(err.to_string()), 132 | allowlist: None, 133 | ruleslist: None, 134 | }), 135 | } 136 | } 137 | 138 | /// Update rules. 139 | /// 140 | /// Update one rule by id. 141 | #[utoipa::path( 142 | post, 143 | path = "/rules/update", 144 | request_body = RulesDto, 145 | responses( 146 | (status = 200, description = "success", body = JsonResponse), 147 | (status = 400, description = "fail", body = JsonResponse) 148 | ) 149 | )] 150 | #[post("/rules/update")] 151 | pub async fn update_rules(body: web::Json) -> impl Responder { 152 | let rule_id = match &body.rule_id { 153 | Some(value) => value.clone(), 154 | None => { 155 | return HttpResponse::BadRequest().json(JsonResponse { 156 | code: 400, 157 | message: Some("It is not a rule id".to_string()), 158 | allowlist: None, 159 | ruleslist: None, 160 | }) 161 | } 162 | }; 163 | 164 | let rule: Rule = match &body.rule { 165 | Some(value) => value.clone(), 166 | None => { 167 | return HttpResponse::BadRequest().json(JsonResponse { 168 | code: 400, 169 | message: Some("It is not a Rule struct".to_string()), 170 | allowlist: None, 171 | ruleslist: None, 172 | }) 173 | } 174 | }; 175 | 176 | match update_rule_by_id(&body.config, &rule_id, &rule) { 177 | Ok(_) => HttpResponse::Ok().json(JsonResponse { 178 | code: 200, 179 | message: Some("success".to_string()), 180 | allowlist: None, 181 | ruleslist: None, 182 | }), 183 | Err(err) => HttpResponse::BadRequest().json(JsonResponse { 184 | code: 400, 185 | message: Some(err.to_string()), 186 | allowlist: None, 187 | ruleslist: None, 188 | }), 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /src/routes/scan.rs: -------------------------------------------------------------------------------- 1 | use actix_web::{post, web, HttpResponse, Responder}; 2 | use serde::{Deserialize, Serialize}; 3 | use utoipa::ToSchema; 4 | 5 | use crate::service::detect_service::detect; 6 | use crate::{Config, Leak}; 7 | 8 | /// The scan configuration 9 | #[derive(Deserialize, Serialize, ToSchema)] 10 | pub struct ConfigDto { 11 | /// Target repository. 12 | pub repo: String, 13 | /// Config path 14 | pub config: String, 15 | /// Maximum number of threads sensleak spawns 16 | pub report: Option, 17 | /// The number of git files processed in each batch 18 | pub report_format: Option, 19 | /// Path to write json leaks file. 20 | pub repo_config: Option, 21 | /// json, csv, sarif 22 | pub threads: Option, 23 | /// Show verbose output from scan. 24 | pub chunk: Option, 25 | /// Pretty print json if leaks are present. 26 | pub commit: Option, 27 | /// comma separated list of a commits to scan 28 | pub commits: Option, 29 | /// file of new line separated list of a commits to scan 30 | pub commits_file: Option, 31 | /// Scan commits more recent than a specific date. Ex: '2006-01-02' or '2023-01-02T15:04:05-0700' format. 32 | pub commit_since: Option, 33 | /// Scan commits older than a specific date. Ex: '2006-01-02' or '2006-10-02T15:04:05-0700' format. 34 | pub commit_until: Option, 35 | 36 | /// Commit to start scan from 37 | pub commit_from: Option, 38 | /// Commit to stop scan 39 | pub commit_to: Option, 40 | /// Branch to scan 41 | pub branch: Option, 42 | /// Run sensleak on uncommitted code 43 | pub uncommitted: Option, 44 | /// Set user to scan 45 | pub user: Option, 46 | 47 | /// Clones repo(s) to disk. 48 | pub disk: Option, 49 | 50 | /// Output to database 51 | pub to_db: bool, 52 | } 53 | 54 | /// The return results of the scan. 55 | #[derive(Deserialize, Serialize, ToSchema)] 56 | pub struct ScanResponse { 57 | /// 200-success, 400-fail 58 | code: usize, 59 | /// the leaks number 60 | leaks_number: Option, 61 | /// the number of scanned commits 62 | commits_number: Option, 63 | /// leaks 64 | leaks: Option>, 65 | /// message 66 | message: Option, 67 | } 68 | 69 | /// Scan the repo. 70 | /// 71 | /// Scan Git repositories for sensitive data. 72 | #[utoipa::path( 73 | post, 74 | path = "/scan", 75 | request_body = ConfigDto, 76 | responses( 77 | (status = 200, description = "success", body = ScanResponse), 78 | (status = 400, description = "fail", body = ScanResponse) 79 | ) 80 | )] 81 | #[post("/scan")] 82 | pub async fn scan_repo(json_config: web::Json) -> impl Responder { 83 | let mut config: Config = Default::default(); 84 | config.repo = json_config.repo.clone(); 85 | config.config = json_config.config.clone(); 86 | config.report = json_config.report.clone(); 87 | config.threads = json_config.threads; 88 | config.chunk = json_config.chunk; 89 | config.report_format = json_config.report_format.clone(); 90 | config.commit = json_config.commit.clone(); 91 | config.commits = json_config.commits.clone(); 92 | config.commit_from = json_config.commit_from.clone(); 93 | config.commit_to = json_config.commit_to.clone(); 94 | config.commit_since = json_config.commit_since.clone(); 95 | config.commits_file = json_config.commits_file.clone(); 96 | config.branch = json_config.branch.clone(); 97 | config.uncommitted = false; 98 | config.user = json_config.user.clone(); 99 | config.disk = json_config.disk.clone(); 100 | config.repo_config = json_config.repo_config.unwrap_or(false); 101 | config.to_db = json_config.to_db; 102 | 103 | match detect(config).await { 104 | Ok(results) => HttpResponse::Ok().json(ScanResponse { 105 | code: 200, 106 | leaks_number: Some(results.outputs.len()), 107 | commits_number: Some(results.commits_number), 108 | leaks: Some(results.outputs), 109 | message: None, 110 | }), 111 | Err(err) => HttpResponse::BadRequest().json(ScanResponse { 112 | code: 400, 113 | message: Some(err.to_string()), 114 | leaks_number: None, 115 | commits_number: None, 116 | leaks: None, 117 | }), 118 | } 119 | } 120 | 121 | 122 | 123 | #[cfg(test)] 124 | mod tests { 125 | // use super::*; 126 | // use actix_web::{test, web, App}; 127 | // use actix_web::http::StatusCode; 128 | 129 | // #[actix_web::test] 130 | // async fn test_scan_repo_success() { 131 | // let config = ConfigDto { 132 | // repo: String::from("example/repo"), 133 | // config: String::from("example/config"), 134 | // report: Some(String::from("example/report")), 135 | // report_format: Some(String::from("json")), 136 | // repo_config: Some(true), 137 | // threads: Some(4), 138 | // chunk: Some(10), 139 | // commit: Some(String::from("abcd1234")), 140 | // commits: Some(String::from("commit1,commit2")), 141 | // commits_file: Some(String::from("path/to/file")), 142 | // commit_since: Some(String::from("2023-01-01")), 143 | // commit_until: Some(String::from("2023-01-31")), 144 | // commit_from: Some(String::from("abcd1234")), 145 | // commit_to: Some(String::from("efgh5678")), 146 | // branch: Some(String::from("main")), 147 | // uncommitted: Some(false), 148 | // user: Some(String::from("john")), 149 | // disk: Some(String::from("path/to/disk")), 150 | // to_db: false, 151 | // }; 152 | // 153 | // let app = test::init_service( 154 | // App::new().service(scan_repo) 155 | // ).await; 156 | // 157 | // let req = test::TestRequest::post() 158 | // .uri("/scan") 159 | // .set_json(&config) 160 | // .to_request(); 161 | // 162 | // let resp = test::call_service(&app, req).await; 163 | // assert_eq!(resp.status(), StatusCode::OK); 164 | // 165 | // let body: ScanResponse = test::read_body_json(resp).await; 166 | // assert_eq!(body.code, 200); 167 | // assert_eq!(body.leaks_number, Some(10)); 168 | // assert_eq!(body.commits_number, Some(2)); 169 | // assert_eq!(body.message, None); 170 | // } 171 | } 172 | -------------------------------------------------------------------------------- /src/service/db_service.rs: -------------------------------------------------------------------------------- 1 | use crate::models::{ConnectDbConfig, Entity as Leaks, Leak}; 2 | use chrono::Local; 3 | use sea_orm::*; 4 | use std::env; 5 | 6 | /// Sets up the database connection using the application's configuration settings. 7 | /// 8 | /// This function attempts to establish a connection to the database using environment variables for the database configuration. 9 | /// It reads configuration values such as host, port, user, password, and database name from environment variables 10 | /// and uses them to construct the database URL. 11 | /// 12 | /// # Returns 13 | /// 14 | /// Returns a `Result`: 15 | /// - `Ok(DatabaseConnection)` if the connection is successfully established. 16 | /// - `Err(DbErr)` if there is an error connecting to the database. 17 | pub async fn set_up_db() -> Result { 18 | let config = get_db_config(); 19 | let db_url = config.to_connection_url(); 20 | let db = Database::connect(&db_url).await?; 21 | Ok(db) 22 | } 23 | 24 | /// Inserts a vector of `Leak` entities into the database and ensures that the `Leaks` table exists. 25 | /// 26 | /// This function first checks if the `Leaks` table exists in the database and creates it if not. 27 | /// Then, it proceeds to insert the provided vector of `Leak` entities into the `Leaks` table. 28 | /// 29 | /// # Arguments 30 | /// 31 | /// * `_leaks` - A reference to a vector of `Leak` entities to be inserted into the database. 32 | /// 33 | /// # Returns 34 | /// 35 | /// Returns a `Result<(), DbErr>` indicating the outcome of the operation: 36 | /// - `Ok(())` if the insertion is successful and the `Leaks` table is either found or successfully created. 37 | /// - `Err(DbErr)` if there is an error during the table check/creation or insertion process. 38 | pub async fn insert_leaks(_leaks: &[Leak]) -> Result<(), DbErr> { 39 | let db = match set_up_db().await { 40 | Ok(db) => db, 41 | Err(err) => panic!("{}", err), 42 | }; 43 | 44 | // Check if the table Leaks exists and create it if not 45 | let builder = db.get_database_backend(); 46 | let schema = Schema::new(builder); 47 | 48 | let stmt = schema 49 | .create_table_from_entity(Leaks) 50 | .if_not_exists() 51 | .to_owned(); 52 | 53 | let stmt = builder.build(&stmt); 54 | 55 | db.execute(stmt).await?; 56 | 57 | println!( 58 | "\x1b[34m[INFO]\x1b[0m[{}] Create Success ...", 59 | Local::now().format("%Y-%m-%d %H:%M:%S"), 60 | ); 61 | 62 | // Insert leaks 63 | for leak in _leaks.iter() { 64 | let active_model = leak.to_active_model(); 65 | 66 | let insert_result = Leaks::insert(active_model) 67 | .exec(&db) 68 | .await?; 69 | println!("Inserted leak with result: {:?}", insert_result); 70 | } 71 | 72 | println!( 73 | "\x1b[34m[INFO]\x1b[0m[{}] Insert Success ...", 74 | Local::now().format("%Y-%m-%d %H:%M:%S"), 75 | ); 76 | 77 | Ok(()) 78 | } 79 | 80 | /// Retrieves database connection configuration from environment variables. 81 | /// 82 | /// This function constructs a `ConnectDbConfig` struct with database connection details 83 | /// such as host, port, username, password, and database name, reading the values from 84 | /// environment variables. If an environment variable is not set, it defaults to a predefined value. 85 | /// 86 | /// # Returns 87 | /// 88 | /// Returns a `ConnectDbConfig` struct populated with the database connection details. 89 | fn get_db_config() -> ConnectDbConfig { 90 | ConnectDbConfig { 91 | host: env::var("PG_HOST").unwrap_or("localhost".to_string()), 92 | port: env::var("PG_PORT").unwrap_or("5432".to_string()), 93 | user: env::var("PG_USER").unwrap_or("postgres".to_string()), 94 | password: env::var("PG_PASSWORD").unwrap_or("postgres".to_string()), 95 | dbname: env::var("PG_DBNAME").unwrap_or("postgres".to_string()) 96 | } 97 | } -------------------------------------------------------------------------------- /src/service/detect_service.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::CustomError; 2 | use crate::models::{Allowlist, CommitInfo, Config, Leak, Results, Rule, Scan}; 3 | use crate::service::git_service::*; 4 | use crate::utils::detect_utils::{ 5 | is_commit_in_allowlist, is_contains_strs, is_link, is_path_in_allowlist, is_string_matched, 6 | load_config, remove_duplicates, write_csv_report, write_json_report, write_sarif_report, 7 | }; 8 | use crate::utils::git_util::{clone_or_load_repository, extract_repo_name}; 9 | use crate::service::db_service::insert_leaks; 10 | use chrono::Local; 11 | use clap::Parser; 12 | use git2::Repository; 13 | use rayon::ThreadPoolBuilder; 14 | use regex::Regex; 15 | use std::error::Error; 16 | use std::fs; 17 | use std::sync::{Arc, Mutex}; 18 | use std::time::Instant; 19 | 20 | /// Starts the Git detector application. 21 | pub async fn sensleaks() { 22 | let args = Config::parse(); 23 | 24 | match detect(args).await { 25 | Ok(results) => results, 26 | Err(err) => { 27 | eprintln!("Application: {}", err); 28 | std::process::exit(0); 29 | } 30 | }; 31 | } 32 | 33 | /// Searches for sensitive information in a repository. 34 | /// 35 | /// # Arguments 36 | /// 37 | /// * `config` - A `Config` struct containing the configuration settings for the detection process. 38 | /// 39 | /// # Returns 40 | /// 41 | /// Returns the detection results as a `Result` containing the scan results or an error. 42 | /// 43 | pub async fn detect(config: Config) -> Result> { 44 | // load repo and record the time of clone repo 45 | let start_clone_repo = Instant::now(); 46 | let repo = clone_or_load_repository(&config)?; 47 | let duration_repo: std::time::Duration = Instant::now().duration_since(start_clone_repo); 48 | 49 | // load scan, which contains allowlist, ruleslist, keywords 50 | let mut scan = load_config(&repo, &config)?; 51 | 52 | // Set threads and chunk in scan 53 | scan.threads = config.threads; 54 | scan.chunk = config.chunk; 55 | 56 | // Record the start time of the scan 57 | let start_scan = Instant::now(); 58 | 59 | // Scan 60 | let results = process_scan(&config, repo, scan)?; 61 | 62 | // To output content in the console. 63 | config_info_after_detect(&config, &results, start_scan, duration_repo).await?; 64 | 65 | Ok(results) 66 | } 67 | 68 | /// Processes the scan based on the provided configuration, repository, and scan settings. 69 | /// 70 | /// # Arguments 71 | /// 72 | /// * `config` - A reference to the `Config` object containing the scan configuration settings. 73 | /// * `repo` - The `Repository` object representing the repository to scan. 74 | /// * `scan` - The `Scan` object containing additional scan settings such as allowlist, ruleslist, and keywords. 75 | /// 76 | /// # Returns 77 | /// 78 | /// Returns the scan results as a `Result` containing the `Results` or an error. 79 | fn process_scan(config: &Config, repo: Repository, scan: Scan) -> Result> { 80 | // Scan the files that have not been submitted. 81 | if config.uncommitted { 82 | return handle_uncommitted_files(repo, &config.repo, scan); 83 | } 84 | 85 | match ( 86 | &config.commit, 87 | &config.commits, 88 | &config.commits_file, 89 | &config.commit_since, 90 | &config.commit_until, 91 | &config.commit_from, 92 | &config.commit_to, 93 | &config.uncommitted, 94 | &config.user, 95 | &config.branch, 96 | ) { 97 | (Some(commit), _, _, _, _, _, _, _, Some(user), _) => { 98 | handle_single_commit(repo, commit, scan, user) 99 | } 100 | (_, Some(commits), _, _, _, _, _, _, Some(user), _) => { 101 | let commit_ids: Vec<&str> = commits.split(',').collect(); 102 | handle_multiple_commits(repo, &commit_ids, scan, user) 103 | } 104 | (_, _, Some(file_path), _, _, _, _, _, Some(user), _) => { 105 | handle_commits_file(repo, file_path, scan, user) 106 | } 107 | (_, _, _, Some(since), Some(until), _, _, _, Some(user), _) => { 108 | handle_commit_range_by_time(repo, since, until, scan, user) 109 | } 110 | (_, _, _, _, _, Some(commit_from), Some(commit_to), _, Some(user), _) => { 111 | handle_commit_range( 112 | repo, 113 | Some(commit_from.clone()), 114 | Some(commit_to.clone()), 115 | scan, 116 | user, 117 | ) 118 | } 119 | (_, _, _, _, _, _, _, _, Some(_user), Some(branch)) => { 120 | handle_branches_by_name(repo, branch, scan) 121 | } 122 | (_, _, _, _, _, _, _, _, Some(user), _) => handle_all_commits(repo, scan, user), 123 | 124 | _ => handle_all_commits(repo, scan, ""), 125 | } 126 | } 127 | 128 | /// Detects leaks in the provided file contents based on the specified rules and configurations. 129 | /// 130 | /// 131 | /// The function utilizes a thread pool to execute detection operations concurrently, improving performance. 132 | /// Detected leaks are stored in a shared mutable vector wrapped in an `Arc`. 133 | /// 134 | /// # Arguments 135 | /// 136 | /// * `contents` - The contents of the file to be scanned for leaks. 137 | /// * `path` - The path to the file being scanned. 138 | /// * `ruleslist` - A slice of `Rule` objects representing the rules to be applied during the detection process. 139 | /// * `allowlist` - An `Allowlist` object containing patterns to exclude from the detection process. 140 | /// * `commit_info` - A reference to the `CommitInfo` object containing information about the commit associated with the file. 141 | /// * `threads` - An optional `usize` value specifying the number of threads to use in the thread pool. Default is 50. 142 | /// 143 | /// # Returns 144 | /// 145 | /// Returns a `Result` containing a cloned vector of `Leak` objects representing the detected leaks, or an error. 146 | /// 147 | /// # Errors 148 | /// 149 | /// This function can return an error if there are any issues during the detection process. 150 | /// 151 | pub fn detect_file( 152 | contents: &str, 153 | path: &str, 154 | ruleslist: &[Rule], 155 | allowlist: &Allowlist, 156 | commit_info: &CommitInfo, 157 | threads: Option, 158 | ) -> Result, Box> { 159 | // Create a shared mutable vector to store detection results 160 | let detect_info: Arc>> = Arc::new(Mutex::new(Vec::new())); 161 | 162 | // Create a thread pool with the setting threads 163 | let thread_pool = ThreadPoolBuilder::new() 164 | .num_threads(threads.unwrap_or(50)) 165 | .build() 166 | .unwrap(); 167 | 168 | // Use the thread pool to execute the detection operations 169 | thread_pool.scope(|s| { 170 | for rule in ruleslist { 171 | // Check if the contents contain any keywords from the rule 172 | if is_contains_strs(&rule.keywords, contents) { 173 | let cloned_path = path.to_string(); 174 | let cloned_rule = rule.clone(); 175 | let cloned_contents = contents.to_string(); 176 | let cloned_allowlist = allowlist.clone(); 177 | let cloned_commits = commit_info.commit.to_string(); 178 | let cloned_commit_info = commit_info.clone(); 179 | let detect_info_clone = Arc::clone(&detect_info); 180 | 181 | // Spawn a thread to perform the detection using regex 182 | s.spawn(move |_| { 183 | let results = detect_by_regex( 184 | &cloned_path, 185 | &cloned_rule, 186 | &cloned_contents, 187 | &cloned_allowlist, 188 | &cloned_commits, 189 | ); 190 | 191 | // Acquire the lock for detection results and update the vector 192 | let mut detect_info = detect_info_clone.lock().unwrap(); 193 | for (line_number, line, matched) in results.iter() { 194 | let output_item = Leak { 195 | line: line.to_string(), 196 | line_number: *line_number as u32, 197 | offender: matched.to_string(), 198 | commit: cloned_commit_info.commit.to_string(), 199 | repo: cloned_commit_info.repo.to_string(), 200 | rule: cloned_rule.description.to_string(), 201 | commit_message: cloned_commit_info.commit_message.to_string(), 202 | author: cloned_commit_info.author.to_string(), 203 | email: cloned_commit_info.email.to_string(), 204 | file: cloned_path.to_string(), 205 | date: cloned_commit_info.date.to_string(), 206 | }; 207 | detect_info.push(output_item); 208 | } 209 | }); 210 | } 211 | } 212 | }); 213 | 214 | // Acquire the lock for detection results and return a clone of the results 215 | let detect_info = detect_info.lock().unwrap(); 216 | Ok(detect_info.clone()) 217 | } 218 | 219 | /// Searches a string for matches of a given regular expression and returns a vector of tuples. 220 | /// 221 | /// # Arguments 222 | /// 223 | /// * `path` - The path to the file being searched. This is used for allowlist checks. 224 | /// * `rules` - A `Rule` object representing the rule to apply during the detection process. It contains the regular expression to match against. 225 | /// * `contents` - A string containing the contents to search for matches. 226 | /// * `allowlist` - An `Allowlist` object containing the allowlist configurations. 227 | /// 228 | /// # Returns 229 | /// 230 | /// A vector of tuples `(usize, &str, &str)`, where each tuple represents a match found in the string. 231 | /// The first element of the tuple is the line number (1-indexed), the second element is the matched line, and the third element is the matched substring. 232 | /// 233 | fn detect_by_regex<'a>( 234 | path: &str, 235 | rules: &Rule, 236 | contents: &'a str, 237 | allowlist: &Allowlist, 238 | commits: &str, 239 | ) -> Vec<(usize, &'a str, &'a str)> { 240 | // Create a regular expression object. 241 | let regex = Regex::new(&rules.regex).unwrap(); 242 | 243 | // Iterate over the lines in the string. 244 | let results: Vec<(usize, &str, &str)> = contents 245 | .lines() 246 | .enumerate() 247 | .filter_map(|(i, line)| { 248 | // Match the regular expression against each line. 249 | regex 250 | .captures(line) 251 | .and_then(|captures| captures.get(0)) 252 | .map(|matched| (i + 1, line, matched.as_str())) 253 | }) 254 | .collect(); 255 | if results.is_empty() { 256 | return Vec::new(); 257 | } 258 | 259 | // The secrets that should be skipped 260 | let mut filtered_results: Vec<(usize, &str, &str)> = Vec::new(); 261 | 262 | // Handle global allowlist 263 | if allowlist.regex_target == "line" { 264 | for (line_number, line, matched) in &results { 265 | if (allowlist.regexes.is_empty() || allowlist.stopwords.is_empty()) 266 | && (is_string_matched(&allowlist.regexes, line) 267 | || is_contains_strs(&allowlist.stopwords, line)) 268 | { 269 | filtered_results.push((*line_number, line, matched)); 270 | } 271 | } 272 | } else { 273 | for (line_number, line, matched) in &results { 274 | if (allowlist.regexes.is_empty() || allowlist.stopwords.is_empty()) 275 | && (is_string_matched(&allowlist.regexes, matched) 276 | || is_contains_strs(&allowlist.stopwords, matched)) 277 | { 278 | filtered_results.push((*line_number, line, matched)); 279 | } 280 | } 281 | } 282 | 283 | // Handle rules.allowlist 284 | if let Some(rules_allowlist) = &rules.allowlist { 285 | // check commits and paths 286 | if (is_path_in_allowlist(path, &rules_allowlist.paths)) 287 | || (is_commit_in_allowlist(commits, &rules_allowlist.commits)) 288 | { 289 | return vec![]; 290 | } 291 | 292 | // check regexes and stopwords 293 | if rules_allowlist.regex_target == "line" { 294 | for (line_number, line, matched) in &results { 295 | if (rules_allowlist.regexes.is_empty() || rules_allowlist.stopwords.is_empty()) 296 | && (is_string_matched(&rules_allowlist.regexes, line) 297 | || is_contains_strs(&rules_allowlist.stopwords, line)) 298 | { 299 | filtered_results.push((*line_number, line, matched)); 300 | } 301 | } 302 | } else { 303 | for (line_number, line, matched) in &results { 304 | if (rules_allowlist.regexes.is_empty() || rules_allowlist.stopwords.is_empty()) 305 | && (is_string_matched(&rules_allowlist.regexes, matched) 306 | || is_contains_strs(&rules_allowlist.stopwords, matched)) 307 | { 308 | filtered_results.push((*line_number, line, matched)); 309 | } 310 | } 311 | } 312 | } 313 | 314 | if filtered_results.is_empty() { 315 | results 316 | } else { 317 | remove_duplicates(results, filtered_results) 318 | } 319 | } 320 | 321 | /// Detects uncommitted files for sensitive information leaks. 322 | /// 323 | /// # Arguments 324 | /// 325 | /// * `contents` - A string slice representing the contents of the file. 326 | /// * `path` - A string slice representing the path of the file. 327 | /// * `ruleslist` - A reference to a slice of `Rule` objects to match against. 328 | /// * `allowlist` - A reference to an `Allowlist` object for paths that should be skipped. 329 | /// 330 | /// # Returns 331 | /// 332 | /// Returns a `Result` containing a vector of `Leak` objects if sensitive information leaks are detected, 333 | /// or an empty vector if no leaks are found. 334 | pub fn detect_uncommitted_file( 335 | contents: &str, 336 | path: &str, 337 | ruleslist: &[Rule], 338 | allowlist: &Allowlist, 339 | threads: Option, 340 | ) -> Result, Box> { 341 | // Create a shared mutable vector to store detection results 342 | let detect_info: Arc>> = Arc::new(Mutex::new(Vec::new())); 343 | 344 | // Create a thread pool with the setting threads 345 | let thread_pool = ThreadPoolBuilder::new() 346 | .num_threads(threads.unwrap_or(50)) 347 | .build() 348 | .unwrap(); 349 | 350 | // Use the thread pool to execute the detection operations 351 | thread_pool.scope(|s| { 352 | for rule in ruleslist { 353 | // Check if the contents contain any keywords from the rule 354 | if is_contains_strs(&rule.keywords, contents) { 355 | let cloned_path = path.to_string(); 356 | let cloned_rule = rule.clone(); 357 | let cloned_contents = contents.to_string(); 358 | let cloned_allowlist = allowlist.clone(); 359 | let detect_info_clone = Arc::clone(&detect_info); 360 | 361 | // Spawn a thread to perform the detection using regex 362 | s.spawn(move |_| { 363 | let results = detect_by_regex( 364 | &cloned_path, 365 | &cloned_rule, 366 | &cloned_contents, 367 | &cloned_allowlist, 368 | "", 369 | ); 370 | 371 | // Acquire the lock for detection results and update the vector 372 | let mut detect_info = detect_info_clone.lock().unwrap(); 373 | for (line_number, line, matched) in results.iter() { 374 | let output_item = Leak { 375 | line: line.to_string(), 376 | line_number: *line_number as u32, 377 | offender: matched.to_string(), 378 | commit: "".to_string(), 379 | repo: "".to_string(), 380 | rule: cloned_rule.description.to_string(), 381 | commit_message: "".to_string(), 382 | author: "".to_string(), 383 | email: "".to_string(), 384 | file: cloned_path.to_string(), 385 | date: "".to_string(), 386 | }; 387 | detect_info.push(output_item); 388 | } 389 | }); 390 | } 391 | } 392 | }); 393 | 394 | // Acquire the lock for detection results and return a clone of the results 395 | let detect_info = detect_info.lock().unwrap(); 396 | Ok(detect_info.clone()) 397 | } 398 | 399 | /// Handles post-detection configuration information and performs actions based on the configuration settings. 400 | /// 401 | /// # Arguments 402 | /// 403 | /// * `config` - A reference to the `Config` object containing the scan configuration settings. 404 | /// * `results` - A reference to the `Results` object containing the detection results. 405 | /// * `start_scan` - The start time of the scan as an `Instant` object. 406 | /// * `duration_repo` - The duration of the repository scanning process as a `std::time::Duration` object. 407 | /// 408 | /// # Returns 409 | /// 410 | /// Returns `Ok(())` if the post-detection actions are performed successfully, or an error of type `Box` if any issues occur. 411 | /// 412 | /// # Errors 413 | /// 414 | /// This function can return an error if there are any issues during the post-detection actions, such as writing reports. 415 | /// 416 | async fn config_info_after_detect( 417 | config: &Config, 418 | results: &Results, 419 | start_scan: Instant, 420 | duration_repo: std::time::Duration, 421 | ) -> Result<(), Box> { 422 | // Calculate the scan duration 423 | let duration_scan = Instant::now().duration_since(start_scan); 424 | 425 | // If the verbose flag is set, print the scan results to the console 426 | if config.verbose { 427 | if config.pretty { 428 | println!("{:#?}", results.outputs); 429 | } else { 430 | println!("{:?}", results.outputs); 431 | } 432 | } 433 | 434 | // If the debug flag is set, print the scan results to the console 435 | if config.debug { 436 | debug_info(duration_repo, duration_scan, results.commits_number); 437 | } 438 | 439 | // Output to database 440 | if config.to_db { 441 | insert_leaks(&results.outputs).await?; 442 | } 443 | 444 | // Write output report 445 | if let Some(report) = &config.report { 446 | if let Some(format) = &config.report_format { 447 | if format == "sarif" { 448 | if write_sarif_report(report, &results.outputs).is_err() { 449 | return Err(Box::new(CustomError::ExportSarifError)); 450 | } 451 | } else if format == "csv" { 452 | if write_csv_report(report, &results.outputs).is_err() { 453 | return Err(Box::new(CustomError::ExportCsvError)); 454 | } 455 | } else if write_json_report(report, &results.outputs).is_err() { 456 | return Err(Box::new(CustomError::ExportJsonError)); 457 | } 458 | }; 459 | } 460 | 461 | println!( 462 | "\x1b[38;5;208m[WARN]\x1b[0m[{}]{} leaks detected. {} commits scanned in {:?}", 463 | Local::now().format("%Y-%m-%d %H:%M:%S"), 464 | results.outputs.len(), 465 | results.commits_number, 466 | duration_scan 467 | ); 468 | 469 | match &config.disk { 470 | Some(_disk) => {} 471 | None => { 472 | if is_link(&config.repo) { 473 | let dest = "workplace/"; 474 | let mut repo_path = String::new(); 475 | if let Some(name) = extract_repo_name(&config.repo) { 476 | repo_path = format!("{}{}", dest, name); 477 | } 478 | match fs::remove_dir_all(repo_path) { 479 | Ok(_) => {} 480 | Err(e) => eprintln!("Delete dir fail: {}", e), 481 | } 482 | } 483 | } 484 | }; 485 | Ok(()) 486 | } 487 | 488 | /// Prints debug information. 489 | /// 490 | /// # Arguments 491 | /// 492 | /// * `total_clone_time` - The total time taken for repository cloning, represented as a `Duration` object. 493 | /// * `total_scan_time` - The total time taken for the scan, represented as a `Duration` object. 494 | /// * `commits` - The number of commits. 495 | fn debug_info( 496 | total_clone_time: std::time::Duration, 497 | total_scan_time: std::time::Duration, 498 | commits: usize, 499 | ) { 500 | let timestamp = Local::now().format("%Y-%m-%dT%H:%M:%S%.3f%:z").to_string(); 501 | println!( 502 | "\x1b[34m[DEBUG]\x1b[0m[{}] -------------------------", 503 | timestamp 504 | ); 505 | println!( 506 | "\x1b[34m[DEBUG]\x1b[0m[{}] | Times and Commit Counts|", 507 | timestamp 508 | ); 509 | println!( 510 | "\x1b[34m[DEBUG]\x1b[0m[{}] -------------------------", 511 | timestamp 512 | ); 513 | println!("totalScanTime: {:?}", total_scan_time); 514 | println!("totalCloneTime: {:?}", total_clone_time); 515 | println!("totalCommits: {}", commits); 516 | } 517 | 518 | #[cfg(test)] 519 | mod tests { 520 | use super::*; 521 | extern crate git2; 522 | 523 | use chrono::DateTime; 524 | // Helper function to create a mock scan 525 | fn create_mock_scan() -> Scan { 526 | let rule = Rule { 527 | description: String::from("Stripe Access Token"), 528 | id: String::from("stripe-access-token"), 529 | regex: String::from(r"(?i)(sk|pk)_(test|live)_[0-9a-z]{10,32}"), 530 | // entropy: Some(0.5), 531 | keywords: vec![ 532 | String::from("sk_test"), 533 | String::from("pk_test"), 534 | String::from("sk_live"), 535 | String::from("pk_live"), 536 | ], 537 | allowlist: None, 538 | }; 539 | let ruleslist: Vec = vec![rule]; 540 | 541 | let allowlist = Allowlist { 542 | paths: vec![], 543 | commits: vec![], 544 | regex_target: String::from("match"), 545 | regexes: vec![], 546 | stopwords: vec![], 547 | }; 548 | 549 | let scan = Scan { 550 | allowlist, 551 | ruleslist, 552 | 553 | threads: Some(50), 554 | chunk: Some(10), 555 | }; 556 | scan 557 | } 558 | 559 | // test detect_file 560 | static PATH: &str = "tests/files/testdir/test.txt"; 561 | #[test] 562 | fn test_detect_file() { 563 | let scan = create_mock_scan(); 564 | let content = "twilio_api_key = SK12345678901234567890123456789012"; 565 | let commit_info = CommitInfo { 566 | repo: "example/repo".to_string(), 567 | commit: git2::Oid::from_str("1234567890abcdef1234567890abcdef12345678").unwrap(), 568 | author: "John Doe".to_string(), 569 | email: "johndoe@example.com".to_string(), 570 | commit_message: "Example commit message".to_string(), 571 | date: DateTime::parse_from_rfc3339("2023-05-26T12:34:56+00:00") 572 | .unwrap() 573 | .into(), 574 | files: vec![ 575 | ("/path/to/file1".to_string(), "File 1 contents".to_string()), 576 | ("/path/to/file2".to_string(), "File 2 contents".to_string()), 577 | ], 578 | }; 579 | // Call the detect_file function 580 | let result = detect_file( 581 | PATH, 582 | content, 583 | &scan.ruleslist, 584 | &scan.allowlist, 585 | &commit_info, 586 | scan.threads, 587 | ); 588 | 589 | // Assert that the result is as expected 590 | let output = result.unwrap(); 591 | assert_eq!(output.len(), 0); 592 | } 593 | // test detect_by_regex 594 | 595 | #[test] 596 | fn test_detect_by_regex() { 597 | let rules = Rule { 598 | description: "Digits".to_string(), 599 | id: "key".to_string(), 600 | regex: r"\d+".to_string(), 601 | // entropy: None, 602 | keywords: vec![], 603 | allowlist: None, 604 | }; 605 | let contents = "123\n456\n789\naaaaaxwsd\ntoken=wkwk121"; 606 | let allowlist = Allowlist { 607 | commits: vec![], 608 | paths: vec![], 609 | regex_target: String::new(), 610 | regexes: vec![], 611 | stopwords: vec![], 612 | }; 613 | 614 | let result = detect_by_regex(PATH, &rules, contents, &allowlist, ""); 615 | 616 | assert_eq!(result.len(), 4); 617 | assert_eq!(result[0], (1, "123", "123")); 618 | assert_eq!(result[1], (2, "456", "456")); 619 | assert_eq!(result[2], (3, "789", "789")); 620 | assert_eq!(result[3], (5, "token=wkwk121", "121")); 621 | } 622 | 623 | #[test] 624 | fn test_detect_by_regex_with_rules_allowlist_regex_target_match() { 625 | let rules = Rule { 626 | description: "Digits".to_string(), 627 | id: "key".to_string(), 628 | regex: r"\d+".to_string(), 629 | // entropy: None, 630 | keywords: vec![], 631 | allowlist: Some(Allowlist { 632 | commits: vec![], 633 | paths: vec!["tests/files/test90.txt".to_string()], 634 | regex_target: "match".to_string(), 635 | regexes: vec![], 636 | stopwords: vec!["token".to_string()], 637 | }), 638 | }; 639 | let contents = "123\n456\n789\naaaaaxwsd\ntoken=wkwk121"; 640 | let allowlist = Allowlist { 641 | commits: vec![], 642 | paths: vec![], 643 | regex_target: String::new(), 644 | regexes: vec![], 645 | stopwords: vec![], 646 | }; 647 | 648 | let result = detect_by_regex(PATH, &rules, contents, &allowlist, ""); 649 | println!("{:?}", result); 650 | assert_eq!(result.len(), 4); 651 | assert_eq!(result[0], (1, "123", "123")); 652 | assert_eq!(result[1], (2, "456", "456")); 653 | assert_eq!(result[2], (3, "789", "789")); 654 | assert_eq!(result[3], (5, "token=wkwk121", "121")); 655 | } 656 | 657 | #[test] 658 | fn test_detect_by_regex_with_rules_allowlist_regex_target_line() { 659 | let rules = Rule { 660 | description: "Digits".to_string(), 661 | id: "key".to_string(), 662 | regex: r"\d+".to_string(), 663 | // entropy: None, 664 | keywords: vec![], 665 | allowlist: Some(Allowlist { 666 | commits: vec![], 667 | paths: vec!["tests/files/test90.txt".to_string()], 668 | regex_target: "line".to_string(), 669 | regexes: vec![], 670 | stopwords: vec!["token".to_string()], 671 | }), 672 | }; 673 | let contents = "123\n456\n789\naaaaaxwsd\ntoken=wkwk121"; 674 | let allowlist = Allowlist { 675 | commits: vec![], 676 | paths: vec![], 677 | regex_target: String::new(), 678 | regexes: vec![], 679 | stopwords: vec![], 680 | }; 681 | 682 | let result = detect_by_regex(PATH, &rules, contents, &allowlist, ""); 683 | println!("{:?}", result); 684 | assert_eq!(result.len(), 3); 685 | assert_eq!(result[0], (1, "123", "123")); 686 | assert_eq!(result[1], (2, "456", "456")); 687 | assert_eq!(result[2], (3, "789", "789")); 688 | } 689 | 690 | #[test] 691 | fn test_detect_by_regex_with_global_allowlist() { 692 | let rules = Rule { 693 | description: "Digits".to_string(), 694 | id: "key".to_string(), 695 | regex: r"\d+".to_string(), 696 | // entropy: None, 697 | keywords: vec![], 698 | allowlist: Some(Allowlist { 699 | commits: vec![], 700 | paths: vec!["tests/files/test90.txt".to_string()], 701 | regex_target: "line".to_string(), 702 | regexes: vec![], 703 | stopwords: vec!["token".to_string()], 704 | }), 705 | }; 706 | let contents = "123\n456\n789\naaaaaxwsd\ntoken=wkwk121\nclient22222\n22"; 707 | let allowlist = Allowlist { 708 | commits: vec![], 709 | paths: vec![], 710 | regex_target: "line".to_string(), 711 | regexes: vec![], 712 | stopwords: vec!["client".to_string()], 713 | }; 714 | 715 | let result = detect_by_regex(PATH, &rules, contents, &allowlist, ""); 716 | assert_eq!(result.len(), 4); 717 | assert_eq!(result[0], (1, "123", "123")); 718 | assert_eq!(result[1], (2, "456", "456")); 719 | assert_eq!(result[2], (3, "789", "789")); 720 | assert_eq!(result[3], (7, "22", "22")); 721 | } 722 | } 723 | -------------------------------------------------------------------------------- /src/service/git_service.rs: -------------------------------------------------------------------------------- 1 | extern crate chrono; 2 | extern crate git2; 3 | use chrono::{DateTime, FixedOffset, TimeZone, Utc}; 4 | 5 | use git2::{BranchType, Repository, StatusOptions}; 6 | use std::sync::{Arc, Mutex}; 7 | use rayon::prelude::*; 8 | 9 | use crate::models::{CommitInfo, Leak, Results, Scan}; 10 | use std::collections::HashSet; 11 | use std::error::Error; 12 | use std::fs; 13 | use std::fs::File; 14 | 15 | use crate::errors::CustomError; 16 | use crate::service::detect_service::{detect_file, detect_uncommitted_file}; 17 | use crate::utils::git_util::{ 18 | config_commit_info, is_valid_date_format, load_all_commits, load_commits_by_conditions, 19 | parse_date_to_datetime, 20 | }; 21 | 22 | use std::io::{BufRead, BufReader, Read}; 23 | 24 | /// Handles a single commit by scanning its content. 25 | /// 26 | /// # Arguments 27 | /// 28 | /// * `repo` - A `Repository` object representing the Git repository. 29 | /// * `commit_id` - The ID of the commit to handle, provided as a string. 30 | /// * `scan` - A `Scan` object representing the scanning configuration. 31 | /// 32 | /// # Returns 33 | /// 34 | /// A `Result` containing the scanning results (`Results`) if successful, 35 | /// otherwise an error (`Box`). 36 | pub fn handle_single_commit( 37 | repo: Repository, 38 | commit_id: &str, 39 | scan: Scan, 40 | user: &str, 41 | ) -> Result> { 42 | let commit = repo.find_commit(git2::Oid::from_str(commit_id)?)?; 43 | if !user.is_empty() && user != commit.author().name().unwrap_or("") { 44 | return Ok(Results::new()); 45 | } 46 | let commit_info = config_commit_info(&repo, &commit, &scan)?; 47 | let commits_list = vec![commit_info]; 48 | 49 | // Handle the commit information and perform the scan 50 | handle_commit_info(&commits_list, scan) 51 | } 52 | 53 | /// Handles multiple commits by scanning their content. 54 | /// 55 | /// # Arguments 56 | /// 57 | /// * `repo` - A `Repository` object representing the Git repository. 58 | /// * `commit_ids` - An array slice of commit IDs to handle, provided as strings. 59 | /// * `scan` - A `Scan` object representing the scanning configuration. 60 | /// 61 | /// # Returns 62 | /// 63 | /// A `Result` containing the scanning results (`Results`) if successful, 64 | /// otherwise an error (`Box`). 65 | pub fn handle_multiple_commits( 66 | repo: Repository, 67 | commit_ids: &[&str], 68 | scan: Scan, 69 | user: &str, 70 | ) -> Result> { 71 | let mut commits_list = vec![]; 72 | 73 | // Iterate over each commit ID 74 | for commit_id in commit_ids { 75 | let commit = repo.find_commit(git2::Oid::from_str(commit_id)?)?; 76 | if user.is_empty() || user == commit.author().name().unwrap_or("") { 77 | let commit_info = config_commit_info(&repo, &commit, &scan)?; 78 | commits_list.push(commit_info); 79 | } 80 | } 81 | if commits_list.is_empty() { 82 | return Ok(Results::new()); 83 | } 84 | // Handle the commit information and perform the scan 85 | handle_commit_info(&commits_list, scan) 86 | } 87 | 88 | /// Handles commits from a file by scanning their content. 89 | /// 90 | /// # Arguments 91 | /// 92 | /// * `repo` - A `Repository` object representing the Git repository. 93 | /// * `file_name` - The name of the file containing commit IDs, provided as a string. 94 | /// * `scan` - A `Scan` object representing the scanning configuration. 95 | /// 96 | /// # Returns 97 | /// 98 | /// A `Result` containing the scanning results (`Results`) if successful, 99 | /// otherwise an error (`Box`). 100 | pub fn handle_commits_file( 101 | repo: Repository, 102 | file_name: &str, 103 | scan: Scan, 104 | user: &str, 105 | ) -> Result> { 106 | // Open the commits file 107 | let file = fs::File::open(file_name).expect("Failed to open commits file"); 108 | let reader = BufReader::new(file); 109 | 110 | let mut commits: Vec = Vec::new(); 111 | 112 | // Read each line from the file, stopping at the first error 113 | for line in reader.lines().map_while(Result::ok) { 114 | commits.push(line); 115 | } 116 | 117 | // Convert commit IDs to a vector of string slices 118 | let commit_ids: Vec<&str> = commits.iter().map(|s| s.as_str()).collect(); 119 | 120 | // Handle multiple commits using the commit IDs and perform the scan 121 | handle_multiple_commits(repo, &commit_ids, scan, user) 122 | } 123 | 124 | /// Handles commits within a specified time range by scanning their content. 125 | /// 126 | /// # Arguments 127 | /// 128 | /// * `repo` - A `Repository` object representing the Git repository. 129 | /// * `since` - The starting time of the commit range, provided as a string. 130 | /// * `until` - The ending time of the commit range, provided as a string. 131 | /// * `scan` - A `Scan` object representing the scanning configuration. 132 | /// 133 | /// # Returns 134 | /// 135 | /// A `Result` containing the scanning results (`Results`) if successful, 136 | /// otherwise an error (`Box`). 137 | #[allow(deprecated)] 138 | pub fn handle_commit_range_by_time( 139 | repo: Repository, 140 | since: &str, 141 | until: &str, 142 | scan: Scan, 143 | user: &str, 144 | ) -> Result> { 145 | let excluded_commits: Vec = vec![]; 146 | let is_since_rfc3339 = DateTime::parse_from_rfc3339(since).is_ok(); 147 | let is_until_rfc3339 = DateTime::parse_from_rfc3339(until).is_ok(); 148 | 149 | let is_since_date = is_valid_date_format(since); 150 | let is_until_date = is_valid_date_format(until); 151 | 152 | if is_since_date && is_until_date { 153 | // Convert since and until to start_time and end_time 154 | let start_time = match parse_date_to_datetime(since, "start") { 155 | Ok(datetime) => datetime.with_timezone(&FixedOffset::east(0)), 156 | Err(err) => { 157 | return Err(err); 158 | } 159 | }; 160 | 161 | let end_time = match parse_date_to_datetime(until, "until") { 162 | Ok(datetime) => datetime.with_timezone(&FixedOffset::east(0)), 163 | Err(err) => { 164 | return Err(err); 165 | } 166 | }; 167 | 168 | handle_multiple_commits_by_time(&repo, &excluded_commits, start_time, end_time, scan, user) 169 | } else if is_since_rfc3339 && is_until_rfc3339 { 170 | let start_time = DateTime::parse_from_rfc3339(since).unwrap(); 171 | let end_time = DateTime::parse_from_rfc3339(until).unwrap(); 172 | 173 | handle_multiple_commits_by_time(&repo, &excluded_commits, start_time, end_time, scan, user) 174 | } else { 175 | return Err(Box::new(CustomError::InvalidDateFormat)); 176 | } 177 | } 178 | 179 | /// Handles multiple commits within a specified time range by scanning their content. 180 | /// 181 | /// # Arguments 182 | /// 183 | /// * `repo` - A reference to a `Repository` object representing the Git repository. 184 | /// * `excluded_commits` - An array slice of excluded commit IDs, provided as `git2::Oid`. 185 | /// * `start_time` - The starting time of the commit range, provided as `DateTime`. 186 | /// * `end_time` - The ending time of the commit range, provided as `DateTime`. 187 | /// * `scan` - A `Scan` object representing the scanning configuration. 188 | /// 189 | /// # Returns 190 | /// 191 | /// A `Result` containing the scanning results (`Results`) if successful, 192 | /// otherwise an error (`Box`). 193 | #[allow(deprecated)] 194 | pub fn handle_multiple_commits_by_time( 195 | repo: &Repository, 196 | excluded_commits: &[git2::Oid], 197 | start_time: DateTime, 198 | end_time: DateTime, 199 | scan: Scan, 200 | user: &str, 201 | ) -> Result> { 202 | // Get the head commit 203 | let head = repo.head()?; 204 | let obj = head.peel(git2::ObjectType::Commit)?; 205 | let commit = if let Some(commit) = obj.as_commit() { 206 | commit.clone() 207 | } else { 208 | return Err(Box::new(CustomError::ObjectConvertFail)); 209 | }; 210 | 211 | // Create a revision walker and set sorting options 212 | let mut revwalk = repo.revwalk()?; 213 | revwalk.push(commit.id())?; 214 | revwalk.set_sorting(git2::Sort::TOPOLOGICAL)?; 215 | 216 | let mut commits = Vec::new(); 217 | let excluded_commits: HashSet<_> = excluded_commits.iter().cloned().collect(); 218 | 219 | // Iterate over each commit ID in the revision walker 220 | for commit_id in revwalk { 221 | let oid = commit_id?; 222 | if excluded_commits.contains(&oid) { 223 | continue; // Skip excluded commits 224 | } 225 | 226 | let commit = repo.find_commit(oid)?; 227 | 228 | if user.is_empty() || user == commit.author().name().unwrap_or("") { 229 | // Get the commit's time and convert it to the appropriate time zone 230 | let commit_time = Utc.timestamp(commit.time().seconds(), 0); 231 | let commit_offset = FixedOffset::west(commit.time().offset_minutes() * 60); 232 | let commit_date = commit_offset.from_utc_datetime(&commit_time.naive_utc()); 233 | 234 | // Check if the commit is within the specified time range 235 | if commit_date >= start_time && commit_date <= end_time { 236 | let commit_info = config_commit_info(repo, &commit, &scan)?; 237 | commits.push(commit_info); 238 | } 239 | } 240 | } 241 | 242 | // Handle the commit information and perform the scan 243 | handle_commit_info(&commits, scan) 244 | } 245 | 246 | /// Handles branches by name, scanning the commits in the matching branches. 247 | /// 248 | /// # Arguments 249 | /// 250 | /// * `repo` - A `Repository` object representing the Git repository. 251 | /// * `branch_name` - The name or partial name of the branches to match. 252 | /// * `scan` - A `Scan` object representing the scanning configuration. 253 | /// 254 | /// # Returns 255 | /// 256 | /// A `Result` containing the scanning results (`Results`) if successful, 257 | /// otherwise an error (`Box`). 258 | pub fn handle_branches_by_name( 259 | repo: Repository, 260 | branch_name: &str, 261 | scan: Scan, 262 | ) -> Result> { 263 | let branches = repo.branches(Some(BranchType::Local))?; 264 | 265 | let mut commits = Vec::new(); 266 | 267 | // Iterate over each branch in the repository 268 | for branch in branches { 269 | let (branch, _) = branch?; 270 | let branch_reference = branch.into_reference(); 271 | let branch_name_str = branch_reference.name().unwrap_or(""); 272 | 273 | // Check if the branch name contains the provided name or partial name 274 | if branch_name_str.contains(branch_name) { 275 | let commit_oid = branch_reference 276 | .target() 277 | .ok_or_else(|| git2::Error::from_str("Failed to get branch commit"))?; 278 | 279 | let commit = repo.find_commit(commit_oid)?; 280 | let commit_info = config_commit_info(&repo, &commit, &scan)?; 281 | 282 | commits.push(commit_info); 283 | } 284 | } 285 | 286 | // Handle the commit information and perform the scan 287 | handle_commit_info(&commits, scan) 288 | } 289 | 290 | /// Handles a commit range, scanning the commits between the specified commit IDs. 291 | /// 292 | /// # Arguments 293 | /// 294 | /// * `repo` - A `Repository` object representing the Git repository. 295 | /// * `commit_from` - An optional string representing the starting commit ID. 296 | /// * `commit_to` - An optional string representing the ending commit ID. 297 | /// * `scan` - A `Scan` object representing the scanning configuration. 298 | /// 299 | /// # Returns 300 | /// 301 | /// A `Result` containing the scanning results (`Results`) if successful, 302 | /// otherwise an error (`Box`). 303 | pub fn handle_commit_range( 304 | repo: Repository, 305 | commit_from: Option, 306 | commit_to: Option, 307 | scan: Scan, 308 | user: &str, 309 | ) -> Result> { 310 | // Load all commits in the repository 311 | let all_commits = match load_all_commits(&repo) { 312 | Ok(all_commits) => all_commits, 313 | Err(_e) => { 314 | return Err(Box::new(CustomError::ObjectConvertFail)); 315 | } 316 | }; 317 | 318 | // Load the commits within the specified commit range 319 | let results = load_commits_by_conditions(commit_from, commit_to, &all_commits); 320 | let commit_ids: Vec<&str> = results.iter().map(|s| s.as_str()).collect(); 321 | 322 | // Handle multiple commits and perform the scan 323 | handle_multiple_commits(repo, &commit_ids, scan, user) 324 | } 325 | 326 | /// Handles uncommitted files in the repository and performs a scan for potential leaks. 327 | /// 328 | /// # Arguments 329 | /// 330 | /// * `repo` - A `Repository` object representing the repository. 331 | /// * `repo_path` - The path to the repository. 332 | /// * `scan` - A `Scan` object containing the rules, keywords, and allowlist for the scan. 333 | /// 334 | /// # Returns 335 | /// 336 | /// Returns a `Result` containing a `Results` object if the operation is successful, or an error if an error occurs during the process. 337 | /// 338 | /// # Errors 339 | /// 340 | /// This function may return an error if any of the following operations fail: 341 | /// 342 | /// * Opening a file for reading. 343 | /// * Reading the contents of a file. 344 | /// * Detecting uncommitted files using `detect_uncommitted_file` function. 345 | /// 346 | pub fn handle_uncommitted_files( 347 | repo: Repository, 348 | repo_path: &str, 349 | scan: Scan, 350 | ) -> Result> { 351 | let mut options = StatusOptions::new(); 352 | options.include_untracked(true); 353 | options.include_unmodified(false); 354 | options.exclude_submodules(true); 355 | 356 | let statuses = repo.statuses(Some(&mut options))?; 357 | 358 | let mut uncommitted_files = Vec::new(); 359 | for entry in statuses.iter() { 360 | if let Some(path) = entry.path() { 361 | let ab_path = format!("{}/{}", repo_path, path); 362 | let mut file = File::open(ab_path)?; 363 | let mut contents = String::new(); 364 | file.read_to_string(&mut contents)?; 365 | uncommitted_files.push((path.to_string(), contents)); 366 | } 367 | } 368 | let mut results = Vec::new(); 369 | for (path, content) in uncommitted_files.iter() { 370 | let result = detect_uncommitted_file( 371 | content, 372 | path, 373 | &scan.ruleslist, 374 | &scan.allowlist, 375 | scan.threads, 376 | ); 377 | if let Ok(output) = result { 378 | if !output.is_empty() { 379 | results.push(output); 380 | } 381 | } else if let Err(err) = result { 382 | return Err(err); 383 | } 384 | } 385 | let flattened: Vec = results.into_iter().flatten().collect(); 386 | let returns = Results { 387 | commits_number: 0, 388 | outputs: flattened, 389 | }; 390 | Ok(returns) 391 | } 392 | 393 | /// Handles all commits in the repository and performs a scan for potential leaks. 394 | /// 395 | /// # Arguments 396 | /// 397 | /// * `repo` - A `Repository` object representing the repository. 398 | /// * `scan` - A `Scan` object containing the rules, keywords, and allowlist for the scan. 399 | /// * `user` - A string representing the user performing the scan. 400 | /// 401 | /// # Returns 402 | /// 403 | /// Returns a `Result` containing a `Results` object if the operation is successful, or an error if an error occurs during the process. 404 | /// 405 | /// # Errors 406 | /// 407 | /// This function may return an error if any of the following operations fail: 408 | /// 409 | /// * Loading all commits in the repository using the `load_all_commits` function. 410 | /// * Handling multiple commits using the `handle_multiple_commits` function. 411 | /// 412 | pub fn handle_all_commits( 413 | repo: Repository, 414 | scan: Scan, 415 | user: &str, 416 | ) -> Result> { 417 | // Load all commits in the repository 418 | let all_commits = match load_all_commits(&repo) { 419 | Ok(all_commits) => all_commits, 420 | Err(_) => { 421 | return Err(Box::new(CustomError::ObjectConvertFail)); 422 | } 423 | }; 424 | let commit_ids: Vec<&str> = all_commits.iter().map(|s| s.as_str()).collect(); 425 | handle_multiple_commits(repo, &commit_ids, scan, user) 426 | } 427 | 428 | /// Handle the commit information by searching for secrets in the commit files. 429 | /// 430 | /// 431 | /// # Arguments 432 | /// 433 | /// * `commit_info_list` - A slice of `CommitInfo` objects representing the commit information. 434 | /// * `scan` - A `Scan` object containing the rules, keywords, and allowlist for secret detection. 435 | /// 436 | /// # Errors 437 | /// 438 | /// This function returns an `Err` variant if any error occurs during the secret detection process. 439 | /// The error type is a boxed `dyn Error`, which allows for returning different types of error objects. 440 | /// 441 | pub fn handle_commit_info( 442 | commit_info_list: &[CommitInfo], 443 | scan: Scan, 444 | ) -> Result> { 445 | let ruleslist = scan.ruleslist; 446 | let allowlist = scan.allowlist; 447 | let threads = scan.threads; 448 | let chunk=scan.chunk.unwrap_or(10); 449 | let results: Arc>> = Arc::new(Mutex::new(Vec::new())); 450 | 451 | commit_info_list.par_iter().for_each(|commit_info| { 452 | let commit_results: Vec = commit_info 453 | .files 454 | .par_chunks(chunk) 455 | .flat_map(|files_chunk| { 456 | files_chunk 457 | .iter() 458 | .filter_map(|(file, content)| { 459 | match detect_file(content, file, &ruleslist, &allowlist, commit_info, threads) { 460 | Ok(output) => Some(output), 461 | Err(_) => None, 462 | } 463 | }) 464 | .flatten() 465 | .collect::>() 466 | }) 467 | .collect(); 468 | 469 | let mut results = results.lock().unwrap(); 470 | results.extend(commit_results); 471 | }); 472 | 473 | let flattened: Vec = results 474 | .lock() 475 | .unwrap() 476 | .clone(); 477 | 478 | let returns = Results { 479 | commits_number: commit_info_list.len(), 480 | outputs: flattened, 481 | }; 482 | 483 | Ok(returns) 484 | } 485 | 486 | // NOTE: The commented-out function can be tested after specifying the repo file 487 | // #[cfg(test)] 488 | // mod tests { 489 | // use super::*; 490 | // static VALID_PATH: &str = "tests/TestGitOperation"; 491 | 492 | // // Helper function to create a mock repository 493 | // fn create_mock_repository() -> Repository { 494 | // let repo = match load_repository(VALID_PATH) { 495 | // Ok(repo) => repo, 496 | // Err(e) => { 497 | // panic!("Failed to load repository"); 498 | // } 499 | // }; 500 | // repo 501 | // } 502 | 503 | // // Helper function to create a mock scan 504 | // fn create_mock_scan() -> Scan { 505 | // let rule = Rule { 506 | // description: String::from("Stripe Access Token"), 507 | // id: String::from("stripe-access-token"), 508 | // regex: String::from(r"(?i)(sk|pk)_(test|live)_[0-9a-z]{10,32}"), 509 | 510 | // keywords: vec![String::from("sk_test"), String::from("pk_test"),String::from("sk_live"), String::from("pk_live")], 511 | // allowlist: None, 512 | // }; 513 | // let ruleslist:Vec=vec![rule]; 514 | 515 | // let keywords = vec![ 516 | // String::from("pk_live"), 517 | // String::from("sk_live"), 518 | // String::from("sk_test"), 519 | // String::from("pk_test"),]; 520 | 521 | // let allowlist = Allowlist { 522 | // paths: vec![], 523 | // commits: vec![ ], 524 | // regex_target: String::from("match"), 525 | // regexes: vec![ ], 526 | // stopwords: vec![], 527 | // }; 528 | 529 | // let scan=Scan{ 530 | // allowlist, 531 | // ruleslist, 532 | // keywords 533 | // }; 534 | // scan 535 | // } 536 | 537 | // // test handle_single_commit 538 | // #[test] 539 | // fn test_handle_single_commit() { 540 | // let repo = create_mock_repository(); 541 | // let scan = create_mock_scan(); 542 | // let result = handle_single_commit(repo, "8bdca802af0514ce29947e20c6be1719974ad866", scan,""); 543 | // assert!(result.is_ok()); 544 | // match result { 545 | // Ok(output_items) => { 546 | // assert_eq!(5, output_items.outputs[0].line_number); 547 | // } 548 | // Err(err) => { 549 | // println!("Error: {}", err); 550 | // assert!(false); 551 | // } 552 | // } 553 | // } 554 | 555 | // // test handle_multiple_commits 556 | // #[test] 557 | // fn test_handle_multiple_commits() { 558 | 559 | // let repo = create_mock_repository(); 560 | // let commit_ids = vec!["8bdca802af0514ce29947e20c6be1719974ad866", "25bc64b31ee8920e1cb1f4ea287b174df5cd9782",]; 561 | // let scan = create_mock_scan(); 562 | // let result = handle_multiple_commits(repo, &commit_ids, scan,""); 563 | 564 | // assert!(result.is_ok()); 565 | // match result { 566 | // Ok(output_items) => { 567 | // assert_eq!(2, output_items.commits_number); 568 | // } 569 | // Err(err) => { 570 | // println!("Error: {}", err); 571 | // assert!(false); 572 | // } 573 | // } 574 | // } 575 | 576 | // // test handle_commits_file 577 | // #[test] 578 | // fn test_handle_commits_file() { 579 | 580 | // let repo = create_mock_repository(); 581 | // let file_name = "tests/files/commits.txt"; 582 | // let scan = create_mock_scan(); 583 | 584 | // // Perform the handle_commits_file function 585 | // let result = handle_commits_file(repo , file_name, scan,""); 586 | 587 | // assert!(result.is_ok()); 588 | // match result { 589 | // Ok(output_items) => { 590 | // assert_eq!(2, output_items.commits_number); 591 | // } 592 | // Err(err) => { 593 | // println!("Error: {}", err); 594 | // assert!(false); 595 | // } 596 | // } 597 | // } 598 | 599 | // // test handle_commit_range_by_time 600 | // #[test] 601 | // fn test_handle_commit_range_by_time() { 602 | // let repo = create_mock_repository(); 603 | // let since = "2023-05-20T00:00:00Z"; 604 | // let until = "2023-05-26T00:00:00Z"; 605 | // let scan = create_mock_scan(); 606 | // let result = handle_commit_range_by_time(repo, since, until, scan,""); 607 | 608 | // // Assert the result 609 | // assert!(result.is_ok()); 610 | // match result { 611 | // Ok(output_items) => { 612 | // assert_eq!(8, output_items.commits_number); 613 | // } 614 | // Err(err) => { 615 | // println!("Error: {}", err); 616 | // assert!(false); 617 | // } 618 | // } 619 | // } 620 | 621 | // // test test_handle_branches_by_name 622 | // #[test] 623 | // fn test_handle_branches_by_name() { 624 | // let repo = create_mock_repository(); 625 | // let branch_name = "secret"; 626 | // let scan = create_mock_scan(); 627 | // let result = handle_branches_by_name(repo, branch_name, scan); 628 | // assert!(result.is_ok()); 629 | // match result { 630 | // Ok(output_items) => { 631 | // assert_eq!(1, output_items.commits_number); 632 | // } 633 | // Err(err) => { 634 | // println!("Error: {}", err); 635 | // assert!(false); 636 | // } 637 | // } 638 | // } 639 | 640 | // // rest handle_commit_range 641 | // #[test] 642 | // fn test_handle_commit_range() { 643 | 644 | // let repo = create_mock_repository(); 645 | // let commit_from = Some("547b550d3ec4d1f24c12f7a4d4c8c0aaa045bd7b".to_string()); 646 | // let commit_to = Some("42c8c6a9c48bc4d9406750f4d15b0d0cd5ab7597".to_string()); 647 | // let scan = create_mock_scan(); 648 | // let result = handle_commit_range(repo, commit_from, commit_to, scan,""); 649 | 650 | // assert!(result.is_ok()); 651 | // match result { 652 | // Ok(output_items) => { 653 | // assert_eq!(4, output_items.commits_number); 654 | // } 655 | // Err(err) => { 656 | // println!("Error: {}", err); 657 | // assert!(false); 658 | // } 659 | // } 660 | // } 661 | // #[test] 662 | // fn test_handle_all_commits() { 663 | 664 | // let repo = create_mock_repository(); 665 | // let scan = create_mock_scan(); 666 | // let user = "sonichen"; 667 | 668 | // let result = handle_all_commits(repo, scan, user); 669 | // assert!(result.is_ok()); 670 | 671 | // } 672 | // } 673 | -------------------------------------------------------------------------------- /src/service/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod detect_service; 2 | pub mod git_service; 3 | pub mod db_service; -------------------------------------------------------------------------------- /src/utils/detect_utils.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::CustomError; 2 | use crate::models::{Allowlist, Config, CsvResult, Leak, Rule, Scan}; 3 | use csv::Writer; 4 | use git2::Repository; 5 | use regex::Regex; 6 | use serde_json::json; 7 | use std::collections::HashSet; 8 | use std::error::Error; 9 | use std::fs; 10 | use std::fs::{File, OpenOptions}; 11 | use std::io::{Seek, SeekFrom, Write}; 12 | use toml::{to_string_pretty, Value}; 13 | 14 | /// Loads the scan configuration based on the specified repository and configuration settings. 15 | /// 16 | /// # Arguments 17 | /// 18 | /// * `repo` - A reference to the `Repository` object representing the target repository. 19 | /// * `config` - A reference to the `Config` object containing the scan configuration settings. 20 | /// 21 | /// # Returns 22 | /// 23 | /// Returns a `Result` containing the loaded `Scan` object if successful, or an error of type `Box` if any issues occur. 24 | /// 25 | pub fn load_config(repo: &Repository, config: &Config) -> Result> { 26 | let scan_result = if config.repo_config { 27 | // Load config from target repo. Config file must be ".gitleaks.toml" or "gitleaks.toml" 28 | let content = load_config_content_from_target_repo(repo)?; 29 | match content { 30 | Some(content) => load_config_from_target_repo(&content), 31 | None => { 32 | return Err(Box::new(CustomError::EmptyFileError)); 33 | } 34 | } 35 | } else { 36 | // Specify the search rule file. 37 | load_config_file(&config.config) 38 | }?; 39 | 40 | Ok(scan_result) 41 | } 42 | 43 | /// Loads the content of a configuration file (`.gitleaks.toml` or `gitleaks.toml`) from the target repository. 44 | /// 45 | /// # Arguments 46 | /// 47 | /// * `repo` - A reference to a `Repository` object representing the target repository. 48 | /// 49 | /// # Returns 50 | /// 51 | /// Returns a `Result` containing an `Option` with the content of the configuration file if found, or `None` if the configuration file is not found in any commit. 52 | /// 53 | /// # Errors 54 | /// 55 | /// This function may return an error if any error occurs during the repository traversal or object retrieval. 56 | /// 57 | fn load_config_content_from_target_repo( 58 | repo: &Repository, 59 | ) -> Result, Box> { 60 | let head_commit = repo.head()?.peel_to_commit()?; 61 | let mut walker = repo.revwalk()?; 62 | walker.push(head_commit.id())?; 63 | 64 | // Iterate over all commits in the repository 65 | for commit_id in walker { 66 | let commit = repo.find_commit(commit_id?)?; 67 | let tree = commit.tree()?; 68 | 69 | // Iterate over all entries in the tree 70 | for entry in tree.iter() { 71 | let file_name = entry.name().unwrap_or(""); 72 | if file_name == ".gitleaks.toml" || file_name == "gitleaks.toml" { 73 | let blob = entry.to_object(repo)?.peel_to_blob()?; 74 | let content = String::from_utf8_lossy(blob.content()); 75 | return Ok(Some(content.into())); 76 | } 77 | } 78 | } 79 | 80 | Ok(None) 81 | } 82 | 83 | /// Loads the configuration file and extracts the allowlist, ruleslist. 84 | /// 85 | /// # Arguments 86 | /// 87 | /// * `config_file_path` - The path to the configuration file. 88 | /// * `repo_file_path` - The path of the repository file. 89 | /// 90 | /// # Returns 91 | /// 92 | /// Returns an `Ok` variant containing a tuple with the extracted allowlist, ruleslist, and keywords. 93 | /// 94 | /// # Errors 95 | /// 96 | /// Returns an `Err` variant if the configuration file cannot be loaded or if there are any errors during parsing. 97 | /// 98 | pub fn load_config_file(config_file_path: &str) -> Result> { 99 | // Load config file 100 | let toml_str = fs::read_to_string(config_file_path) 101 | .map_err(|_| Box::new(CustomError::EmptyConfigFileError))?; 102 | 103 | // Parse config file 104 | let config_file_content: Value = toml::from_str(&toml_str)?; 105 | 106 | // Config allowlist 107 | let allowlist = config_allowlist(&config_file_content)?; 108 | 109 | // Config ruleslist and keywords 110 | let ruleslist= config_ruleslist_and_keywords(&config_file_content)?; 111 | 112 | let scan = Scan { 113 | allowlist, 114 | ruleslist, 115 | threads: None, 116 | chunk: None, 117 | }; 118 | 119 | Ok(scan) 120 | } 121 | 122 | /// Loads the configuration from the target repository. 123 | /// 124 | /// # Arguments 125 | /// 126 | /// * `toml_str` - A TOML string representing the configuration file from the target repository. 127 | /// 128 | /// # Returns 129 | /// 130 | /// Returns an `Ok` variant containing a tuple with the extracted allowlist, ruleslist, and keywords. 131 | /// 132 | /// # Errors 133 | /// 134 | /// Returns an `Err` variant if there are any errors during parsing or extraction. 135 | /// 136 | fn load_config_from_target_repo(toml_str: &str) -> Result> { 137 | // Load config file 138 | let config_file_content: Value = toml::from_str(toml_str)?; 139 | 140 | // Config allowlist 141 | let allowlist = config_allowlist(&config_file_content)?; 142 | 143 | // Config ruleslist and keywords 144 | let ruleslist= config_ruleslist_and_keywords(&config_file_content)?; 145 | 146 | let scan = Scan { 147 | allowlist, 148 | ruleslist, 149 | threads: None, 150 | chunk: None, 151 | }; 152 | 153 | Ok(scan) 154 | } 155 | 156 | /// Extracts the allowlist from the config file. 157 | /// 158 | /// # Arguments 159 | /// 160 | /// * `config_file_content` - The TOML content of the configuration file. 161 | /// * `repo_file_path` - The path of the repository file. 162 | /// 163 | /// # Returns 164 | /// 165 | /// Returns an `Ok` variant containing the extracted `Allowlist` object. 166 | /// 167 | fn config_allowlist(config_file_content: &Value) -> Result> { 168 | let mut allowlist = Allowlist { 169 | paths: Vec::new(), 170 | commits: Vec::new(), 171 | regex_target: String::from(""), 172 | regexes: Vec::new(), 173 | stopwords: Vec::new(), 174 | }; 175 | 176 | // Get paths 177 | if let Some(file_list) = config_file_content 178 | .get("allowlist") 179 | .and_then(|v| v.get("paths").and_then(|v| v.as_array())) 180 | { 181 | for path in file_list.iter() { 182 | let path_str = path 183 | .as_str() 184 | .ok_or_else(|| Box::::from(CustomError::InvalidTomlFile))? 185 | .to_string(); 186 | allowlist.paths.push(path_str); 187 | } 188 | } 189 | 190 | // Get commit 191 | if let Some(regex_list) = config_file_content 192 | .get("allowlist") 193 | .and_then(|v| v.get("commits").and_then(|v| v.as_array())) 194 | { 195 | allowlist.commits = regex_list 196 | .iter() 197 | .filter_map(|r| r.as_str()) 198 | .map(|s| s.to_string()) 199 | .collect(); 200 | } 201 | 202 | // Get regex target (default to "match") 203 | if let Some(target) = config_file_content 204 | .get("allowlist") 205 | .and_then(|v| v.get("regexTarget").and_then(|v| v.as_str())) 206 | { 207 | allowlist.regex_target = target.to_string(); 208 | } 209 | 210 | // Get regexes 211 | if let Some(regex_list) = config_file_content 212 | .get("allowlist") 213 | .and_then(|v| v.get("regexes").and_then(|v| v.as_array())) 214 | { 215 | allowlist.regexes = regex_list 216 | .iter() 217 | .filter_map(|r| r.as_str()) 218 | .map(|s| s.to_string()) 219 | .collect(); 220 | } 221 | 222 | // Get stopwords 223 | if let Some(stopwords_list) = config_file_content 224 | .get("allowlist") 225 | .and_then(|v| v.get("stopwords").and_then(|v| v.as_array())) 226 | { 227 | allowlist.stopwords = stopwords_list 228 | .iter() 229 | .filter_map(|r| r.as_str()) 230 | .map(|s| s.to_string()) 231 | .collect(); 232 | } 233 | 234 | Ok(allowlist) 235 | } 236 | 237 | /// Extracts the rules list and keywords from the config file. 238 | /// 239 | /// # Arguments 240 | /// 241 | /// * `config_file_content` - The TOML content of the configuration file. 242 | /// * `repo_file_path` - The path of the repository file. 243 | /// 244 | /// # Returns 245 | /// 246 | /// Returns a tuple containing the extracted `ruleslist` and `keywords`. 247 | /// * `ruleslist` - A vector of `Rule` objects representing the rules for detection. 248 | /// * `keywords` - A vector of strings representing the keywords used for detection. 249 | /// 250 | fn config_ruleslist_and_keywords( 251 | config_file_content: &Value, 252 | ) -> Result, Box> { 253 | let mut ruleslist = vec![]; 254 | 255 | let regex_array = config_file_content 256 | .get("rules") 257 | .and_then(|v| v.as_array()) 258 | .ok_or_else(|| Box::::from(CustomError::InvalidTomlFile))?; 259 | 260 | for rule in regex_array { 261 | let description = rule 262 | .get("description") 263 | .and_then(|v| v.as_str().map(|s| s.to_string())) 264 | .ok_or_else(|| Box::::from(CustomError::InvalidTomlFile))?; 265 | let id = rule 266 | .get("id") 267 | .and_then(|v| v.as_str().map(|s| s.to_string())) 268 | .ok_or_else(|| Box::::from(CustomError::InvalidTomlFile))?; 269 | let regex = rule 270 | .get("regex") 271 | .and_then(|v| v.as_str().map(|s| s.to_string())) 272 | .ok_or_else(|| Box::::from(CustomError::InvalidTomlFile))?; 273 | // let entropy: Option = rule.get("entropy").map(|e| e.as_float().unwrap()); 274 | let keywords_array = rule 275 | .get("keywords") 276 | .and_then(|v| v.as_array()) 277 | .ok_or_else(|| Box::::from(CustomError::InvalidTomlFile))?; 278 | 279 | let mut rules_allowlist = Allowlist { 280 | commits: vec![], 281 | paths: vec![], 282 | regex_target: String::new(), 283 | regexes: vec![], 284 | stopwords: vec![], 285 | }; 286 | 287 | if rule.get("allowlist").is_none() { 288 | let rule = Rule { 289 | description, 290 | id, 291 | regex, 292 | keywords: keywords_array 293 | .iter() 294 | .map(|kw| kw.as_str().unwrap().to_string()) 295 | .collect(), 296 | allowlist: None, 297 | }; 298 | ruleslist.push(rule); 299 | continue; 300 | } 301 | 302 | if let Some(allowlist_table) = rule.get("allowlist") { 303 | if let Some(commits_array) = allowlist_table.get("commits").and_then(|v| v.as_array()) { 304 | for commit in commits_array { 305 | if let Some(commit_str) = commit.as_str() { 306 | rules_allowlist.commits.push(commit_str.to_string()); 307 | } 308 | } 309 | } 310 | 311 | if let Some(paths_array) = allowlist_table.get("paths").and_then(|v| v.as_array()) { 312 | for path in paths_array { 313 | if let Some(path_str) = path.as_str() { 314 | rules_allowlist.paths.push(path_str.to_string()); 315 | } 316 | } 317 | } 318 | 319 | rules_allowlist.regex_target = allowlist_table 320 | .get("regexTarget") 321 | .and_then(|v| v.as_str()) 322 | .unwrap_or("") 323 | .to_string(); 324 | 325 | if let Some(regexes_array) = allowlist_table.get("regexes").and_then(|v| v.as_array()) { 326 | for regex in regexes_array { 327 | if let Some(regex_str) = regex.as_str() { 328 | rules_allowlist.regexes.push(regex_str.to_string()); 329 | } 330 | } 331 | } 332 | 333 | if let Some(stopwords_array) = 334 | allowlist_table.get("stopwords").and_then(|v| v.as_array()) 335 | { 336 | for stopword in stopwords_array { 337 | if let Some(stopword_str) = stopword.as_str() { 338 | rules_allowlist.stopwords.push(stopword_str.to_string()); 339 | } 340 | } 341 | } 342 | } 343 | 344 | let rule = Rule { 345 | description, 346 | id, 347 | regex, 348 | keywords: keywords_array 349 | .iter() 350 | .map(|kw| kw.as_str().unwrap().to_string()) 351 | .collect(), 352 | allowlist: Some(rules_allowlist), 353 | }; 354 | ruleslist.push(rule); 355 | } 356 | 357 | Ok(ruleslist) 358 | } 359 | 360 | /// Appends a rule to a TOML file. 361 | /// 362 | /// # Arguments 363 | /// 364 | /// * `rule` - A reference to the `Rule` object to be appended to the TOML file. 365 | /// * `filename` - The name of the TOML file to which the rule should be appended. 366 | /// 367 | /// # Returns 368 | /// 369 | /// Returns `Ok(())` if the rule is successfully appended to the TOML file, or an error of type `Box` 370 | /// if any issues occur. 371 | /// 372 | /// # Errors 373 | /// 374 | /// This function can return an error if there are any issues during the file operations, such as opening the file, 375 | /// moving the file pointer, or writing the rule contents. 376 | /// 377 | pub fn append_rule_to_toml(rule: &Rule, filename: &str) -> Result<(), Box> { 378 | // Open the file with read, write, and append options 379 | let mut file = OpenOptions::new() 380 | .read(true) 381 | .append(true) 382 | .open(filename)?; 383 | 384 | // Move the file pointer to the end of the file 385 | file.seek(SeekFrom::End(0))?; 386 | 387 | // Write the start marker for a new [[rules]] section 388 | file.write_all(b"[[rules]]\n")?; 389 | 390 | // Serialize the Rule struct to a TOML string 391 | let toml_string = toml::to_string(rule)?; 392 | 393 | // Write the contents of the Rule 394 | file.write_all(toml_string.as_bytes())?; 395 | 396 | // Write a newline character to separate different [[rules]] 397 | file.write_all(b"\n")?; 398 | 399 | Ok(()) 400 | } 401 | 402 | /// Deletes a rule with the specified ID from a TOML file. 403 | /// 404 | /// # Arguments 405 | /// 406 | /// * `file_path` - A string slice representing the path to the TOML file. 407 | /// * `rule_id` - A string slice representing the ID of the rule to be deleted. 408 | /// 409 | /// # Returns 410 | /// 411 | /// Returns `Ok(())` if the rule with the specified ID is successfully deleted from the TOML file, or an error of 412 | /// type `Box` if any issues occur. 413 | /// 414 | /// # Errors 415 | /// 416 | /// This function can return an error if there are any issues during the file operations, such as reading the file, 417 | /// parsing the TOML content, modifying the data, or writing the modified TOML to the file. 418 | /// 419 | pub fn delete_rule_by_id(file_path: &str, rule_id: &str) -> Result<(), Box> { 420 | // Read the content of the TOML file 421 | let toml_content = fs::read_to_string(file_path)?; 422 | 423 | // Parse the TOML content 424 | let mut toml_data: Value = toml::from_str(&toml_content)?; 425 | 426 | // Delete rules with the specified id 427 | if let Some(rules) = toml_data.get_mut("rules") { 428 | if let Some(rules_array) = rules.as_array_mut() { 429 | rules_array.retain(|rule| { 430 | if let Some(id) = rule.get("id") { 431 | // Delete the rule based on the id 432 | let rule_id_value = id.as_str().unwrap(); 433 | rule_id_value != rule_id 434 | } else { 435 | true 436 | } 437 | }); 438 | } 439 | } 440 | 441 | // Convert the modified TOML data back to a string 442 | let modified_toml = to_string_pretty(&toml_data)?; 443 | 444 | // Write the modified TOML to the file 445 | fs::write(file_path, modified_toml)?; 446 | 447 | Ok(()) 448 | } 449 | 450 | 451 | /// Updates a rule with the specified ID in a TOML file. 452 | /// 453 | /// # Arguments 454 | /// 455 | /// * `file_path` - A string slice representing the path to the TOML file. 456 | /// * `rule_id` - A string slice representing the ID of the rule to be updated. 457 | /// * `new_rule` - A reference to the updated `Rule` object. 458 | /// 459 | /// # Returns 460 | /// 461 | /// Returns `Ok(())` if the rule with the specified ID is successfully updated in the TOML file, or an error of 462 | /// type `Box` if any issues occur. 463 | /// 464 | pub fn update_rule_by_id(file_path: &str, rule_id: &str, new_rule: &Rule) -> Result<(), Box> { 465 | 466 | let toml_content = fs::read_to_string(file_path)?; 467 | 468 | let mut toml_data: toml::Value = toml::from_str(&toml_content)?; 469 | 470 | // Update rules with the specified ID 471 | if let Some(rules) = toml_data.get_mut("rules") { 472 | if let Some(rules_array) = rules.as_array_mut() { 473 | for rule in rules_array.iter_mut() { 474 | if let Some(id) = rule.get("id") { 475 | let rule_id_value = id.as_str().unwrap(); 476 | if rule_id_value == rule_id { 477 | // Update the rule with the new values 478 | *rule = toml::value::Value::try_from(new_rule)?; 479 | break; 480 | } 481 | } 482 | } 483 | } 484 | } 485 | 486 | 487 | let modified_toml = toml::to_string_pretty(&toml_data)?; 488 | 489 | 490 | fs::write(file_path, modified_toml)?; 491 | 492 | Ok(()) 493 | } 494 | 495 | 496 | 497 | 498 | /// Writes a JSON report with the provided `Leak` results to the specified file path. 499 | /// 500 | /// # Arguments 501 | /// 502 | /// * `file_path` - The file path where the JSON report will be written. 503 | /// * `results` - A slice containing the `Leak` results to be included in the report. 504 | /// 505 | /// # Returns 506 | /// 507 | /// * `Result<(), Box>` - Returns `Ok(())` if the JSON report is written successfully, 508 | /// or an `Err` variant containing the error information. 509 | /// 510 | pub fn write_json_report(file_path: &str, results: &[Leak]) -> Result<(), Box> { 511 | let json_result = serde_json::to_string_pretty(results)?; 512 | let mut file = File::create(file_path)?; 513 | file.write_all(json_result.as_bytes())?; 514 | Ok(()) 515 | } 516 | 517 | /// Writes a SARIF report with the provided `Leak` results to the specified file path. 518 | /// 519 | /// # Arguments 520 | /// 521 | /// * `file_path` - The file path where the SARIF report will be written. 522 | /// * `results` - A slice containing the `Leak` results to be included in the report. 523 | /// 524 | /// # Returns 525 | /// 526 | /// * `Result<(), Box>` - Returns `Ok(())` if the SARIF report is written successfully, 527 | /// or an `Err` variant containing the error information. 528 | /// 529 | pub fn write_sarif_report(file_path: &str, results: &[Leak]) -> Result<(), Box> { 530 | let sarif_result = convert_to_sarif(results)?; 531 | let mut file = File::create(file_path)?; 532 | file.write_all(sarif_result.as_bytes())?; 533 | Ok(()) 534 | } 535 | 536 | /// Converts the provided `Leak` results into a SARIF JSON string. 537 | /// 538 | /// # Arguments 539 | /// 540 | /// * `results` - A slice containing the `Leak` results to be converted. 541 | /// 542 | /// # Returns 543 | /// 544 | /// * `Result` - Returns a `String` containing the SARIF JSON if the conversion is 545 | /// successful, or an `Error` if the conversion fails. 546 | /// 547 | fn convert_to_sarif(results: &[Leak]) -> Result { 548 | let mut run_results = vec![]; 549 | for result in results { 550 | let location = json!({ 551 | "physicalLocation": { 552 | "artifactLocation": { 553 | "uri": result.file 554 | }, 555 | "region": { 556 | "startLine": result.line_number, 557 | "snippet": { 558 | "text": result.line 559 | } 560 | } 561 | } 562 | }); 563 | 564 | let run_result = json!({ 565 | "message": { 566 | "text": format!("{} {}", result.rule,"detected!") 567 | }, 568 | "properties": { 569 | "commit": result.commit, 570 | "offender": result.offender, 571 | "date": result.date, 572 | "author": result.author, 573 | "email": result.email, 574 | "commitMessage": result.commit_message, 575 | 576 | "repo": result.repo 577 | }, 578 | "locations": [location] 579 | }); 580 | 581 | run_results.push(run_result); 582 | } 583 | 584 | let sarif_json = json!({ 585 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 586 | "version": "2.1.0", 587 | "runs": [ 588 | { 589 | "tool": { 590 | "driver": { 591 | "name": "Gitleaks", 592 | "semanticVersion": "v6.2.0", 593 | "rules": [] 594 | } 595 | }, 596 | "results": run_results 597 | } 598 | ] 599 | }); 600 | 601 | serde_json::to_string_pretty(&sarif_json) 602 | } 603 | 604 | /// Writes a CSV report with the provided results to the specified file path. 605 | /// 606 | /// # Arguments 607 | /// 608 | /// * `file_path` - The file path where the CSV report will be written. 609 | /// * `results` - A slice containing the `Leak` results to be written to the CSV. 610 | /// 611 | /// # Returns 612 | /// 613 | /// * `Result<(), Box>` - Returns `Ok(())` if the CSV report is written successfully, 614 | /// or an `Err` variant containing the error information. 615 | pub fn write_csv_report(file_path: &str, results: &[Leak]) -> Result<(), Box> { 616 | let mut data: Vec = vec![]; 617 | for leak in results { 618 | let item = CsvResult { 619 | repo: leak.repo.clone(), 620 | line_number: leak.line_number, 621 | line: leak.line.clone(), 622 | offender: leak.offender.clone(), 623 | commit: leak.commit.clone(), 624 | rule: leak.rule.clone(), 625 | commit_message: leak.commit_message.clone(), 626 | author: leak.author.clone(), 627 | email: leak.email.clone(), 628 | file: leak.file.clone(), 629 | date: leak.date.clone(), 630 | }; 631 | data.push(item); 632 | } 633 | let file = File::create(file_path)?; 634 | let mut writer = Writer::from_writer(file); 635 | for item in data { 636 | writer.serialize(item)?; 637 | } 638 | writer.flush()?; 639 | 640 | Ok(()) 641 | } 642 | 643 | /// Check if the provided `path` is in the allowlist of paths. 644 | /// 645 | /// 646 | /// # Arguments 647 | /// 648 | /// * `path` - The path to check against the allowlist paths. 649 | /// * `allowlist_paths` - A slice of strings representing the allowlist paths. 650 | /// 651 | /// # Returns 652 | /// 653 | /// Returns `true` if the `path` is found in the allowlist paths, otherwise `false`. 654 | /// 655 | pub fn is_path_in_allowlist(path: &str, allowlist_paths: &[String]) -> bool { 656 | for allowlist_path in allowlist_paths { 657 | if is_regex(allowlist_path) { 658 | let allowlist_regex = Regex::new(allowlist_path).unwrap(); 659 | if allowlist_regex.is_match(path) { 660 | return true; 661 | } 662 | } else { 663 | for allowlist_path in allowlist_paths { 664 | if allowlist_path == path { 665 | return true; 666 | } 667 | } 668 | } 669 | } 670 | false 671 | } 672 | 673 | /// Checks if a commit is present in the allowlist of commits. 674 | /// 675 | /// # Arguments 676 | /// 677 | /// * `commit` - The commit to check. 678 | /// * `allow_commits` - A slice containing the allowlist of commits. 679 | /// 680 | /// # Returns 681 | /// 682 | /// * `bool` - Returns `true` if the commit is found in the allowlist, otherwise `false`. 683 | /// 684 | pub fn is_commit_in_allowlist(commit: &str, allow_commits: &[String]) -> bool { 685 | for allowlist_commit in allow_commits { 686 | if commit == allowlist_commit { 687 | return true; 688 | } 689 | } 690 | false 691 | } 692 | 693 | /// Check if the provided `test_string` matches any of the regular expressions in the `regex_array`. 694 | /// 695 | /// # Arguments 696 | /// 697 | /// * `regex_array` - A vector of regular expression strings to check against the `test_string`. 698 | /// * `test_string` - The string to test against the regular expressions in `regex_array`. 699 | /// 700 | /// # Returns 701 | /// 702 | /// Returns `true` if the `test_string` matches any of the regular expressions in `regex_array`, otherwise `false`. 703 | /// 704 | pub fn is_string_matched(regex_array: &[String], test_string: &str) -> bool { 705 | for regex_str in regex_array.iter() { 706 | let regex = Regex::new(regex_str).unwrap(); 707 | if regex.is_match(test_string) { 708 | return true; 709 | } 710 | } 711 | false 712 | } 713 | 714 | /// Check if the provided `content` contains any of the strings in the given `array`. It is used to find stopswords. 715 | /// 716 | /// # Arguments 717 | /// 718 | /// * `array` - A vector of strings to check against the `content`. 719 | /// * `content` - The string to check for the presence of any of the strings in `array`. 720 | /// 721 | /// # Returns 722 | /// 723 | /// Returns `true` if any of the strings in `array` is found in the `content`, otherwise `false`. 724 | /// 725 | pub fn is_contains_strs(array: &[String], content: &str) -> bool { 726 | for item in array.iter() { 727 | if content.contains(item) { 728 | return true; 729 | } 730 | } 731 | false 732 | } 733 | 734 | /// Checks if a given text is a link. 735 | /// 736 | /// # Arguments 737 | /// 738 | /// * `text` - The text to check for links. 739 | /// 740 | /// # Returns 741 | /// 742 | /// * `bool` - Returns `true` if the text contains a link, otherwise `false`. 743 | /// 744 | pub fn is_link(text: &str) -> bool { 745 | let re = Regex::new(r"(?i)\b((?:https?://|www\.)\S+)\b").unwrap(); 746 | re.is_match(text) 747 | } 748 | 749 | /// Check if the given string is a regular expression. 750 | /// 751 | /// 752 | /// # Arguments 753 | /// 754 | /// * `s` - The string to check for regular expression syntax. 755 | /// 756 | /// # Returns 757 | /// 758 | /// Returns `true` if the string is a regular expression, otherwise `false`. 759 | /// 760 | fn is_regex(s: &str) -> bool { 761 | //TODO: Improve regular expression check 762 | s.starts_with('(') && s.ends_with('$')&&!s.starts_with('/') 763 | 764 | } 765 | 766 | /// Removes duplicates from `array1` based on the elements in `array2`. 767 | /// 768 | /// # Arguments 769 | /// 770 | /// * `array1` - The first vector containing elements to remove duplicates from. 771 | /// * `array2` - The second vector used to determine the duplicates. 772 | /// 773 | /// # Type Constraints 774 | /// 775 | /// `T` must implement the `Eq`, `std::hash::Hash`, and `Clone` traits. 776 | /// 777 | /// # Returns 778 | /// 779 | /// Returns a new vector that contains the elements from `array1` without the duplicates 780 | /// that are present in `array2`. 781 | /// 782 | pub fn remove_duplicates( 783 | array1: Vec, 784 | array2: Vec, 785 | ) -> Vec { 786 | let set: HashSet<_> = array2.into_iter().collect(); 787 | array1.into_iter().filter(|x| !set.contains(x)).collect() 788 | } 789 | 790 | #[cfg(test)] 791 | mod tests { 792 | use super::*; 793 | static CONFIG_FILE_PATH: &str = "examples/test_gitleaks.toml"; 794 | 795 | fn mock_config_content() -> Value { 796 | toml::from_str::( 797 | r#" 798 | [[rules]] 799 | description = "Rule 1" 800 | id = "rule1" 801 | regex = "\\d+" 802 | entropy = 0.5 803 | keywords = ["keyword1", "keyword2"] 804 | 805 | [[rules]] 806 | description = "Rule 2" 807 | id = "rule2" 808 | regex = "[A-Z]+" 809 | entropy = 0.3 810 | keywords = ["keyword3"] 811 | 812 | [[rules]] 813 | description = "Rule 3" 814 | id = "rule3" 815 | regex = "[a-z]+" 816 | entropy = 0.2 817 | keywords = ["keyword4", "keyword5"] 818 | 819 | [[rules]] 820 | description = "Rule 4" 821 | id = "rule4" 822 | regex = "\\w+" 823 | entropy = 0.4 824 | keywords = ["keyword6"] 825 | "#, 826 | ) 827 | .unwrap() 828 | } 829 | 830 | fn mock_leaks() -> Vec { 831 | vec![Leak { 832 | line: "Sensitive information".to_string(), 833 | line_number: 42, 834 | offender: "John Doe".to_string(), 835 | commit: "abcd1234".to_string(), 836 | repo: "my-repo".to_string(), 837 | rule: "password_leak".to_string(), 838 | commit_message: "Fix security issue".to_string(), 839 | author: "John Doe".to_string(), 840 | email: "john@example.com".to_string(), 841 | file: "path/to/file.txt".to_string(), 842 | date: "2023-05-30".to_string(), 843 | }] 844 | } 845 | #[test] 846 | fn test_load_config() { 847 | let result = load_config_file(CONFIG_FILE_PATH); 848 | assert!(result.is_ok()); 849 | } 850 | 851 | #[test] 852 | fn test_config_allowlist_valid_config() { 853 | let result = config_allowlist(&mock_config_content()); 854 | assert!(result.is_ok()); 855 | } 856 | #[test] 857 | fn test_config_ruleslist_and_keywords() { 858 | let result = config_ruleslist_and_keywords(&mock_config_content()); 859 | 860 | assert!(result.is_ok()); 861 | let ruleslist = result.unwrap(); 862 | 863 | assert_eq!(ruleslist.len(), 4); 864 | 865 | let rule1 = &ruleslist[0]; 866 | assert_eq!(rule1.description, "Rule 1"); 867 | assert_eq!(rule1.id, "rule1"); 868 | assert_eq!(rule1.regex, "\\d+"); 869 | assert_eq!(rule1.keywords, vec!["keyword1", "keyword2"]); 870 | assert!(rule1.allowlist.is_none()); 871 | 872 | let rule2 = &ruleslist[1]; 873 | assert_eq!(rule2.description, "Rule 2"); 874 | assert_eq!(rule2.id, "rule2"); 875 | assert_eq!(rule2.regex, "[A-Z]+"); 876 | assert_eq!(rule2.keywords, vec!["keyword3"]); 877 | assert!(rule2.allowlist.is_none()); 878 | 879 | let rule3 = &ruleslist[2]; 880 | assert_eq!(rule3.description, "Rule 3"); 881 | assert_eq!(rule3.id, "rule3"); 882 | assert_eq!(rule3.regex, "[a-z]+"); 883 | assert_eq!(rule3.keywords, vec!["keyword4", "keyword5"]); 884 | assert!(rule3.allowlist.is_none()); 885 | 886 | let rule4 = &ruleslist[3]; 887 | assert_eq!(rule4.description, "Rule 4"); 888 | assert_eq!(rule4.id, "rule4"); 889 | assert_eq!(rule4.regex, "\\w+"); 890 | assert_eq!(rule4.keywords, vec!["keyword6"]); 891 | assert!(rule4.allowlist.is_none()); 892 | } 893 | 894 | #[test] 895 | fn test_write_rule_to_toml() { 896 | let rule = Rule { 897 | description: "Adafruit API Key".to_string(), 898 | id: "adafruit-api-key".to_string(), 899 | regex: r#"(?i)(?:adafruit)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([a-z0-9_-]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"#.to_string(), 900 | keywords: vec!["adafruit".to_string()], 901 | allowlist: None, 902 | }; 903 | let result = append_rule_to_toml(&rule, CONFIG_FILE_PATH); 904 | assert!(result.is_ok()); 905 | } 906 | 907 | #[test] 908 | fn test_delete_rule_by_id() { 909 | if let Err(err) = delete_rule_by_id(CONFIG_FILE_PATH, "adafruit-api-key") { 910 | eprintln!("Error: {}", err); 911 | } 912 | } 913 | 914 | #[test] 915 | fn test_update_rule_by_id() { 916 | let rule = Rule { 917 | description: "11111111111".to_string(), 918 | id: "stripe-access-token".to_string(), 919 | regex: r#"(?i)(?:adafruit)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([a-z0-9_-]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"#.to_string(), 920 | keywords: vec!["adafruit".to_string()], 921 | allowlist: None, 922 | }; 923 | let result = update_rule_by_id( CONFIG_FILE_PATH,&rule.id,&rule,); 924 | 925 | assert!(result.is_ok()); 926 | } 927 | #[test] 928 | fn test_is_path_in_allowlist_regex_not_match() { 929 | let path = "/path/to/file.txt"; 930 | let allowlist_paths = vec!["/other/.*\\.txt".to_string()]; 931 | let result = is_path_in_allowlist(path, &allowlist_paths); 932 | assert_eq!(result, false); 933 | } 934 | 935 | #[test] 936 | fn test_is_path_in_allowlist_exact_match() { 937 | let path = "tests/files/gitleaks.toml"; 938 | let allowlist_paths = vec!["tests/files/gitleaks.toml".to_string()]; 939 | let result = is_path_in_allowlist(path, &allowlist_paths); 940 | assert_eq!(result, true); 941 | } 942 | 943 | #[test] 944 | fn test_is_string_matched_match() { 945 | let regex_array = vec!["^hello".to_string(), "world$".to_string()]; 946 | let test_string = "hello, world!"; 947 | let result = is_string_matched(®ex_array, test_string); 948 | assert_eq!(result, true); 949 | } 950 | 951 | #[test] 952 | fn test_is_string_matched_not_match() { 953 | let regex_array = vec!["^hello".to_string(), "world$".to_string()]; 954 | let test_string = "goodbye"; 955 | let result = is_string_matched(®ex_array, test_string); 956 | assert_eq!(result, false); 957 | } 958 | 959 | #[test] 960 | fn test_is_contains_strs_contains() { 961 | let array = vec![ 962 | "apple".to_string(), 963 | "banana".to_string(), 964 | "orange".to_string(), 965 | ]; 966 | let content = "I like to eat bananas"; 967 | let result = is_contains_strs(&array, content); 968 | assert_eq!(result, true); 969 | } 970 | 971 | #[test] 972 | fn test_is_contains_strs_not_contains() { 973 | let array = vec![ 974 | "apple".to_string(), 975 | "banana".to_string(), 976 | "orange".to_string(), 977 | ]; 978 | let content = "I like to eat grapes"; 979 | let result = is_contains_strs(&array, content); 980 | assert_eq!(result, false); 981 | } 982 | 983 | #[test] 984 | fn test_is_regex_valid_case() { 985 | let input = "(regex$"; 986 | let result = is_regex(input); 987 | assert_eq!(result, true); 988 | } 989 | 990 | #[test] 991 | fn test_is_regex_invalid_case() { 992 | let input = "(regex"; 993 | let result = is_regex(input); 994 | assert_eq!(result, false); 995 | } 996 | 997 | #[test] 998 | fn test_is_regex_empty_string() { 999 | let input = ""; 1000 | let result = is_regex(input); 1001 | assert_eq!(result, false); 1002 | } 1003 | 1004 | #[test] 1005 | fn test_remove_duplicates() { 1006 | // Test case 1 1007 | let array1 = vec![1, 1, 2, 3, 4, 5]; 1008 | let array2 = vec![3, 4, 5, 6, 7]; 1009 | let result = remove_duplicates(array1, array2); 1010 | assert_eq!(result, vec![1, 1, 2]); 1011 | } 1012 | 1013 | #[test] 1014 | fn test_is_link_with_valid_links() { 1015 | assert!(is_link("https://www.example.com")); 1016 | assert!(is_link("http://example.com")); 1017 | assert!(is_link("www.example.com")); 1018 | assert!(is_link("www.example.com/path")); 1019 | assert!(is_link("www.example.com?q=query")); 1020 | } 1021 | 1022 | #[test] 1023 | fn test_is_link_with_invalid_links() { 1024 | assert!(!is_link("example.com")); 1025 | assert!(!is_link("example.com/path")); 1026 | assert!(!is_link("example.com?q=query")); 1027 | assert!(!is_link("not a link")); 1028 | } 1029 | 1030 | // test report functions 1031 | #[test] 1032 | fn test_write_json_report() { 1033 | let temp_file = tempfile::NamedTempFile::new().unwrap(); 1034 | let file_path = temp_file.path().to_str().unwrap(); 1035 | 1036 | write_json_report(file_path, &&mock_leaks()).unwrap(); 1037 | 1038 | let json_content = fs::read_to_string(file_path).unwrap(); 1039 | 1040 | assert!(json_content.contains("Sensitive information")); 1041 | assert!(json_content.contains("path/to/file.txt")); 1042 | } 1043 | 1044 | #[test] 1045 | fn test_write_sarif_report() { 1046 | let temp_file = tempfile::NamedTempFile::new().unwrap(); 1047 | let file_path = temp_file.path().to_str().unwrap(); 1048 | 1049 | write_sarif_report(file_path, &mock_leaks()).unwrap(); 1050 | 1051 | let sarif_content = fs::read_to_string(file_path).unwrap(); 1052 | 1053 | assert!(sarif_content.contains("Sensitive information")); 1054 | assert!(sarif_content.contains("path/to/file.txt")); 1055 | 1056 | } 1057 | 1058 | #[test] 1059 | fn test_write_csv_report() { 1060 | let temp_file = tempfile::NamedTempFile::new().unwrap(); 1061 | let file_path = temp_file.path().to_str().unwrap(); 1062 | 1063 | write_csv_report(file_path, &&mock_leaks()).unwrap(); 1064 | 1065 | let csv_content = fs::read_to_string(file_path).unwrap(); 1066 | 1067 | assert!(csv_content.contains("Sensitive information")); 1068 | assert!(csv_content.contains("path/to/file.txt")); 1069 | } 1070 | } 1071 | -------------------------------------------------------------------------------- /src/utils/git_util.rs: -------------------------------------------------------------------------------- 1 | extern crate chrono; 2 | extern crate git2; 3 | use crate::errors::CustomError; 4 | use crate::models::{CommitInfo, Config, Scan}; 5 | use crate::utils::detect_utils::{is_commit_in_allowlist, is_link, is_path_in_allowlist}; 6 | use chrono::Local; 7 | use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc}; 8 | use git2::Repository; 9 | use regex::Regex; 10 | use std::error::Error; 11 | use std::fs; 12 | 13 | /// Loads a repository from the specified path. 14 | /// 15 | /// # Arguments 16 | /// 17 | /// * `repo_path` - A string slice that represents the path to the repository. 18 | /// 19 | /// # Returns 20 | /// 21 | /// Returns a `Result` containing a `Repository` if the repository is loaded successfully, or an error if the repository fails to load. 22 | /// 23 | pub fn load_repository(repo_path: &str) -> Result> { 24 | let repo = match Repository::open(repo_path) { 25 | Ok(repo) => repo, 26 | Err(_) => { 27 | return Err(Box::new(CustomError::FailLoadRepo)); 28 | } 29 | }; 30 | 31 | Ok(repo) 32 | } 33 | 34 | /// Retrieves the name of the repository from the provided `Repository` object. 35 | /// 36 | /// # Arguments 37 | /// 38 | /// * `repo` - A reference to a `Repository` object. 39 | /// 40 | /// # Returns 41 | /// 42 | /// Returns a `Result` containing the name of the repository as a `String` if successful, or an error if the repository name is invalid or cannot be determined. 43 | /// 44 | /// # Errors 45 | /// 46 | /// This function may return the following errors: 47 | /// 48 | /// * `CustomError::InvalidRepoName` - Indicates that the repository name is invalid. 49 | /// 50 | pub fn config_repo_name(repo: &Repository) -> Result> { 51 | let repo_path = repo.path(); 52 | let repo_dir = repo_path.parent().ok_or(CustomError::InvalidRepoName)?; 53 | 54 | let repo_name = repo_dir 55 | .file_name() 56 | .and_then(|s| s.to_str()) 57 | .unwrap_or("") 58 | .to_string(); 59 | if repo_name.ends_with(".git") { 60 | Ok(repo_name[..repo_name.len() - 4].to_string()) 61 | } else { 62 | Ok(repo_name) 63 | } 64 | } 65 | 66 | /// Traverse the tree in batches and collect file paths and contents. 67 | /// 68 | /// Parameters: 69 | /// - `repo`: Reference to the repository. 70 | /// - `tree`: Reference to the tree object. 71 | /// - `path`: Path of the current tree. 72 | /// - `files`: Mutable vector to store the file paths and contents. 73 | /// - `scan`: Reference to the Scan object. 74 | /// - `commit_id`: Commit ID. 75 | /// 76 | /// Returns: 77 | /// - `Ok(())`: If the traversal is successful. 78 | /// - `Err(Box)`: If an error occurs during traversal. 79 | pub fn traverse_tree( 80 | repo: &Repository, 81 | tree: &git2::Tree, 82 | path: &str, 83 | files: &mut Vec<(String, String)>, 84 | scan: &Scan, 85 | commit_id: git2::Oid, 86 | 87 | ) -> Result<(), Box> { 88 | for entry in tree.iter() { 89 | let entry_path = format!("{}/{}", path, entry.name().unwrap()); 90 | // Skip entry if it is in the allowlist paths, in the allowlist commits, or is an ignored path 91 | if (is_path_in_allowlist(&entry_path, &scan.allowlist.paths)) 92 | || (is_commit_in_allowlist(&commit_id.to_string(), &scan.allowlist.commits)) 93 | || is_ignored_path(&entry_path) 94 | { 95 | continue; 96 | } 97 | if entry.kind() == Some(git2::ObjectType::Blob) { 98 | let blob = repo 99 | .find_blob(entry.id()) 100 | .map_err(|_| CustomError::ObjectNotFound)?; 101 | let content = String::from_utf8_lossy(blob.content()); 102 | files.push((entry_path, content.to_string())); 103 | } else if entry.kind() == Some(git2::ObjectType::Tree) { 104 | let subtree = repo 105 | .find_tree(entry.id()) 106 | .map_err(|_| CustomError::RepoInternalError)?; 107 | traverse_tree(repo, &subtree, &entry_path, files,scan,commit_id)?; 108 | } 109 | } 110 | Ok(()) 111 | } 112 | 113 | /// skip the files or directories begin with "." 114 | fn is_ignored_path(path: &str) -> bool { 115 | let path_segments: Vec<&str> = path.split('/').collect(); 116 | for item in path_segments { 117 | if item.starts_with('.') { 118 | return true; 119 | } 120 | } 121 | false 122 | } 123 | 124 | /// Retrieves commit information from the given `Repository` and `Commit`. 125 | /// 126 | /// # Arguments 127 | /// 128 | /// * `repo` - A reference to a `Repository` object. 129 | /// * `commit` - A reference to a `Commit` object representing the commit to retrieve information from. 130 | /// 131 | /// # Returns 132 | /// 133 | /// Returns a `Result` containing a `CommitInfo` struct if the retrieval is successful, or an error if an error occurs during the retrieval. 134 | /// 135 | /// # Errors 136 | /// 137 | /// This function may return the following errors: 138 | /// 139 | /// * `CustomError::InvalidRepoName` - Indicates that the repository name is invalid. 140 | /// * `CustomError::ObjectNotFound` - Indicates that an object in the repository is not found. 141 | /// * `CustomError::RepoInternalError` - Indicates an internal error in the repository. 142 | /// 143 | #[allow(deprecated)] 144 | pub fn config_commit_info( 145 | repo: &Repository, 146 | commit: &git2::Commit, 147 | scan: &Scan, 148 | ) -> Result> { 149 | // Config info 150 | let commit_id = commit.id(); 151 | let author = commit.author(); 152 | let email = author.email().unwrap_or("").to_string(); 153 | let commit_message = commit.message().unwrap_or("").to_string(); 154 | let date = Utc.timestamp(commit.time().seconds(), 0); 155 | let offset = FixedOffset::west(commit.time().offset_minutes() * 60); 156 | let date = offset.from_utc_datetime(&date.naive_utc()); 157 | let mut files = Vec::new(); 158 | 159 | let repo_name = match config_repo_name(repo) { 160 | Ok(repo_name) => repo_name, 161 | Err(_) => { 162 | return Err(Box::new(CustomError::InvalidRepoName)); 163 | } 164 | }; 165 | 166 | // Retrieve the tree of the commit 167 | let tree = commit.tree().map_err(|_| CustomError::ObjectNotFound)?; 168 | 169 | // Traverse the tree to get the file paths and content 170 | traverse_tree( 171 | repo, 172 | &tree, 173 | "", 174 | &mut files, 175 | scan, 176 | commit_id, 177 | 178 | ) 179 | .map_err(|_| CustomError::RepoInternalError)?; 180 | let commit_info = CommitInfo { 181 | repo: repo_name, 182 | commit: commit_id, 183 | author: author.name().unwrap_or("").to_string(), 184 | email, 185 | commit_message, 186 | date, 187 | files, 188 | }; 189 | 190 | Ok(commit_info) 191 | } 192 | 193 | /// Loads all commit IDs from the repository in topological order. 194 | /// 195 | /// # Arguments 196 | /// 197 | /// * `repo` - A reference to a `Repository` object representing the repository. 198 | /// 199 | /// # Returns 200 | /// 201 | /// Returns a `Result` containing a vector of commit IDs (`Vec`) if the operation is successful, or an error if an error occurs during the process. 202 | /// 203 | /// # Errors 204 | /// 205 | /// This function may return the following errors: 206 | /// 207 | /// * `CustomError::AccessWalkerError` - Indicates an error occurred while accessing the revision walker. 208 | /// * `CustomError::PushWalkerHeadError` - Indicates an error occurred while pushing the head commit to the revision walker or setting the sorting order. 209 | /// * `CustomError::WalkerSortError` - Indicates an error occurred while sorting the revision walker. 210 | /// * `CustomError::RepoCommitError` - Indicates an error occurred while finding a commit in the repository. 211 | /// 212 | pub fn load_all_commits(repo: &Repository) -> Result, Box> { 213 | let mut revwalk = repo.revwalk().map_err(|_| CustomError::AccessWalkerError)?; 214 | 215 | revwalk 216 | .push_head() 217 | .map_err(|_| CustomError::PushWalkerHeadError)?; 218 | revwalk 219 | .set_sorting(git2::Sort::TOPOLOGICAL) 220 | .map_err(|_| CustomError::PushWalkerHeadError)?; 221 | 222 | let mut commits = Vec::new(); 223 | 224 | for oid in revwalk { 225 | let oid = oid.map_err(|_| CustomError::WalkerSortError)?; 226 | let commit = repo 227 | .find_commit(oid) 228 | .map_err(|_| CustomError::RepoCommitError)?; 229 | let commit_id = commit.id().to_string(); 230 | commits.push(commit_id); 231 | } 232 | 233 | Ok(commits) 234 | } 235 | 236 | /// Loads a subset of commits based on specified conditions. 237 | /// 238 | /// # Arguments 239 | /// 240 | /// * `commit_from` - An optional string representing the start commit ID. 241 | /// * `commit_to` - An optional string representing the end commit ID. 242 | /// * `commits` - A slice of strings representing the available commit IDs. 243 | /// 244 | /// # Returns 245 | /// 246 | /// Returns a vector of commit IDs as strings, representing the subset of commits based on 247 | /// the specified conditions. If the start commit is after the end commit or if either commit 248 | /// is not found in the input commits, an empty vector is returned. 249 | /// 250 | pub fn load_commits_by_conditions( 251 | commit_from: Option, 252 | commit_to: Option, 253 | commits: &[String], 254 | ) -> Vec { 255 | match (commit_from, commit_to) { 256 | (Some(start_commit), Some(end_commit)) => { 257 | let start_index = commits.iter().position(|commit| *commit == start_commit); 258 | let end_index = commits.iter().position(|commit| *commit == end_commit); 259 | 260 | if let (Some(start), Some(end)) = (start_index, end_index) { 261 | if start <= end { 262 | commits[start..=end].to_vec() 263 | } else { 264 | Vec::new() 265 | } 266 | } else { 267 | Vec::new() 268 | } 269 | } 270 | _ => Vec::new(), 271 | } 272 | } 273 | 274 | /// Loads all commit IDs from the given `Repository`. 275 | /// 276 | /// # Arguments 277 | /// 278 | /// * `repo` - A reference to a `Repository` object. 279 | /// 280 | /// # Returns 281 | /// 282 | /// Returns a `Result` containing a vector of commit IDs as strings if the loading is successful, or an error if an error occurs during the loading. 283 | /// 284 | /// # Errors 285 | /// 286 | /// This function may return the following errors: 287 | /// 288 | /// * `CustomError::AccessWalkerError` - Indicates an error in accessing the commit walker. 289 | /// * `CustomError::PushWalkerHeadError` - Indicates an error in pushing the head to the commit walker. 290 | /// * `CustomError::WalkerSortError` - Indicates an error in sorting the commit walker. 291 | /// * `CustomError::RepoCommitError` - Indicates an error in finding a commit in the repository. 292 | /// 293 | pub fn load_all_object_ids(repo: &Repository) -> Result, Box> { 294 | let mut object_ids = Vec::new(); 295 | let odb = repo.odb().map_err(|_| CustomError::ObjectNotAccess)?; 296 | 297 | odb.foreach(|id| { 298 | object_ids.push(*id); 299 | true 300 | }) 301 | .map_err(|_| CustomError::RepoInternalError)?; 302 | 303 | Ok(object_ids) 304 | } 305 | 306 | /// Parses a date string into a `DateTime` object. 307 | /// 308 | /// # Arguments 309 | /// 310 | /// * `input` - A string slice representing the date to parse. The expected format is "%Y-%m-%d". 311 | /// * `mytype` - A string slice indicating the type of datetime to create. It can be either "start" or any other value. 312 | /// 313 | /// # Returns 314 | /// 315 | /// Returns a `Result` containing a `DateTime` object if the parsing is successful, or an error if an error occurs during the parsing. 316 | /// 317 | /// # Errors 318 | /// 319 | /// This function may return the following errors: 320 | /// 321 | /// * `CustomError::InvalidDateFormat` - Indicates that the input date format is invalid. 322 | /// * `CustomError::InvalidTimeFormat` - Indicates that the time format is invalid. 323 | /// 324 | pub fn parse_date_to_datetime(input: &str, mytype: &str) -> Result, Box> { 325 | let date = 326 | NaiveDate::parse_from_str(input, "%Y-%m-%d").map_err(|_| CustomError::InvalidDateFormat)?; 327 | 328 | let time: NaiveTime; 329 | if mytype == "start" { 330 | if let Some(t) = NaiveTime::from_hms_opt(0, 0, 0) { 331 | time = t; 332 | } else { 333 | return Err(Box::new(CustomError::InvalidTimeFormat)); 334 | } 335 | } else if let Some(t) = NaiveTime::from_hms_opt(23, 59, 59) { 336 | time = t; 337 | } else { 338 | return Err(Box::new(CustomError::InvalidTimeFormat)); 339 | } 340 | 341 | let datetime = NaiveDateTime::new(date, time); 342 | // let datetime_utc = DateTime::from_utc(datetime, Utc); 343 | let datetime_utc = DateTime::from_naive_utc_and_offset(datetime, Utc); 344 | 345 | Ok(datetime_utc) 346 | } 347 | 348 | /// Checks if the input string has a valid date format of "YYYY-MM-DD". 349 | /// 350 | /// # Arguments 351 | /// 352 | /// * `input` - The string to be checked for date format validity. 353 | /// 354 | /// # Returns 355 | /// 356 | /// Returns `true` if the input string has a valid date format, otherwise `false`. 357 | pub fn is_valid_date_format(input: &str) -> bool { 358 | if let Ok(date) = NaiveDate::parse_from_str(input, "%Y-%m-%d") { 359 | let formatted = date.format("%Y-%m-%d").to_string(); 360 | return formatted == input; 361 | } 362 | false 363 | } 364 | 365 | /// Loads the content of a configuration file (`.gitleaks.toml` or `gitleaks.toml`) from the target repository. 366 | /// 367 | /// # Arguments 368 | /// 369 | /// * `repo` - A reference to a `Repository` object representing the target repository. 370 | /// 371 | /// # Returns 372 | /// 373 | /// Returns a `Result` containing an `Option` with the content of the configuration file if found, or `None` if the configuration file is not found in any commit. 374 | /// 375 | /// # Errors 376 | /// 377 | /// This function may return an error if any error occurs during the repository traversal or object retrieval. 378 | /// 379 | pub fn load_config_content_from_target_repo( 380 | repo: &Repository, 381 | ) -> Result, Box> { 382 | let head_commit = repo.head()?.peel_to_commit()?; 383 | let mut walker = repo.revwalk()?; 384 | walker.push(head_commit.id())?; 385 | 386 | // Iterate over all commits in the repository 387 | for commit_id in walker { 388 | let commit = repo.find_commit(commit_id?)?; 389 | let tree = commit.tree()?; 390 | 391 | // Iterate over all entries in the tree 392 | for entry in tree.iter() { 393 | let file_name = entry.name().unwrap_or(""); 394 | if file_name == ".gitleaks.toml" || file_name == "gitleaks.toml" { 395 | let blob = entry.to_object(repo)?.peel_to_blob()?; 396 | let content = String::from_utf8_lossy(blob.content()); 397 | return Ok(Some(content.into())); 398 | } 399 | } 400 | } 401 | 402 | Ok(None) 403 | } 404 | 405 | /// Extracts the repository name from a given URL. 406 | /// 407 | /// # Arguments 408 | /// 409 | /// * `url` - A string slice representing the URL of the repository. 410 | /// 411 | /// # Returns 412 | /// 413 | /// Returns an `Option` containing the extracted repository name if it matches the expected format, or `None` if the extraction fails. 414 | /// 415 | pub fn extract_repo_name(url: &str) -> Option { 416 | let re = Regex::new(r"/([^/]+)\.git$").unwrap(); 417 | if let Some(captures) = re.captures(url) { 418 | if let Some(repo_name) = captures.get(1) { 419 | return Some(repo_name.as_str().to_string()); 420 | } 421 | } 422 | None 423 | } 424 | 425 | /// Clones or loads a repository based on the provided configuration. 426 | /// 427 | /// # Arguments 428 | /// 429 | /// * `config` - A reference to a `Config` object containing the repository information. 430 | /// 431 | /// # Returns 432 | /// 433 | /// Returns a `Result` containing a `Repository` object if the operation is successful, or an error if an error occurs during cloning or loading. 434 | /// 435 | /// # Errors 436 | /// 437 | /// This function may return the following errors: 438 | /// 439 | /// * `CustomError::FailDeteleDir` - Indicates that the directory removal operation failed. 440 | /// * `CustomError::FailCreateDir` - Indicates that the directory creation operation failed. 441 | /// * `CustomError::FailCloneRepo` - Indicates that the repository cloning operation failed. 442 | /// * `CustomError::FailLoadRepo` - Indicates that the repository loading operation failed. 443 | /// 444 | #[warn(clippy::needless_return)] 445 | pub fn clone_or_load_repository(config: &Config) -> Result> { 446 | if is_link(&config.repo) { 447 | let repo_path = match &config.disk { 448 | Some(disk) => disk.to_string(), 449 | None => { 450 | let dest = "workplace/"; 451 | let mut repo_path = String::new(); 452 | if let Some(name) = extract_repo_name(&config.repo) { 453 | repo_path = format!("{}{}", dest, name); 454 | } 455 | 456 | if fs::metadata(&repo_path).is_ok() { 457 | match fs::remove_dir_all(&repo_path) { 458 | Ok(_) => {} 459 | Err(_) => { 460 | return Err(Box::new(CustomError::FailDeleteDir)); 461 | } 462 | } 463 | } 464 | 465 | match fs::create_dir(&repo_path) { 466 | Ok(_) => {} 467 | Err(_) => { 468 | return Err(Box::new(CustomError::FailCreateDir)); 469 | } 470 | } 471 | repo_path 472 | } 473 | }; 474 | match Repository::clone(&config.repo, repo_path) { 475 | Ok(repo) => { 476 | println!( 477 | "\x1b[34m[INFO]\x1b[0m[{}] Clone repo ...", 478 | Local::now().format("%Y-%m-%d %H:%M:%S"), 479 | ); 480 | 481 | Ok(repo) 482 | } 483 | Err(_) => Err(Box::new(CustomError::FailCloneRepo)), 484 | } 485 | } else { 486 | match load_repository(&config.repo) { 487 | Ok(repo) => { 488 | println!( 489 | "\x1b[34m[INFO]\x1b[0m[{}] Clone repo ...", 490 | Local::now().format("%Y-%m-%d %H:%M:%S"), 491 | ); 492 | 493 | Ok(repo) 494 | } 495 | 496 | Err(_) => Err(Box::new(CustomError::FailLoadRepo)), 497 | } 498 | } 499 | } 500 | 501 | // NOTE: The commented-out function can be tested after specifying the repo file 502 | #[cfg(test)] 503 | mod tests { 504 | 505 | use super::*; 506 | // static VALID_PATH: &str = "D:/Workplace/Git/TestGitOperation"; 507 | // static INVALID_PATH: &str = "D:/Workplace/Git/TestGitOperation222"; 508 | 509 | // // test load_repository 510 | // #[test] 511 | // fn test_load_repository_valid_path() { 512 | // let result = load_repository(VALID_PATH); 513 | // assert!(result.is_ok()); 514 | // } 515 | 516 | // #[test] 517 | // fn test_load_repository_invalid_path() { 518 | // let result = load_repository(INVALID_PATH); 519 | // assert!(result.is_err()); 520 | // } 521 | 522 | // NOTE: The commented-out function can be tested after specifying the repo file 523 | // // test config_repo_name 524 | // #[test] 525 | // fn test_config_repo_name_valid_repo() { 526 | // let repo = match load_repository(VALID_PATH) { 527 | // Ok(repo) => repo, 528 | // Err(_) => { 529 | // panic!("Failed to load repository"); 530 | // } 531 | // }; 532 | // let result = match config_repo_name(&repo) { 533 | // Ok(result) => result, 534 | // Err(e) => { 535 | // panic!("Error:{}", e); 536 | // } 537 | // }; 538 | // assert_eq!(result, "TestGitOperation"); 539 | // } 540 | 541 | // // test load_all_commits 542 | // #[test] 543 | // fn test_load_all_commits_valid_repository() { 544 | // let repo = match Repository::init(VALID_PATH) { 545 | // Ok(repo) => repo, 546 | // Err(e) => { 547 | // eprintln!("{}", e); 548 | // panic!("Failed to initialize repository"); 549 | // } 550 | // }; 551 | 552 | // let result = load_all_commits(&repo); 553 | 554 | // assert!(result.is_ok()); 555 | // let commits = result.unwrap(); 556 | // assert!(commits.contains(&"9e2fe5fc27b1bb8bd4de5574f8d9010164427051".to_string())); 557 | // } 558 | 559 | // // test load_commits_by_conditions 560 | // #[test] 561 | // fn test_load_commits_by_conditions_valid_conditions() { 562 | // let commits = vec![ 563 | // "commit1".to_string(), 564 | // "commit2".to_string(), 565 | // "commit3".to_string(), 566 | // "commit4".to_string(), 567 | // "commit5".to_string(), 568 | // ]; 569 | // let commit_from = Some("commit2".to_string()); 570 | // let commit_to = Some("commit4".to_string()); 571 | 572 | // let result = load_commits_by_conditions(commit_from, commit_to, &commits); 573 | 574 | // assert_eq!( 575 | // result, 576 | // vec![ 577 | // "commit2".to_string(), 578 | // "commit3".to_string(), 579 | // "commit4".to_string(), 580 | // ] 581 | // ); 582 | // } 583 | 584 | // // test load_all_object_ids 585 | // #[test] 586 | // fn test_load_all_object_ids_valid_repository() { 587 | // let repo = match Repository::init(VALID_PATH) { 588 | // Ok(repo) => repo, 589 | // Err(e) => { 590 | // eprintln!("{}", e); 591 | // panic!("Failed to initialize repository"); 592 | // } 593 | // }; 594 | 595 | // let oid1 = repo.blob("Content 1".as_bytes()).unwrap(); 596 | // let oid2 = repo.blob("Content 2".as_bytes()).unwrap(); 597 | // let oid3 = repo.blob("Content 3".as_bytes()).unwrap(); 598 | 599 | // let result = load_all_object_ids(&repo); 600 | 601 | // assert!(result.is_ok()); 602 | // let object_ids = result.unwrap(); 603 | // assert!(object_ids.contains(&oid1)); 604 | // assert!(object_ids.contains(&oid2)); 605 | // assert!(object_ids.contains(&oid3)); 606 | // } 607 | 608 | // test parse_date_to_datetime 609 | #[test] 610 | fn test_parse_date_to_datetime_valid_input_start() { 611 | let valid_input = "2023-05-25"; 612 | let mytype = "start"; 613 | let result = parse_date_to_datetime(valid_input, mytype); 614 | assert!(result.is_ok()); 615 | assert_eq!(result.unwrap().to_rfc3339(), "2023-05-25T00:00:00+00:00"); 616 | } 617 | 618 | #[test] 619 | fn test_parse_date_to_datetime_valid_input_end() { 620 | let valid_input = "2023-05-25"; 621 | let mytype = "end"; 622 | let result = parse_date_to_datetime(valid_input, mytype); 623 | assert!(result.is_ok()); 624 | assert_eq!(result.unwrap().to_rfc3339(), "2023-05-25T23:59:59+00:00"); 625 | } 626 | 627 | #[test] 628 | fn test_parse_date_to_datetime_invalid_input() { 629 | let invalid_input = "2023-05-32"; 630 | let mytype = "start"; 631 | let result = parse_date_to_datetime(invalid_input, mytype); 632 | assert!(result.is_err()); 633 | } 634 | 635 | // test is_valid_date_format 636 | #[test] 637 | fn test_is_valid_date_format_valid_input() { 638 | let valid_input = "2023-05-25"; 639 | let result = is_valid_date_format(valid_input); 640 | assert!(result); 641 | } 642 | 643 | #[test] 644 | fn test_is_valid_date_format_invalid_input() { 645 | let invalid_input = "2023-05-32"; 646 | let result = is_valid_date_format(invalid_input); 647 | assert!(!result); 648 | } 649 | 650 | // test extract_repo_name 651 | #[test] 652 | fn test_extract_repo_name() { 653 | // Test with a valid URL 654 | let url = "https://github.com/user/repo.git"; 655 | let result = extract_repo_name(url); 656 | assert_eq!(result, Some("repo".to_owned())); 657 | 658 | // Test with a URL without ".git" extension 659 | let url = "https://github.com/user/repo"; 660 | let result = extract_repo_name(url); 661 | assert_eq!(result, None); 662 | } 663 | } 664 | -------------------------------------------------------------------------------- /src/utils/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod detect_utils; 2 | pub mod git_util; --------------------------------------------------------------------------------