├── .devcontainer
    ├── Dockerfile
    └── devcontainer.json
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── base.yml
├── .gitignore
├── BUCK
├── CODE-OF-CONDUCT.md
├── CONTRIBUTING.md
├── Cargo.toml
├── LICENSE
├── LICENSE-MIT
├── README.md
├── assets
    └── image-20230605104420197.png
├── examples
    └── test_gitleaks.toml
├── gitleaks.toml
└── src
    ├── api.rs
    ├── entity
        ├── mod.rs
        └── models.rs
    ├── errors.rs
    ├── lib.rs
    ├── main.rs
    ├── routes
        ├── rules.rs
        └── scan.rs
    ├── service
        ├── db_service.rs
        ├── detect_service.rs
        ├── git_service.rs
        └── mod.rs
    └── utils
        ├── detect_utils.rs
        ├── git_util.rs
        └── mod.rs


/.devcontainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM almalinux:8.10-20250307
 2 | 
 3 | # Install tools and dependencies
 4 | RUN dnf update -y \
 5 |     && dnf group install -y "Development Tools" \
 6 |     && dnf install -y glibc-langpack-en sudo tzdata \
 7 |     && dnf install -y curl openssl-devel
 8 | 
 9 | # Create and switch to user
10 | ARG USERNAME
11 | ARG USER_UID
12 | RUN useradd -m -s /bin/bash -u $USER_UID $USERNAME \
13 |     && mkdir -p /etc/sudoers.d \
14 |     && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \
15 |     && chmod 0440 /etc/sudoers.d/$USERNAME
16 | USER $USERNAME
17 | 
18 | # Install Rust, set environment variable
19 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
20 | ENV PATH="/home/$USERNAME/.cargo/bin:${PATH}"
21 | 
22 | # Create and set permissions for workspace directory
23 | USER root
24 | RUN mkdir -p /workspace && chown $USERNAME:$USERNAME /workspace
25 | USER $USERNAME
26 | 
27 | WORKDIR /workspace
28 | 
29 | ENV TZ=Asia/Shanghai
30 | ENV LANG=en_US.UTF-8
31 | ENV LC_ALL=en_US.UTF-8
32 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "Sensleak Development",
 3 |     "build": {
 4 |         "dockerfile": "./Dockerfile",
 5 |         "args": {
 6 |             "USERNAME": "rust", // Used for creating "/home/$USERNAME" directory within container
 7 |             "USER_UID": "1000"
 8 |         }
 9 |     },
10 |     "workspaceFolder": "/workspace",
11 |     "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind",
12 |     "mounts": [],
13 |     "remoteUser": "rust",
14 |     "customizations": {
15 |         "vscode": {
16 |             "extensions": [
17 |                 "fill-labs.dependi",
18 |                 "ms-azuretools.vscode-docker",
19 |                 "saoudrizwan.claude-dev",
20 |                 "tamasfe.even-better-toml"
21 |             ]
22 |         }
23 |     }
24 | }


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "cargo" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "daily"
12 | 


--------------------------------------------------------------------------------
/.github/workflows/base.yml:
--------------------------------------------------------------------------------
 1 | # Based on https://github.com/actions-rs/meta/blob/master/recipes/quickstart.md
 2 | #
 3 | # History:
 4 | #   1. 2023-02-14: Created at 2023-02-14T16:00:00Z by Quanyi Ma <eli@patch.sh>
 5 | #
 6 | #
 7 | #
 8 | 
 9 | on: [ push, pull_request ]
10 | 
11 | name: Base GitHub Action for Check, Test and Lints
12 | 
13 | jobs:
14 |   check:
15 |     name: Check
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: actions/checkout@v2
19 |       - uses: actions-rs/toolchain@v1
20 |         with:
21 |           profile: minimal
22 |           toolchain: stable
23 |           override: true
24 |       - uses: actions-rs/cargo@v1
25 |         with:
26 |           command: check
27 | 
28 |   test:
29 |     name: Test Suite
30 |     runs-on: ubuntu-latest
31 |     steps:
32 |       - uses: actions/checkout@v2
33 |       - uses: actions-rs/toolchain@v1
34 |         with:
35 |           profile: minimal
36 |           toolchain: stable
37 |           override: true
38 |       - uses: actions-rs/cargo@v1
39 |         with:
40 |           command: test
41 | 
42 |   clippy:
43 |     name: Clippy
44 |     runs-on: ubuntu-latest
45 |     steps:
46 |       - uses: actions/checkout@v2
47 |       - uses: actions-rs/toolchain@v1
48 |         with:
49 |           profile: minimal
50 |           toolchain: stable
51 |           override: true
52 |       - run: rustup component add clippy
53 |       - uses: actions-rs/cargo@v1
54 |         with:
55 |           command: clippy
56 |           args: -- -D warnings


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Generated by Cargo
 2 | # will have compiled files and executables
 3 | debug/
 4 | target/
 5 | 
 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 8 | Cargo.lock
 9 | 
10 | # These are backup files generated by rustfmt
11 | **/*.rs.bk
12 | 
13 | # MSVC Windows builds of rustc generate these, which store debugging information
14 | *.pdb
15 | 
16 | # Compiled binary file
17 | .gitignore
18 | 
19 | log.sh


--------------------------------------------------------------------------------
/BUCK:
--------------------------------------------------------------------------------
 1 | load("@prelude//rust:cargo_package.bzl", "cargo")
 2 | 
 3 | # package definitions
 4 | filegroup(
 5 |     name = "sensleak-0.3.0.crate",
 6 |     srcs = glob(["src/**/*.rs"]),
 7 | )
 8 | 
 9 | pkg_deps = [
10 |     "//third-party:actix-cors",
11 |     "//third-party:actix-web",
12 |     "//third-party:assert_cmd",
13 |     "//third-party:chrono",
14 |     "//third-party:clap",
15 |     "//third-party:csv",
16 |     "//third-party:env_logger",
17 |     "//third-party:git2",
18 |     "//third-party:log",
19 |     "//third-party:mockito",
20 |     "//third-party:postgres",
21 |     "//third-party:rayon",
22 |     "//third-party:regex",
23 |     "//third-party:sea-orm",
24 |     "//third-party:serde",
25 |     "//third-party:serde_json",
26 |     "//third-party:tempfile",
27 |     "//third-party:tokio",
28 |     "//third-party:toml",
29 |     "//third-party:utoipa",
30 |     "//third-party:utoipa-swagger-ui",
31 |     "//third-party:walkdir",
32 | ]
33 | 
34 | # targets
35 | cargo.rust_library(
36 |     name = "sensleak",
37 |     srcs = [":sensleak-0.3.0.crate"],
38 |     crate_root = "sensleak-0.3.0.crate/src/lib.rs",
39 |     edition = "2024",
40 |     deps = pkg_deps,
41 |     visibility = ["PUBLIC"],
42 | )
43 | 
44 | cargo.rust_binary(
45 |     name = "api",
46 |     srcs = [":sensleak-0.3.0.crate"],
47 |     crate_root = "sensleak-0.3.0.crate/src/api.rs",
48 |     edition = "2024",
49 |     deps = [":sensleak"] + pkg_deps,
50 |     visibility = ["PUBLIC"],
51 | )
52 | 
53 | cargo.rust_binary(
54 |     name = "scan",
55 |     srcs = [":sensleak-0.3.0.crate"],
56 |     crate_root = "sensleak-0.3.0.crate/src/main.rs",
57 |     edition = "2024",
58 |     deps = [":sensleak"] + pkg_deps,
59 |     visibility = ["PUBLIC"],
60 | )
61 | 


--------------------------------------------------------------------------------
/CODE-OF-CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # CODE OF CONDUCT
 2 | 
 3 | This code of conduct outlines the expected behavior of all members of Open Rust Initiative to ensure a safe, productive, and inclusive environment for everyone.
 4 | 
 5 | All members of Open Rust Initiative, including employees, contractors, interns, volunteers, and anyone else represents the company, are expected to behave in a professional, respectful, considerate, and collaborative manner. Harassment, discrimination, or toxic behavior of any kind will not be tolerated.
 6 | 
 7 | Open Rust Initiative is committed to providing an environment free of harassment and discrimination for everyone, regardless of gender, gender identity and expression, sexual orientation, disability, physical appearance, body size, race, age, or religion. We do not tolerate harassment of participants in any form. Harassment includes offensive comments related to these characteristics, as well as deliberate intimidation, stalking, following, harassing photography or recording, sustained disruption of talks or other events, inappropriate physical contact, and unwelcome sexual attention.
 8 | 
 9 | If you experience or witness unacceptable behavior, see something that makes you feel unsafe, or have concerns about the well-being of a participant, please report it to Eli Ma or Charles Feng immediately. All reports will be handled confidentially.
10 | 
11 | We value diverse opinions, skills, and experiences. We strive to build an inclusive environment where everyone feels safe and respected. Together, we can achieve great things.
12 | 
13 | THANK YOU FOR YOUR COOPERATION IN ADVANCING OUR COMMITMENT TO INCLUSION AND RESPECT.
14 | 
15 | Responsibilities
16 | 
17 | All members of Open Rust Initiative are expected to:
18 | 
19 | - Treat all people with respect and consideration, valuing a diversity of views and opinions.
20 |   • Communicate openly and thoughtfully.
21 |   • Avoid personal attacks directed at other participants.
22 |   • Be mindful of your surroundings and your fellow participants. Alert Eli Ma if you notice a dangerous situation or someone in distress.
23 |   • Respect personal space and property.
24 |   • Refrain from demeaning, discriminatory, or harassing behavior, speech, and imagery.
25 |   • Be considerate in your use of space and resources. For example, avoid excessive noise from conversations, laptops, and other electronic devices. Be courteous when taking up shared space such as tables and walkways.
26 |   • Follow the instructions of Open Rust Initiative staff and security.
27 |   • Avoid using language that reinforces social and cultural structures of domination related to gender, gender identity and expression, sexual orientation, disability, physical appearance, body size, race, age, religion, or other personal characteristics.
28 | 
29 | Consequences
30 | 
31 | Failure to comply with this Code of Conduct may result in disciplinary action, including removal from Open Rust Initiative spaces and events and prohibition from future participation.
32 | 
33 | Contact Information
34 | 
35 | If you have questions or concerns about this Code of Conduct, contact Eli Ma or Charles Feng.
36 | 
37 | # Enforcement
38 | 
39 | Open Rust Initiative prioritizes creating a safe and positive experience for everyone. We do not tolerate harassment or discrimination of any kind.
40 | 
41 | We expect participants to follow these rules at all Open Rust Initiative venues and events. Open Rust Initiative staff will enforce this Code of Conduct.
42 | 
43 | If a participant engages in harassing or discriminatory behavior, Open Rust Initiative staff will take reasonable action they deem appropriate, including warning the offender, expulsion from an event, or banning them from future events.
44 | 
45 | At their discretion, Open Rust Initiative staff may report offenders to local law enforcement. Open Rust Initiative staff may take action against participants for other behaviors that violate this Code of Conduct or negatively impact the safety and inclusion of event participants.


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # CONTRIBUTING
 2 | 
 3 | Thank you for your interest in contributing to this project. There are many ways you can contribute, from writing tutorials or blog posts, improving the documentation, submitting bug reports and feature requests, all the way to developing code which can be incorporated into the project.
 4 | 
 5 | As a contributor, you agree to abide by the Code of Conduct enforced in this community.
 6 | 
 7 | ## How to contribute
 8 | 
 9 | Here are some guidelines for contributing to this project:
10 | 
11 | 1. Report issues/bugs: If you find any issues or bugs in the project, please report them by creating an issue on the issue tracker. Describe the issue in detail and also mention the steps to reproduce it. The more details you provide, the easier it will be for me to investigate and fix the issue.
12 | 2. Suggest enhancements: If you have an idea to enhance or improve this project, you can suggest it by creating an issue on the issue tracker. Explain your enhancement in detail along with its use cases and benefits. I appreciate well-thought-out enhancement suggestions.
13 | 3. Contribute code: If you want to develop and contribute code, follow these steps:
14 | - Fork the repository and clone it locally.
15 | - Create a new branch for your feature/bugfix.
16 | - Make necessary changes and commit them with proper commit messages.
17 | - Push your changes to your fork and create a pull request.
18 | - I will review your changes and merge the PR if found suitable. Please ensure your code is properly formatted and follows the same style as the existing codebase.
19 | 1. Write tutorials/blog posts: You can contribute by writing tutorials or blog posts to help users get started with this project. Submit your posts on the issue tracker for review and inclusion. High quality posts that provide value to users are highly appreciated.
20 | 2. Improve documentation: If you find any gaps in the documentation or think any part can be improved, you can make changes to files in the documentation folder and submit a PR. Ensure the documentation is up-to-date with the latest changes.
21 | 
22 | Your contributions are highly appreciated. Feel free to ask any questions if you have any doubts or facing issues while contributing. The more you contribute, the more you will learn and improve your skills.


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "sensleak"
 3 | version = "0.3.0"
 4 | edition = "2024"
 5 | 
 6 | [[bin]]
 7 | name = "scan"
 8 | path = "src/main.rs"
 9 | 
10 | [[bin]]
11 | name = "api"
12 | path = "src/api.rs"
13 | 
14 | [dependencies]
15 | actix-cors = "0.7"
16 | actix-web = "4.10"
17 | assert_cmd = "2.0"
18 | chrono = "0.4"
19 | clap = { version = "4.5", features = ["derive"] }
20 | csv = "1.3"
21 | env_logger = "0.11"
22 | git2 = "0.20"
23 | log = "0.4"
24 | mockito = "1.7"
25 | postgres = { version = "0.19" }
26 | rayon = "1.10"
27 | regex = "1.11"
28 | sea-orm = { version = "1.1", features = [
29 |     "runtime-tokio-rustls",
30 |     "sqlx-postgres",
31 | ] }
32 | serde = { version = "1.0", features = ["derive"] }
33 | serde_json = "1.0"
34 | tempfile = "3.19"
35 | tokio = { version = "1.44", features = ["full"] }
36 | toml = "0.8"
37 | utoipa = { version = "5.3", features = ["actix_extras"] }
38 | utoipa-swagger-ui = { version = "9.0", features = ["actix-web"] }
39 | walkdir = "2.5"
40 | zip = "=2.4.2" # !
41 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 open-rust-initiative
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 - 2023 Open Rust Initiative
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # sensleak - scan git repo secrets
  2 | 
  3 | sensleak is a Rust-based tool that scans Git repositories for sensitive data, specifically targeting sensitive information such as passwords, API keys, certificates, and private keys embedded within code. 
  4 | 
  5 | ## Background
  6 | 
  7 | Many developers store sensitive information such as keys and certificates in their code, which poses security risks. Therefore, there are commercial services like GitGuardian scanning GitHub and GitLab, as well as open-source components like truffleHog and Gitleaks that support similar functionalities.
  8 | 
  9 | ## Feature 
 10 | 
 11 | - **Enhanced Security.** Develop the tool in Rust to ensure improved security and memory safety.
 12 | - **Command-line Interface**. Create a user-friendly command-line tool that generates a comprehensive test report.
 13 | - **REST API with Access Control**. Enable the tool to run as a service and provide access control through a REST API. Utilize Swagger to generate API documentation.
 14 | - **Concurrent Scanning**. Utilize a thread pool to control concurrent scanning of secrets, thereby improving overall efficiency.
 15 | - **Batch Processing**. Implement batch processing of files to further optimize the scanning process and enhance efficiency.
 16 | 
 17 | ## Technology
 18 | 
 19 | - Development Language: Rust
 20 | - Command-line Interaction: [clap.rs](https://github.com/clap-rs/clap)
 21 | - Git Repository Operations: [git2](https://github.com/rust-lang/git2-rs)
 22 | - Web Framework: [actix-web](https://actix.rs)
 23 | - Auto-generated OpenAPI Documentation: [utoipa](https://github.com/juhaku/utoipa)
 24 | 
 25 | ## Usage
 26 | 
 27 | ### CLI Usage
 28 | 
 29 | Running the tool in the command-line interface (CLI) to perform sensitive data checks.
 30 | 
 31 | ```
 32 | cargo run --bin scan -- -help
 33 | ```
 34 | 
 35 | ```shell
 36 | Usage: scan [OPTIONS] --repo <REPO>
 37 | 
 38 | Options:
 39 |       --repo <REPO>                    Target repository
 40 |       --config <CONFIG>                Config path [default: gitleaks.toml]
 41 |       --threads <THREADS>              Maximum number of threads sensleak spawns [default: 10]
 42 |       --chunk <CHUNK>                  The number of files processed in each batch [default: 10]
 43 |       --report <REPORT>                Path to write json leaks file
 44 |       --report-format <REPORT_FORMAT>  json, csv, sarif [default: json]
 45 |   -v, --verbose                        Show verbose output from scan
 46 |       --pretty                         Pretty print json if leaks are present
 47 |       --commit <COMMIT>                sha of commit to scan
 48 |       --commits <COMMITS>              comma separated list of a commits to scan
 49 |       --commits-file <COMMITS_FILE>    file of new line separated list of a commits to scan
 50 |       --commit-since <COMMIT_SINCE>    Scan commits more recent than a specific date. Ex: '2006-01-02' or '2023-01-02T15:04:05-0700' format
 51 |       --commit-until <COMMIT_UNTIL>    Scan commits older than a specific date. Ex: '2006-01-02' or '2006-10-02T15:04:05-0700' format
 52 |       --commit-from <COMMIT_FROM>      Commit to start scan from
 53 |       --commit-to <COMMIT_TO>          Commit to stop scan
 54 |       --branch <BRANCH>                Branch to scan
 55 |       --uncommitted                    Run sensleak on uncommitted code
 56 |       --user <USER>                    Set user to scan [default: ]
 57 |       --repo-config                    Load config from target repo. Config file must be ".gitleaks.toml" or "gitleaks.toml"
 58 |       --debug                          log debug messages
 59 |       --disk <DISK>                    Clones repo(s) to disk
 60 |       --to-db                          Output to database
 61 |   -h, --help                           Print help (see more with '--help')
 62 |   -V, --version                        Print version
 63 | 
 64 | run 'cargo run --bin api' to get REST API.
 65 | Repository: https://github.com/open-rust-initiative/sensleak-rs
 66 | 
 67 | ```
 68 | 
 69 | Example: 
 70 | 
 71 | Test https://github.com/sonichen/Expiry-Reminder-Assistant.git
 72 | 
 73 | ```shell
 74 | $ cargo run --bin scan -- --repo="D:/Workplace/Java/project/ExpiryReminderAssistant" -v --pretty
 75 | ```
 76 | 
 77 | ```shell
 78 | [INFO][2023-06-05 09:59:59] Clone repo ...
 79 | [
 80 |     Leak {
 81 |         line: "        String secret = \"1708b0314f18f420d3fe8128652af43c\"; //自己小程序的SECRET",
 82 |         line_number: 67,
 83 |         offender: "secret = \"1708b0314f18f420d3fe8128652af43c\"",
 84 |         commit: "410eb5a84408d3e63edb4d0975e5516e56f6ea6a",
 85 |         repo: "ExpiryReminderAssistant",
 86 |         rule: "Generic API Key",
 87 |         commit_message: "submit code\n",
 88 |         author: "sonichen",
 89 |         email: "1606673007@qq.com",
 90 |         file: "/backend/src/main/java/com/cyj/controller/login/WXLoginController.java",
 91 |         date: "2023-05-31 18:09:42 -08:00",
 92 |     },
 93 |     Leak {
 94 |         line: "        businessException.apiResponse = apiResponse;",
 95 |         line_number: 64,
 96 |         offender: "apiResponse = apiResponse;",
 97 |         commit: "410eb5a84408d3e63edb4d0975e5516e56f6ea6a",
 98 |         repo: "ExpiryReminderAssistant",
 99 |         rule: "Generic API Key",
100 |         commit_message: "submit code\n",
101 |         author: "sonichen",
102 |         email: "1606673007@qq.com",
103 |         file: "/backend/src/main/java/com/cyj/exception/BusinessException.java",
104 |         date: "2023-05-31 18:09:42 -08:00",
105 |     },
106 |     Leak {
107 |         line: "//    app_secret:bm92ZWk2WFdoR3RkV3ZiUk5SUnVXUT09",
108 |         line_number: 5,
109 |         offender: "secret:bm92ZWk2WFdoR3RkV3ZiUk5SUnVXUT09",
110 |         commit: "410eb5a84408d3e63edb4d0975e5516e56f6ea6a",
111 |         repo: "ExpiryReminderAssistant",
112 |         rule: "Generic API Key",
113 |         commit_message: "submit code\n",
114 |         author: "sonichen",
115 |         email: "1606673007@qq.com",
116 |         file: "/backend/src/main/java/com/cyj/utils/constants/DevelopConstants.java",
117 |         date: "2023-05-31 18:09:42 -08:00",
118 |     },
119 |     Leak {
120 |         line: "    public static final String  APP_SECRET=\"bm92ZWk2WFdoR3RkV3ZiUk5SUnVXUT09\";",
121 |         line_number: 7,
122 |         offender: "SECRET=\"bm92ZWk2WFdoR3RkV3ZiUk5SUnVXUT09\"",
123 |         commit: "410eb5a84408d3e63edb4d0975e5516e56f6ea6a",
124 |         repo: "ExpiryReminderAssistant",
125 |         rule: "Generic API Key",
126 |         commit_message: "submit code\n",
127 |         author: "sonichen",
128 |         email: "1606673007@qq.com",
129 |         file: "/backend/src/main/java/com/cyj/utils/constants/DevelopConstants.java",
130 |         date: "2023-05-31 18:09:42 -08:00",
131 |     },
132 |     Leak {
133 |         line: "//    public static final String APPSECRET = \"94f391d306875101822ffa1b2c3cff09\";",
134 |         line_number: 17,
135 |         offender: "SECRET = \"94f391d306875101822ffa1b2c3cff09\"",
136 |         commit: "410eb5a84408d3e63edb4d0975e5516e56f6ea6a",
137 |         repo: "ExpiryReminderAssistant",
138 |         rule: "Generic API Key",
139 |         commit_message: "submit code\n",
140 |         author: "sonichen",
141 |         email: "1606673007@qq.com",
142 |         file: "/backend/src/main/java/com/cyj/utils/secret/AuthUtil.java",
143 |         date: "2023-05-31 18:09:42 -08:00",
144 |     },
145 |     Leak {
146 |         line: "  secret: \"c6e1180dda3eaca49f3d7ed912718e4d\"   #小程序密钥",
147 |         line_number: 36,
148 |         offender: "secret: \"c6e1180dda3eaca49f3d7ed912718e4d\"",
149 |         commit: "410eb5a84408d3e63edb4d0975e5516e56f6ea6a",
150 |         repo: "ExpiryReminderAssistant",
151 |         rule: "Generic API Key",
152 |         commit_message: "submit code\n",
153 |         author: "sonichen",
154 |         email: "1606673007@qq.com",
155 |         file: "/backend/src/main/resources/application.yaml",
156 |         date: "2023-05-31 18:09:42 -08:00",
157 |     },
158 |     Leak {
159 |         line: "  secret: \"c6e1180dda3eaca49f3d7ed912718e4d\"   #小程序密钥",
160 |         line_number: 36,
161 |         offender: "secret: \"c6e1180dda3eaca49f3d7ed912718e4d\"",
162 |         commit: "410eb5a84408d3e63edb4d0975e5516e56f6ea6a",
163 |         repo: "ExpiryReminderAssistant",
164 |         rule: "Generic API Key",
165 |         commit_message: "submit code\n",
166 |         author: "sonichen",
167 |         email: "1606673007@qq.com",
168 |         file: "/backend/target/classes/application.yaml",
169 |         date: "2023-05-31 18:09:42 -08:00",
170 |     },
171 | ]
172 | [WARN][2023-06-05 10:00:02]7 leaks detected. 1 commits scanned in 1.2538834s
173 | ```
174 | 
175 | ### API Document
176 | 
177 | Run the following code to read the project document.
178 | 
179 | ```shell
180 | cargo run --bin api
181 | ```
182 | 
183 | The API document is located at http://localhost:7000/swagger-ui/#/
184 | 
185 | ### Project Document
186 | 
187 | Run the following code to read the project document.
188 | 
189 | ```shell
190 | cargo doc --document-private-items --open
191 | ```
192 | 
193 | ### Configuration
194 | 
195 | Use the [gitleaks configuration](https://github.com/gitleaks/gitleaks#configuration) in this project. The difference is that in this project, the paths need to start with a "/".
196 | 
197 | ```toml
198 | # Title for the gitleaks configuration file.
199 | title = "Gitleaks title"
200 | 
201 | # Extend the base (this) configuration. When you extend a configuration
202 | # the base rules take precedence over the extended rules. I.e., if there are
203 | # duplicate rules in both the base configuration and the extended configuration
204 | # the base rules will override the extended rules.
205 | # Another thing to know with extending configurations is you can chain together
206 | # multiple configuration files to a depth of 2. Allowlist arrays are appended
207 | # and can contain duplicates.
208 | # useDefault and path can NOT be used at the same time. Choose one.
209 | [extend]
210 | # useDefault will extend the base configuration with the default gitleaks config:
211 | # https://github.com/zricethezav/gitleaks/blob/master/config/gitleaks.toml
212 | useDefault = true
213 | # or you can supply a path to a configuration. Path is relative to where gitleaks
214 | # was invoked, not the location of the base config.
215 | path = "common_config.toml"
216 | 
217 | # An array of tables that contain information that define instructions
218 | # on how to detect secrets
219 | [[rules]]
220 | 
221 | # Unique identifier for this rule
222 | id = "awesome-rule-1"
223 | 
224 | # Short human readable description of the rule.
225 | description = "awesome rule 1"
226 | 
227 | # Golang regular expression used to detect secrets. Note Golang's regex engine
228 | # does not support lookaheads.
229 | regex = '''one-go-style-regex-for-this-rule'''
230 | 
231 | # Golang regular expression used to match paths. This can be used as a standalone rule or it can be used
232 | # in conjunction with a valid `regex` entry.
233 | path = '''a-file-path-regex'''
234 | 
235 | # Array of strings used for metadata and reporting purposes.
236 | tags = ["tag","another tag"]
237 | 
238 | # Int used to extract secret from regex match and used as the group that will have
239 | # its entropy checked if `entropy` is set.
240 | secretGroup = 3
241 | 
242 | # Float representing the minimum shannon entropy a regex group must have to be considered a secret.
243 | entropy = 3.5
244 | 
245 | # Keywords are used for pre-regex check filtering. Rules that contain
246 | # keywords will perform a quick string compare check to make sure the
247 | # keyword(s) are in the content being scanned. Ideally these values should
248 | # either be part of the idenitifer or unique strings specific to the rule's regex
249 | # (introduced in v8.6.0)
250 | keywords = [
251 |   "auth",
252 |   "password",
253 |   "token",
254 | ]
255 | 
256 | # You can include an allowlist table for a single rule to reduce false positives or ignore commits
257 | # with known/rotated secrets
258 | [rules.allowlist]
259 | description = "ignore commit A"
260 | commits = [ "commit-A", "commit-B"]
261 | paths = [
262 |   '''\go\.mod''',
263 |   '''\go\.sum'''
264 | ]
265 | # note: (rule) regexTarget defaults to check the _Secret_ in the finding.
266 | # if regexTarget is not specified then _Secret_ will be used.
267 | # Acceptable values for regexTarget are "match" and "line"
268 | regexTarget = "match"
269 | regexes = [
270 |   '''process''',
271 |   '''getenv''',
272 | ]
273 | # note: stopwords targets the extracted secret, not the entire regex match
274 | # like 'regexes' does. (stopwords introduced in 8.8.0)
275 | stopwords = [
276 |   '''client''',
277 |   '''endpoint''',
278 | ]
279 | 
280 | 
281 | # This is a global allowlist which has a higher order of precedence than rule-specific allowlists.
282 | # If a commit listed in the `commits` field below is encountered then that commit will be skipped and no
283 | # secrets will be detected for said commit. The same logic applies for regexes and paths.
284 | [allowlist]
285 | description = "global allow list"
286 | commits = [ "commit-A", "commit-B", "commit-C"]
287 | paths = [
288 |   '''gitleaks\.toml''',
289 |   '''(.*?)(jpg|gif|doc)'''
290 | ]
291 | 
292 | # note: (global) regexTarget defaults to check the _Secret_ in the finding.
293 | # if regexTarget is not specified then _Secret_ will be used.
294 | # Acceptable values for regexTarget are "match" and "line"
295 | regexTarget = "match"
296 | 
297 | regexes = [
298 |   '''219-09-9999''',
299 |   '''078-05-1120''',
300 |   '''(9[0-9]{2}|666)-\d{2}-\d{4}''',
301 | ]
302 | # note: stopwords targets the extracted secret, not the entire regex match
303 | # like 'regexes' does. (stopwords introduced in 8.8.0)
304 | stopwords = [
305 |   '''client''',
306 |   '''endpoint''',
307 | ]
308 | ```
309 | 
310 | ## Contributing
311 | 
312 | The  project relies on community contributions and aims to simplify getting  started. To use sensleak, clone the repo, install dependencies, and run  sensleak. Pick an issue, make changes, and submit a pull request for community review.
313 | 
314 | To contribute to rkos, you should:
315 | 
316 | - Familiarize yourself with the [Code of Conduct](https://github.com/open-rust-initiative/rkos/blob/main/CODE-OF-CONDUCT.md). sensleak-rs has a strict policy against abusive, unethical, or illegal behavior.
317 | - Review the [Contributing Guidelines](https://github.com/open-rust-initiative/rkos/blob/main/CONTRIBUTING.md). This document outlines the process for submitting bug reports, feature requests, and pull requests to sensleak-rs.
318 | - Sign the [Developer Certificate of Origin](https://developercertificate.org) (DCO) by adding a `Signed-off-by` line to your commit messages. This certifies that you wrote or have the right to submit the code you are contributing to the project.
319 | - Choose an issue to work on. Issues labeled `good first issue` are suitable for newcomers. You can also look for issues marked `help wanted`.
320 | - Fork the sensleak-rs repository and create a branch for your changes.
321 | - Make your changes and commit them with a clear commit message.
322 | - Push your changes to GitHub and open a pull request.
323 | - Respond to any feedback on your pull request. The sensleak-rs maintainers  will review your changes and may request modifications before merging.
324 | - Once your pull request is merged, you will be listed as a contributor in the project repository and documentation.
325 | 
326 | To comply with the requirements, contributors must include both a `Signed-off-by` line and a PGP signature in their commit messages. You can find more information about how to generate a PGP key [here](https://docs.github.com/en/github/authenticating-to-github/managing-commit-signature-verification/generating-a-new-gpg-key).
327 | 
328 | Git even has a `-s` command line option to append this automatically to your commit message, and `-S` to sign your commit with your PGP key. For example:
329 | 
330 | ```shell
331 | $ git commit -S -s -m 'This is my commit message'
332 | ```
333 | 
334 | ## License
335 | 
336 | sensleak-rs is licensed under this licensed:
337 | 
338 | - MIT LICENSE (  https://opensource.org/licenses/MIT)
339 | 
340 | ## References
341 | 
342 | 1. [What is Gitleaks and how to use it?](https://akashchandwani.medium.com/what-is-gitleaks-and-how-to-use-it-a05f2fb5b034)
343 | 2. [Gitleaks.tools](https://github.com/gitleaks/gitleaks)
344 | 


--------------------------------------------------------------------------------
/assets/image-20230605104420197.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crates-pro/sensleak-rs/4006679cc127fa47ad1ae6d5ec0a0437c0fa82c1/assets/image-20230605104420197.png


--------------------------------------------------------------------------------
/examples/test_gitleaks.toml:
--------------------------------------------------------------------------------
 1 | [[rules]]
 2 | description = "11111111111"
 3 | id = "stripe-access-token"
 4 | keywords = ["adafruit"]
 5 | regex = '''(?i)(?:adafruit)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([a-z0-9_-]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)'''
 6 | 
 7 | [[rules]]
 8 | description = "Generic API Key"
 9 | entropy = 3.5
10 | id = "generic-api-key"
11 | keywords = [
12 |     "key",
13 |     "api",
14 |     "token",
15 |     "secret",
16 |     "client",
17 |     "passwd",
18 |     "password",
19 |     "auth",
20 |     "access",
21 | ]
22 | regex = '''(?i)(?:key|api|token|secret|client|passwd|password|auth|access)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([0-9a-z\-_.=]{10,150})(?:['|\"|\n|\r|\s|\x60|;]|$)'''
23 | secretGroup = 1
24 | 
25 | [[rules]]
26 | description = "Digit"
27 | id = "key"
28 | keywords = ["1212"]
29 | regex = '\d+'
30 | secretGroup = 1
31 | 
32 | [rules.allowlist]
33 | regexTarget = "line"
34 | stopwords = ["token"]
35 | 
36 | [[rules]]
37 | description = "Adafruit API Key"
38 | id = "adafruit-api-key"
39 | keywords = ["adafruit"]
40 | regex = '''(?i)(?:adafruit)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([a-z0-9_-]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)'''
41 | 
42 | [[rules]]
43 | description = "Adafruit API Key"
44 | id = "adafruit-api-key"
45 | keywords = ["adafruit"]
46 | regex = '''(?i)(?:adafruit)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([a-z0-9_-]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)'''
47 | 
48 | [allowlist]
49 | description = "global allow lists"
50 | paths = [
51 |     "(.*?)(jpg|gif|doc|docx|zip|xls|pdf|bin|svg|socket)$",
52 |     "gradle.lockfile",
53 |     "node_modules",
54 |     "package-lock.json",
55 | ]
56 | stopwords = ["token"]
57 | 


--------------------------------------------------------------------------------
/src/api.rs:
--------------------------------------------------------------------------------
1 | use sensleak::start;
2 | #[tokio::main]
3 | async fn main() -> Result<(), Box<dyn std::error::Error>> {
4 |     println!("The API document is located at http://localhost:7000/swagger-ui/#/");
5 |     start().await?;
6 |     Ok(())
7 | }
8 | 
9 |  


--------------------------------------------------------------------------------
/src/entity/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod models;
2 | 


--------------------------------------------------------------------------------
/src/entity/models.rs:
--------------------------------------------------------------------------------
  1 | use chrono::{DateTime, FixedOffset};
  2 | use clap::Parser;
  3 | use serde::{Deserialize, Serialize};
  4 | use utoipa::{ToSchema};
  5 | use sea_orm::{entity::prelude::*, ActiveValue};
  6 | /// Represents the configuration for sensleaks tool.
  7 | #[derive(Parser, Debug)]
  8 | #[command(
  9 |     author = "yjchen",
 10 |     version = "0.3.0",
 11 |     about = "sensleaks-rs",
 12 |     long_about = "sensleaks: A tool to detect sensitive information in Git repository",
 13 |     after_help = "run 'cargo run --bin api' to get REST API.\nRepository: https://github.com/open-rust-initiative/sensleak-rs"
 14 | )]
 15 | #[derive(Deserialize, Serialize)]
 16 | pub struct Config {
 17 |     /// Target repository.
 18 |     #[arg(long)]
 19 |     pub repo: String,
 20 | 
 21 |     /// Config path
 22 |     #[arg(long, default_value = "gitleaks.toml")]
 23 |     pub config: String,
 24 | 
 25 |     /// Maximum number of threads sensleak spawns
 26 |     #[arg(long, default_value = "10")]
 27 |     pub threads: Option<usize>,
 28 | 
 29 |     /// The number of files processed in each batch
 30 |     #[arg(long, default_value = "10")]
 31 |     pub chunk: Option<usize>,
 32 | 
 33 |     /// Path to write json leaks file.
 34 |     #[arg(long)]
 35 |     pub report: Option<String>,
 36 | 
 37 |     /// json, csv, sarif
 38 |     #[arg(long, default_value = "json")]
 39 |     pub report_format: Option<String>,
 40 | 
 41 |     /// Show verbose output from scan.
 42 |     #[arg(short, long, default_value = "false")]
 43 |     pub verbose: bool,
 44 | 
 45 |     /// Pretty print json if leaks are present.
 46 |     #[arg(long, default_value = "false")]
 47 |     pub pretty: bool,
 48 | 
 49 |     /// sha of commit to scan
 50 |     #[arg(long)]
 51 |     pub commit: Option<String>,
 52 | 
 53 |     /// comma separated list of a commits to scan
 54 |     #[arg(long)]
 55 |     pub commits: Option<String>,
 56 | 
 57 |     /// file of new line separated list of a commits to scan
 58 |     #[arg(long)]
 59 |     pub commits_file: Option<String>,
 60 | 
 61 |     /// Scan commits more recent than a specific date. Ex: '2006-01-02' or '2023-01-02T15:04:05-0700' format.
 62 |     #[arg(long)]
 63 |     pub commit_since: Option<String>,
 64 | 
 65 |     /// Scan commits older than a specific date. Ex: '2006-01-02' or '2006-10-02T15:04:05-0700' format.
 66 |     #[arg(long)]
 67 |     pub commit_until: Option<String>,
 68 | 
 69 |     /// Commit to start scan from
 70 |     #[arg(long)]
 71 |     pub commit_from: Option<String>,
 72 | 
 73 |     /// Commit to stop scan
 74 |     #[arg(long)]
 75 |     pub commit_to: Option<String>,
 76 | 
 77 |     /// Branch to scan
 78 |     #[arg(long)]
 79 |     pub branch: Option<String>,
 80 | 
 81 |     /// Run sensleak on uncommitted code
 82 |     #[arg(long, default_value = "false")]
 83 |     // pub uncommitted: bool ,
 84 |     pub uncommitted: bool,
 85 | 
 86 |     /// Set user to scan
 87 |     #[arg(long, default_value = "")]
 88 |     pub user: Option<String>,
 89 | 
 90 |     /// Load config from target repo. Config file must be ".gitleaks.toml" or "gitleaks.toml"
 91 |     #[arg(long)]
 92 |     pub repo_config: bool,
 93 | 
 94 |     /// log debug messages.
 95 |     #[arg(long, default_value = "false")]
 96 |     pub debug: bool,
 97 | 
 98 |     /// Clones repo(s) to disk.
 99 |     #[arg(long)]
100 |     pub disk: Option<String>,
101 | 
102 |     /// Output to database
103 |     #[arg(long)]
104 |     pub to_db: bool,
105 |     // /// Start API
106 |     // #[arg(long, default_value = "false")]
107 |     // pub api: bool,
108 | }
109 | impl Default for Config {
110 |     fn default() -> Self {
111 |         Config {
112 |             repo: String::default(),
113 |             config: String::from("gitleaks.toml"),
114 |             threads: Some(50),
115 |             chunk: Some(10),
116 |             report: None,
117 |             report_format: Some(String::from("json")),
118 |             verbose: false,
119 |             pretty: false,
120 |             commit: None,
121 |             commits: None,
122 |             commits_file: None,
123 |             commit_since: None,
124 |             commit_until: None,
125 |             commit_from: None,
126 |             commit_to: None,
127 |             branch: None,
128 |             uncommitted: false,
129 |             user: Some("".to_string()),
130 |             repo_config: false,
131 |             debug: false,
132 |             disk: None,
133 |             to_db: false,
134 |             // api: false,
135 |         }
136 |     }
137 | }
138 | 
139 | /// # An array of tables that contain information that define instructions on how to detect secrets.
140 | #[derive(Debug, Serialize, Clone, Deserialize,ToSchema)]
141 | pub struct Rule {
142 |     /// Short human readable description of the rule.
143 |     pub description: String,
144 | 
145 |     /// Unique identifier for this rule.
146 |     pub id: String,
147 | 
148 |     /// Regular expression used to detect secrets.
149 |     pub regex: String,
150 | 
151 |     // /// Float representing the minimum shannon entropy a regex group must have to be considered a secret.
152 |     // pub entropy: Option<f64>,
153 | 
154 |     /// Keywords are used for pre-regex check filtering. Rules that contain keywords will perform a quick string compare check to make sure the keyword(s) are in the content being scanned. Ideally these values should either be part of the idenitifer or unique strings specific to the rule's regex
155 |     pub keywords: Vec<String>,
156 | 
157 |     /// You can include an allowlist table for a single rule to reduce false positives or ignore commits with known/rotated secrets.
158 |     pub allowlist: Option<Allowlist>,
159 | }
160 | 
161 | impl Rule {
162 |     pub fn new() -> Rule {
163 |         Rule {
164 |             description: String::from("11"),
165 |             id: String::from("11"),
166 |             regex: String::from("(?i)(?:key|api|token|secret|client|passwd|password|auth|access)"),
167 |             // entropy: Some(3.1),
168 |             keywords: Vec::new(),
169 |             allowlist: None,
170 |         }
171 |     }
172 | }
173 | 
174 | impl Default for Rule {
175 |     fn default() -> Self {
176 |         Self::new()
177 |     }
178 | }
179 | 
180 | /// Skip the allowlist
181 | #[derive(Debug, Deserialize, Serialize, Clone,ToSchema)]
182 | pub struct Allowlist {
183 |     /// Skip the paths.
184 |     pub paths: Vec<String>,
185 | 
186 |     /// Skip the commits.
187 |     pub commits: Vec<String>,
188 | 
189 |     /// Acceptable values for regexTarget are "match" and "line".
190 |     pub regex_target: String,
191 | 
192 |     /// Skip the secrets that satisfy the regexes.
193 |     pub regexes: Vec<String>,
194 | 
195 |     /// Skip the secrets that contain the stopwords.
196 |     pub stopwords: Vec<String>,
197 | }
198 | impl Allowlist {
199 |     pub fn new() -> Allowlist {
200 |         Allowlist {
201 |             paths: Vec::new(),
202 |             commits: Vec::new(),
203 |             regex_target: String::from("match"),
204 |             regexes: Vec::new(),
205 |             stopwords: Vec::new(),
206 |         }
207 |     }
208 | }
209 | impl Default for Allowlist {
210 |     fn default() -> Self {
211 |         Self::new()
212 |     }
213 | }
214 | 
215 | /// Sea-orm Entity
216 | #[derive(Clone, Debug, PartialEq, Eq, DeriveEntityModel)]
217 | #[sea_orm(table_name = "leaks")]
218 | pub struct Model {
219 |     #[sea_orm(primary_key)]
220 |     pub id: i32,
221 |     pub line: String,
222 |     pub line_number: u32,
223 |     pub offender: String,
224 |     pub commit: String,
225 |     pub repo: String,
226 |     pub rule: String,
227 |     pub commit_message: String,
228 |     pub author: String,
229 |     pub email: String,
230 |     pub file: String,
231 |     pub date: String,
232 | }
233 | 
234 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
235 | pub enum Relation {}
236 | 
237 | impl ActiveModelBehavior for ActiveModel {}
238 | 
239 | /// Represents an item in the scanned output.
240 | #[derive(Debug, Serialize, Deserialize, Clone, ToSchema)]
241 | pub struct Leak {
242 |     /// The line containing the sensitive information.
243 |     pub line: String,
244 | 
245 |     /// The line number where the sensitive information is found.
246 |     pub line_number: u32,
247 | 
248 |     /// The sensitive information detected.
249 |     pub offender: String,
250 | 
251 |     /// The commit info.
252 |     pub commit: String,
253 | 
254 |     /// The repository where the sensitive information is found.
255 |     pub repo: String,
256 | 
257 |     /// The rule used to detect the sensitive information.
258 |     pub rule: String,
259 | 
260 |     /// The commit message associated with the sensitive information.
261 |     pub commit_message: String,
262 | 
263 |     /// The author of the commit.
264 |     pub author: String,
265 | 
266 |     /// The email of the commit author.
267 |     pub email: String,
268 | 
269 |     /// The file path where the sensitive information is found.
270 |     pub file: String,
271 | 
272 |     /// The date of the commit.
273 |     pub date: String,
274 | }
275 | 
276 | impl Leak {
277 |     pub fn to_active_model(&self) -> ActiveModel {
278 |         ActiveModel {
279 |             line: ActiveValue::set(self.line.clone()),
280 |             line_number: ActiveValue::set(self.line_number),
281 |             offender: ActiveValue::set(self.offender.clone()),
282 |             commit: ActiveValue::set(self.commit.clone()),
283 |             repo: ActiveValue::set(self.repo.clone()),
284 |             rule: ActiveValue::set(self.rule.clone()),
285 |             commit_message: ActiveValue::set(self.commit_message.clone()),
286 |             author: ActiveValue::set(self.author.clone()),
287 |             email: ActiveValue::set(self.email.clone()),
288 |             file: ActiveValue::set(self.file.clone()),
289 |             date: ActiveValue::set(self.date.clone()),
290 |             ..Default::default()
291 |         }
292 |     }
293 | }
294 | 
295 | /// The scan condition
296 | #[derive(Debug, Clone)]
297 | pub struct Scan {
298 |     /// allow list
299 |     pub allowlist: Allowlist,
300 | 
301 |     /// the rules list
302 |     pub ruleslist: Vec<Rule>,
303 | 
304 |     pub threads: Option<usize>,
305 |     pub chunk: Option<usize>,
306 | }
307 | impl Scan {
308 |     pub fn new() -> Self {
309 |         Scan {
310 |             allowlist: Allowlist::new(),
311 |             ruleslist: Vec::new(),
312 |             // keywords:Vec::new(),
313 |             threads: Some(10),
314 |             chunk: Some(10),
315 |         }
316 |     }
317 | }
318 | 
319 | impl Default for Scan {
320 |     fn default() -> Self {
321 |         Self::new()
322 |     }
323 | }
324 | 
325 | /// The commit info
326 | #[derive(Debug, Clone)]
327 | pub struct CommitInfo {
328 |     /// repo name
329 |     pub repo: String,
330 | 
331 |     /// commit id
332 |     pub commit: git2::Oid,
333 | 
334 |     /// author name
335 |     pub author: String,
336 | 
337 |     /// the email of author
338 |     pub email: String,
339 | 
340 |     /// commit message
341 |     pub commit_message: String,
342 | 
343 |     /// commit date
344 |     pub date: DateTime<FixedOffset>,
345 | 
346 |     /// file
347 |     pub files: Vec<(String, String)>,
348 | }
349 | 
350 | /// The Results of the project
351 | #[derive(Debug)]
352 | pub struct Results {
353 |     /// The number of commits being scanned
354 |     pub commits_number: usize,
355 | 
356 |     /// The leaks
357 |     pub outputs: Vec<Leak>,
358 | }
359 | impl Results {
360 |     pub fn new() -> Self {
361 |         Results {
362 |             commits_number: 0,
363 |             outputs: Vec::new(),
364 |         }
365 |     }
366 | }
367 | impl Default for Results {
368 |     fn default() -> Self {
369 |         Self::new()
370 |     }
371 | }
372 | /// CSV Object
373 | #[derive(Debug, Serialize, Deserialize)]
374 | pub struct CsvResult {
375 |     /// The line containing the sensitive information.
376 |     pub line: String,
377 | 
378 |     /// The line number where the sensitive information is found.
379 |     pub line_number: u32,
380 | 
381 |     /// The sensitive information detected.
382 |     pub offender: String,
383 | 
384 |     /// The commit info.
385 |     pub commit: String,
386 | 
387 |     /// The repository where the sensitive information is found.
388 |     pub repo: String,
389 | 
390 |     /// The rule used to detect the sensitive information.
391 |     pub rule: String,
392 | 
393 |     /// The commit message associated with the sensitive information.
394 |     pub commit_message: String,
395 | 
396 |     /// The author of the commit.
397 |     pub author: String,
398 | 
399 |     /// The email of the commit author.
400 |     pub email: String,
401 | 
402 |     /// The file path where the sensitive information is found.
403 |     pub file: String,
404 | 
405 |     /// The date of the commit.
406 |     pub date: String,
407 | }
408 | 
409 | /// Config to connect to the database
410 | #[derive(Debug, Default,Serialize, Deserialize)]
411 | pub struct ConnectDbConfig {
412 |     /// The host of the database
413 |     pub host: String,
414 |     /// The user of the database
415 |     pub user: String,
416 |     /// The password of the database
417 |     pub password: String,
418 |     /// The name of the database
419 |     pub dbname: String,
420 |     /// The port of the database
421 |     pub port: String,
422 | }
423 | 
424 | impl ConnectDbConfig {
425 |     /// Translate the config to connection url
426 |     pub fn to_connection_url(&self) -> String {
427 |         format!(
428 |             "postgresql://{}:{}@{}:{}/{}",
429 |             self.user, self.password, self.host, self.port, self.dbname
430 |         )
431 |     }
432 |     pub fn new() -> Self {
433 |         ConnectDbConfig {
434 |             host: String::from(""),
435 |             user: String::from(""),
436 |             password: String::from(""),
437 |             dbname: String::from(""),
438 |             port: String::from(""),
439 |         }
440 |     }
441 | }
442 | 


--------------------------------------------------------------------------------
/src/errors.rs:
--------------------------------------------------------------------------------
 1 | use std::error::Error;
 2 | use std::fmt;
 3 | 
 4 | /// CustomError represents custom errors that can occur in the application.
 5 | #[derive(Debug)]
 6 | pub enum CustomError {
 7 |     EmptyFileError,
 8 |     EmptyConfigFileError,
 9 | 
10 |     FailDeleteDir,
11 |     FailCreateDir,
12 |     FailLoadRepo,
13 |     FailCloneRepo,
14 |     InvalidRepoName,
15 |     ObjectNotFound,
16 |     RepoInternalError,
17 |     ObjectNotAccess,
18 |     ObjectConvertFail,
19 |     AccessWalkerError,
20 |     RepoCommitError,
21 |     WalkerSortError,
22 |     PushWalkerHeadError,
23 |     InvalidDateFormat,
24 |     InvalidTimeFormat,
25 |     InvalidTomlFile,
26 | 
27 |     ExportCsvError,
28 |     ExportSarifError,
29 |     ExportJsonError,
30 | }
31 | 
32 | impl fmt::Display for CustomError {
33 |     /// Formats the error message for display.
34 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
35 |         let error_message = match *self {
36 |             CustomError::EmptyFileError => "Empty file",
37 |             CustomError::EmptyConfigFileError => "Empty Config file",
38 | 
39 |             CustomError::ExportCsvError => "Export CSV Error",
40 |             CustomError::ExportSarifError => "Export Sarif Error",
41 |             CustomError::ExportJsonError => "Export Json Error",
42 | 
43 |             CustomError::FailDeleteDir => "Failed to delete directory",
44 |             CustomError::FailCreateDir => "Failed to create directory",
45 |             CustomError::FailLoadRepo => "Failed to load repository",
46 |             CustomError::FailCloneRepo => "Failed to clone repository",
47 |             CustomError::InvalidRepoName => "Invalid repository name",
48 |             CustomError::RepoInternalError => "Internal error within the repository",
49 |             CustomError::ObjectNotFound => {
50 |                 "Failure to find a blob or tree object in the repository"
51 |             }
52 |             CustomError::ObjectNotAccess => "Failed to access the repository's object database",
53 |             CustomError::ObjectConvertFail => "Failed to convert object to commit",
54 |             CustomError::AccessWalkerError => "Failure to create or access the revision walker",
55 |             CustomError::RepoCommitError => "Failed to find a commit in the repository",
56 |             CustomError::WalkerSortError => {
57 |                 "Failed to set the sorting order of the revision walker"
58 |             }
59 |             CustomError::PushWalkerHeadError => {
60 |                 "Failed to push the HEAD reference to the revision walker"
61 |             }
62 |             CustomError::InvalidDateFormat => "Invalid date format",
63 |             CustomError::InvalidTimeFormat => "Invalid time format",
64 |             CustomError::InvalidTomlFile => "Invalid TOML file",
65 |         };
66 |         write!(f, "{}", error_message)
67 |     }
68 | }
69 | 
70 | impl Error for CustomError {}
71 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | mod errors;
 2 | 
 3 | mod utils {
 4 |     pub mod detect_utils;
 5 |     pub mod git_util;
 6 | }
 7 | 
 8 | pub mod entity{
 9 |     pub mod models;
10 | }
11 | 
12 | pub mod service{
13 |     pub mod detect_service;
14 |     pub mod git_service;
15 |     pub mod db_service;
16 | }
17 |  
18 | pub use entity::models;
19 | pub use errors::*;
20 | pub use utils::detect_utils;
21 | pub use utils::git_util;
22 | pub use git_util::*;
23 | pub use models::*;
24 | 
25 | use actix_web::{App, HttpServer};
26 | use actix_cors::Cors;
27 | use utoipa::OpenApi;
28 | use utoipa_swagger_ui::SwaggerUi;
29 | 
30 | mod routes{
31 |     pub mod scan;
32 |     pub mod rules;
33 | }
34 | pub use routes::scan::*;
35 | pub use routes::rules::*;
36 | 
37 | use crate::routes::*;
38 | 
39 | pub async fn start() -> Result<(), Box<dyn std::error::Error>> {
40 |     #[derive(OpenApi)]
41 |     #[openapi(
42 |         paths(
43 |             scan::scan_repo,
44 |             rules::get_all,
45 |             rules::add_rules,
46 |             rules::delete_rules_by_id,
47 |             rules::update_rules
48 |         ),
49 |         components(
50 |             schemas(ConfigDto,ScanResponse,RulesDto,JsonResponse,Rule,Allowlist,Leak)
51 |         ),
52 |      
53 |         tags(
54 |             (name = "scan", description = "Scan Git repositories API"),
55 |             (name = "rules", description = "Rules management API"),
56 |         )
57 |     )]
58 |     struct ApiDoc;
59 | 
60 |     HttpServer::new(|| {
61 |         let cors = Cors::default()
62 |             .allow_any_origin()
63 |             .allow_any_method()
64 |             .allow_any_header();
65 | 
66 |         App::new()
67 |             .wrap(cors)
68 |             .service(
69 |                 SwaggerUi::new("/swagger-ui/{_:.*}")
70 |                     .url("/api-docs/openapi.json", ApiDoc::openapi())
71 |             )
72 |             .service(scan_repo)
73 |             .service(rules::get_all)
74 |             .service(rules::add_rules)
75 |             .service(rules::delete_rules_by_id)
76 |             .service(rules::update_rules)
77 |     })
78 |     .bind("0.0.0.0:7000")?
79 |     .run()
80 |     .await?;
81 | 
82 |     Ok(())
83 | }
84 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
1 | use sensleak::service::detect_service::sensleaks;
2 | 
3 | /// The entry of the project
4 | #[tokio::main]
5 | async fn main() {
6 |     sensleaks().await;
7 | }
8 | 
9 |  


--------------------------------------------------------------------------------
/src/routes/rules.rs:
--------------------------------------------------------------------------------
  1 | use crate::utils::detect_utils::*;
  2 | use crate::models::{Allowlist, Rule};
  3 | use actix_web::{post, web, HttpResponse, Responder};
  4 | use serde::{Deserialize, Serialize};
  5 | use utoipa::ToSchema;
  6 | 
  7 | /// Rules Dto
  8 | #[derive(Serialize, Deserialize, Debug, ToSchema)]
  9 | pub struct RulesDto {
 10 |     config: String,
 11 |     rule: Option<Rule>,
 12 |     rule_id: Option<String>,
 13 | }
 14 | 
 15 | /// The response object
 16 | #[derive(Serialize, ToSchema)]
 17 | pub struct JsonResponse {
 18 |     code: usize,
 19 |     allowlist: Option<Allowlist>,
 20 |     ruleslist: Option<Vec<Rule>>,
 21 |     message: Option<String>,
 22 | }
 23 | 
 24 | /// Load the rules
 25 | /// 
 26 | /// Load the allowlists and ruleslist.
 27 | #[utoipa::path(
 28 |     post,
 29 |     path = "/rules/get_all",
 30 |     request_body = RulesDto,
 31 |     responses(
 32 |         (status = 200, description = "success", body = JsonResponse),
 33 |         (status = 400, description = "fail", body = JsonResponse)
 34 |     )
 35 | )]
 36 | #[post("/rules/get_all")]
 37 | pub async fn get_all(body: web::Json<RulesDto>) -> impl Responder {
 38 |     match load_config_file(&body.config) {
 39 |         Ok(scan) => HttpResponse::Ok().json(JsonResponse {
 40 |             code: 200,
 41 |             allowlist: Some(scan.allowlist),
 42 |             ruleslist: Some(scan.ruleslist),
 43 |             message: None,
 44 |         }),
 45 |         Err(err) => HttpResponse::BadRequest().json(JsonResponse {
 46 |             code: 400,
 47 |             message: Some(err.to_string()),
 48 |             allowlist: None,
 49 |             ruleslist: None,
 50 |         }),
 51 |     }
 52 | }
 53 | 
 54 | /// Add rules.
 55 | /// 
 56 | /// Add one single rule.
 57 | #[utoipa::path(
 58 |     post,
 59 |     path = "/rules/add_rules",
 60 |     request_body = RulesDto,
 61 |     responses(
 62 |         (status = 200, description = "success", body = JsonResponse),
 63 |         (status = 400, description = "fail", body = JsonResponse)
 64 |     )
 65 | )]
 66 | #[post("/rules/add_rules")]
 67 | pub async fn add_rules(body: web::Json<RulesDto>) -> impl Responder {
 68 |     let rule: Rule = match &body.rule {
 69 |         Some(value) => value.clone(),
 70 |         None => {
 71 |             return HttpResponse::BadRequest().json(JsonResponse {
 72 |                 code: 400,
 73 |                 message: Some("It is not a Rule struct".to_string()),
 74 |                 allowlist: None,
 75 |                 ruleslist: None,
 76 |             })
 77 |         }
 78 |     };
 79 | 
 80 |     match append_rule_to_toml(&rule, &body.config) {
 81 |         Ok(_) => HttpResponse::Ok().json(JsonResponse {
 82 |             code: 200,
 83 |             message: Some("success".to_string()),
 84 |             allowlist: None,
 85 |             ruleslist: None,
 86 |         }),
 87 |         Err(err) => HttpResponse::BadRequest().json(JsonResponse {
 88 |             code: 400,
 89 |             message: Some(err.to_string()),
 90 |             allowlist: None,
 91 |             ruleslist: None,
 92 |         }),
 93 |     }
 94 | }
 95 | 
 96 | /// Delete rules.
 97 | /// 
 98 | /// Delete one rule by id.
 99 | #[utoipa::path(
100 |     post,
101 |     path = "/rules/delete_rules_by_id",
102 |     request_body = RulesDto,
103 |     responses(
104 |         (status = 200, description = "success", body = JsonResponse),
105 |         (status = 400, description = "fail", body = JsonResponse)
106 |     )
107 | )]
108 | #[post("/rules/delete_rules_by_id")]
109 | pub async fn delete_rules_by_id(body: web::Json<RulesDto>) -> impl Responder {
110 |     let rule_id = match &body.rule_id {
111 |         Some(value) => value.clone(),
112 |         None => {
113 |             return HttpResponse::BadRequest().json(JsonResponse {
114 |                 code: 400,
115 |                 message: Some("It is not a rule id".to_string()),
116 |                 allowlist: None,
117 |                 ruleslist: None,
118 |             })
119 |         }
120 |     };
121 | 
122 |     match delete_rule_by_id(&body.config, &rule_id) {
123 |         Ok(_) => HttpResponse::Ok().json(JsonResponse {
124 |             code: 200,
125 |             message: Some("success".to_string()),
126 |             allowlist: None,
127 |             ruleslist: None,
128 |         }),
129 |         Err(err) => HttpResponse::BadRequest().json(JsonResponse {
130 |             code: 400,
131 |             message: Some(err.to_string()),
132 |             allowlist: None,
133 |             ruleslist: None,
134 |         }),
135 |     }
136 | }
137 | 
138 | /// Update rules.
139 | /// 
140 | /// Update one rule by id.
141 | #[utoipa::path(
142 |     post,
143 |     path = "/rules/update",
144 |     request_body = RulesDto,
145 |     responses(
146 |         (status = 200, description = "success", body = JsonResponse),
147 |         (status = 400, description = "fail", body = JsonResponse)
148 |     )
149 | )]
150 | #[post("/rules/update")]
151 | pub async fn update_rules(body: web::Json<RulesDto>) -> impl Responder {
152 |     let rule_id = match &body.rule_id {
153 |         Some(value) => value.clone(),
154 |         None => {
155 |             return HttpResponse::BadRequest().json(JsonResponse {
156 |                 code: 400,
157 |                 message: Some("It is not a rule id".to_string()),
158 |                 allowlist: None,
159 |                 ruleslist: None,
160 |             })
161 |         }
162 |     };
163 |     
164 |     let rule: Rule = match &body.rule {
165 |         Some(value) => value.clone(),
166 |         None => {
167 |             return HttpResponse::BadRequest().json(JsonResponse {
168 |                 code: 400,
169 |                 message: Some("It is not a Rule struct".to_string()),
170 |                 allowlist: None,
171 |                 ruleslist: None,
172 |             })
173 |         }
174 |     };
175 | 
176 |     match update_rule_by_id(&body.config, &rule_id, &rule) {
177 |         Ok(_) => HttpResponse::Ok().json(JsonResponse {
178 |             code: 200,
179 |             message: Some("success".to_string()),
180 |             allowlist: None,
181 |             ruleslist: None,
182 |         }),
183 |         Err(err) => HttpResponse::BadRequest().json(JsonResponse {
184 |             code: 400,
185 |             message: Some(err.to_string()),
186 |             allowlist: None,
187 |             ruleslist: None,
188 |         }),
189 |     }
190 | }
191 | 


--------------------------------------------------------------------------------
/src/routes/scan.rs:
--------------------------------------------------------------------------------
  1 | use actix_web::{post, web, HttpResponse, Responder};
  2 | use serde::{Deserialize, Serialize};
  3 | use utoipa::ToSchema;
  4 | 
  5 | use crate::service::detect_service::detect;
  6 | use crate::{Config, Leak};
  7 | 
  8 | /// The scan configuration
  9 | #[derive(Deserialize, Serialize, ToSchema)]
 10 | pub struct ConfigDto {
 11 |     /// Target repository.
 12 |     pub repo: String,
 13 |     /// Config path
 14 |     pub config: String,
 15 |     /// Maximum number of threads sensleak spawns
 16 |     pub report: Option<String>,
 17 |     /// The number of git files processed in each batch
 18 |     pub report_format: Option<String>,
 19 |     /// Path to write json leaks file.
 20 |     pub repo_config: Option<bool>,
 21 |     /// json, csv, sarif
 22 |     pub threads: Option<usize>,
 23 |     /// Show verbose output from scan.
 24 |     pub chunk: Option<usize>,
 25 |     /// Pretty print json if leaks are present.
 26 |     pub commit: Option<String>,
 27 |     /// comma separated list of a commits to scan
 28 |     pub commits: Option<String>,
 29 |     /// file of new line separated list of a commits to scan
 30 |     pub commits_file: Option<String>,
 31 |     /// Scan commits more recent than a specific date. Ex: '2006-01-02' or '2023-01-02T15:04:05-0700' format.
 32 |     pub commit_since: Option<String>,
 33 |     /// Scan commits older than a specific date. Ex: '2006-01-02' or '2006-10-02T15:04:05-0700' format.
 34 |     pub commit_until: Option<String>,
 35 | 
 36 |     /// Commit to start scan from
 37 |     pub commit_from: Option<String>,
 38 |     /// Commit to stop scan
 39 |     pub commit_to: Option<String>,
 40 |     /// Branch to scan
 41 |     pub branch: Option<String>,
 42 |     /// Run sensleak on uncommitted code
 43 |     pub uncommitted: Option<bool>,
 44 |     /// Set user to scan
 45 |     pub user: Option<String>,
 46 | 
 47 |     /// Clones repo(s) to disk.
 48 |     pub disk: Option<String>,
 49 | 
 50 |     /// Output to database
 51 |     pub to_db: bool,
 52 | }
 53 | 
 54 | /// The return results of the scan.
 55 | #[derive(Deserialize, Serialize, ToSchema)]
 56 | pub struct ScanResponse {
 57 |     /// 200-success, 400-fail
 58 |     code: usize,
 59 |     /// the leaks number
 60 |     leaks_number: Option<usize>,
 61 |     /// the number of scanned commits
 62 |     commits_number: Option<usize>,
 63 |     /// leaks
 64 |     leaks: Option<Vec<Leak>>,
 65 |     /// message
 66 |     message: Option<String>,
 67 | }
 68 | 
 69 | /// Scan the repo.
 70 | ///
 71 | /// Scan Git repositories for sensitive data.
 72 | #[utoipa::path(
 73 |     post,
 74 |     path = "/scan",
 75 |     request_body = ConfigDto,
 76 |     responses(
 77 |         (status = 200, description = "success", body = ScanResponse),
 78 |         (status = 400, description = "fail", body = ScanResponse)
 79 |     )
 80 | )]
 81 | #[post("/scan")]
 82 | pub async fn scan_repo(json_config: web::Json<ConfigDto>) -> impl Responder {
 83 |     let mut config: Config = Default::default();
 84 |     config.repo = json_config.repo.clone();
 85 |     config.config = json_config.config.clone();
 86 |     config.report = json_config.report.clone();
 87 |     config.threads = json_config.threads;
 88 |     config.chunk = json_config.chunk;
 89 |     config.report_format = json_config.report_format.clone();
 90 |     config.commit = json_config.commit.clone();
 91 |     config.commits = json_config.commits.clone();
 92 |     config.commit_from = json_config.commit_from.clone();
 93 |     config.commit_to = json_config.commit_to.clone();
 94 |     config.commit_since = json_config.commit_since.clone();
 95 |     config.commits_file = json_config.commits_file.clone();
 96 |     config.branch = json_config.branch.clone();
 97 |     config.uncommitted = false;
 98 |     config.user = json_config.user.clone();
 99 |     config.disk = json_config.disk.clone();
100 |     config.repo_config = json_config.repo_config.unwrap_or(false);
101 |     config.to_db = json_config.to_db;
102 | 
103 |     match detect(config).await {
104 |         Ok(results) => HttpResponse::Ok().json(ScanResponse {
105 |             code: 200,
106 |             leaks_number: Some(results.outputs.len()),
107 |             commits_number: Some(results.commits_number),
108 |             leaks: Some(results.outputs),
109 |             message: None,
110 |         }),
111 |         Err(err) => HttpResponse::BadRequest().json(ScanResponse {
112 |             code: 400,
113 |             message: Some(err.to_string()),
114 |             leaks_number: None,
115 |             commits_number: None,
116 |             leaks: None,
117 |         }),
118 |     }
119 | }
120 | 
121 | 
122 | 
123 | #[cfg(test)]
124 | mod tests {
125 |     // use super::*;
126 |     // use actix_web::{test, web, App};
127 |     // use actix_web::http::StatusCode;
128 |     
129 |     // #[actix_web::test]
130 |     // async fn test_scan_repo_success() {
131 |     //     let config = ConfigDto {
132 |     //         repo: String::from("example/repo"),
133 |     //         config: String::from("example/config"),
134 |     //         report: Some(String::from("example/report")),
135 |     //         report_format: Some(String::from("json")),
136 |     //         repo_config: Some(true),
137 |     //         threads: Some(4),
138 |     //         chunk: Some(10),
139 |     //         commit: Some(String::from("abcd1234")),
140 |     //         commits: Some(String::from("commit1,commit2")),
141 |     //         commits_file: Some(String::from("path/to/file")),
142 |     //         commit_since: Some(String::from("2023-01-01")),
143 |     //         commit_until: Some(String::from("2023-01-31")),
144 |     //         commit_from: Some(String::from("abcd1234")),
145 |     //         commit_to: Some(String::from("efgh5678")),
146 |     //         branch: Some(String::from("main")),
147 |     //         uncommitted: Some(false),
148 |     //         user: Some(String::from("john")),
149 |     //         disk: Some(String::from("path/to/disk")),
150 |     //         to_db: false,
151 |     //     };
152 |     //
153 |     //     let app = test::init_service(
154 |     //         App::new().service(scan_repo)
155 |     //     ).await;
156 |     //
157 |     //     let req = test::TestRequest::post()
158 |     //         .uri("/scan")
159 |     //         .set_json(&config)
160 |     //         .to_request();
161 |     //
162 |     //     let resp = test::call_service(&app, req).await;
163 |     //     assert_eq!(resp.status(), StatusCode::OK);
164 |     //
165 |     //     let body: ScanResponse = test::read_body_json(resp).await;
166 |     //     assert_eq!(body.code, 200);
167 |     //     assert_eq!(body.leaks_number, Some(10));
168 |     //     assert_eq!(body.commits_number, Some(2));
169 |     //     assert_eq!(body.message, None);
170 |     // }
171 | }
172 | 


--------------------------------------------------------------------------------
/src/service/db_service.rs:
--------------------------------------------------------------------------------
 1 | use crate::models::{ConnectDbConfig, Entity as Leaks, Leak};
 2 | use chrono::Local;
 3 | use sea_orm::*;
 4 | use std::env;
 5 | 
 6 | /// Sets up the database connection using the application's configuration settings.
 7 | ///
 8 | /// This function attempts to establish a connection to the database using environment variables for the database configuration.
 9 | /// It reads configuration values such as host, port, user, password, and database name from environment variables
10 | /// and uses them to construct the database URL.
11 | ///
12 | /// # Returns
13 | ///
14 | /// Returns a `Result<DatabaseConnection, DbErr>`:
15 | /// - `Ok(DatabaseConnection)` if the connection is successfully established.
16 | /// - `Err(DbErr)` if there is an error connecting to the database.
17 | pub async fn set_up_db() -> Result<DatabaseConnection, DbErr> {
18 |     let config = get_db_config();
19 |     let db_url = config.to_connection_url();                    
20 |     let db = Database::connect(&db_url).await?;
21 |     Ok(db)
22 | }
23 | 
24 | /// Inserts a vector of `Leak` entities into the database and ensures that the `Leaks` table exists.
25 | ///
26 | /// This function first checks if the `Leaks` table exists in the database and creates it if not.
27 | /// Then, it proceeds to insert the provided vector of `Leak` entities into the `Leaks` table.
28 | ///
29 | /// # Arguments
30 | ///
31 | /// * `_leaks` - A reference to a vector of `Leak` entities to be inserted into the database.
32 | ///
33 | /// # Returns
34 | ///
35 | /// Returns a `Result<(), DbErr>` indicating the outcome of the operation:
36 | /// - `Ok(())` if the insertion is successful and the `Leaks` table is either found or successfully created.
37 | /// - `Err(DbErr)` if there is an error during the table check/creation or insertion process.
38 | pub async fn insert_leaks(_leaks: &[Leak]) -> Result<(), DbErr> {
39 |     let db = match set_up_db().await {
40 |         Ok(db) => db,
41 |         Err(err) => panic!("{}", err),
42 |     };
43 | 
44 |     // Check if the table Leaks exists and create it if not
45 |     let builder = db.get_database_backend();
46 |     let schema = Schema::new(builder);
47 | 
48 |     let stmt = schema
49 |         .create_table_from_entity(Leaks)
50 |         .if_not_exists()
51 |         .to_owned();
52 | 
53 |     let stmt = builder.build(&stmt);
54 | 
55 |     db.execute(stmt).await?;
56 |       
57 |     println!(
58 |         "\x1b[34m[INFO]\x1b[0m[{}] Create Success ...",
59 |         Local::now().format("%Y-%m-%d %H:%M:%S"),
60 |     );
61 | 
62 |     // Insert leaks
63 |     for leak in _leaks.iter() {
64 |         let active_model = leak.to_active_model();
65 | 
66 |         let insert_result = Leaks::insert(active_model)
67 |             .exec(&db)
68 |             .await?;
69 |         println!("Inserted leak with result: {:?}", insert_result);
70 |     }
71 | 
72 |     println!(
73 |         "\x1b[34m[INFO]\x1b[0m[{}] Insert Success ...",
74 |         Local::now().format("%Y-%m-%d %H:%M:%S"),
75 |     );
76 | 
77 |     Ok(())
78 | }
79 | 
80 | /// Retrieves database connection configuration from environment variables.
81 | ///
82 | /// This function constructs a `ConnectDbConfig` struct with database connection details
83 | /// such as host, port, username, password, and database name, reading the values from
84 | /// environment variables. If an environment variable is not set, it defaults to a predefined value.
85 | ///
86 | /// # Returns
87 | ///
88 | /// Returns a `ConnectDbConfig` struct populated with the database connection details.
89 | fn get_db_config() -> ConnectDbConfig {
90 |     ConnectDbConfig { 
91 |         host: env::var("PG_HOST").unwrap_or("localhost".to_string()), 
92 |         port: env::var("PG_PORT").unwrap_or("5432".to_string()), 
93 |         user: env::var("PG_USER").unwrap_or("postgres".to_string()), 
94 |         password: env::var("PG_PASSWORD").unwrap_or("postgres".to_string()), 
95 |         dbname: env::var("PG_DBNAME").unwrap_or("postgres".to_string()) 
96 |     }
97 | }


--------------------------------------------------------------------------------
/src/service/detect_service.rs:
--------------------------------------------------------------------------------
  1 | use crate::errors::CustomError;
  2 | use crate::models::{Allowlist, CommitInfo, Config, Leak, Results, Rule, Scan};
  3 | use crate::service::git_service::*;
  4 | use crate::utils::detect_utils::{
  5 |     is_commit_in_allowlist, is_contains_strs, is_link, is_path_in_allowlist, is_string_matched,
  6 |     load_config, remove_duplicates, write_csv_report, write_json_report, write_sarif_report,
  7 | };
  8 | use crate::utils::git_util::{clone_or_load_repository, extract_repo_name};
  9 | use crate::service::db_service::insert_leaks;
 10 | use chrono::Local;
 11 | use clap::Parser;
 12 | use git2::Repository;
 13 | use rayon::ThreadPoolBuilder;
 14 | use regex::Regex;
 15 | use std::error::Error;
 16 | use std::fs;
 17 | use std::sync::{Arc, Mutex};
 18 | use std::time::Instant;
 19 | 
 20 | /// Starts the Git detector application.
 21 | pub async fn sensleaks() {
 22 |     let args = Config::parse();
 23 | 
 24 |     match detect(args).await {
 25 |         Ok(results) => results,
 26 |         Err(err) => {
 27 |             eprintln!("Application: {}", err);
 28 |             std::process::exit(0);
 29 |         }
 30 |     };
 31 | }
 32 | 
 33 | /// Searches for sensitive information in a repository.
 34 | ///
 35 | /// # Arguments
 36 | ///
 37 | /// * `config` - A `Config` struct containing the configuration settings for the detection process.
 38 | ///
 39 | /// # Returns
 40 | ///
 41 | /// Returns the detection results as a `Result` containing the scan results or an error.
 42 | ///
 43 | pub async fn detect(config: Config) -> Result<Results, Box<dyn Error>> {
 44 |     // load repo and record the time of clone repo
 45 |     let start_clone_repo = Instant::now();
 46 |     let repo = clone_or_load_repository(&config)?;
 47 |     let duration_repo: std::time::Duration = Instant::now().duration_since(start_clone_repo);
 48 | 
 49 |     // load scan, which contains allowlist, ruleslist, keywords
 50 |     let mut scan = load_config(&repo, &config)?;
 51 | 
 52 |     // Set threads and chunk in scan
 53 |     scan.threads = config.threads;
 54 |     scan.chunk = config.chunk;
 55 | 
 56 |     // Record the start time of the scan
 57 |     let start_scan = Instant::now();
 58 | 
 59 |     // Scan
 60 |     let results = process_scan(&config, repo, scan)?;
 61 | 
 62 |     // To output content in the console.
 63 |     config_info_after_detect(&config, &results, start_scan, duration_repo).await?;
 64 | 
 65 |     Ok(results)
 66 | }
 67 | 
 68 | /// Processes the scan based on the provided configuration, repository, and scan settings.
 69 | ///
 70 | /// # Arguments
 71 | ///
 72 | /// * `config` - A reference to the `Config` object containing the scan configuration settings.
 73 | /// * `repo` - The `Repository` object representing the repository to scan.
 74 | /// * `scan` - The `Scan` object containing additional scan settings such as allowlist, ruleslist, and keywords.
 75 | ///
 76 | /// # Returns
 77 | ///
 78 | /// Returns the scan results as a `Result` containing the `Results` or an error.
 79 | fn process_scan(config: &Config, repo: Repository, scan: Scan) -> Result<Results, Box<dyn Error>> {
 80 |     // Scan the files that have not been submitted.
 81 |     if config.uncommitted {
 82 |         return handle_uncommitted_files(repo, &config.repo, scan);
 83 |     }
 84 | 
 85 |     match (
 86 |         &config.commit,
 87 |         &config.commits,
 88 |         &config.commits_file,
 89 |         &config.commit_since,
 90 |         &config.commit_until,
 91 |         &config.commit_from,
 92 |         &config.commit_to,
 93 |         &config.uncommitted,
 94 |         &config.user,
 95 |         &config.branch,
 96 |     ) {
 97 |         (Some(commit), _, _, _, _, _, _, _, Some(user), _) => {
 98 |             handle_single_commit(repo, commit, scan, user)
 99 |         }
100 |         (_, Some(commits), _, _, _, _, _, _, Some(user), _) => {
101 |             let commit_ids: Vec<&str> = commits.split(',').collect();
102 |             handle_multiple_commits(repo, &commit_ids, scan, user)
103 |         }
104 |         (_, _, Some(file_path), _, _, _, _, _, Some(user), _) => {
105 |             handle_commits_file(repo, file_path, scan, user)
106 |         }
107 |         (_, _, _, Some(since), Some(until), _, _, _, Some(user), _) => {
108 |             handle_commit_range_by_time(repo, since, until, scan, user)
109 |         }
110 |         (_, _, _, _, _, Some(commit_from), Some(commit_to), _, Some(user), _) => {
111 |             handle_commit_range(
112 |                 repo,
113 |                 Some(commit_from.clone()),
114 |                 Some(commit_to.clone()),
115 |                 scan,
116 |                 user,
117 |             )
118 |         }
119 |         (_, _, _, _, _, _, _, _, Some(_user), Some(branch)) => {
120 |             handle_branches_by_name(repo, branch, scan)
121 |         }
122 |         (_, _, _, _, _, _, _, _, Some(user), _) => handle_all_commits(repo, scan, user),
123 | 
124 |         _ => handle_all_commits(repo, scan, ""),
125 |     }
126 | }
127 | 
128 | /// Detects leaks in the provided file contents based on the specified rules and configurations.
129 | ///
130 | ///
131 | /// The function utilizes a thread pool to execute detection operations concurrently, improving performance.
132 | /// Detected leaks are stored in a shared mutable vector wrapped in an `Arc<Mutex>`.
133 | ///
134 | /// # Arguments
135 | ///
136 | /// * `contents` - The contents of the file to be scanned for leaks.
137 | /// * `path` - The path to the file being scanned.
138 | /// * `ruleslist` - A slice of `Rule` objects representing the rules to be applied during the detection process.
139 | /// * `allowlist` - An `Allowlist` object containing patterns to exclude from the detection process.
140 | /// * `commit_info` - A reference to the `CommitInfo` object containing information about the commit associated with the file.
141 | /// * `threads` - An optional `usize` value specifying the number of threads to use in the thread pool. Default is 50.
142 | ///
143 | /// # Returns
144 | ///
145 | /// Returns a `Result` containing a cloned vector of `Leak` objects representing the detected leaks, or an error.
146 | ///
147 | /// # Errors
148 | ///
149 | /// This function can return an error if there are any issues during the detection process.
150 | ///
151 | pub fn detect_file(
152 |     contents: &str,
153 |     path: &str,
154 |     ruleslist: &[Rule],
155 |     allowlist: &Allowlist,
156 |     commit_info: &CommitInfo,
157 |     threads: Option<usize>,
158 | ) -> Result<Vec<Leak>, Box<dyn Error>> {
159 |     // Create a shared mutable vector to store detection results
160 |     let detect_info: Arc<Mutex<Vec<Leak>>> = Arc::new(Mutex::new(Vec::new()));
161 | 
162 |     // Create a thread pool with the setting threads
163 |     let thread_pool = ThreadPoolBuilder::new()
164 |         .num_threads(threads.unwrap_or(50))
165 |         .build()
166 |         .unwrap();
167 | 
168 |     // Use the thread pool to execute the detection operations
169 |     thread_pool.scope(|s| {
170 |         for rule in ruleslist {
171 |             // Check if the contents contain any keywords from the rule
172 |             if is_contains_strs(&rule.keywords, contents) {
173 |                 let cloned_path = path.to_string();
174 |                 let cloned_rule = rule.clone();
175 |                 let cloned_contents = contents.to_string();
176 |                 let cloned_allowlist = allowlist.clone();
177 |                 let cloned_commits = commit_info.commit.to_string();
178 |                 let cloned_commit_info = commit_info.clone();
179 |                 let detect_info_clone = Arc::clone(&detect_info);
180 | 
181 |                 // Spawn a thread to perform the detection using regex
182 |                 s.spawn(move |_| {
183 |                     let results = detect_by_regex(
184 |                         &cloned_path,
185 |                         &cloned_rule,
186 |                         &cloned_contents,
187 |                         &cloned_allowlist,
188 |                         &cloned_commits,
189 |                     );
190 | 
191 |                     // Acquire the lock for detection results and update the vector
192 |                     let mut detect_info = detect_info_clone.lock().unwrap();
193 |                     for (line_number, line, matched) in results.iter() {
194 |                         let output_item = Leak {
195 |                             line: line.to_string(),
196 |                             line_number: *line_number as u32,
197 |                             offender: matched.to_string(),
198 |                             commit: cloned_commit_info.commit.to_string(),
199 |                             repo: cloned_commit_info.repo.to_string(),
200 |                             rule: cloned_rule.description.to_string(),
201 |                             commit_message: cloned_commit_info.commit_message.to_string(),
202 |                             author: cloned_commit_info.author.to_string(),
203 |                             email: cloned_commit_info.email.to_string(),
204 |                             file: cloned_path.to_string(),
205 |                             date: cloned_commit_info.date.to_string(),
206 |                         };
207 |                         detect_info.push(output_item);
208 |                     }
209 |                 });
210 |             }
211 |         }
212 |     });
213 | 
214 |     // Acquire the lock for detection results and return a clone of the results
215 |     let detect_info = detect_info.lock().unwrap();
216 |     Ok(detect_info.clone())
217 | }
218 | 
219 | /// Searches a string for matches of a given regular expression and returns a vector of tuples.
220 | ///
221 | /// # Arguments
222 | ///
223 | /// * `path` - The path to the file being searched. This is used for allowlist checks.
224 | /// * `rules` - A `Rule` object representing the rule to apply during the detection process. It contains the regular expression to match against.
225 | /// * `contents` - A string containing the contents to search for matches.
226 | /// * `allowlist` - An `Allowlist` object containing the allowlist configurations.
227 | ///
228 | /// # Returns
229 | ///
230 | /// A vector of tuples `(usize, &str, &str)`, where each tuple represents a match found in the string.
231 | /// The first element of the tuple is the line number (1-indexed), the second element is the matched line, and the third element is the matched substring.
232 | ///
233 | fn detect_by_regex<'a>(
234 |     path: &str,
235 |     rules: &Rule,
236 |     contents: &'a str,
237 |     allowlist: &Allowlist,
238 |     commits: &str,
239 | ) -> Vec<(usize, &'a str, &'a str)> {
240 |     // Create a regular expression object.
241 |     let regex = Regex::new(&rules.regex).unwrap();
242 | 
243 |     // Iterate over the lines in the string.
244 |     let results: Vec<(usize, &str, &str)> = contents
245 |         .lines()
246 |         .enumerate()
247 |         .filter_map(|(i, line)| {
248 |             // Match the regular expression against each line.
249 |             regex
250 |                 .captures(line)
251 |                 .and_then(|captures| captures.get(0))
252 |                 .map(|matched| (i + 1, line, matched.as_str()))
253 |         })
254 |         .collect();
255 |     if results.is_empty() {
256 |         return Vec::new();
257 |     }
258 | 
259 |     // The secrets that should be skipped
260 |     let mut filtered_results: Vec<(usize, &str, &str)> = Vec::new();
261 | 
262 |     // Handle global allowlist
263 |     if allowlist.regex_target == "line" {
264 |         for (line_number, line, matched) in &results {
265 |             if (allowlist.regexes.is_empty() || allowlist.stopwords.is_empty())
266 |                 && (is_string_matched(&allowlist.regexes, line)
267 |                     || is_contains_strs(&allowlist.stopwords, line))
268 |             {
269 |                 filtered_results.push((*line_number, line, matched));
270 |             }
271 |         }
272 |     } else {
273 |         for (line_number, line, matched) in &results {
274 |             if (allowlist.regexes.is_empty() || allowlist.stopwords.is_empty())
275 |                 && (is_string_matched(&allowlist.regexes, matched)
276 |                     || is_contains_strs(&allowlist.stopwords, matched))
277 |             {
278 |                 filtered_results.push((*line_number, line, matched));
279 |             }
280 |         }
281 |     }
282 | 
283 |     // Handle rules.allowlist
284 |     if let Some(rules_allowlist) = &rules.allowlist {
285 |         // check commits and paths
286 |         if (is_path_in_allowlist(path, &rules_allowlist.paths))
287 |             || (is_commit_in_allowlist(commits, &rules_allowlist.commits))
288 |         {
289 |             return vec![];
290 |         }
291 | 
292 |         // check regexes and stopwords
293 |         if rules_allowlist.regex_target == "line" {
294 |             for (line_number, line, matched) in &results {
295 |                 if (rules_allowlist.regexes.is_empty() || rules_allowlist.stopwords.is_empty())
296 |                     && (is_string_matched(&rules_allowlist.regexes, line)
297 |                         || is_contains_strs(&rules_allowlist.stopwords, line))
298 |                 {
299 |                     filtered_results.push((*line_number, line, matched));
300 |                 }
301 |             }
302 |         } else {
303 |             for (line_number, line, matched) in &results {
304 |                 if (rules_allowlist.regexes.is_empty() || rules_allowlist.stopwords.is_empty())
305 |                     && (is_string_matched(&rules_allowlist.regexes, matched)
306 |                         || is_contains_strs(&rules_allowlist.stopwords, matched))
307 |                 {
308 |                     filtered_results.push((*line_number, line, matched));
309 |                 }
310 |             }
311 |         }
312 |     }
313 | 
314 |     if filtered_results.is_empty() {
315 |         results
316 |     } else {
317 |         remove_duplicates(results, filtered_results)
318 |     }
319 | }
320 | 
321 | /// Detects uncommitted files for sensitive information leaks.
322 | ///
323 | /// # Arguments
324 | ///
325 | /// * `contents` - A string slice representing the contents of the file.
326 | /// * `path` - A string slice representing the path of the file.
327 | /// * `ruleslist` - A reference to a slice of `Rule` objects to match against.
328 | /// * `allowlist` - A reference to an `Allowlist` object for paths that should be skipped.
329 | ///
330 | /// # Returns
331 | ///
332 | /// Returns a `Result` containing a vector of `Leak` objects if sensitive information leaks are detected,
333 | /// or an empty vector if no leaks are found.
334 | pub fn detect_uncommitted_file(
335 |     contents: &str,
336 |     path: &str,
337 |     ruleslist: &[Rule],
338 |     allowlist: &Allowlist,
339 |     threads: Option<usize>,
340 | ) -> Result<Vec<Leak>, Box<dyn Error>> {
341 |     // Create a shared mutable vector to store detection results
342 |     let detect_info: Arc<Mutex<Vec<Leak>>> = Arc::new(Mutex::new(Vec::new()));
343 | 
344 |     // Create a thread pool with the setting threads
345 |     let thread_pool = ThreadPoolBuilder::new()
346 |         .num_threads(threads.unwrap_or(50))
347 |         .build()
348 |         .unwrap();
349 | 
350 |     // Use the thread pool to execute the detection operations
351 |     thread_pool.scope(|s| {
352 |         for rule in ruleslist {
353 |             // Check if the contents contain any keywords from the rule
354 |             if is_contains_strs(&rule.keywords, contents) {
355 |                 let cloned_path = path.to_string();
356 |                 let cloned_rule = rule.clone();
357 |                 let cloned_contents = contents.to_string();
358 |                 let cloned_allowlist = allowlist.clone();
359 |                 let detect_info_clone = Arc::clone(&detect_info);
360 | 
361 |                 // Spawn a thread to perform the detection using regex
362 |                 s.spawn(move |_| {
363 |                     let results = detect_by_regex(
364 |                         &cloned_path,
365 |                         &cloned_rule,
366 |                         &cloned_contents,
367 |                         &cloned_allowlist,
368 |                         "",
369 |                     );
370 | 
371 |                     // Acquire the lock for detection results and update the vector
372 |                     let mut detect_info = detect_info_clone.lock().unwrap();
373 |                     for (line_number, line, matched) in results.iter() {
374 |                         let output_item = Leak {
375 |                             line: line.to_string(),
376 |                             line_number: *line_number as u32,
377 |                             offender: matched.to_string(),
378 |                             commit: "".to_string(),
379 |                             repo: "".to_string(),
380 |                             rule: cloned_rule.description.to_string(),
381 |                             commit_message: "".to_string(),
382 |                             author: "".to_string(),
383 |                             email: "".to_string(),
384 |                             file: cloned_path.to_string(),
385 |                             date: "".to_string(),
386 |                         };
387 |                         detect_info.push(output_item);
388 |                     }
389 |                 });
390 |             }
391 |         }
392 |     });
393 | 
394 |     // Acquire the lock for detection results and return a clone of the results
395 |     let detect_info = detect_info.lock().unwrap();
396 |     Ok(detect_info.clone())
397 | }
398 | 
399 | /// Handles post-detection configuration information and performs actions based on the configuration settings.
400 | ///
401 | /// # Arguments
402 | ///
403 | /// * `config` - A reference to the `Config` object containing the scan configuration settings.
404 | /// * `results` - A reference to the `Results` object containing the detection results.
405 | /// * `start_scan` - The start time of the scan as an `Instant` object.
406 | /// * `duration_repo` - The duration of the repository scanning process as a `std::time::Duration` object.
407 | ///
408 | /// # Returns
409 | ///
410 | /// Returns `Ok(())` if the post-detection actions are performed successfully, or an error of type `Box<dyn Error>` if any issues occur.
411 | ///
412 | /// # Errors
413 | ///
414 | /// This function can return an error if there are any issues during the post-detection actions, such as writing reports.
415 | ///
416 | async fn  config_info_after_detect(
417 |     config: &Config,
418 |     results: &Results,
419 |     start_scan: Instant,
420 |     duration_repo: std::time::Duration,
421 | ) -> Result<(), Box<dyn Error>> {
422 |     // Calculate the scan duration
423 |     let duration_scan = Instant::now().duration_since(start_scan);
424 | 
425 |     //  If the verbose flag is set, print the scan results to the console
426 |     if config.verbose {
427 |         if config.pretty {
428 |             println!("{:#?}", results.outputs);
429 |         } else {
430 |             println!("{:?}", results.outputs);
431 |         }
432 |     }
433 | 
434 |     // If the debug flag is set, print the scan results to the console
435 |     if config.debug {
436 |         debug_info(duration_repo, duration_scan, results.commits_number);
437 |     }
438 | 
439 |     // Output to database
440 |     if config.to_db {
441 |         insert_leaks(&results.outputs).await?;
442 |     }
443 | 
444 |     // Write output report
445 |     if let Some(report) = &config.report {
446 |         if let Some(format) = &config.report_format {
447 |             if format == "sarif" {
448 |                 if write_sarif_report(report, &results.outputs).is_err() {
449 |                     return Err(Box::new(CustomError::ExportSarifError));
450 |                 }
451 |             } else if format == "csv" {
452 |                 if write_csv_report(report, &results.outputs).is_err() {
453 |                     return Err(Box::new(CustomError::ExportCsvError));
454 |                 }
455 |             } else if write_json_report(report, &results.outputs).is_err() {
456 |                 return Err(Box::new(CustomError::ExportJsonError));
457 |             }
458 |         };
459 |     }
460 | 
461 |     println!(
462 |         "\x1b[38;5;208m[WARN]\x1b[0m[{}]{} leaks detected. {} commits scanned in {:?}",
463 |         Local::now().format("%Y-%m-%d %H:%M:%S"),
464 |         results.outputs.len(),
465 |         results.commits_number,
466 |         duration_scan
467 |     );
468 | 
469 |     match &config.disk {
470 |         Some(_disk) => {}
471 |         None => {
472 |             if is_link(&config.repo) {
473 |                 let dest = "workplace/";
474 |                 let mut repo_path = String::new();
475 |                 if let Some(name) = extract_repo_name(&config.repo) {
476 |                     repo_path = format!("{}{}", dest, name);
477 |                 }
478 |                 match fs::remove_dir_all(repo_path) {
479 |                     Ok(_) => {}
480 |                     Err(e) => eprintln!("Delete dir fail: {}", e),
481 |                 }
482 |             }
483 |         }
484 |     };
485 |     Ok(())
486 | }
487 | 
488 | /// Prints debug information.
489 | ///
490 | /// # Arguments
491 | ///
492 | /// * `total_clone_time` - The total time taken for repository cloning, represented as a `Duration` object.
493 | /// * `total_scan_time` - The total time taken for the scan, represented as a `Duration` object.
494 | /// * `commits` - The number of commits.
495 | fn debug_info(
496 |     total_clone_time: std::time::Duration,
497 |     total_scan_time: std::time::Duration,
498 |     commits: usize,
499 | ) {
500 |     let timestamp = Local::now().format("%Y-%m-%dT%H:%M:%S%.3f%:z").to_string();
501 |     println!(
502 |         "\x1b[34m[DEBUG]\x1b[0m[{}] -------------------------",
503 |         timestamp
504 |     );
505 |     println!(
506 |         "\x1b[34m[DEBUG]\x1b[0m[{}]  | Times and Commit Counts|",
507 |         timestamp
508 |     );
509 |     println!(
510 |         "\x1b[34m[DEBUG]\x1b[0m[{}] -------------------------",
511 |         timestamp
512 |     );
513 |     println!("totalScanTime:  {:?}", total_scan_time);
514 |     println!("totalCloneTime:  {:?}", total_clone_time);
515 |     println!("totalCommits:  {}", commits);
516 | }
517 | 
518 | #[cfg(test)]
519 | mod tests {
520 |     use super::*;
521 |     extern crate git2;
522 | 
523 |     use chrono::DateTime;
524 |     // Helper function to create a mock scan
525 |     fn create_mock_scan() -> Scan {
526 |         let rule = Rule {
527 |             description: String::from("Stripe Access Token"),
528 |             id: String::from("stripe-access-token"),
529 |             regex: String::from(r"(?i)(sk|pk)_(test|live)_[0-9a-z]{10,32}"),
530 |             // entropy: Some(0.5),
531 |             keywords: vec![
532 |                 String::from("sk_test"),
533 |                 String::from("pk_test"),
534 |                 String::from("sk_live"),
535 |                 String::from("pk_live"),
536 |             ],
537 |             allowlist: None,
538 |         };
539 |         let ruleslist: Vec<Rule> = vec![rule];
540 | 
541 |         let allowlist = Allowlist {
542 |             paths: vec![],
543 |             commits: vec![],
544 |             regex_target: String::from("match"),
545 |             regexes: vec![],
546 |             stopwords: vec![],
547 |         };
548 | 
549 |         let scan = Scan {
550 |             allowlist,
551 |             ruleslist,
552 | 
553 |             threads: Some(50),
554 |             chunk: Some(10),
555 |         };
556 |         scan
557 |     }
558 | 
559 |     // test detect_file
560 |     static PATH: &str = "tests/files/testdir/test.txt";
561 |     #[test]
562 |     fn test_detect_file() {
563 |         let scan = create_mock_scan();
564 |         let content = "twilio_api_key = SK12345678901234567890123456789012";
565 |         let commit_info = CommitInfo {
566 |             repo: "example/repo".to_string(),
567 |             commit: git2::Oid::from_str("1234567890abcdef1234567890abcdef12345678").unwrap(),
568 |             author: "John Doe".to_string(),
569 |             email: "johndoe@example.com".to_string(),
570 |             commit_message: "Example commit message".to_string(),
571 |             date: DateTime::parse_from_rfc3339("2023-05-26T12:34:56+00:00")
572 |                 .unwrap()
573 |                 .into(),
574 |             files: vec![
575 |                 ("/path/to/file1".to_string(), "File 1 contents".to_string()),
576 |                 ("/path/to/file2".to_string(), "File 2 contents".to_string()),
577 |             ],
578 |         };
579 |         // Call the detect_file function
580 |         let result = detect_file(
581 |             PATH,
582 |             content,
583 |             &scan.ruleslist,
584 |             &scan.allowlist,
585 |             &commit_info,
586 |             scan.threads,
587 |         );
588 | 
589 |         // Assert that the result is as expected
590 |         let output = result.unwrap();
591 |         assert_eq!(output.len(), 0);
592 |     }
593 |     // test detect_by_regex
594 | 
595 |     #[test]
596 |     fn test_detect_by_regex() {
597 |         let rules = Rule {
598 |             description: "Digits".to_string(),
599 |             id: "key".to_string(),
600 |             regex: r"\d+".to_string(),
601 |             // entropy: None,
602 |             keywords: vec![],
603 |             allowlist: None,
604 |         };
605 |         let contents = "123\n456\n789\naaaaaxwsd\ntoken=wkwk121";
606 |         let allowlist = Allowlist {
607 |             commits: vec![],
608 |             paths: vec![],
609 |             regex_target: String::new(),
610 |             regexes: vec![],
611 |             stopwords: vec![],
612 |         };
613 | 
614 |         let result = detect_by_regex(PATH, &rules, contents, &allowlist, "");
615 | 
616 |         assert_eq!(result.len(), 4);
617 |         assert_eq!(result[0], (1, "123", "123"));
618 |         assert_eq!(result[1], (2, "456", "456"));
619 |         assert_eq!(result[2], (3, "789", "789"));
620 |         assert_eq!(result[3], (5, "token=wkwk121", "121"));
621 |     }
622 | 
623 |     #[test]
624 |     fn test_detect_by_regex_with_rules_allowlist_regex_target_match() {
625 |         let rules = Rule {
626 |             description: "Digits".to_string(),
627 |             id: "key".to_string(),
628 |             regex: r"\d+".to_string(),
629 |             // entropy: None,
630 |             keywords: vec![],
631 |             allowlist: Some(Allowlist {
632 |                 commits: vec![],
633 |                 paths: vec!["tests/files/test90.txt".to_string()],
634 |                 regex_target: "match".to_string(),
635 |                 regexes: vec![],
636 |                 stopwords: vec!["token".to_string()],
637 |             }),
638 |         };
639 |         let contents = "123\n456\n789\naaaaaxwsd\ntoken=wkwk121";
640 |         let allowlist = Allowlist {
641 |             commits: vec![],
642 |             paths: vec![],
643 |             regex_target: String::new(),
644 |             regexes: vec![],
645 |             stopwords: vec![],
646 |         };
647 | 
648 |         let result = detect_by_regex(PATH, &rules, contents, &allowlist, "");
649 |         println!("{:?}", result);
650 |         assert_eq!(result.len(), 4);
651 |         assert_eq!(result[0], (1, "123", "123"));
652 |         assert_eq!(result[1], (2, "456", "456"));
653 |         assert_eq!(result[2], (3, "789", "789"));
654 |         assert_eq!(result[3], (5, "token=wkwk121", "121"));
655 |     }
656 | 
657 |     #[test]
658 |     fn test_detect_by_regex_with_rules_allowlist_regex_target_line() {
659 |         let rules = Rule {
660 |             description: "Digits".to_string(),
661 |             id: "key".to_string(),
662 |             regex: r"\d+".to_string(),
663 |             // entropy: None,
664 |             keywords: vec![],
665 |             allowlist: Some(Allowlist {
666 |                 commits: vec![],
667 |                 paths: vec!["tests/files/test90.txt".to_string()],
668 |                 regex_target: "line".to_string(),
669 |                 regexes: vec![],
670 |                 stopwords: vec!["token".to_string()],
671 |             }),
672 |         };
673 |         let contents = "123\n456\n789\naaaaaxwsd\ntoken=wkwk121";
674 |         let allowlist = Allowlist {
675 |             commits: vec![],
676 |             paths: vec![],
677 |             regex_target: String::new(),
678 |             regexes: vec![],
679 |             stopwords: vec![],
680 |         };
681 | 
682 |         let result = detect_by_regex(PATH, &rules, contents, &allowlist, "");
683 |         println!("{:?}", result);
684 |         assert_eq!(result.len(), 3);
685 |         assert_eq!(result[0], (1, "123", "123"));
686 |         assert_eq!(result[1], (2, "456", "456"));
687 |         assert_eq!(result[2], (3, "789", "789"));
688 |     }
689 | 
690 |     #[test]
691 |     fn test_detect_by_regex_with_global_allowlist() {
692 |         let rules = Rule {
693 |             description: "Digits".to_string(),
694 |             id: "key".to_string(),
695 |             regex: r"\d+".to_string(),
696 |             // entropy: None,
697 |             keywords: vec![],
698 |             allowlist: Some(Allowlist {
699 |                 commits: vec![],
700 |                 paths: vec!["tests/files/test90.txt".to_string()],
701 |                 regex_target: "line".to_string(),
702 |                 regexes: vec![],
703 |                 stopwords: vec!["token".to_string()],
704 |             }),
705 |         };
706 |         let contents = "123\n456\n789\naaaaaxwsd\ntoken=wkwk121\nclient22222\n22";
707 |         let allowlist = Allowlist {
708 |             commits: vec![],
709 |             paths: vec![],
710 |             regex_target: "line".to_string(),
711 |             regexes: vec![],
712 |             stopwords: vec!["client".to_string()],
713 |         };
714 | 
715 |         let result = detect_by_regex(PATH, &rules, contents, &allowlist, "");
716 |         assert_eq!(result.len(), 4);
717 |         assert_eq!(result[0], (1, "123", "123"));
718 |         assert_eq!(result[1], (2, "456", "456"));
719 |         assert_eq!(result[2], (3, "789", "789"));
720 |         assert_eq!(result[3], (7, "22", "22"));
721 |     }
722 | }
723 | 


--------------------------------------------------------------------------------
/src/service/git_service.rs:
--------------------------------------------------------------------------------
  1 | extern crate chrono;
  2 | extern crate git2;
  3 | use chrono::{DateTime, FixedOffset, TimeZone, Utc};
  4 | 
  5 | use git2::{BranchType, Repository, StatusOptions};
  6 | use std::sync::{Arc, Mutex};
  7 | use rayon::prelude::*;
  8 | 
  9 | use crate::models::{CommitInfo, Leak, Results, Scan};
 10 | use std::collections::HashSet;
 11 | use std::error::Error;
 12 | use std::fs;
 13 | use std::fs::File;
 14 | 
 15 | use crate::errors::CustomError;
 16 | use crate::service::detect_service::{detect_file, detect_uncommitted_file};
 17 | use crate::utils::git_util::{
 18 |     config_commit_info, is_valid_date_format, load_all_commits, load_commits_by_conditions,
 19 |     parse_date_to_datetime,
 20 | };
 21 | 
 22 | use std::io::{BufRead, BufReader, Read};
 23 | 
 24 | /// Handles a single commit by scanning its content.
 25 | ///
 26 | /// # Arguments
 27 | ///
 28 | /// * `repo` - A `Repository` object representing the Git repository.
 29 | /// * `commit_id` - The ID of the commit to handle, provided as a string.
 30 | /// * `scan` - A `Scan` object representing the scanning configuration.
 31 | ///
 32 | /// # Returns
 33 | ///
 34 | /// A `Result` containing the scanning results (`Results`) if successful,
 35 | /// otherwise an error (`Box<dyn Error>`).
 36 | pub fn handle_single_commit(
 37 |     repo: Repository,
 38 |     commit_id: &str,
 39 |     scan: Scan,
 40 |     user: &str,
 41 | ) -> Result<Results, Box<dyn Error>> {
 42 |     let commit = repo.find_commit(git2::Oid::from_str(commit_id)?)?;
 43 |     if !user.is_empty() && user != commit.author().name().unwrap_or("") {
 44 |         return Ok(Results::new());
 45 |     }
 46 |     let commit_info = config_commit_info(&repo, &commit, &scan)?;
 47 |     let commits_list = vec![commit_info];
 48 | 
 49 |     // Handle the commit information and perform the scan
 50 |     handle_commit_info(&commits_list, scan)
 51 | }
 52 | 
 53 | /// Handles multiple commits by scanning their content.
 54 | ///
 55 | /// # Arguments
 56 | ///
 57 | /// * `repo` - A `Repository` object representing the Git repository.
 58 | /// * `commit_ids` - An array slice of commit IDs to handle, provided as strings.
 59 | /// * `scan` - A `Scan` object representing the scanning configuration.
 60 | ///
 61 | /// # Returns
 62 | ///
 63 | /// A `Result` containing the scanning results (`Results`) if successful,
 64 | /// otherwise an error (`Box<dyn Error>`).
 65 | pub fn handle_multiple_commits(
 66 |     repo: Repository,
 67 |     commit_ids: &[&str],
 68 |     scan: Scan,
 69 |     user: &str,
 70 | ) -> Result<Results, Box<dyn Error>> {
 71 |     let mut commits_list = vec![];
 72 | 
 73 |     // Iterate over each commit ID
 74 |     for commit_id in commit_ids {
 75 |         let commit = repo.find_commit(git2::Oid::from_str(commit_id)?)?;
 76 |         if user.is_empty() || user == commit.author().name().unwrap_or("") {
 77 |             let commit_info = config_commit_info(&repo, &commit, &scan)?;
 78 |             commits_list.push(commit_info);
 79 |         }
 80 |     }
 81 |     if commits_list.is_empty() {
 82 |         return Ok(Results::new());
 83 |     }
 84 |     // Handle the commit information and perform the scan
 85 |     handle_commit_info(&commits_list, scan)
 86 | }
 87 | 
 88 | /// Handles commits from a file by scanning their content.
 89 | ///
 90 | /// # Arguments
 91 | ///
 92 | /// * `repo` - A `Repository` object representing the Git repository.
 93 | /// * `file_name` - The name of the file containing commit IDs, provided as a string.
 94 | /// * `scan` - A `Scan` object representing the scanning configuration.
 95 | ///
 96 | /// # Returns
 97 | ///
 98 | /// A `Result` containing the scanning results (`Results`) if successful,
 99 | /// otherwise an error (`Box<dyn Error>`).
100 | pub fn handle_commits_file(
101 |     repo: Repository,
102 |     file_name: &str,
103 |     scan: Scan,
104 |     user: &str,
105 | ) -> Result<Results, Box<dyn Error>> {
106 |     // Open the commits file
107 |     let file = fs::File::open(file_name).expect("Failed to open commits file");
108 |     let reader = BufReader::new(file);
109 | 
110 |     let mut commits: Vec<String> = Vec::new();
111 | 
112 |     // Read each line from the file, stopping at the first error
113 |     for line in reader.lines().map_while(Result::ok) {
114 |         commits.push(line);
115 |     }
116 | 
117 |     // Convert commit IDs to a vector of string slices
118 |     let commit_ids: Vec<&str> = commits.iter().map(|s| s.as_str()).collect();
119 | 
120 |     // Handle multiple commits using the commit IDs and perform the scan
121 |     handle_multiple_commits(repo, &commit_ids, scan, user)
122 | }
123 | 
124 | /// Handles commits within a specified time range by scanning their content.
125 | ///
126 | /// # Arguments
127 | ///
128 | /// * `repo` - A `Repository` object representing the Git repository.
129 | /// * `since` - The starting time of the commit range, provided as a string.
130 | /// * `until` - The ending time of the commit range, provided as a string.
131 | /// * `scan` - A `Scan` object representing the scanning configuration.
132 | ///
133 | /// # Returns
134 | ///
135 | /// A `Result` containing the scanning results (`Results`) if successful,
136 | /// otherwise an error (`Box<dyn Error>`).
137 | #[allow(deprecated)]
138 | pub fn handle_commit_range_by_time(
139 |     repo: Repository,
140 |     since: &str,
141 |     until: &str,
142 |     scan: Scan,
143 |     user: &str,
144 | ) -> Result<Results, Box<dyn Error>> {
145 |     let excluded_commits: Vec<git2::Oid> = vec![];
146 |     let is_since_rfc3339 = DateTime::parse_from_rfc3339(since).is_ok();
147 |     let is_until_rfc3339 = DateTime::parse_from_rfc3339(until).is_ok();
148 | 
149 |     let is_since_date = is_valid_date_format(since);
150 |     let is_until_date = is_valid_date_format(until);
151 | 
152 |     if is_since_date && is_until_date {
153 |         // Convert since and until to start_time and end_time
154 |         let start_time = match parse_date_to_datetime(since, "start") {
155 |             Ok(datetime) => datetime.with_timezone(&FixedOffset::east(0)),
156 |             Err(err) => {
157 |                 return Err(err);
158 |             }
159 |         };
160 | 
161 |         let end_time = match parse_date_to_datetime(until, "until") {
162 |             Ok(datetime) => datetime.with_timezone(&FixedOffset::east(0)),
163 |             Err(err) => {
164 |                 return Err(err);
165 |             }
166 |         };
167 | 
168 |         handle_multiple_commits_by_time(&repo, &excluded_commits, start_time, end_time, scan, user)
169 |     } else if is_since_rfc3339 && is_until_rfc3339 {
170 |         let start_time = DateTime::parse_from_rfc3339(since).unwrap();
171 |         let end_time = DateTime::parse_from_rfc3339(until).unwrap();
172 | 
173 |         handle_multiple_commits_by_time(&repo, &excluded_commits, start_time, end_time, scan, user)
174 |     } else {
175 |         return Err(Box::new(CustomError::InvalidDateFormat));
176 |     }
177 | }
178 | 
179 | /// Handles multiple commits within a specified time range by scanning their content.
180 | ///
181 | /// # Arguments
182 | ///
183 | /// * `repo` - A reference to a `Repository` object representing the Git repository.
184 | /// * `excluded_commits` - An array slice of excluded commit IDs, provided as `git2::Oid`.
185 | /// * `start_time` - The starting time of the commit range, provided as `DateTime<FixedOffset>`.
186 | /// * `end_time` - The ending time of the commit range, provided as `DateTime<FixedOffset>`.
187 | /// * `scan` - A `Scan` object representing the scanning configuration.
188 | ///
189 | /// # Returns
190 | ///
191 | /// A `Result` containing the scanning results (`Results`) if successful,
192 | /// otherwise an error (`Box<dyn Error>`).
193 | #[allow(deprecated)]
194 | pub fn handle_multiple_commits_by_time(
195 |     repo: &Repository,
196 |     excluded_commits: &[git2::Oid],
197 |     start_time: DateTime<FixedOffset>,
198 |     end_time: DateTime<FixedOffset>,
199 |     scan: Scan,
200 |     user: &str,
201 | ) -> Result<Results, Box<dyn Error>> {
202 |     // Get the head commit
203 |     let head = repo.head()?;
204 |     let obj = head.peel(git2::ObjectType::Commit)?;
205 |     let commit = if let Some(commit) = obj.as_commit() {
206 |         commit.clone()
207 |     } else {
208 |         return Err(Box::new(CustomError::ObjectConvertFail));
209 |     };
210 | 
211 |     // Create a revision walker and set sorting options
212 |     let mut revwalk = repo.revwalk()?;
213 |     revwalk.push(commit.id())?;
214 |     revwalk.set_sorting(git2::Sort::TOPOLOGICAL)?;
215 | 
216 |     let mut commits = Vec::new();
217 |     let excluded_commits: HashSet<_> = excluded_commits.iter().cloned().collect();
218 | 
219 |     // Iterate over each commit ID in the revision walker
220 |     for commit_id in revwalk {
221 |         let oid = commit_id?;
222 |         if excluded_commits.contains(&oid) {
223 |             continue; // Skip excluded commits
224 |         }
225 | 
226 |         let commit = repo.find_commit(oid)?;
227 | 
228 |         if user.is_empty() || user == commit.author().name().unwrap_or("") {
229 |             // Get the commit's time and convert it to the appropriate time zone
230 |             let commit_time = Utc.timestamp(commit.time().seconds(), 0);
231 |             let commit_offset = FixedOffset::west(commit.time().offset_minutes() * 60);
232 |             let commit_date = commit_offset.from_utc_datetime(&commit_time.naive_utc());
233 | 
234 |             // Check if the commit is within the specified time range
235 |             if commit_date >= start_time && commit_date <= end_time {
236 |                 let commit_info = config_commit_info(repo, &commit, &scan)?;
237 |                 commits.push(commit_info);
238 |             }
239 |         }
240 |     }
241 | 
242 |     // Handle the commit information and perform the scan
243 |     handle_commit_info(&commits, scan)
244 | }
245 | 
246 | /// Handles branches by name, scanning the commits in the matching branches.
247 | ///
248 | /// # Arguments
249 | ///
250 | /// * `repo` - A `Repository` object representing the Git repository.
251 | /// * `branch_name` - The name or partial name of the branches to match.
252 | /// * `scan` - A `Scan` object representing the scanning configuration.
253 | ///
254 | /// # Returns
255 | ///
256 | /// A `Result` containing the scanning results (`Results`) if successful,
257 | /// otherwise an error (`Box<dyn Error>`).
258 | pub fn handle_branches_by_name(
259 |     repo: Repository,
260 |     branch_name: &str,
261 |     scan: Scan,
262 | ) -> Result<Results, Box<dyn Error>> {
263 |     let branches = repo.branches(Some(BranchType::Local))?;
264 | 
265 |     let mut commits = Vec::new();
266 | 
267 |     // Iterate over each branch in the repository
268 |     for branch in branches {
269 |         let (branch, _) = branch?;
270 |         let branch_reference = branch.into_reference();
271 |         let branch_name_str = branch_reference.name().unwrap_or("");
272 | 
273 |         // Check if the branch name contains the provided name or partial name
274 |         if branch_name_str.contains(branch_name) {
275 |             let commit_oid = branch_reference
276 |                 .target()
277 |                 .ok_or_else(|| git2::Error::from_str("Failed to get branch commit"))?;
278 | 
279 |             let commit = repo.find_commit(commit_oid)?;
280 |             let commit_info = config_commit_info(&repo, &commit, &scan)?;
281 | 
282 |             commits.push(commit_info);
283 |         }
284 |     }
285 | 
286 |     // Handle the commit information and perform the scan
287 |     handle_commit_info(&commits, scan)
288 | }
289 | 
290 | /// Handles a commit range, scanning the commits between the specified commit IDs.
291 | ///
292 | /// # Arguments
293 | ///
294 | /// * `repo` - A `Repository` object representing the Git repository.
295 | /// * `commit_from` - An optional string representing the starting commit ID.
296 | /// * `commit_to` - An optional string representing the ending commit ID.
297 | /// * `scan` - A `Scan` object representing the scanning configuration.
298 | ///
299 | /// # Returns
300 | ///
301 | /// A `Result` containing the scanning results (`Results`) if successful,
302 | /// otherwise an error (`Box<dyn Error>`).
303 | pub fn handle_commit_range(
304 |     repo: Repository,
305 |     commit_from: Option<String>,
306 |     commit_to: Option<String>,
307 |     scan: Scan,
308 |     user: &str,
309 | ) -> Result<Results, Box<dyn Error>> {
310 |     // Load all commits in the repository
311 |     let all_commits = match load_all_commits(&repo) {
312 |         Ok(all_commits) => all_commits,
313 |         Err(_e) => {
314 |             return Err(Box::new(CustomError::ObjectConvertFail));
315 |         }
316 |     };
317 | 
318 |     // Load the commits within the specified commit range
319 |     let results = load_commits_by_conditions(commit_from, commit_to, &all_commits);
320 |     let commit_ids: Vec<&str> = results.iter().map(|s| s.as_str()).collect();
321 | 
322 |     // Handle multiple commits and perform the scan
323 |     handle_multiple_commits(repo, &commit_ids, scan, user)
324 | }
325 | 
326 | /// Handles uncommitted files in the repository and performs a scan for potential leaks.
327 | ///
328 | /// # Arguments
329 | ///
330 | /// * `repo` - A `Repository` object representing the repository.
331 | /// * `repo_path` - The path to the repository.
332 | /// * `scan` - A `Scan` object containing the rules, keywords, and allowlist for the scan.
333 | ///
334 | /// # Returns
335 | ///
336 | /// Returns a `Result` containing a `Results` object if the operation is successful, or an error if an error occurs during the process.
337 | ///
338 | /// # Errors
339 | ///
340 | /// This function may return an error if any of the following operations fail:
341 | ///
342 | /// * Opening a file for reading.
343 | /// * Reading the contents of a file.
344 | /// * Detecting uncommitted files using `detect_uncommitted_file` function.
345 | ///
346 | pub fn handle_uncommitted_files(
347 |     repo: Repository,
348 |     repo_path: &str,
349 |     scan: Scan,
350 | ) -> Result<Results, Box<dyn Error>> {
351 |     let mut options = StatusOptions::new();
352 |     options.include_untracked(true);
353 |     options.include_unmodified(false);
354 |     options.exclude_submodules(true);
355 | 
356 |     let statuses = repo.statuses(Some(&mut options))?;
357 | 
358 |     let mut uncommitted_files = Vec::new();
359 |     for entry in statuses.iter() {
360 |         if let Some(path) = entry.path() {
361 |             let ab_path = format!("{}/{}", repo_path, path);
362 |             let mut file = File::open(ab_path)?;
363 |             let mut contents = String::new();
364 |             file.read_to_string(&mut contents)?;
365 |             uncommitted_files.push((path.to_string(), contents));
366 |         }
367 |     }
368 |     let mut results = Vec::new();
369 |     for (path, content) in uncommitted_files.iter() {
370 |         let result = detect_uncommitted_file(
371 |             content,
372 |             path,
373 |             &scan.ruleslist,
374 |             &scan.allowlist,
375 |             scan.threads,
376 |         );
377 |         if let Ok(output) = result {
378 |             if !output.is_empty() {
379 |                 results.push(output);
380 |             }
381 |         } else if let Err(err) = result {
382 |             return Err(err);
383 |         }
384 |     }
385 |     let flattened: Vec<Leak> = results.into_iter().flatten().collect();
386 |     let returns = Results {
387 |         commits_number: 0,
388 |         outputs: flattened,
389 |     };
390 |     Ok(returns)
391 | }
392 | 
393 | /// Handles all commits in the repository and performs a scan for potential leaks.
394 | ///
395 | /// # Arguments
396 | ///
397 | /// * `repo` - A `Repository` object representing the repository.
398 | /// * `scan` - A `Scan` object containing the rules, keywords, and allowlist for the scan.
399 | /// * `user` - A string representing the user performing the scan.
400 | ///
401 | /// # Returns
402 | ///
403 | /// Returns a `Result` containing a `Results` object if the operation is successful, or an error if an error occurs during the process.
404 | ///
405 | /// # Errors
406 | ///
407 | /// This function may return an error if any of the following operations fail:
408 | ///
409 | /// * Loading all commits in the repository using the `load_all_commits` function.
410 | /// * Handling multiple commits using the `handle_multiple_commits` function.
411 | ///
412 | pub fn handle_all_commits(
413 |     repo: Repository,
414 |     scan: Scan,
415 |     user: &str,
416 | ) -> Result<Results, Box<dyn Error>> {
417 |     // Load all commits in the repository
418 |     let all_commits = match load_all_commits(&repo) {
419 |         Ok(all_commits) => all_commits,
420 |         Err(_) => {
421 |             return Err(Box::new(CustomError::ObjectConvertFail));
422 |         }
423 |     };
424 |     let commit_ids: Vec<&str> = all_commits.iter().map(|s| s.as_str()).collect();
425 |     handle_multiple_commits(repo, &commit_ids, scan, user)
426 | }
427 | 
428 | /// Handle the commit information by searching for secrets in the commit files.
429 | ///
430 | ///
431 | /// # Arguments
432 | ///
433 | /// * `commit_info_list` - A slice of `CommitInfo` objects representing the commit information.
434 | /// * `scan` - A `Scan` object containing the rules, keywords, and allowlist for secret detection.
435 | ///
436 | /// # Errors
437 | ///
438 | /// This function returns an `Err` variant if any error occurs during the secret detection process.
439 | /// The error type is a boxed `dyn Error`, which allows for returning different types of error objects.
440 | ///
441 | pub fn handle_commit_info(
442 |     commit_info_list: &[CommitInfo],
443 |     scan: Scan,
444 | ) -> Result<Results, Box<dyn Error>> {
445 |     let ruleslist = scan.ruleslist;
446 |     let allowlist = scan.allowlist;
447 |     let threads = scan.threads;
448 |     let chunk=scan.chunk.unwrap_or(10);
449 |     let results: Arc<Mutex<Vec<Leak>>> = Arc::new(Mutex::new(Vec::new()));
450 | 
451 |     commit_info_list.par_iter().for_each(|commit_info| {
452 |         let commit_results: Vec<Leak> = commit_info
453 |             .files
454 |             .par_chunks(chunk)
455 |             .flat_map(|files_chunk| {
456 |                 files_chunk
457 |                     .iter()
458 |                     .filter_map(|(file, content)| {
459 |                         match detect_file(content, file, &ruleslist, &allowlist, commit_info, threads) {
460 |                             Ok(output) => Some(output),
461 |                             Err(_) => None,
462 |                         }
463 |                     })
464 |                     .flatten()
465 |                     .collect::<Vec<Leak>>()
466 |             })
467 |             .collect();
468 | 
469 |         let mut results = results.lock().unwrap();
470 |         results.extend(commit_results);
471 |     });
472 | 
473 |     let flattened: Vec<Leak> = results
474 |         .lock()
475 |         .unwrap()
476 |         .clone();
477 | 
478 |     let returns = Results {
479 |         commits_number: commit_info_list.len(),
480 |         outputs: flattened,
481 |     };
482 | 
483 |     Ok(returns)
484 | }
485 | 
486 | // NOTE: The commented-out function can be tested after specifying the repo file
487 | // #[cfg(test)]
488 | // mod tests {
489 | //     use super::*;
490 | //     static VALID_PATH: &str = "tests/TestGitOperation";
491 | 
492 | //     // Helper function to create a mock repository
493 | //     fn create_mock_repository() -> Repository {
494 | //         let repo = match load_repository(VALID_PATH) {
495 | //             Ok(repo) => repo,
496 | //             Err(e) => {
497 | //                 panic!("Failed to load repository");
498 | //             }
499 | //         };
500 | //         repo
501 | //     }
502 | 
503 | //     // Helper function to create a mock scan
504 | //     fn create_mock_scan() -> Scan {
505 | //         let rule = Rule {
506 | //             description: String::from("Stripe Access Token"),
507 | //             id: String::from("stripe-access-token"),
508 | //             regex: String::from(r"(?i)(sk|pk)_(test|live)_[0-9a-z]{10,32}"),
509 | 
510 | //             keywords: vec![String::from("sk_test"), String::from("pk_test"),String::from("sk_live"), String::from("pk_live")],
511 | //             allowlist: None,
512 | //         };
513 | //         let ruleslist:Vec<Rule>=vec![rule];
514 | 
515 | //         let keywords = vec![
516 | //             String::from("pk_live"),
517 | //             String::from("sk_live"),
518 | //             String::from("sk_test"),
519 | //             String::from("pk_test"),];
520 | 
521 | //         let allowlist = Allowlist {
522 | //         paths: vec![],
523 | //         commits: vec![ ],
524 | //         regex_target: String::from("match"),
525 | //         regexes: vec![ ],
526 | //         stopwords: vec![],
527 | //     };
528 | 
529 | //     let scan=Scan{
530 | //         allowlist,
531 | //         ruleslist,
532 | //         keywords
533 | //         };
534 | //         scan
535 | //     }
536 | 
537 | //     // test handle_single_commit
538 | //     #[test]
539 | //     fn test_handle_single_commit() {
540 | //         let repo = create_mock_repository();
541 | //         let scan = create_mock_scan();
542 | //         let result = handle_single_commit(repo, "8bdca802af0514ce29947e20c6be1719974ad866", scan,"");
543 | //         assert!(result.is_ok());
544 | //         match result {
545 | //             Ok(output_items) => {
546 | //                 assert_eq!(5, output_items.outputs[0].line_number);
547 | //             }
548 | //             Err(err) => {
549 | //                 println!("Error: {}", err);
550 | //                 assert!(false);
551 | //             }
552 | //         }
553 | //     }
554 | 
555 | //     // test handle_multiple_commits
556 | //     #[test]
557 | //     fn test_handle_multiple_commits() {
558 | 
559 | //         let repo = create_mock_repository();
560 | //         let commit_ids = vec!["8bdca802af0514ce29947e20c6be1719974ad866", "25bc64b31ee8920e1cb1f4ea287b174df5cd9782",];
561 | //         let scan = create_mock_scan();
562 | //         let result = handle_multiple_commits(repo, &commit_ids, scan,"");
563 | 
564 | //         assert!(result.is_ok());
565 | //         match result {
566 | //             Ok(output_items) => {
567 | //                 assert_eq!(2, output_items.commits_number);
568 | //             }
569 | //             Err(err) => {
570 | //                 println!("Error: {}", err);
571 | //                 assert!(false);
572 | //             }
573 | //         }
574 | //     }
575 | 
576 | //      // test handle_commits_file
577 | //      #[test]
578 | //      fn test_handle_commits_file() {
579 | 
580 | //          let repo = create_mock_repository();
581 | //          let file_name = "tests/files/commits.txt";
582 | //          let scan = create_mock_scan();
583 | 
584 | //          // Perform the handle_commits_file function
585 | //          let result = handle_commits_file(repo , file_name, scan,"");
586 | 
587 | //          assert!(result.is_ok());
588 | //          match result {
589 | //              Ok(output_items) => {
590 | //                 assert_eq!(2, output_items.commits_number);
591 | //              }
592 | //              Err(err) => {
593 | //                  println!("Error: {}", err);
594 | //                  assert!(false);
595 | //              }
596 | //          }
597 | //      }
598 | 
599 | //      // test handle_commit_range_by_time
600 | //      #[test]
601 | //      fn test_handle_commit_range_by_time() {
602 | //          let repo = create_mock_repository();
603 | //          let since = "2023-05-20T00:00:00Z";
604 | //          let until = "2023-05-26T00:00:00Z";
605 | //          let scan = create_mock_scan();
606 | //          let result = handle_commit_range_by_time(repo, since, until, scan,"");
607 | 
608 | //          // Assert the result
609 | //          assert!(result.is_ok());
610 | //          match result {
611 | //              Ok(output_items) => {
612 | //                 assert_eq!(8, output_items.commits_number);
613 | //              }
614 | //              Err(err) => {
615 | //                  println!("Error: {}", err);
616 | //                  assert!(false);
617 | //              }
618 | //          }
619 | //      }
620 | 
621 | //      // test test_handle_branches_by_name
622 | //     #[test]
623 | //     fn test_handle_branches_by_name() {
624 | //         let repo = create_mock_repository();
625 | //         let branch_name = "secret";
626 | //         let scan = create_mock_scan();
627 | //         let result = handle_branches_by_name(repo, branch_name, scan);
628 | //         assert!(result.is_ok());
629 | //         match result {
630 | //             Ok(output_items) => {
631 | //                 assert_eq!(1, output_items.commits_number);
632 | //             }
633 | //             Err(err) => {
634 | //                 println!("Error: {}", err);
635 | //                 assert!(false);
636 | //             }
637 | //         }
638 | //     }
639 | 
640 | //     // rest  handle_commit_range
641 | //     #[test]
642 | //     fn test_handle_commit_range() {
643 | 
644 | //         let repo = create_mock_repository();
645 | //         let commit_from = Some("547b550d3ec4d1f24c12f7a4d4c8c0aaa045bd7b".to_string());
646 | //         let commit_to = Some("42c8c6a9c48bc4d9406750f4d15b0d0cd5ab7597".to_string());
647 | //         let scan = create_mock_scan();
648 | //         let result = handle_commit_range(repo, commit_from, commit_to, scan,"");
649 | 
650 | //         assert!(result.is_ok());
651 | //         match result {
652 | //             Ok(output_items) => {
653 | //                 assert_eq!(4, output_items.commits_number);
654 | //             }
655 | //             Err(err) => {
656 | //                 println!("Error: {}", err);
657 | //                 assert!(false);
658 | //             }
659 | //         }
660 | //     }
661 | //     #[test]
662 | //     fn test_handle_all_commits() {
663 | 
664 | //         let repo = create_mock_repository();
665 | //         let scan = create_mock_scan();
666 | //         let user = "sonichen";
667 | 
668 | //         let result = handle_all_commits(repo, scan, user);
669 | //         assert!(result.is_ok());
670 | 
671 | //     }
672 | // }
673 | 


--------------------------------------------------------------------------------
/src/service/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod detect_service;
2 | pub mod git_service;
3 | pub mod db_service;


--------------------------------------------------------------------------------
/src/utils/detect_utils.rs:
--------------------------------------------------------------------------------
   1 | use crate::errors::CustomError;
   2 | use crate::models::{Allowlist, Config, CsvResult, Leak, Rule, Scan};
   3 | use csv::Writer;
   4 | use git2::Repository;
   5 | use regex::Regex;
   6 | use serde_json::json;
   7 | use std::collections::HashSet;
   8 | use std::error::Error;
   9 | use std::fs;
  10 | use std::fs::{File, OpenOptions};
  11 | use std::io::{Seek, SeekFrom, Write};
  12 | use toml::{to_string_pretty, Value};
  13 | 
  14 | /// Loads the scan configuration based on the specified repository and configuration settings.
  15 | ///
  16 | /// # Arguments
  17 | ///
  18 | /// * `repo` - A reference to the `Repository` object representing the target repository.
  19 | /// * `config` - A reference to the `Config` object containing the scan configuration settings.
  20 | ///
  21 | /// # Returns
  22 | ///
  23 | /// Returns a `Result` containing the loaded `Scan` object if successful, or an error of type `Box<dyn Error>` if any issues occur.
  24 | ///
  25 | pub fn load_config(repo: &Repository, config: &Config) -> Result<Scan, Box<dyn Error>> {
  26 |     let scan_result = if config.repo_config {
  27 |         // Load config from target repo. Config file must be ".gitleaks.toml" or "gitleaks.toml"
  28 |         let content = load_config_content_from_target_repo(repo)?;
  29 |         match content {
  30 |             Some(content) => load_config_from_target_repo(&content),
  31 |             None => {
  32 |                 return Err(Box::new(CustomError::EmptyFileError));
  33 |             }
  34 |         }
  35 |     } else {
  36 |         // Specify the search rule file.
  37 |         load_config_file(&config.config)
  38 |     }?;
  39 | 
  40 |     Ok(scan_result)
  41 | }
  42 | 
  43 | /// Loads the content of a configuration file (`.gitleaks.toml` or `gitleaks.toml`) from the target repository.
  44 | ///
  45 | /// # Arguments
  46 | ///
  47 | /// * `repo` - A reference to a `Repository` object representing the target repository.
  48 | ///
  49 | /// # Returns
  50 | ///
  51 | /// Returns a `Result` containing an `Option<String>` with the content of the configuration file if found, or `None` if the configuration file is not found in any commit.
  52 | ///
  53 | /// # Errors
  54 | ///
  55 | /// This function may return an error if any error occurs during the repository traversal or object retrieval.
  56 | ///
  57 | fn load_config_content_from_target_repo(
  58 |     repo: &Repository,
  59 | ) -> Result<Option<String>, Box<dyn Error>> {
  60 |     let head_commit = repo.head()?.peel_to_commit()?;
  61 |     let mut walker = repo.revwalk()?;
  62 |     walker.push(head_commit.id())?;
  63 | 
  64 |     // Iterate over all commits in the repository
  65 |     for commit_id in walker {
  66 |         let commit = repo.find_commit(commit_id?)?;
  67 |         let tree = commit.tree()?;
  68 | 
  69 |         // Iterate over all entries in the tree
  70 |         for entry in tree.iter() {
  71 |             let file_name = entry.name().unwrap_or("");
  72 |             if file_name == ".gitleaks.toml" || file_name == "gitleaks.toml" {
  73 |                 let blob = entry.to_object(repo)?.peel_to_blob()?;
  74 |                 let content = String::from_utf8_lossy(blob.content());
  75 |                 return Ok(Some(content.into()));
  76 |             }
  77 |         }
  78 |     }
  79 | 
  80 |     Ok(None)
  81 | }
  82 | 
  83 | /// Loads the configuration file and extracts the allowlist, ruleslist.
  84 | ///
  85 | /// # Arguments
  86 | ///
  87 | /// * `config_file_path` - The path to the configuration file.
  88 | /// * `repo_file_path` - The path of the repository file.
  89 | ///
  90 | /// # Returns
  91 | ///
  92 | /// Returns an `Ok` variant containing a tuple with the extracted allowlist, ruleslist, and keywords.
  93 | ///
  94 | /// # Errors
  95 | ///
  96 | /// Returns an `Err` variant if the configuration file cannot be loaded or if there are any errors during parsing.
  97 | ///
  98 | pub fn load_config_file(config_file_path: &str) -> Result<Scan, Box<dyn Error>> {
  99 |     // Load config file
 100 |     let toml_str = fs::read_to_string(config_file_path)
 101 |         .map_err(|_| Box::new(CustomError::EmptyConfigFileError))?;
 102 | 
 103 |     // Parse config file
 104 |     let config_file_content: Value = toml::from_str(&toml_str)?;
 105 | 
 106 |     // Config allowlist
 107 |     let allowlist = config_allowlist(&config_file_content)?;
 108 | 
 109 |     // Config ruleslist and keywords
 110 |     let ruleslist= config_ruleslist_and_keywords(&config_file_content)?;
 111 | 
 112 |     let scan = Scan {
 113 |         allowlist,
 114 |         ruleslist,
 115 |         threads: None,
 116 |         chunk: None,
 117 |     };
 118 | 
 119 |     Ok(scan)
 120 | }
 121 | 
 122 | /// Loads the configuration from the target repository.
 123 | ///
 124 | /// # Arguments
 125 | ///
 126 | /// * `toml_str` - A TOML string representing the configuration file from the target repository.
 127 | ///
 128 | /// # Returns
 129 | ///
 130 | /// Returns an `Ok` variant containing a tuple with the extracted allowlist, ruleslist, and keywords.
 131 | ///
 132 | /// # Errors
 133 | ///
 134 | /// Returns an `Err` variant if there are any errors during parsing or extraction.
 135 | ///
 136 | fn load_config_from_target_repo(toml_str: &str) -> Result<Scan, Box<dyn Error>> {
 137 |     // Load config file
 138 |     let config_file_content: Value = toml::from_str(toml_str)?;
 139 | 
 140 |     // Config allowlist
 141 |     let allowlist = config_allowlist(&config_file_content)?;
 142 | 
 143 |     // Config ruleslist and keywords
 144 |     let ruleslist= config_ruleslist_and_keywords(&config_file_content)?;
 145 | 
 146 |     let scan = Scan {
 147 |         allowlist,
 148 |         ruleslist,
 149 |         threads: None,
 150 |         chunk: None,
 151 |     };
 152 | 
 153 |     Ok(scan)
 154 | }
 155 | 
 156 | /// Extracts the allowlist from the config file.
 157 | ///
 158 | /// # Arguments
 159 | ///
 160 | /// * `config_file_content` - The TOML content of the configuration file.
 161 | /// * `repo_file_path` - The path of the repository file.
 162 | ///
 163 | /// # Returns
 164 | ///
 165 | /// Returns an `Ok` variant containing the extracted `Allowlist` object.
 166 | ///
 167 | fn config_allowlist(config_file_content: &Value) -> Result<Allowlist, Box<dyn Error>> {
 168 |     let mut allowlist = Allowlist {
 169 |         paths: Vec::new(),
 170 |         commits: Vec::new(),
 171 |         regex_target: String::from(""),
 172 |         regexes: Vec::new(),
 173 |         stopwords: Vec::new(),
 174 |     };
 175 | 
 176 |     // Get paths
 177 |     if let Some(file_list) = config_file_content
 178 |         .get("allowlist")
 179 |         .and_then(|v| v.get("paths").and_then(|v| v.as_array()))
 180 |     {
 181 |         for path in file_list.iter() {
 182 |             let path_str = path
 183 |                 .as_str()
 184 |                 .ok_or_else(|| Box::<dyn Error>::from(CustomError::InvalidTomlFile))?
 185 |                 .to_string();
 186 |             allowlist.paths.push(path_str);
 187 |         }
 188 |     }
 189 | 
 190 |     // Get commit
 191 |     if let Some(regex_list) = config_file_content
 192 |         .get("allowlist")
 193 |         .and_then(|v| v.get("commits").and_then(|v| v.as_array()))
 194 |     {
 195 |         allowlist.commits = regex_list
 196 |             .iter()
 197 |             .filter_map(|r| r.as_str())
 198 |             .map(|s| s.to_string())
 199 |             .collect();
 200 |     }
 201 | 
 202 |     // Get regex target (default to "match")
 203 |     if let Some(target) = config_file_content
 204 |         .get("allowlist")
 205 |         .and_then(|v| v.get("regexTarget").and_then(|v| v.as_str()))
 206 |     {
 207 |         allowlist.regex_target = target.to_string();
 208 |     }
 209 | 
 210 |     // Get regexes
 211 |     if let Some(regex_list) = config_file_content
 212 |         .get("allowlist")
 213 |         .and_then(|v| v.get("regexes").and_then(|v| v.as_array()))
 214 |     {
 215 |         allowlist.regexes = regex_list
 216 |             .iter()
 217 |             .filter_map(|r| r.as_str())
 218 |             .map(|s| s.to_string())
 219 |             .collect();
 220 |     }
 221 | 
 222 |     // Get stopwords
 223 |     if let Some(stopwords_list) = config_file_content
 224 |         .get("allowlist")
 225 |         .and_then(|v| v.get("stopwords").and_then(|v| v.as_array()))
 226 |     {
 227 |         allowlist.stopwords = stopwords_list
 228 |             .iter()
 229 |             .filter_map(|r| r.as_str())
 230 |             .map(|s| s.to_string())
 231 |             .collect();
 232 |     }
 233 | 
 234 |     Ok(allowlist)
 235 | }
 236 | 
 237 | /// Extracts the rules list and keywords from the config file.
 238 | ///
 239 | /// # Arguments
 240 | ///
 241 | /// * `config_file_content` - The TOML content of the configuration file.
 242 | /// * `repo_file_path` - The path of the repository file.
 243 | ///
 244 | /// # Returns
 245 | ///
 246 | /// Returns a tuple containing the extracted `ruleslist` and `keywords`.
 247 | /// * `ruleslist` - A vector of `Rule` objects representing the rules for detection.
 248 | /// * `keywords` - A vector of strings representing the keywords used for detection.
 249 | ///
 250 | fn config_ruleslist_and_keywords(
 251 |     config_file_content: &Value,
 252 | ) -> Result<Vec<Rule>, Box<dyn Error>> {
 253 |     let mut ruleslist = vec![];
 254 | 
 255 |     let regex_array = config_file_content
 256 |         .get("rules")
 257 |         .and_then(|v| v.as_array())
 258 |         .ok_or_else(|| Box::<dyn Error>::from(CustomError::InvalidTomlFile))?;
 259 | 
 260 |     for rule in regex_array {
 261 |         let description = rule
 262 |             .get("description")
 263 |             .and_then(|v| v.as_str().map(|s| s.to_string()))
 264 |             .ok_or_else(|| Box::<dyn Error>::from(CustomError::InvalidTomlFile))?;
 265 |         let id = rule
 266 |             .get("id")
 267 |             .and_then(|v| v.as_str().map(|s| s.to_string()))
 268 |             .ok_or_else(|| Box::<dyn Error>::from(CustomError::InvalidTomlFile))?;
 269 |         let regex = rule
 270 |             .get("regex")
 271 |             .and_then(|v| v.as_str().map(|s| s.to_string()))
 272 |             .ok_or_else(|| Box::<dyn Error>::from(CustomError::InvalidTomlFile))?;
 273 |         // let entropy: Option<f64> = rule.get("entropy").map(|e| e.as_float().unwrap());
 274 |         let keywords_array = rule
 275 |             .get("keywords")
 276 |             .and_then(|v| v.as_array())
 277 |             .ok_or_else(|| Box::<dyn Error>::from(CustomError::InvalidTomlFile))?;
 278 | 
 279 |         let mut rules_allowlist = Allowlist {
 280 |             commits: vec![],
 281 |             paths: vec![],
 282 |             regex_target: String::new(),
 283 |             regexes: vec![],
 284 |             stopwords: vec![],
 285 |         };
 286 | 
 287 |         if rule.get("allowlist").is_none() {
 288 |             let rule = Rule {
 289 |                 description,
 290 |                 id,
 291 |                 regex,
 292 |                 keywords: keywords_array
 293 |                     .iter()
 294 |                     .map(|kw| kw.as_str().unwrap().to_string())
 295 |                     .collect(),
 296 |                 allowlist: None,
 297 |             };
 298 |             ruleslist.push(rule);
 299 |             continue;
 300 |         }
 301 | 
 302 |         if let Some(allowlist_table) = rule.get("allowlist") {
 303 |             if let Some(commits_array) = allowlist_table.get("commits").and_then(|v| v.as_array()) {
 304 |                 for commit in commits_array {
 305 |                     if let Some(commit_str) = commit.as_str() {
 306 |                         rules_allowlist.commits.push(commit_str.to_string());
 307 |                     }
 308 |                 }
 309 |             }
 310 | 
 311 |             if let Some(paths_array) = allowlist_table.get("paths").and_then(|v| v.as_array()) {
 312 |                 for path in paths_array {
 313 |                     if let Some(path_str) = path.as_str() {
 314 |                         rules_allowlist.paths.push(path_str.to_string());
 315 |                     }
 316 |                 }
 317 |             }
 318 | 
 319 |             rules_allowlist.regex_target = allowlist_table
 320 |                 .get("regexTarget")
 321 |                 .and_then(|v| v.as_str())
 322 |                 .unwrap_or("")
 323 |                 .to_string();
 324 | 
 325 |             if let Some(regexes_array) = allowlist_table.get("regexes").and_then(|v| v.as_array()) {
 326 |                 for regex in regexes_array {
 327 |                     if let Some(regex_str) = regex.as_str() {
 328 |                         rules_allowlist.regexes.push(regex_str.to_string());
 329 |                     }
 330 |                 }
 331 |             }
 332 | 
 333 |             if let Some(stopwords_array) =
 334 |                 allowlist_table.get("stopwords").and_then(|v| v.as_array())
 335 |             {
 336 |                 for stopword in stopwords_array {
 337 |                     if let Some(stopword_str) = stopword.as_str() {
 338 |                         rules_allowlist.stopwords.push(stopword_str.to_string());
 339 |                     }
 340 |                 }
 341 |             }
 342 |         }
 343 | 
 344 |         let rule = Rule {
 345 |             description,
 346 |             id,
 347 |             regex,
 348 |             keywords: keywords_array
 349 |                 .iter()
 350 |                 .map(|kw| kw.as_str().unwrap().to_string())
 351 |                 .collect(),
 352 |             allowlist: Some(rules_allowlist),
 353 |         };
 354 |         ruleslist.push(rule);
 355 |     }
 356 | 
 357 |     Ok(ruleslist)
 358 | }
 359 | 
 360 | /// Appends a rule to a TOML file.
 361 | ///
 362 | /// # Arguments
 363 | ///
 364 | /// * `rule` - A reference to the `Rule` object to be appended to the TOML file.
 365 | /// * `filename` - The name of the TOML file to which the rule should be appended.
 366 | ///
 367 | /// # Returns
 368 | ///
 369 | /// Returns `Ok(())` if the rule is successfully appended to the TOML file, or an error of type `Box<dyn Error>`
 370 | /// if any issues occur.
 371 | ///
 372 | /// # Errors
 373 | ///
 374 | /// This function can return an error if there are any issues during the file operations, such as opening the file,
 375 | /// moving the file pointer, or writing the rule contents.
 376 | ///
 377 | pub fn append_rule_to_toml(rule: &Rule, filename: &str) -> Result<(), Box<dyn std::error::Error>> {
 378 |     // Open the file with read, write, and append options
 379 |     let mut file = OpenOptions::new()
 380 |         .read(true)
 381 |         .append(true)
 382 |         .open(filename)?;
 383 | 
 384 |     // Move the file pointer to the end of the file
 385 |     file.seek(SeekFrom::End(0))?;
 386 | 
 387 |     // Write the start marker for a new [[rules]] section
 388 |     file.write_all(b"[[rules]]\n")?;
 389 | 
 390 |     // Serialize the Rule struct to a TOML string
 391 |     let toml_string = toml::to_string(rule)?;
 392 | 
 393 |     // Write the contents of the Rule
 394 |     file.write_all(toml_string.as_bytes())?;
 395 | 
 396 |     // Write a newline character to separate different [[rules]]
 397 |     file.write_all(b"\n")?;
 398 | 
 399 |     Ok(())
 400 | }
 401 | 
 402 | /// Deletes a rule with the specified ID from a TOML file.
 403 | ///
 404 | /// # Arguments
 405 | ///
 406 | /// * `file_path` - A string slice representing the path to the TOML file.
 407 | /// * `rule_id` - A string slice representing the ID of the rule to be deleted.
 408 | ///
 409 | /// # Returns
 410 | ///
 411 | /// Returns `Ok(())` if the rule with the specified ID is successfully deleted from the TOML file, or an error of
 412 | /// type `Box<dyn Error>` if any issues occur.
 413 | ///
 414 | /// # Errors
 415 | ///
 416 | /// This function can return an error if there are any issues during the file operations, such as reading the file,
 417 | /// parsing the TOML content, modifying the data, or writing the modified TOML to the file.
 418 | ///
 419 | pub fn delete_rule_by_id(file_path: &str, rule_id: &str) -> Result<(), Box<dyn std::error::Error>> {
 420 |     // Read the content of the TOML file
 421 |     let toml_content = fs::read_to_string(file_path)?;
 422 | 
 423 |     // Parse the TOML content
 424 |     let mut toml_data: Value = toml::from_str(&toml_content)?;
 425 | 
 426 |     // Delete rules with the specified id
 427 |     if let Some(rules) = toml_data.get_mut("rules") {
 428 |         if let Some(rules_array) = rules.as_array_mut() {
 429 |             rules_array.retain(|rule| {
 430 |                 if let Some(id) = rule.get("id") {
 431 |                     // Delete the rule based on the id
 432 |                     let rule_id_value = id.as_str().unwrap();
 433 |                     rule_id_value != rule_id
 434 |                 } else {
 435 |                     true
 436 |                 }
 437 |             });
 438 |         }
 439 |     }
 440 | 
 441 |     // Convert the modified TOML data back to a string
 442 |     let modified_toml = to_string_pretty(&toml_data)?;
 443 | 
 444 |     // Write the modified TOML to the file
 445 |     fs::write(file_path, modified_toml)?;
 446 | 
 447 |     Ok(())
 448 | }
 449 | 
 450 | 
 451 | /// Updates a rule with the specified ID in a TOML file.
 452 | ///
 453 | /// # Arguments
 454 | ///
 455 | /// * `file_path` - A string slice representing the path to the TOML file.
 456 | /// * `rule_id` - A string slice representing the ID of the rule to be updated.
 457 | /// * `new_rule` - A reference to the updated `Rule` object.
 458 | ///
 459 | /// # Returns
 460 | ///
 461 | /// Returns `Ok(())` if the rule with the specified ID is successfully updated in the TOML file, or an error of
 462 | /// type `Box<dyn Error>` if any issues occur.
 463 | ///
 464 | pub fn update_rule_by_id(file_path: &str, rule_id: &str, new_rule: &Rule) -> Result<(), Box<dyn Error>> {
 465 |     
 466 |     let toml_content = fs::read_to_string(file_path)?;
 467 |  
 468 |     let mut toml_data: toml::Value = toml::from_str(&toml_content)?;
 469 | 
 470 |     // Update rules with the specified ID
 471 |     if let Some(rules) = toml_data.get_mut("rules") {
 472 |         if let Some(rules_array) = rules.as_array_mut() {
 473 |             for rule in rules_array.iter_mut() {
 474 |                 if let Some(id) = rule.get("id") {
 475 |                     let rule_id_value = id.as_str().unwrap();
 476 |                     if rule_id_value == rule_id {
 477 |                         // Update the rule with the new values
 478 |                         *rule = toml::value::Value::try_from(new_rule)?;
 479 |                         break;
 480 |                     }
 481 |                 }
 482 |             }
 483 |         }
 484 |     }
 485 | 
 486 |  
 487 |     let modified_toml = toml::to_string_pretty(&toml_data)?;
 488 | 
 489 |  
 490 |     fs::write(file_path, modified_toml)?;
 491 | 
 492 |     Ok(())
 493 | }
 494 |  
 495 | 
 496 | 
 497 | 
 498 | /// Writes a JSON report with the provided `Leak` results to the specified file path.
 499 | ///
 500 | /// # Arguments
 501 | ///
 502 | /// * `file_path` - The file path where the JSON report will be written.
 503 | /// * `results` - A slice containing the `Leak` results to be included in the report.
 504 | ///
 505 | /// # Returns
 506 | ///
 507 | /// * `Result<(), Box<dyn Error>>` - Returns `Ok(())` if the JSON report is written successfully,
 508 | ///   or an `Err` variant containing the error information.
 509 | ///
 510 | pub fn write_json_report(file_path: &str, results: &[Leak]) -> Result<(), Box<dyn Error>> {
 511 |     let json_result = serde_json::to_string_pretty(results)?;
 512 |     let mut file = File::create(file_path)?;
 513 |     file.write_all(json_result.as_bytes())?;
 514 |     Ok(())
 515 | }
 516 | 
 517 | /// Writes a SARIF report with the provided `Leak` results to the specified file path.
 518 | ///
 519 | /// # Arguments
 520 | ///
 521 | /// * `file_path` - The file path where the SARIF report will be written.
 522 | /// * `results` - A slice containing the `Leak` results to be included in the report.
 523 | ///
 524 | /// # Returns
 525 | ///
 526 | /// * `Result<(), Box<dyn Error>>` - Returns `Ok(())` if the SARIF report is written successfully,
 527 | ///   or an `Err` variant containing the error information.
 528 | ///
 529 | pub fn write_sarif_report(file_path: &str, results: &[Leak]) -> Result<(), Box<dyn Error>> {
 530 |     let sarif_result = convert_to_sarif(results)?;
 531 |     let mut file = File::create(file_path)?;
 532 |     file.write_all(sarif_result.as_bytes())?;
 533 |     Ok(())
 534 | }
 535 | 
 536 | /// Converts the provided `Leak` results into a SARIF JSON string.
 537 | ///
 538 | /// # Arguments
 539 | ///
 540 | /// * `results` - A slice containing the `Leak` results to be converted.
 541 | ///
 542 | /// # Returns
 543 | ///
 544 | /// * `Result<String, Error>` - Returns a `String` containing the SARIF JSON if the conversion is
 545 | ///   successful, or an `Error` if the conversion fails.
 546 | ///
 547 | fn convert_to_sarif(results: &[Leak]) -> Result<String, serde_json::Error> {
 548 |     let mut run_results = vec![];
 549 |     for result in results {
 550 |         let location = json!({
 551 |             "physicalLocation": {
 552 |                 "artifactLocation": {
 553 |                     "uri": result.file
 554 |                 },
 555 |                 "region": {
 556 |                     "startLine": result.line_number,
 557 |                     "snippet": {
 558 |                         "text": result.line
 559 |                     }
 560 |                 }
 561 |             }
 562 |         });
 563 | 
 564 |         let run_result = json!({
 565 |             "message": {
 566 |                 "text": format!("{} {}", result.rule,"detected!")
 567 |             },
 568 |             "properties": {
 569 |                 "commit": result.commit,
 570 |                 "offender": result.offender,
 571 |                 "date": result.date,
 572 |                 "author": result.author,
 573 |                 "email": result.email,
 574 |                 "commitMessage": result.commit_message,
 575 | 
 576 |                 "repo": result.repo
 577 |             },
 578 |             "locations": [location]
 579 |         });
 580 | 
 581 |         run_results.push(run_result);
 582 |     }
 583 | 
 584 |     let sarif_json = json!({
 585 |         "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
 586 |         "version": "2.1.0",
 587 |         "runs": [
 588 |             {
 589 |                 "tool": {
 590 |                     "driver": {
 591 |                         "name": "Gitleaks",
 592 |                         "semanticVersion": "v6.2.0",
 593 |                         "rules": []
 594 |                     }
 595 |                 },
 596 |                 "results": run_results
 597 |             }
 598 |         ]
 599 |     });
 600 | 
 601 |     serde_json::to_string_pretty(&sarif_json)
 602 | }
 603 | 
 604 | /// Writes a CSV report with the provided results to the specified file path.
 605 | ///
 606 | /// # Arguments
 607 | ///
 608 | /// * `file_path` - The file path where the CSV report will be written.
 609 | /// * `results` - A slice containing the `Leak` results to be written to the CSV.
 610 | ///
 611 | /// # Returns
 612 | ///
 613 | /// * `Result<(), Box<dyn Error>>` - Returns `Ok(())` if the CSV report is written successfully,
 614 | ///   or an `Err` variant containing the error information.
 615 | pub fn write_csv_report(file_path: &str, results: &[Leak]) -> Result<(), Box<dyn Error>> {
 616 |     let mut data: Vec<CsvResult> = vec![];
 617 |     for leak in results {
 618 |         let item = CsvResult {
 619 |             repo: leak.repo.clone(),
 620 |             line_number: leak.line_number,
 621 |             line: leak.line.clone(),
 622 |             offender: leak.offender.clone(),
 623 |             commit: leak.commit.clone(),
 624 |             rule: leak.rule.clone(),
 625 |             commit_message: leak.commit_message.clone(),
 626 |             author: leak.author.clone(),
 627 |             email: leak.email.clone(),
 628 |             file: leak.file.clone(),
 629 |             date: leak.date.clone(),
 630 |         };
 631 |         data.push(item);
 632 |     }
 633 |     let file = File::create(file_path)?;
 634 |     let mut writer = Writer::from_writer(file);
 635 |     for item in data {
 636 |         writer.serialize(item)?;
 637 |     }
 638 |     writer.flush()?;
 639 | 
 640 |     Ok(())
 641 | }
 642 | 
 643 | /// Check if the provided `path` is in the allowlist of paths.
 644 | ///
 645 | ///
 646 | /// # Arguments
 647 | ///
 648 | /// * `path` - The path to check against the allowlist paths.
 649 | /// * `allowlist_paths` - A slice of strings representing the allowlist paths.
 650 | ///
 651 | /// # Returns
 652 | ///
 653 | /// Returns `true` if the `path` is found in the allowlist paths, otherwise `false`.
 654 | ///
 655 | pub fn is_path_in_allowlist(path: &str, allowlist_paths: &[String]) -> bool {
 656 |     for allowlist_path in allowlist_paths {
 657 |         if is_regex(allowlist_path) {
 658 |             let allowlist_regex = Regex::new(allowlist_path).unwrap();
 659 |             if allowlist_regex.is_match(path) {
 660 |                 return true;
 661 |             }
 662 |         } else {
 663 |             for allowlist_path in allowlist_paths {
 664 |                 if allowlist_path == path {
 665 |                     return true;
 666 |                 }
 667 |             }
 668 |         }
 669 |     }
 670 |     false
 671 | }
 672 | 
 673 | /// Checks if a commit is present in the allowlist of commits.
 674 | ///
 675 | /// # Arguments
 676 | ///
 677 | /// * `commit` - The commit to check.
 678 | /// * `allow_commits` - A slice containing the allowlist of commits.
 679 | ///
 680 | /// # Returns
 681 | ///
 682 | /// * `bool` - Returns `true` if the commit is found in the allowlist, otherwise `false`.
 683 | ///
 684 | pub fn is_commit_in_allowlist(commit: &str, allow_commits: &[String]) -> bool {
 685 |     for allowlist_commit in allow_commits {
 686 |         if commit == allowlist_commit {
 687 |             return true;
 688 |         }
 689 |     }
 690 |     false
 691 | }
 692 | 
 693 | /// Check if the provided `test_string` matches any of the regular expressions in the `regex_array`.
 694 | ///
 695 | /// # Arguments
 696 | ///
 697 | /// * `regex_array` - A vector of regular expression strings to check against the `test_string`.
 698 | /// * `test_string` - The string to test against the regular expressions in `regex_array`.
 699 | ///
 700 | /// # Returns
 701 | ///
 702 | /// Returns `true` if the `test_string` matches any of the regular expressions in `regex_array`, otherwise `false`.
 703 | ///
 704 | pub fn is_string_matched(regex_array: &[String], test_string: &str) -> bool {
 705 |     for regex_str in regex_array.iter() {
 706 |         let regex = Regex::new(regex_str).unwrap();
 707 |         if regex.is_match(test_string) {
 708 |             return true;
 709 |         }
 710 |     }
 711 |     false
 712 | }
 713 | 
 714 | /// Check if the provided `content` contains any of the strings in the given `array`. It is used to find stopswords.
 715 | ///
 716 | /// # Arguments
 717 | ///
 718 | /// * `array` - A vector of strings to check against the `content`.
 719 | /// * `content` - The string to check for the presence of any of the strings in `array`.
 720 | ///
 721 | /// # Returns
 722 | ///
 723 | /// Returns `true` if any of the strings in `array` is found in the `content`, otherwise `false`.
 724 | ///
 725 | pub fn is_contains_strs(array: &[String], content: &str) -> bool {
 726 |     for item in array.iter() {
 727 |         if content.contains(item) {
 728 |             return true;
 729 |         }
 730 |     }
 731 |     false
 732 | }
 733 | 
 734 | /// Checks if a given text is a link.
 735 | ///
 736 | /// # Arguments
 737 | ///
 738 | /// * `text` - The text to check for links.
 739 | ///
 740 | /// # Returns
 741 | ///
 742 | /// * `bool` - Returns `true` if the text contains a link, otherwise `false`.
 743 | ///
 744 | pub fn is_link(text: &str) -> bool {
 745 |     let re = Regex::new(r"(?i)\b((?:https?://|www\.)\S+)\b").unwrap();
 746 |     re.is_match(text)
 747 | }
 748 | 
 749 | /// Check if the given string is a regular expression.
 750 | ///
 751 | ///
 752 | /// # Arguments
 753 | ///
 754 | /// * `s` - The string to check for regular expression syntax.
 755 | ///
 756 | /// # Returns
 757 | ///
 758 | /// Returns `true` if the string is a regular expression, otherwise `false`.
 759 | ///
 760 | fn is_regex(s: &str) -> bool {
 761 |     //TODO: Improve regular expression check
 762 |     s.starts_with('(') && s.ends_with('$')&&!s.starts_with('/') 
 763 | 
 764 | }
 765 | 
 766 | /// Removes duplicates from `array1` based on the elements in `array2`.
 767 | ///
 768 | /// # Arguments
 769 | ///
 770 | /// * `array1` - The first vector containing elements to remove duplicates from.
 771 | /// * `array2` - The second vector used to determine the duplicates.
 772 | ///
 773 | /// # Type Constraints
 774 | ///
 775 | /// `T` must implement the `Eq`, `std::hash::Hash`, and `Clone` traits.
 776 | ///
 777 | /// # Returns
 778 | ///
 779 | /// Returns a new vector that contains the elements from `array1` without the duplicates
 780 | /// that are present in `array2`.
 781 | ///
 782 | pub fn remove_duplicates<T: Eq + std::hash::Hash + Clone>(
 783 |     array1: Vec<T>,
 784 |     array2: Vec<T>,
 785 | ) -> Vec<T> {
 786 |     let set: HashSet<_> = array2.into_iter().collect();
 787 |     array1.into_iter().filter(|x| !set.contains(x)).collect()
 788 | }
 789 | 
 790 | #[cfg(test)]
 791 | mod tests {
 792 |     use super::*;
 793 |     static CONFIG_FILE_PATH: &str = "examples/test_gitleaks.toml";
 794 | 
 795 |     fn mock_config_content() -> Value {
 796 |         toml::from_str::<Value>(
 797 |             r#"
 798 |             [[rules]]
 799 |             description = "Rule 1"
 800 |             id = "rule1"
 801 |             regex = "\\d+"
 802 |             entropy = 0.5
 803 |             keywords = ["keyword1", "keyword2"]
 804 | 
 805 |             [[rules]]
 806 |             description = "Rule 2"
 807 |             id = "rule2"
 808 |             regex = "[A-Z]+"
 809 |             entropy = 0.3
 810 |             keywords = ["keyword3"]
 811 | 
 812 |             [[rules]]
 813 |             description = "Rule 3"
 814 |             id = "rule3"
 815 |             regex = "[a-z]+"
 816 |             entropy = 0.2
 817 |             keywords = ["keyword4", "keyword5"]
 818 | 
 819 |             [[rules]]
 820 |             description = "Rule 4"
 821 |             id = "rule4"
 822 |             regex = "\\w+"
 823 |             entropy = 0.4
 824 |             keywords = ["keyword6"]
 825 |             "#,
 826 |         )
 827 |         .unwrap()
 828 |     }
 829 | 
 830 |     fn mock_leaks() -> Vec<Leak> {
 831 |         vec![Leak {
 832 |             line: "Sensitive information".to_string(),
 833 |             line_number: 42,
 834 |             offender: "John Doe".to_string(),
 835 |             commit: "abcd1234".to_string(),
 836 |             repo: "my-repo".to_string(),
 837 |             rule: "password_leak".to_string(),
 838 |             commit_message: "Fix security issue".to_string(),
 839 |             author: "John Doe".to_string(),
 840 |             email: "john@example.com".to_string(),
 841 |             file: "path/to/file.txt".to_string(),
 842 |             date: "2023-05-30".to_string(),
 843 |         }]
 844 |     }
 845 |     #[test]
 846 |     fn test_load_config() {
 847 |         let result = load_config_file(CONFIG_FILE_PATH);
 848 |         assert!(result.is_ok());
 849 |     }
 850 | 
 851 |     #[test]
 852 |     fn test_config_allowlist_valid_config() {
 853 |         let result = config_allowlist(&mock_config_content());
 854 |         assert!(result.is_ok());
 855 |     }
 856 |     #[test]
 857 |     fn test_config_ruleslist_and_keywords() {
 858 |         let result = config_ruleslist_and_keywords(&mock_config_content());
 859 | 
 860 |         assert!(result.is_ok());
 861 |         let ruleslist = result.unwrap();
 862 | 
 863 |         assert_eq!(ruleslist.len(), 4);
 864 | 
 865 |         let rule1 = &ruleslist[0];
 866 |         assert_eq!(rule1.description, "Rule 1");
 867 |         assert_eq!(rule1.id, "rule1");
 868 |         assert_eq!(rule1.regex, "\\d+");
 869 |         assert_eq!(rule1.keywords, vec!["keyword1", "keyword2"]);
 870 |         assert!(rule1.allowlist.is_none());
 871 | 
 872 |         let rule2 = &ruleslist[1];
 873 |         assert_eq!(rule2.description, "Rule 2");
 874 |         assert_eq!(rule2.id, "rule2");
 875 |         assert_eq!(rule2.regex, "[A-Z]+");
 876 |         assert_eq!(rule2.keywords, vec!["keyword3"]);
 877 |         assert!(rule2.allowlist.is_none());
 878 | 
 879 |         let rule3 = &ruleslist[2];
 880 |         assert_eq!(rule3.description, "Rule 3");
 881 |         assert_eq!(rule3.id, "rule3");
 882 |         assert_eq!(rule3.regex, "[a-z]+");
 883 |         assert_eq!(rule3.keywords, vec!["keyword4", "keyword5"]);
 884 |         assert!(rule3.allowlist.is_none());
 885 | 
 886 |         let rule4 = &ruleslist[3];
 887 |         assert_eq!(rule4.description, "Rule 4");
 888 |         assert_eq!(rule4.id, "rule4");
 889 |         assert_eq!(rule4.regex, "\\w+");
 890 |         assert_eq!(rule4.keywords, vec!["keyword6"]);
 891 |         assert!(rule4.allowlist.is_none());
 892 |     }
 893 | 
 894 |     #[test]
 895 |     fn test_write_rule_to_toml() {
 896 |         let rule = Rule {
 897 |             description: "Adafruit API Key".to_string(),
 898 |             id: "adafruit-api-key".to_string(),
 899 |             regex: r#"(?i)(?:adafruit)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([a-z0-9_-]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"#.to_string(),
 900 |             keywords: vec!["adafruit".to_string()],
 901 |             allowlist: None,
 902 |         };
 903 |         let result = append_rule_to_toml(&rule, CONFIG_FILE_PATH);
 904 |         assert!(result.is_ok());
 905 |     }
 906 | 
 907 |     #[test]
 908 |     fn test_delete_rule_by_id() {
 909 |         if let Err(err) = delete_rule_by_id(CONFIG_FILE_PATH, "adafruit-api-key") {
 910 |             eprintln!("Error: {}", err);
 911 |         }
 912 |     }
 913 | 
 914 |      #[test]
 915 |     fn test_update_rule_by_id() {
 916 |         let rule = Rule {
 917 |             description: "11111111111".to_string(),
 918 |             id: "stripe-access-token".to_string(),
 919 |             regex: r#"(?i)(?:adafruit)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:=|\|\|:|<=|=>|:)(?:'|\"|\s|=|\x60){0,5}([a-z0-9_-]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"#.to_string(),
 920 |             keywords: vec!["adafruit".to_string()],
 921 |             allowlist: None,
 922 |         };
 923 |         let result = update_rule_by_id( CONFIG_FILE_PATH,&rule.id,&rule,);
 924 |        
 925 |          assert!(result.is_ok());
 926 |     }
 927 |     #[test]
 928 |     fn test_is_path_in_allowlist_regex_not_match() {
 929 |         let path = "/path/to/file.txt";
 930 |         let allowlist_paths = vec!["/other/.*\\.txt".to_string()];
 931 |         let result = is_path_in_allowlist(path, &allowlist_paths);
 932 |         assert_eq!(result, false);
 933 |     }
 934 | 
 935 |     #[test]
 936 |     fn test_is_path_in_allowlist_exact_match() {
 937 |         let path = "tests/files/gitleaks.toml";
 938 |         let allowlist_paths = vec!["tests/files/gitleaks.toml".to_string()];
 939 |         let result = is_path_in_allowlist(path, &allowlist_paths);
 940 |         assert_eq!(result, true);
 941 |     }
 942 | 
 943 |     #[test]
 944 |     fn test_is_string_matched_match() {
 945 |         let regex_array = vec!["^hello".to_string(), "world$".to_string()];
 946 |         let test_string = "hello, world!";
 947 |         let result = is_string_matched(&regex_array, test_string);
 948 |         assert_eq!(result, true);
 949 |     }
 950 | 
 951 |     #[test]
 952 |     fn test_is_string_matched_not_match() {
 953 |         let regex_array = vec!["^hello".to_string(), "world$".to_string()];
 954 |         let test_string = "goodbye";
 955 |         let result = is_string_matched(&regex_array, test_string);
 956 |         assert_eq!(result, false);
 957 |     }
 958 | 
 959 |     #[test]
 960 |     fn test_is_contains_strs_contains() {
 961 |         let array = vec![
 962 |             "apple".to_string(),
 963 |             "banana".to_string(),
 964 |             "orange".to_string(),
 965 |         ];
 966 |         let content = "I like to eat bananas";
 967 |         let result = is_contains_strs(&array, content);
 968 |         assert_eq!(result, true);
 969 |     }
 970 | 
 971 |     #[test]
 972 |     fn test_is_contains_strs_not_contains() {
 973 |         let array = vec![
 974 |             "apple".to_string(),
 975 |             "banana".to_string(),
 976 |             "orange".to_string(),
 977 |         ];
 978 |         let content = "I like to eat grapes";
 979 |         let result = is_contains_strs(&array, content);
 980 |         assert_eq!(result, false);
 981 |     }
 982 | 
 983 |     #[test]
 984 |     fn test_is_regex_valid_case() {
 985 |         let input = "(regex$";
 986 |         let result = is_regex(input);
 987 |         assert_eq!(result, true);
 988 |     }
 989 | 
 990 |     #[test]
 991 |     fn test_is_regex_invalid_case() {
 992 |         let input = "(regex";
 993 |         let result = is_regex(input);
 994 |         assert_eq!(result, false);
 995 |     }
 996 | 
 997 |     #[test]
 998 |     fn test_is_regex_empty_string() {
 999 |         let input = "";
1000 |         let result = is_regex(input);
1001 |         assert_eq!(result, false);
1002 |     }
1003 | 
1004 |     #[test]
1005 |     fn test_remove_duplicates() {
1006 |         // Test case 1
1007 |         let array1 = vec![1, 1, 2, 3, 4, 5];
1008 |         let array2 = vec![3, 4, 5, 6, 7];
1009 |         let result = remove_duplicates(array1, array2);
1010 |         assert_eq!(result, vec![1, 1, 2]);
1011 |     }
1012 | 
1013 |     #[test]
1014 |     fn test_is_link_with_valid_links() {
1015 |         assert!(is_link("https://www.example.com"));
1016 |         assert!(is_link("http://example.com"));
1017 |         assert!(is_link("www.example.com"));
1018 |         assert!(is_link("www.example.com/path"));
1019 |         assert!(is_link("www.example.com?q=query"));
1020 |     }
1021 | 
1022 |     #[test]
1023 |     fn test_is_link_with_invalid_links() {
1024 |         assert!(!is_link("example.com"));
1025 |         assert!(!is_link("example.com/path"));
1026 |         assert!(!is_link("example.com?q=query"));
1027 |         assert!(!is_link("not a link"));
1028 |     }
1029 | 
1030 |     // test report functions
1031 |     #[test]
1032 |     fn test_write_json_report() {
1033 |         let temp_file = tempfile::NamedTempFile::new().unwrap();
1034 |         let file_path = temp_file.path().to_str().unwrap();
1035 | 
1036 |         write_json_report(file_path, &&mock_leaks()).unwrap();
1037 | 
1038 |         let json_content = fs::read_to_string(file_path).unwrap();
1039 | 
1040 |         assert!(json_content.contains("Sensitive information"));
1041 |         assert!(json_content.contains("path/to/file.txt"));
1042 |     }
1043 | 
1044 |     #[test]
1045 |     fn test_write_sarif_report() {
1046 |         let temp_file = tempfile::NamedTempFile::new().unwrap();
1047 |         let file_path = temp_file.path().to_str().unwrap();
1048 | 
1049 |         write_sarif_report(file_path, &mock_leaks()).unwrap();
1050 | 
1051 |         let sarif_content = fs::read_to_string(file_path).unwrap();
1052 | 
1053 |         assert!(sarif_content.contains("Sensitive information"));
1054 |         assert!(sarif_content.contains("path/to/file.txt"));
1055 |         
1056 |     }
1057 | 
1058 |     #[test]
1059 |     fn test_write_csv_report() {
1060 |         let temp_file = tempfile::NamedTempFile::new().unwrap();
1061 |         let file_path = temp_file.path().to_str().unwrap();
1062 | 
1063 |         write_csv_report(file_path, &&mock_leaks()).unwrap();
1064 | 
1065 |         let csv_content = fs::read_to_string(file_path).unwrap();
1066 | 
1067 |         assert!(csv_content.contains("Sensitive information"));
1068 |         assert!(csv_content.contains("path/to/file.txt"));
1069 |     }
1070 | }
1071 | 


--------------------------------------------------------------------------------
/src/utils/git_util.rs:
--------------------------------------------------------------------------------
  1 | extern crate chrono;
  2 | extern crate git2;
  3 | use crate::errors::CustomError;
  4 | use crate::models::{CommitInfo, Config, Scan};
  5 | use crate::utils::detect_utils::{is_commit_in_allowlist, is_link, is_path_in_allowlist};
  6 | use chrono::Local;
  7 | use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
  8 | use git2::Repository;
  9 | use regex::Regex;
 10 | use std::error::Error;
 11 | use std::fs;
 12 | 
 13 | /// Loads a repository from the specified path.
 14 | ///
 15 | /// # Arguments
 16 | ///
 17 | /// * `repo_path` - A string slice that represents the path to the repository.
 18 | ///
 19 | /// # Returns
 20 | ///
 21 | /// Returns a `Result` containing a `Repository` if the repository is loaded successfully, or an error if the repository fails to load.
 22 | ///
 23 | pub fn load_repository(repo_path: &str) -> Result<Repository, Box<dyn Error>> {
 24 |     let repo = match Repository::open(repo_path) {
 25 |         Ok(repo) => repo,
 26 |         Err(_) => {
 27 |             return Err(Box::new(CustomError::FailLoadRepo));
 28 |         }
 29 |     };
 30 | 
 31 |     Ok(repo)
 32 | }
 33 | 
 34 | /// Retrieves the name of the repository from the provided `Repository` object.
 35 | ///
 36 | /// # Arguments
 37 | ///
 38 | /// * `repo` - A reference to a `Repository` object.
 39 | ///
 40 | /// # Returns
 41 | ///
 42 | /// Returns a `Result` containing the name of the repository as a `String` if successful, or an error if the repository name is invalid or cannot be determined.
 43 | ///
 44 | /// # Errors
 45 | ///
 46 | /// This function may return the following errors:
 47 | ///
 48 | /// * `CustomError::InvalidRepoName` - Indicates that the repository name is invalid.
 49 | ///
 50 | pub fn config_repo_name(repo: &Repository) -> Result<String, Box<dyn Error>> {
 51 |     let repo_path = repo.path();
 52 |     let repo_dir = repo_path.parent().ok_or(CustomError::InvalidRepoName)?;
 53 | 
 54 |     let repo_name = repo_dir
 55 |         .file_name()
 56 |         .and_then(|s| s.to_str())
 57 |         .unwrap_or("")
 58 |         .to_string();
 59 |     if repo_name.ends_with(".git") {
 60 |         Ok(repo_name[..repo_name.len() - 4].to_string())
 61 |     } else {
 62 |         Ok(repo_name)
 63 |     }
 64 | }
 65 | 
 66 | /// Traverse the tree in batches and collect file paths and contents.
 67 | ///
 68 | /// Parameters:
 69 | /// - `repo`: Reference to the repository.
 70 | /// - `tree`: Reference to the tree object.
 71 | /// - `path`: Path of the current tree.
 72 | /// - `files`: Mutable vector to store the file paths and contents.
 73 | /// - `scan`: Reference to the Scan object.
 74 | /// - `commit_id`: Commit ID.
 75 | ///
 76 | /// Returns:
 77 | /// - `Ok(())`: If the traversal is successful.
 78 | /// - `Err(Box<dyn Error>)`: If an error occurs during traversal.
 79 | pub fn traverse_tree(
 80 |       repo: &Repository,
 81 |     tree: &git2::Tree,
 82 |     path: &str,
 83 |     files: &mut Vec<(String, String)>,
 84 |     scan: &Scan,
 85 |     commit_id: git2::Oid,
 86 |  
 87 | ) -> Result<(), Box<dyn Error>> {
 88 |     for entry in tree.iter() {
 89 |         let entry_path = format!("{}/{}", path, entry.name().unwrap());
 90 |         // Skip entry if it is in the allowlist paths, in the allowlist commits, or is an ignored path
 91 |         if (is_path_in_allowlist(&entry_path, &scan.allowlist.paths))
 92 |             || (is_commit_in_allowlist(&commit_id.to_string(), &scan.allowlist.commits))
 93 |             || is_ignored_path(&entry_path)
 94 |         {
 95 |             continue;
 96 |         }
 97 |         if entry.kind() == Some(git2::ObjectType::Blob) {
 98 |             let blob = repo
 99 |                 .find_blob(entry.id())
100 |                 .map_err(|_| CustomError::ObjectNotFound)?;
101 |             let content = String::from_utf8_lossy(blob.content());
102 |             files.push((entry_path, content.to_string()));
103 |         } else if entry.kind() == Some(git2::ObjectType::Tree) {
104 |             let subtree = repo
105 |                 .find_tree(entry.id())
106 |                 .map_err(|_| CustomError::RepoInternalError)?;
107 |             traverse_tree(repo, &subtree, &entry_path, files,scan,commit_id)?;
108 |         }
109 |     }
110 |     Ok(())
111 | }
112 | 
113 | /// skip the files or directories begin with "."
114 | fn is_ignored_path(path: &str) -> bool {
115 |     let path_segments: Vec<&str> = path.split('/').collect();
116 |     for item in path_segments {
117 |         if item.starts_with('.') {
118 |             return true;
119 |         }
120 |     }
121 |     false
122 | }
123 | 
124 | /// Retrieves commit information from the given `Repository` and `Commit`.
125 | ///
126 | /// # Arguments
127 | ///
128 | /// * `repo` - A reference to a `Repository` object.
129 | /// * `commit` - A reference to a `Commit` object representing the commit to retrieve information from.
130 | ///
131 | /// # Returns
132 | ///
133 | /// Returns a `Result` containing a `CommitInfo` struct if the retrieval is successful, or an error if an error occurs during the retrieval.
134 | ///
135 | /// # Errors
136 | ///
137 | /// This function may return the following errors:
138 | ///
139 | /// * `CustomError::InvalidRepoName` - Indicates that the repository name is invalid.
140 | /// * `CustomError::ObjectNotFound` - Indicates that an object in the repository is not found.
141 | /// * `CustomError::RepoInternalError` - Indicates an internal error in the repository.
142 | ///
143 | #[allow(deprecated)]
144 | pub fn config_commit_info(
145 |     repo: &Repository,
146 |     commit: &git2::Commit,
147 |     scan: &Scan,
148 | ) -> Result<CommitInfo, Box<dyn Error>> {
149 |     // Config info
150 |     let commit_id = commit.id();
151 |     let author = commit.author();
152 |     let email = author.email().unwrap_or("").to_string();
153 |     let commit_message = commit.message().unwrap_or("").to_string();
154 |     let date = Utc.timestamp(commit.time().seconds(), 0);
155 |     let offset = FixedOffset::west(commit.time().offset_minutes() * 60);
156 |     let date = offset.from_utc_datetime(&date.naive_utc());
157 |     let mut files = Vec::new();
158 | 
159 |     let repo_name = match config_repo_name(repo) {
160 |         Ok(repo_name) => repo_name,
161 |         Err(_) => {
162 |             return Err(Box::new(CustomError::InvalidRepoName));
163 |         }
164 |     };
165 | 
166 |     // Retrieve the tree of the commit
167 |     let tree = commit.tree().map_err(|_| CustomError::ObjectNotFound)?;
168 | 
169 |     // Traverse the tree to get the file paths and content
170 |     traverse_tree(
171 |         repo,
172 |         &tree,
173 |         "",
174 |         &mut files,
175 |         scan,
176 |         commit_id,
177 |       
178 |     )
179 |     .map_err(|_| CustomError::RepoInternalError)?;
180 |     let commit_info = CommitInfo {
181 |         repo: repo_name,
182 |         commit: commit_id,
183 |         author: author.name().unwrap_or("").to_string(),
184 |         email,
185 |         commit_message,
186 |         date,
187 |         files,
188 |     };
189 | 
190 |     Ok(commit_info)
191 | }
192 | 
193 | /// Loads all commit IDs from the repository in topological order.
194 | ///
195 | /// # Arguments
196 | ///
197 | /// * `repo` - A reference to a `Repository` object representing the repository.
198 | ///
199 | /// # Returns
200 | ///
201 | /// Returns a `Result` containing a vector of commit IDs (`Vec<String>`) if the operation is successful, or an error if an error occurs during the process.
202 | ///
203 | /// # Errors
204 | ///
205 | /// This function may return the following errors:
206 | ///
207 | /// * `CustomError::AccessWalkerError` - Indicates an error occurred while accessing the revision walker.
208 | /// * `CustomError::PushWalkerHeadError` - Indicates an error occurred while pushing the head commit to the revision walker or setting the sorting order.
209 | /// * `CustomError::WalkerSortError` - Indicates an error occurred while sorting the revision walker.
210 | /// * `CustomError::RepoCommitError` - Indicates an error occurred while finding a commit in the repository.
211 | ///
212 | pub fn load_all_commits(repo: &Repository) -> Result<Vec<String>, Box<dyn Error>> {
213 |     let mut revwalk = repo.revwalk().map_err(|_| CustomError::AccessWalkerError)?;
214 | 
215 |     revwalk
216 |         .push_head()
217 |         .map_err(|_| CustomError::PushWalkerHeadError)?;
218 |     revwalk
219 |         .set_sorting(git2::Sort::TOPOLOGICAL)
220 |         .map_err(|_| CustomError::PushWalkerHeadError)?;
221 | 
222 |     let mut commits = Vec::new();
223 | 
224 |     for oid in revwalk {
225 |         let oid = oid.map_err(|_| CustomError::WalkerSortError)?;
226 |         let commit = repo
227 |             .find_commit(oid)
228 |             .map_err(|_| CustomError::RepoCommitError)?;
229 |         let commit_id = commit.id().to_string();
230 |         commits.push(commit_id);
231 |     }
232 | 
233 |     Ok(commits)
234 | }
235 | 
236 | /// Loads a subset of commits based on specified conditions.
237 | ///
238 | /// # Arguments
239 | ///
240 | /// * `commit_from` - An optional string representing the start commit ID.
241 | /// * `commit_to` - An optional string representing the end commit ID.
242 | /// * `commits` - A slice of strings representing the available commit IDs.
243 | ///
244 | /// # Returns
245 | ///
246 | /// Returns a vector of commit IDs as strings, representing the subset of commits based on
247 | /// the specified conditions. If the start commit is after the end commit or if either commit
248 | /// is not found in the input commits, an empty vector is returned.
249 | ///
250 | pub fn load_commits_by_conditions(
251 |     commit_from: Option<String>,
252 |     commit_to: Option<String>,
253 |     commits: &[String],
254 | ) -> Vec<String> {
255 |     match (commit_from, commit_to) {
256 |         (Some(start_commit), Some(end_commit)) => {
257 |             let start_index = commits.iter().position(|commit| *commit == start_commit);
258 |             let end_index = commits.iter().position(|commit| *commit == end_commit);
259 | 
260 |             if let (Some(start), Some(end)) = (start_index, end_index) {
261 |                 if start <= end {
262 |                     commits[start..=end].to_vec()
263 |                 } else {
264 |                     Vec::new()
265 |                 }
266 |             } else {
267 |                 Vec::new()
268 |             }
269 |         }
270 |         _ => Vec::new(),
271 |     }
272 | }
273 | 
274 | /// Loads all commit IDs from the given `Repository`.
275 | ///
276 | /// # Arguments
277 | ///
278 | /// * `repo` - A reference to a `Repository` object.
279 | ///
280 | /// # Returns
281 | ///
282 | /// Returns a `Result` containing a vector of commit IDs as strings if the loading is successful, or an error if an error occurs during the loading.
283 | ///
284 | /// # Errors
285 | ///
286 | /// This function may return the following errors:
287 | ///
288 | /// * `CustomError::AccessWalkerError` - Indicates an error in accessing the commit walker.
289 | /// * `CustomError::PushWalkerHeadError` - Indicates an error in pushing the head to the commit walker.
290 | /// * `CustomError::WalkerSortError` - Indicates an error in sorting the commit walker.
291 | /// * `CustomError::RepoCommitError` - Indicates an error in finding a commit in the repository.
292 | ///
293 | pub fn load_all_object_ids(repo: &Repository) -> Result<Vec<git2::Oid>, Box<dyn Error>> {
294 |     let mut object_ids = Vec::new();
295 |     let odb = repo.odb().map_err(|_| CustomError::ObjectNotAccess)?;
296 | 
297 |     odb.foreach(|id| {
298 |         object_ids.push(*id);
299 |         true
300 |     })
301 |     .map_err(|_| CustomError::RepoInternalError)?;
302 | 
303 |     Ok(object_ids)
304 | }
305 | 
306 | /// Parses a date string into a `DateTime<Utc>` object.
307 | ///
308 | /// # Arguments
309 | ///
310 | /// * `input` - A string slice representing the date to parse. The expected format is "%Y-%m-%d".
311 | /// * `mytype` - A string slice indicating the type of datetime to create. It can be either "start" or any other value.
312 | ///
313 | /// # Returns
314 | ///
315 | /// Returns a `Result` containing a `DateTime<Utc>` object if the parsing is successful, or an error if an error occurs during the parsing.
316 | ///
317 | /// # Errors
318 | ///
319 | /// This function may return the following errors:
320 | ///
321 | /// * `CustomError::InvalidDateFormat` - Indicates that the input date format is invalid.
322 | /// * `CustomError::InvalidTimeFormat` - Indicates that the time format is invalid.
323 | ///
324 | pub fn parse_date_to_datetime(input: &str, mytype: &str) -> Result<DateTime<Utc>, Box<dyn Error>> {
325 |     let date =
326 |         NaiveDate::parse_from_str(input, "%Y-%m-%d").map_err(|_| CustomError::InvalidDateFormat)?;
327 | 
328 |     let time: NaiveTime;
329 |     if mytype == "start" {
330 |         if let Some(t) = NaiveTime::from_hms_opt(0, 0, 0) {
331 |             time = t;
332 |         } else {
333 |             return Err(Box::new(CustomError::InvalidTimeFormat));
334 |         }
335 |     } else if let Some(t) = NaiveTime::from_hms_opt(23, 59, 59) {
336 |         time = t;
337 |     } else {
338 |         return Err(Box::new(CustomError::InvalidTimeFormat));
339 |     }
340 | 
341 |     let datetime = NaiveDateTime::new(date, time);
342 |     // let datetime_utc = DateTime::from_utc(datetime, Utc);
343 |     let datetime_utc = DateTime::from_naive_utc_and_offset(datetime, Utc);
344 | 
345 |     Ok(datetime_utc)
346 | }
347 | 
348 | /// Checks if the input string has a valid date format of "YYYY-MM-DD".
349 | ///
350 | /// # Arguments
351 | ///
352 | /// * `input` - The string to be checked for date format validity.
353 | ///
354 | /// # Returns
355 | ///
356 | /// Returns `true` if the input string has a valid date format, otherwise `false`.
357 | pub fn is_valid_date_format(input: &str) -> bool {
358 |     if let Ok(date) = NaiveDate::parse_from_str(input, "%Y-%m-%d") {
359 |         let formatted = date.format("%Y-%m-%d").to_string();
360 |         return formatted == input;
361 |     }
362 |     false
363 | }
364 | 
365 | /// Loads the content of a configuration file (`.gitleaks.toml` or `gitleaks.toml`) from the target repository.
366 | ///
367 | /// # Arguments
368 | ///
369 | /// * `repo` - A reference to a `Repository` object representing the target repository.
370 | ///
371 | /// # Returns
372 | ///
373 | /// Returns a `Result` containing an `Option<String>` with the content of the configuration file if found, or `None` if the configuration file is not found in any commit.
374 | ///
375 | /// # Errors
376 | ///
377 | /// This function may return an error if any error occurs during the repository traversal or object retrieval.
378 | ///
379 | pub fn load_config_content_from_target_repo(
380 |     repo: &Repository,
381 | ) -> Result<Option<String>, Box<dyn Error>> {
382 |     let head_commit = repo.head()?.peel_to_commit()?;
383 |     let mut walker = repo.revwalk()?;
384 |     walker.push(head_commit.id())?;
385 | 
386 |     // Iterate over all commits in the repository
387 |     for commit_id in walker {
388 |         let commit = repo.find_commit(commit_id?)?;
389 |         let tree = commit.tree()?;
390 | 
391 |         // Iterate over all entries in the tree
392 |         for entry in tree.iter() {
393 |             let file_name = entry.name().unwrap_or("");
394 |             if file_name == ".gitleaks.toml" || file_name == "gitleaks.toml" {
395 |                 let blob = entry.to_object(repo)?.peel_to_blob()?;
396 |                 let content = String::from_utf8_lossy(blob.content());
397 |                 return Ok(Some(content.into()));
398 |             }
399 |         }
400 |     }
401 | 
402 |     Ok(None)
403 | }
404 | 
405 | /// Extracts the repository name from a given URL.
406 | ///
407 | /// # Arguments
408 | ///
409 | /// * `url` - A string slice representing the URL of the repository.
410 | ///
411 | /// # Returns
412 | ///
413 | /// Returns an `Option<String>` containing the extracted repository name if it matches the expected format, or `None` if the extraction fails.
414 | ///
415 | pub fn extract_repo_name(url: &str) -> Option<String> {
416 |     let re = Regex::new(r"/([^/]+)\.git$").unwrap();
417 |     if let Some(captures) = re.captures(url) {
418 |         if let Some(repo_name) = captures.get(1) {
419 |             return Some(repo_name.as_str().to_string());
420 |         }
421 |     }
422 |     None
423 | }
424 | 
425 | /// Clones or loads a repository based on the provided configuration.
426 | ///
427 | /// # Arguments
428 | ///
429 | /// * `config` - A reference to a `Config` object containing the repository information.
430 | ///
431 | /// # Returns
432 | ///
433 | /// Returns a `Result` containing a `Repository` object if the operation is successful, or an error if an error occurs during cloning or loading.
434 | ///
435 | /// # Errors
436 | ///
437 | /// This function may return the following errors:
438 | ///
439 | /// * `CustomError::FailDeteleDir` - Indicates that the directory removal operation failed.
440 | /// * `CustomError::FailCreateDir` - Indicates that the directory creation operation failed.
441 | /// * `CustomError::FailCloneRepo` - Indicates that the repository cloning operation failed.
442 | /// * `CustomError::FailLoadRepo` - Indicates that the repository loading operation failed.
443 | ///
444 | #[warn(clippy::needless_return)]
445 | pub fn clone_or_load_repository(config: &Config) -> Result<Repository, Box<dyn Error>> {
446 |     if is_link(&config.repo) {
447 |         let repo_path = match &config.disk {
448 |             Some(disk) => disk.to_string(),
449 |             None => {
450 |                 let dest = "workplace/";
451 |                 let mut repo_path = String::new();
452 |                 if let Some(name) = extract_repo_name(&config.repo) {
453 |                     repo_path = format!("{}{}", dest, name);
454 |                 }
455 | 
456 |                 if fs::metadata(&repo_path).is_ok() {
457 |                     match fs::remove_dir_all(&repo_path) {
458 |                         Ok(_) => {}
459 |                         Err(_) => {
460 |                             return Err(Box::new(CustomError::FailDeleteDir));
461 |                         }
462 |                     }
463 |                 }
464 | 
465 |                 match fs::create_dir(&repo_path) {
466 |                     Ok(_) => {}
467 |                     Err(_) => {
468 |                         return Err(Box::new(CustomError::FailCreateDir));
469 |                     }
470 |                 }
471 |                 repo_path
472 |             }
473 |         };
474 |         match Repository::clone(&config.repo, repo_path) {
475 |             Ok(repo) => {
476 |                 println!(
477 |                     "\x1b[34m[INFO]\x1b[0m[{}] Clone repo ...",
478 |                     Local::now().format("%Y-%m-%d %H:%M:%S"),
479 |                 );
480 | 
481 |                 Ok(repo)
482 |             }
483 |             Err(_) => Err(Box::new(CustomError::FailCloneRepo)),
484 |         }
485 |     } else {
486 |         match load_repository(&config.repo) {
487 |             Ok(repo) => {
488 |                 println!(
489 |                     "\x1b[34m[INFO]\x1b[0m[{}] Clone repo ...",
490 |                     Local::now().format("%Y-%m-%d %H:%M:%S"),
491 |                 );
492 | 
493 |                 Ok(repo)
494 |             }
495 | 
496 |             Err(_) => Err(Box::new(CustomError::FailLoadRepo)),
497 |         }
498 |     }
499 | }
500 | 
501 | // NOTE: The commented-out function can be tested after specifying the repo file
502 | #[cfg(test)]
503 | mod tests {
504 | 
505 |     use super::*;
506 |     // static VALID_PATH: &str = "D:/Workplace/Git/TestGitOperation";
507 |     // static INVALID_PATH: &str = "D:/Workplace/Git/TestGitOperation222";
508 | 
509 |     // // test load_repository
510 |     // #[test]
511 |     // fn test_load_repository_valid_path() {
512 |     //     let result = load_repository(VALID_PATH);
513 |     //     assert!(result.is_ok());
514 |     // }
515 | 
516 |     // #[test]
517 |     // fn test_load_repository_invalid_path() {
518 |     //     let result = load_repository(INVALID_PATH);
519 |     //     assert!(result.is_err());
520 |     // }
521 | 
522 |     // NOTE: The commented-out function can be tested after specifying the repo file
523 |     // // test config_repo_name
524 |     // #[test]
525 |     // fn test_config_repo_name_valid_repo() {
526 |     //     let repo = match load_repository(VALID_PATH) {
527 |     //         Ok(repo) => repo,
528 |     //         Err(_) => {
529 |     //             panic!("Failed to load repository");
530 |     //         }
531 |     //     };
532 |     //     let result = match config_repo_name(&repo) {
533 |     //         Ok(result) => result,
534 |     //         Err(e) => {
535 |     //             panic!("Error:{}", e);
536 |     //         }
537 |     //     };
538 |     //     assert_eq!(result, "TestGitOperation");
539 |     // }
540 | 
541 |     // // test load_all_commits
542 |     // #[test]
543 |     // fn test_load_all_commits_valid_repository() {
544 |     //     let repo = match Repository::init(VALID_PATH) {
545 |     //         Ok(repo) => repo,
546 |     //         Err(e) => {
547 |     //             eprintln!("{}", e);
548 |     //             panic!("Failed to initialize repository");
549 |     //         }
550 |     //     };
551 | 
552 |     //     let result = load_all_commits(&repo);
553 | 
554 |     //     assert!(result.is_ok());
555 |     //     let commits = result.unwrap();
556 |     //     assert!(commits.contains(&"9e2fe5fc27b1bb8bd4de5574f8d9010164427051".to_string()));
557 |     // }
558 | 
559 |     // // test load_commits_by_conditions
560 |     // #[test]
561 |     // fn test_load_commits_by_conditions_valid_conditions() {
562 |     //     let commits = vec![
563 |     //         "commit1".to_string(),
564 |     //         "commit2".to_string(),
565 |     //         "commit3".to_string(),
566 |     //         "commit4".to_string(),
567 |     //         "commit5".to_string(),
568 |     //     ];
569 |     //     let commit_from = Some("commit2".to_string());
570 |     //     let commit_to = Some("commit4".to_string());
571 | 
572 |     //     let result = load_commits_by_conditions(commit_from, commit_to, &commits);
573 | 
574 |     //     assert_eq!(
575 |     //         result,
576 |     //         vec![
577 |     //             "commit2".to_string(),
578 |     //             "commit3".to_string(),
579 |     //             "commit4".to_string(),
580 |     //         ]
581 |     //     );
582 |     // }
583 | 
584 |     // // test load_all_object_ids
585 |     // #[test]
586 |     // fn test_load_all_object_ids_valid_repository() {
587 |     //     let repo = match Repository::init(VALID_PATH) {
588 |     //         Ok(repo) => repo,
589 |     //         Err(e) => {
590 |     //             eprintln!("{}", e);
591 |     //             panic!("Failed to initialize repository");
592 |     //         }
593 |     //     };
594 | 
595 |     //     let oid1 = repo.blob("Content 1".as_bytes()).unwrap();
596 |     //     let oid2 = repo.blob("Content 2".as_bytes()).unwrap();
597 |     //     let oid3 = repo.blob("Content 3".as_bytes()).unwrap();
598 | 
599 |     //     let result = load_all_object_ids(&repo);
600 | 
601 |     //     assert!(result.is_ok());
602 |     //     let object_ids = result.unwrap();
603 |     //     assert!(object_ids.contains(&oid1));
604 |     //     assert!(object_ids.contains(&oid2));
605 |     //     assert!(object_ids.contains(&oid3));
606 |     // }
607 | 
608 |     // test parse_date_to_datetime
609 |     #[test]
610 |     fn test_parse_date_to_datetime_valid_input_start() {
611 |         let valid_input = "2023-05-25";
612 |         let mytype = "start";
613 |         let result = parse_date_to_datetime(valid_input, mytype);
614 |         assert!(result.is_ok());
615 |         assert_eq!(result.unwrap().to_rfc3339(), "2023-05-25T00:00:00+00:00");
616 |     }
617 | 
618 |     #[test]
619 |     fn test_parse_date_to_datetime_valid_input_end() {
620 |         let valid_input = "2023-05-25";
621 |         let mytype = "end";
622 |         let result = parse_date_to_datetime(valid_input, mytype);
623 |         assert!(result.is_ok());
624 |         assert_eq!(result.unwrap().to_rfc3339(), "2023-05-25T23:59:59+00:00");
625 |     }
626 | 
627 |     #[test]
628 |     fn test_parse_date_to_datetime_invalid_input() {
629 |         let invalid_input = "2023-05-32";
630 |         let mytype = "start";
631 |         let result = parse_date_to_datetime(invalid_input, mytype);
632 |         assert!(result.is_err());
633 |     }
634 | 
635 |     // test is_valid_date_format
636 |     #[test]
637 |     fn test_is_valid_date_format_valid_input() {
638 |         let valid_input = "2023-05-25";
639 |         let result = is_valid_date_format(valid_input);
640 |         assert!(result);
641 |     }
642 | 
643 |     #[test]
644 |     fn test_is_valid_date_format_invalid_input() {
645 |         let invalid_input = "2023-05-32";
646 |         let result = is_valid_date_format(invalid_input);
647 |         assert!(!result);
648 |     }
649 | 
650 |     // test extract_repo_name
651 |     #[test]
652 |     fn test_extract_repo_name() {
653 |         // Test with a valid URL
654 |         let url = "https://github.com/user/repo.git";
655 |         let result = extract_repo_name(url);
656 |         assert_eq!(result, Some("repo".to_owned()));
657 | 
658 |         // Test with a URL without ".git" extension
659 |         let url = "https://github.com/user/repo";
660 |         let result = extract_repo_name(url);
661 |         assert_eq!(result, None);
662 |     }
663 | }
664 | 


--------------------------------------------------------------------------------
/src/utils/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod detect_utils;
2 | pub mod git_util;


--------------------------------------------------------------------------------